From 4341c4c2aca4842f9ef1ce27fa82d58b9f926cd2 Mon Sep 17 00:00:00 2001
From: Vaishnavi Hire <vhire@redhat.com>
Date: Mon, 10 Nov 2025 09:29:15 -0500
Subject: [PATCH 01/62] docs: Add Llama Stack Operator docs (#3983)

# What does this PR do?
<!-- Provide a short summary of what this PR does and why. Link to
relevant issues if applicable. -->
Add documentation for llama-stack-k8s-operator under kubernetes
deployment guide.

Signed-off-by: Vaishnavi Hire <vhire@redhat.com>
---
 docs/docs/deploying/kubernetes_deployment.mdx | 217 +++++++++++-------
 1 file changed, 139 insertions(+), 78 deletions(-)

diff --git a/docs/docs/deploying/kubernetes_deployment.mdx b/docs/docs/deploying/kubernetes_deployment.mdx
index 8ed1e2756..48d08f0db 100644
--- a/docs/docs/deploying/kubernetes_deployment.mdx
+++ b/docs/docs/deploying/kubernetes_deployment.mdx
@@ -10,7 +10,7 @@ import TabItem from '@theme/TabItem';
 
 # Kubernetes Deployment Guide
 
-Deploy Llama Stack and vLLM servers in a Kubernetes cluster instead of running them locally. This guide covers both local development with Kind and production deployment on AWS EKS.
+Deploy Llama Stack and vLLM servers in a Kubernetes cluster instead of running them locally. This guide covers deployment using the Kubernetes operator to manage the Llama Stack server with Kind. The vLLM inference server is deployed manually.
 
 ## Prerequisites
 
@@ -110,115 +110,176 @@ spec:
 EOF
 ```
 
-### Step 3: Configure Llama Stack
+### Step 3: Install Kubernetes Operator
 
-Update your run configuration:
-
-```yaml
-providers:
-  inference:
-  - provider_id: vllm
-    provider_type: remote::vllm
-    config:
-      url: http://vllm-server.default.svc.cluster.local:8000/v1
-      max_tokens: 4096
-      api_token: fake
-```
-
-Build container image:
+Install the Llama Stack Kubernetes operator to manage Llama Stack deployments:
 
 ```bash
-tmp_dir=$(mktemp -d) && cat >$tmp_dir/Containerfile.llama-stack-run-k8s <<EOF
-FROM distribution-myenv:dev
-RUN apt-get update && apt-get install -y git
-RUN git clone https://github.com/meta-llama/llama-stack.git /app/llama-stack-source
-ADD ./vllm-llama-stack-run-k8s.yaml /app/config.yaml
-EOF
-podman build -f $tmp_dir/Containerfile.llama-stack-run-k8s -t llama-stack-run-k8s $tmp_dir
+# Install from the latest main branch
+kubectl apply -f https://raw.githubusercontent.com/llamastack/llama-stack-k8s-operator/main/release/operator.yaml
+
+# Or install a specific version (e.g., v0.4.0)
+# kubectl apply -f https://raw.githubusercontent.com/llamastack/llama-stack-k8s-operator/v0.4.0/release/operator.yaml
 ```
 
-### Step 4: Deploy Llama Stack Server
+Verify the operator is running:
+
+```bash
+kubectl get pods -n llama-stack-operator-system
+```
+
+For more information about the operator, see the [llama-stack-k8s-operator repository](https://github.com/llamastack/llama-stack-k8s-operator).
+
+### Step 4: Deploy Llama Stack Server using Operator
+
+Create a `LlamaStackDistribution` custom resource to deploy the Llama Stack server. The operator will automatically create the necessary Deployment, Service, and other resources.
+You can optionally override the default `run.yaml` using `spec.server.userConfig` with a ConfigMap (see [userConfig spec](https://github.com/llamastack/llama-stack-k8s-operator/blob/main/docs/api-overview.md#userconfigspec)).
 
 ```yaml
 cat <<EOF | kubectl apply -f -
-apiVersion: v1
-kind: PersistentVolumeClaim
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
 metadata:
-  name: llama-pvc
-spec:
-  accessModes:
-    - ReadWriteOnce
-  resources:
-    requests:
-      storage: 1Gi
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: llama-stack-server
+  name: llamastack-vllm
 spec:
   replicas: 1
-  selector:
-    matchLabels:
-      app.kubernetes.io/name: llama-stack
-  template:
-    metadata:
-      labels:
-        app.kubernetes.io/name: llama-stack
-    spec:
-      containers:
-      - name: llama-stack
-        image: localhost/llama-stack-run-k8s:latest
-        imagePullPolicy: IfNotPresent
-        command: ["llama", "stack", "run", "/app/config.yaml"]
-        ports:
-          - containerPort: 5000
-        volumeMounts:
-          - name: llama-storage
-            mountPath: /root/.llama
-      volumes:
-      - name: llama-storage
-        persistentVolumeClaim:
-          claimName: llama-pvc
----
-apiVersion: v1
-kind: Service
-metadata:
-  name: llama-stack-service
-spec:
-  selector:
-    app.kubernetes.io/name: llama-stack
-  ports:
-  - protocol: TCP
-    port: 5000
-    targetPort: 5000
-  type: ClusterIP
+  server:
+    distribution:
+      name: starter
+    containerSpec:
+      port: 8321
+      env:
+      - name: VLLM_URL
+        value: "http://vllm-server.default.svc.cluster.local:8000/v1"
+      - name: VLLM_MAX_TOKENS
+        value: "4096"
+      - name: VLLM_API_TOKEN
+        value: "fake"
+    # Optional: override run.yaml from a ConfigMap using userConfig
+    userConfig:
+      configMap:
+        name: llama-stack-config
+    storage:
+      size: "20Gi"
+      mountPath: "/home/lls/.lls"
 EOF
 ```
 
+**Configuration Options:**
+
+- `replicas`: Number of Llama Stack server instances to run
+- `server.distribution.name`: The distribution to use (e.g., `starter` for the starter distribution). See the [list of supported distributions](https://github.com/llamastack/llama-stack-k8s-operator/blob/main/distributions.json) in the operator repository.
+- `server.distribution.image`: (Optional) Custom container image for non-supported distributions. Use this field when deploying a distribution that is not in the supported list. If specified, this takes precedence over `name`.
+- `server.containerSpec.port`: Port on which the Llama Stack server listens (default: 8321)
+- `server.containerSpec.env`: Environment variables to configure providers:
+- `server.userConfig`: (Optional) Override the default `run.yaml` using a ConfigMap. See [userConfig spec](https://github.com/llamastack/llama-stack-k8s-operator/blob/main/docs/api-overview.md#userconfigspec).
+- `server.storage.size`: Size of the persistent volume for model and data storage
+- `server.storage.mountPath`: Where to mount the storage in the container
+
+**Note:** For a complete list of supported distributions, see [distributions.json](https://github.com/llamastack/llama-stack-k8s-operator/blob/main/distributions.json) in the operator repository. To use a custom or non-supported distribution, set the `server.distribution.image` field with your container image instead of  `server.distribution.name`.
+
+The operator automatically creates:
+- A Deployment for the Llama Stack server
+- A Service to access the server
+- A PersistentVolumeClaim for storage
+- All necessary RBAC resources
+
+
+Check the status of your deployment:
+
+```bash
+kubectl get llamastackdistribution
+kubectl describe llamastackdistribution llamastack-vllm
+```
+
 ### Step 5: Test Deployment
 
+Wait for the Llama Stack server pod to be ready:
+
 ```bash
-# Port forward and test
-kubectl port-forward service/llama-stack-service 5000:5000
-llama-stack-client --endpoint http://localhost:5000 inference chat-completion --message "hello, what model are you?"
+# Check the status of the LlamaStackDistribution
+kubectl get llamastackdistribution llamastack-vllm
+
+# Check the pods created by the operator
+kubectl get pods -l app.kubernetes.io/name=llama-stack
+
+# Wait for the pod to be ready
+kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=llama-stack --timeout=300s
+```
+
+Get the service name created by the operator (it typically follows the pattern `<llamastackdistribution-name>-service`):
+
+```bash
+# List services to find the service name
+kubectl get services | grep llamastack
+
+# Port forward and test (replace SERVICE_NAME with the actual service name)
+kubectl port-forward service/llamastack-vllm-service 8321:8321
+```
+
+In another terminal, test the deployment:
+
+```bash
+llama-stack-client --endpoint http://localhost:8321 inference chat-completion --message "hello, what model are you?"
 ```
 
 ## Troubleshooting
 
-**Check pod status:**
+### vLLM Server Issues
+
+**Check vLLM pod status:**
 ```bash
 kubectl get pods -l app.kubernetes.io/name=vllm
 kubectl logs -l app.kubernetes.io/name=vllm
 ```
 
-**Test service connectivity:**
+**Test vLLM service connectivity:**
 ```bash
 kubectl run -it --rm debug --image=curlimages/curl --restart=Never -- curl http://vllm-server:8000/v1/models
 ```
 
+### Llama Stack Server Issues
+
+**Check LlamaStackDistribution status:**
+```bash
+# Get detailed status
+kubectl describe llamastackdistribution llamastack-vllm
+
+# Check for events
+kubectl get events --sort-by='.lastTimestamp' | grep llamastack-vllm
+```
+
+**Check operator-managed pods:**
+```bash
+# List all pods managed by the operator
+kubectl get pods -l app.kubernetes.io/name=llama-stack
+
+# Check pod logs (replace POD_NAME with actual pod name)
+kubectl logs -l app.kubernetes.io/name=llama-stack
+```
+
+**Check operator status:**
+```bash
+# Verify the operator is running
+kubectl get pods -n llama-stack-operator-system
+
+# Check operator logs if issues persist
+kubectl logs -n llama-stack-operator-system -l control-plane=controller-manager
+```
+
+**Verify service connectivity:**
+```bash
+# Get the service endpoint
+kubectl get svc llamastack-vllm-service
+
+# Test connectivity from within the cluster
+kubectl run -it --rm debug --image=curlimages/curl --restart=Never -- curl http://llamastack-vllm-service:8321/health
+```
+
 ## Related Resources
 
 - **[Deployment Overview](/docs/deploying/)** - Overview of deployment options
 - **[Distributions](/docs/distributions)** - Understanding Llama Stack distributions
 - **[Configuration](/docs/distributions/configuration)** - Detailed configuration options
+- **[LlamaStack Operator](https://github.com/llamastack/llama-stack-k8s-operator)** - Overview of llama-stack kubernetes operator
+- **[LlamaStackDistribution](https://github.com/llamastack/llama-stack-k8s-operator/blob/main/docs/api-overview.md)** - API Spec of the llama-stack operator Custom Resource.

From d4ecbfd092a7502b4b3ffffbbc3df75c8c38862d Mon Sep 17 00:00:00 2001
From: ehhuang <ehhuang@users.noreply.github.com>
Date: Mon, 10 Nov 2025 10:16:35 -0800
Subject: [PATCH 02/62] fix(vector store)!: fix file content API (#4105)

# What does this PR do?
- changed to match
https://app.stainless.com/api/spec/documented/openai/openapi.documented.yml

## Test Plan
updated test CI
---
 client-sdks/stainless/openapi.yml             | 48 ++++++++-----------
 docs/static/llama-stack-spec.yaml             | 48 ++++++++-----------
 docs/static/stainless-llama-stack-spec.yaml   | 48 ++++++++-----------
 src/llama_stack/apis/vector_io/vector_io.py   | 24 +++++-----
 src/llama_stack/core/routers/vector_io.py     |  4 +-
 .../core/routing_tables/vector_stores.py      |  4 +-
 .../utils/memory/openai_vector_store_mixin.py | 15 +++---
 .../vector_io/test_openai_vector_stores.py    | 16 +++----
 8 files changed, 93 insertions(+), 114 deletions(-)

diff --git a/client-sdks/stainless/openapi.yml b/client-sdks/stainless/openapi.yml
index d8159be62..adee2f086 100644
--- a/client-sdks/stainless/openapi.yml
+++ b/client-sdks/stainless/openapi.yml
@@ -2916,11 +2916,11 @@ paths:
       responses:
         '200':
           description: >-
-            A list of InterleavedContent representing the file contents.
+            A VectorStoreFileContentResponse representing the file contents.
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/VectorStoreFileContentsResponse'
+                $ref: '#/components/schemas/VectorStoreFileContentResponse'
         '400':
           $ref: '#/components/responses/BadRequest400'
         '429':
@@ -10465,41 +10465,35 @@ components:
       title: VectorStoreContent
       description: >-
         Content item from a vector store file or search result.
-    VectorStoreFileContentsResponse:
+    VectorStoreFileContentResponse:
       type: object
       properties:
-        file_id:
+        object:
           type: string
-          description: Unique identifier for the file
-        filename:
-          type: string
-          description: Name of the file
-        attributes:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
+          const: vector_store.file_content.page
+          default: vector_store.file_content.page
           description: >-
-            Key-value attributes associated with the file
-        content:
+            The object type, which is always `vector_store.file_content.page`
+        data:
           type: array
           items:
             $ref: '#/components/schemas/VectorStoreContent'
-          description: List of content items from the file
+          description: Parsed content of the file
+        has_more:
+          type: boolean
+          description: >-
+            Indicates if there are more content pages to fetch
+        next_page:
+          type: string
+          description: The token for the next page, if any
       additionalProperties: false
       required:
-        - file_id
-        - filename
-        - attributes
-        - content
-      title: VectorStoreFileContentsResponse
+        - object
+        - data
+        - has_more
+      title: VectorStoreFileContentResponse
       description: >-
-        Response from retrieving the contents of a vector store file.
+        Represents the parsed content of a vector store file.
     OpenaiSearchVectorStoreRequest:
       type: object
       properties:
diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml
index ea7fd6eec..72600bf13 100644
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@@ -2913,11 +2913,11 @@ paths:
       responses:
         '200':
           description: >-
-            A list of InterleavedContent representing the file contents.
+            A VectorStoreFileContentResponse representing the file contents.
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/VectorStoreFileContentsResponse'
+                $ref: '#/components/schemas/VectorStoreFileContentResponse'
         '400':
           $ref: '#/components/responses/BadRequest400'
         '429':
@@ -9749,41 +9749,35 @@ components:
       title: VectorStoreContent
       description: >-
         Content item from a vector store file or search result.
-    VectorStoreFileContentsResponse:
+    VectorStoreFileContentResponse:
       type: object
       properties:
-        file_id:
+        object:
           type: string
-          description: Unique identifier for the file
-        filename:
-          type: string
-          description: Name of the file
-        attributes:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
+          const: vector_store.file_content.page
+          default: vector_store.file_content.page
           description: >-
-            Key-value attributes associated with the file
-        content:
+            The object type, which is always `vector_store.file_content.page`
+        data:
           type: array
           items:
             $ref: '#/components/schemas/VectorStoreContent'
-          description: List of content items from the file
+          description: Parsed content of the file
+        has_more:
+          type: boolean
+          description: >-
+            Indicates if there are more content pages to fetch
+        next_page:
+          type: string
+          description: The token for the next page, if any
       additionalProperties: false
       required:
-        - file_id
-        - filename
-        - attributes
-        - content
-      title: VectorStoreFileContentsResponse
+        - object
+        - data
+        - has_more
+      title: VectorStoreFileContentResponse
       description: >-
-        Response from retrieving the contents of a vector store file.
+        Represents the parsed content of a vector store file.
     OpenaiSearchVectorStoreRequest:
       type: object
       properties:
diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml
index d8159be62..adee2f086 100644
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@@ -2916,11 +2916,11 @@ paths:
       responses:
         '200':
           description: >-
-            A list of InterleavedContent representing the file contents.
+            A VectorStoreFileContentResponse representing the file contents.
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/VectorStoreFileContentsResponse'
+                $ref: '#/components/schemas/VectorStoreFileContentResponse'
         '400':
           $ref: '#/components/responses/BadRequest400'
         '429':
@@ -10465,41 +10465,35 @@ components:
       title: VectorStoreContent
       description: >-
         Content item from a vector store file or search result.
-    VectorStoreFileContentsResponse:
+    VectorStoreFileContentResponse:
       type: object
       properties:
-        file_id:
+        object:
           type: string
-          description: Unique identifier for the file
-        filename:
-          type: string
-          description: Name of the file
-        attributes:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
+          const: vector_store.file_content.page
+          default: vector_store.file_content.page
           description: >-
-            Key-value attributes associated with the file
-        content:
+            The object type, which is always `vector_store.file_content.page`
+        data:
           type: array
           items:
             $ref: '#/components/schemas/VectorStoreContent'
-          description: List of content items from the file
+          description: Parsed content of the file
+        has_more:
+          type: boolean
+          description: >-
+            Indicates if there are more content pages to fetch
+        next_page:
+          type: string
+          description: The token for the next page, if any
       additionalProperties: false
       required:
-        - file_id
-        - filename
-        - attributes
-        - content
-      title: VectorStoreFileContentsResponse
+        - object
+        - data
+        - has_more
+      title: VectorStoreFileContentResponse
       description: >-
-        Response from retrieving the contents of a vector store file.
+        Represents the parsed content of a vector store file.
     OpenaiSearchVectorStoreRequest:
       type: object
       properties:
diff --git a/src/llama_stack/apis/vector_io/vector_io.py b/src/llama_stack/apis/vector_io/vector_io.py
index 26c961db3..846c6f191 100644
--- a/src/llama_stack/apis/vector_io/vector_io.py
+++ b/src/llama_stack/apis/vector_io/vector_io.py
@@ -396,19 +396,19 @@ class VectorStoreListFilesResponse(BaseModel):
 
 
 @json_schema_type
-class VectorStoreFileContentsResponse(BaseModel):
-    """Response from retrieving the contents of a vector store file.
+class VectorStoreFileContentResponse(BaseModel):
+    """Represents the parsed content of a vector store file.
 
-    :param file_id: Unique identifier for the file
-    :param filename: Name of the file
-    :param attributes: Key-value attributes associated with the file
-    :param content: List of content items from the file
+    :param object: The object type, which is always `vector_store.file_content.page`
+    :param data: Parsed content of the file
+    :param has_more: Indicates if there are more content pages to fetch
+    :param next_page: The token for the next page, if any
     """
 
-    file_id: str
-    filename: str
-    attributes: dict[str, Any]
-    content: list[VectorStoreContent]
+    object: Literal["vector_store.file_content.page"] = "vector_store.file_content.page"
+    data: list[VectorStoreContent]
+    has_more: bool
+    next_page: str | None = None
 
 
 @json_schema_type
@@ -732,12 +732,12 @@ class VectorIO(Protocol):
         self,
         vector_store_id: str,
         file_id: str,
-    ) -> VectorStoreFileContentsResponse:
+    ) -> VectorStoreFileContentResponse:
         """Retrieves the contents of a vector store file.
 
         :param vector_store_id: The ID of the vector store containing the file to retrieve.
         :param file_id: The ID of the file to retrieve.
-        :returns: A list of InterleavedContent representing the file contents.
+        :returns: A VectorStoreFileContentResponse representing the file contents.
         """
         ...
 
diff --git a/src/llama_stack/core/routers/vector_io.py b/src/llama_stack/core/routers/vector_io.py
index b54217619..9dac461db 100644
--- a/src/llama_stack/core/routers/vector_io.py
+++ b/src/llama_stack/core/routers/vector_io.py
@@ -24,7 +24,7 @@ from llama_stack.apis.vector_io import (
     VectorStoreChunkingStrategyStaticConfig,
     VectorStoreDeleteResponse,
     VectorStoreFileBatchObject,
-    VectorStoreFileContentsResponse,
+    VectorStoreFileContentResponse,
     VectorStoreFileDeleteResponse,
     VectorStoreFileObject,
     VectorStoreFilesListInBatchResponse,
@@ -338,7 +338,7 @@ class VectorIORouter(VectorIO):
         self,
         vector_store_id: str,
         file_id: str,
-    ) -> VectorStoreFileContentsResponse:
+    ) -> VectorStoreFileContentResponse:
         logger.debug(f"VectorIORouter.openai_retrieve_vector_store_file_contents: {vector_store_id}, {file_id}")
         provider = await self.routing_table.get_provider_impl(vector_store_id)
         return await provider.openai_retrieve_vector_store_file_contents(
diff --git a/src/llama_stack/core/routing_tables/vector_stores.py b/src/llama_stack/core/routing_tables/vector_stores.py
index c6c80a01e..f95a4dbe3 100644
--- a/src/llama_stack/core/routing_tables/vector_stores.py
+++ b/src/llama_stack/core/routing_tables/vector_stores.py
@@ -15,7 +15,7 @@ from llama_stack.apis.vector_io.vector_io import (
     SearchRankingOptions,
     VectorStoreChunkingStrategy,
     VectorStoreDeleteResponse,
-    VectorStoreFileContentsResponse,
+    VectorStoreFileContentResponse,
     VectorStoreFileDeleteResponse,
     VectorStoreFileObject,
     VectorStoreFileStatus,
@@ -195,7 +195,7 @@ class VectorStoresRoutingTable(CommonRoutingTableImpl):
         self,
         vector_store_id: str,
         file_id: str,
-    ) -> VectorStoreFileContentsResponse:
+    ) -> VectorStoreFileContentResponse:
         await self.assert_action_allowed("read", "vector_store", vector_store_id)
         provider = await self.get_provider_impl(vector_store_id)
         return await provider.openai_retrieve_vector_store_file_contents(
diff --git a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
index d047d9d12..86e6ea013 100644
--- a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
+++ b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
@@ -30,7 +30,7 @@ from llama_stack.apis.vector_io import (
     VectorStoreContent,
     VectorStoreDeleteResponse,
     VectorStoreFileBatchObject,
-    VectorStoreFileContentsResponse,
+    VectorStoreFileContentResponse,
     VectorStoreFileCounts,
     VectorStoreFileDeleteResponse,
     VectorStoreFileLastError,
@@ -921,22 +921,21 @@ class OpenAIVectorStoreMixin(ABC):
         self,
         vector_store_id: str,
         file_id: str,
-    ) -> VectorStoreFileContentsResponse:
+    ) -> VectorStoreFileContentResponse:
         """Retrieves the contents of a vector store file."""
         if vector_store_id not in self.openai_vector_stores:
             raise VectorStoreNotFoundError(vector_store_id)
 
-        file_info = await self._load_openai_vector_store_file(vector_store_id, file_id)
         dict_chunks = await self._load_openai_vector_store_file_contents(vector_store_id, file_id)
         chunks = [Chunk.model_validate(c) for c in dict_chunks]
         content = []
         for chunk in chunks:
             content.extend(self._chunk_to_vector_store_content(chunk))
-        return VectorStoreFileContentsResponse(
-            file_id=file_id,
-            filename=file_info.get("filename", ""),
-            attributes=file_info.get("attributes", {}),
-            content=content,
+        return VectorStoreFileContentResponse(
+            object="vector_store.file_content.page",
+            data=content,
+            has_more=False,
+            next_page=None,
         )
 
     async def openai_update_vector_store_file(
diff --git a/tests/integration/vector_io/test_openai_vector_stores.py b/tests/integration/vector_io/test_openai_vector_stores.py
index 97ce4abe8..20f9d2978 100644
--- a/tests/integration/vector_io/test_openai_vector_stores.py
+++ b/tests/integration/vector_io/test_openai_vector_stores.py
@@ -907,16 +907,16 @@ def test_openai_vector_store_retrieve_file_contents(
     )
 
     assert file_contents is not None
-    assert len(file_contents.content) == 1
-    content = file_contents.content[0]
+    assert file_contents.object == "vector_store.file_content.page"
+    assert len(file_contents.data) == 1
+    content = file_contents.data[0]
 
     # llama-stack-client returns a model, openai-python is a badboy and returns a dict
     if not isinstance(content, dict):
         content = content.model_dump()
     assert content["type"] == "text"
     assert content["text"] == test_content.decode("utf-8")
-    assert file_contents.filename == file_name
-    assert file_contents.attributes == attributes
+    assert file_contents.has_more is False
 
 
 @vector_provider_wrapper
@@ -1483,14 +1483,12 @@ def test_openai_vector_store_file_batch_retrieve_contents(
         )
 
         assert file_contents is not None
-        assert file_contents.filename == file_data[i][0]
-        assert len(file_contents.content) > 0
+        assert file_contents.object == "vector_store.file_content.page"
+        assert len(file_contents.data) > 0
 
         # Verify the content matches what we uploaded
         content_text = (
-            file_contents.content[0].text
-            if hasattr(file_contents.content[0], "text")
-            else file_contents.content[0]["text"]
+            file_contents.data[0].text if hasattr(file_contents.data[0], "text") else file_contents.data[0]["text"]
         )
         assert file_data[i][1].decode("utf-8") in content_text
 

From fadf17daf37c1518a5b05adf56bc0939453c0a6e Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Mon, 10 Nov 2025 10:36:33 -0800
Subject: [PATCH 03/62] feat(api)!: deprecate register/unregister resource APIs
 (#4099)

Mark all register_* / unregister_* APIs as deprecated across models,
shields, tool groups, datasets, benchmarks, and scoring functions. This
is the first step toward moving resource mutations to an `/admin`
namespace as outlined in
https://github.com/llamastack/llama-stack/issues/3809#issuecomment-3492931585.

The deprecation flag will be reflected in the OpenAPI schema to warn API
users that these endpoints are being phased out. Next step will be
implementing the `/admin` route namespace for these resource management
operations.

- `register_model` / `unregister_model`
- `register_shield` / `unregister_shield`
- `register_tool_group` / `unregister_toolgroup`
- `register_dataset` / `unregister_dataset`
- `register_benchmark` / `unregister_benchmark`
- `register_scoring_function` / `unregister_scoring_function`
---
 client-sdks/stainless/openapi.yml             |  603 ++-------
 docs/static/deprecated-llama-stack-spec.yaml  | 1094 ++++++++++++++++-
 .../static/experimental-llama-stack-spec.yaml |  214 ++--
 docs/static/llama-stack-spec.yaml             |  389 +-----
 docs/static/stainless-llama-stack-spec.yaml   |  603 ++-------
 src/llama_stack/apis/benchmarks/benchmarks.py |    4 +-
 src/llama_stack/apis/datasets/datasets.py     |    4 +-
 src/llama_stack/apis/models/models.py         |    4 +-
 .../scoring_functions/scoring_functions.py    |    6 +-
 src/llama_stack/apis/shields/shields.py       |    4 +-
 src/llama_stack/apis/tools/tools.py           |    4 +-
 11 files changed, 1454 insertions(+), 1475 deletions(-)

diff --git a/client-sdks/stainless/openapi.yml b/client-sdks/stainless/openapi.yml
index adee2f086..2b9849535 100644
--- a/client-sdks/stainless/openapi.yml
+++ b/client-sdks/stainless/openapi.yml
@@ -998,39 +998,6 @@ paths:
       description: List models using the OpenAI API.
       parameters: []
       deprecated: false
-    post:
-      responses:
-        '200':
-          description: A Model.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/Model'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Models
-      summary: Register model.
-      description: >-
-        Register model.
-
-        Register a model.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/RegisterModelRequest'
-        required: true
-      deprecated: false
   /v1/models/{model_id}:
     get:
       responses:
@@ -1065,36 +1032,6 @@ paths:
           schema:
             type: string
       deprecated: false
-    delete:
-      responses:
-        '200':
-          description: OK
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Models
-      summary: Unregister model.
-      description: >-
-        Unregister model.
-
-        Unregister a model.
-      parameters:
-        - name: model_id
-          in: path
-          description: >-
-            The identifier of the model to unregister.
-          required: true
-          schema:
-            type: string
-      deprecated: false
   /v1/moderations:
     post:
       responses:
@@ -1725,32 +1662,6 @@ paths:
       description: List all scoring functions.
       parameters: []
       deprecated: false
-    post:
-      responses:
-        '200':
-          description: OK
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - ScoringFunctions
-      summary: Register a scoring function.
-      description: Register a scoring function.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/RegisterScoringFunctionRequest'
-        required: true
-      deprecated: false
   /v1/scoring-functions/{scoring_fn_id}:
     get:
       responses:
@@ -1782,33 +1693,6 @@ paths:
           schema:
             type: string
       deprecated: false
-    delete:
-      responses:
-        '200':
-          description: OK
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - ScoringFunctions
-      summary: Unregister a scoring function.
-      description: Unregister a scoring function.
-      parameters:
-        - name: scoring_fn_id
-          in: path
-          description: >-
-            The ID of the scoring function to unregister.
-          required: true
-          schema:
-            type: string
-      deprecated: false
   /v1/scoring/score:
     post:
       responses:
@@ -1897,36 +1781,6 @@ paths:
       description: List all shields.
       parameters: []
       deprecated: false
-    post:
-      responses:
-        '200':
-          description: A Shield.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/Shield'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Shields
-      summary: Register a shield.
-      description: Register a shield.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/RegisterShieldRequest'
-        required: true
-      deprecated: false
   /v1/shields/{identifier}:
     get:
       responses:
@@ -1958,33 +1812,6 @@ paths:
           schema:
             type: string
       deprecated: false
-    delete:
-      responses:
-        '200':
-          description: OK
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Shields
-      summary: Unregister a shield.
-      description: Unregister a shield.
-      parameters:
-        - name: identifier
-          in: path
-          description: >-
-            The identifier of the shield to unregister.
-          required: true
-          schema:
-            type: string
-      deprecated: false
   /v1/tool-runtime/invoke:
     post:
       responses:
@@ -2080,32 +1907,6 @@ paths:
       description: List tool groups with optional provider.
       parameters: []
       deprecated: false
-    post:
-      responses:
-        '200':
-          description: OK
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - ToolGroups
-      summary: Register a tool group.
-      description: Register a tool group.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/RegisterToolGroupRequest'
-        required: true
-      deprecated: false
   /v1/toolgroups/{toolgroup_id}:
     get:
       responses:
@@ -2137,32 +1938,6 @@ paths:
           schema:
             type: string
       deprecated: false
-    delete:
-      responses:
-        '200':
-          description: OK
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - ToolGroups
-      summary: Unregister a tool group.
-      description: Unregister a tool group.
-      parameters:
-        - name: toolgroup_id
-          in: path
-          description: The ID of the tool group to unregister.
-          required: true
-          schema:
-            type: string
-      deprecated: false
   /v1/tools:
     get:
       responses:
@@ -3171,7 +2946,7 @@ paths:
             schema:
               $ref: '#/components/schemas/RegisterDatasetRequest'
         required: true
-      deprecated: false
+      deprecated: true
   /v1beta/datasets/{dataset_id}:
     get:
       responses:
@@ -3228,7 +3003,7 @@ paths:
           required: true
           schema:
             type: string
-      deprecated: false
+      deprecated: true
   /v1alpha/eval/benchmarks:
     get:
       responses:
@@ -3279,7 +3054,7 @@ paths:
             schema:
               $ref: '#/components/schemas/RegisterBenchmarkRequest'
         required: true
-      deprecated: false
+      deprecated: true
   /v1alpha/eval/benchmarks/{benchmark_id}:
     get:
       responses:
@@ -3336,7 +3111,7 @@ paths:
           required: true
           schema:
             type: string
-      deprecated: false
+      deprecated: true
   /v1alpha/eval/benchmarks/{benchmark_id}/evaluations:
     post:
       responses:
@@ -6280,46 +6055,6 @@ components:
       required:
         - data
       title: OpenAIListModelsResponse
-    ModelType:
-      type: string
-      enum:
-        - llm
-        - embedding
-        - rerank
-      title: ModelType
-      description: >-
-        Enumeration of supported model types in Llama Stack.
-    RegisterModelRequest:
-      type: object
-      properties:
-        model_id:
-          type: string
-          description: The identifier of the model to register.
-        provider_model_id:
-          type: string
-          description: >-
-            The identifier of the model in the provider.
-        provider_id:
-          type: string
-          description: The identifier of the provider.
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: Any additional metadata for this model.
-        model_type:
-          $ref: '#/components/schemas/ModelType'
-          description: The type of model to register.
-      additionalProperties: false
-      required:
-        - model_id
-      title: RegisterModelRequest
     Model:
       type: object
       properties:
@@ -6377,6 +6112,15 @@ components:
       title: Model
       description: >-
         A model resource representing an AI model registered in Llama Stack.
+    ModelType:
+      type: string
+      enum:
+        - llm
+        - embedding
+        - rerank
+      title: ModelType
+      description: >-
+        Enumeration of supported model types in Llama Stack.
     RunModerationRequest:
       type: object
       properties:
@@ -9115,61 +8859,6 @@ components:
       required:
         - data
       title: ListScoringFunctionsResponse
-    ParamType:
-      oneOf:
-        - $ref: '#/components/schemas/StringType'
-        - $ref: '#/components/schemas/NumberType'
-        - $ref: '#/components/schemas/BooleanType'
-        - $ref: '#/components/schemas/ArrayType'
-        - $ref: '#/components/schemas/ObjectType'
-        - $ref: '#/components/schemas/JsonType'
-        - $ref: '#/components/schemas/UnionType'
-        - $ref: '#/components/schemas/ChatCompletionInputType'
-        - $ref: '#/components/schemas/CompletionInputType'
-      discriminator:
-        propertyName: type
-        mapping:
-          string: '#/components/schemas/StringType'
-          number: '#/components/schemas/NumberType'
-          boolean: '#/components/schemas/BooleanType'
-          array: '#/components/schemas/ArrayType'
-          object: '#/components/schemas/ObjectType'
-          json: '#/components/schemas/JsonType'
-          union: '#/components/schemas/UnionType'
-          chat_completion_input: '#/components/schemas/ChatCompletionInputType'
-          completion_input: '#/components/schemas/CompletionInputType'
-    RegisterScoringFunctionRequest:
-      type: object
-      properties:
-        scoring_fn_id:
-          type: string
-          description: >-
-            The ID of the scoring function to register.
-        description:
-          type: string
-          description: The description of the scoring function.
-        return_type:
-          $ref: '#/components/schemas/ParamType'
-          description: The return type of the scoring function.
-        provider_scoring_fn_id:
-          type: string
-          description: >-
-            The ID of the provider scoring function to use for the scoring function.
-        provider_id:
-          type: string
-          description: >-
-            The ID of the provider to use for the scoring function.
-        params:
-          $ref: '#/components/schemas/ScoringFnParams'
-          description: >-
-            The parameters for the scoring function for benchmark eval, these can
-            be overridden for app eval.
-      additionalProperties: false
-      required:
-        - scoring_fn_id
-        - description
-        - return_type
-      title: RegisterScoringFunctionRequest
     ScoreRequest:
       type: object
       properties:
@@ -9345,35 +9034,6 @@ components:
       required:
         - data
       title: ListShieldsResponse
-    RegisterShieldRequest:
-      type: object
-      properties:
-        shield_id:
-          type: string
-          description: >-
-            The identifier of the shield to register.
-        provider_shield_id:
-          type: string
-          description: >-
-            The identifier of the shield in the provider.
-        provider_id:
-          type: string
-          description: The identifier of the provider.
-        params:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: The parameters of the shield.
-      additionalProperties: false
-      required:
-        - shield_id
-      title: RegisterShieldRequest
     InvokeToolRequest:
       type: object
       properties:
@@ -9634,37 +9294,6 @@ components:
       title: ListToolGroupsResponse
       description: >-
         Response containing a list of tool groups.
-    RegisterToolGroupRequest:
-      type: object
-      properties:
-        toolgroup_id:
-          type: string
-          description: The ID of the tool group to register.
-        provider_id:
-          type: string
-          description: >-
-            The ID of the provider to use for the tool group.
-        mcp_endpoint:
-          $ref: '#/components/schemas/URL'
-          description: >-
-            The MCP endpoint to use for the tool group.
-        args:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            A dictionary of arguments to pass to the tool group.
-      additionalProperties: false
-      required:
-        - toolgroup_id
-        - provider_id
-      title: RegisterToolGroupRequest
     Chunk:
       type: object
       properties:
@@ -10810,68 +10439,6 @@ components:
         - data
       title: ListDatasetsResponse
       description: Response from listing datasets.
-    DataSource:
-      oneOf:
-        - $ref: '#/components/schemas/URIDataSource'
-        - $ref: '#/components/schemas/RowsDataSource'
-      discriminator:
-        propertyName: type
-        mapping:
-          uri: '#/components/schemas/URIDataSource'
-          rows: '#/components/schemas/RowsDataSource'
-    RegisterDatasetRequest:
-      type: object
-      properties:
-        purpose:
-          type: string
-          enum:
-            - post-training/messages
-            - eval/question-answer
-            - eval/messages-answer
-          description: >-
-            The purpose of the dataset. One of: - "post-training/messages": The dataset
-            contains a messages column with list of messages for post-training. {
-            "messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant",
-            "content": "Hello, world!"}, ] } - "eval/question-answer": The dataset
-            contains a question column and an answer column for evaluation. { "question":
-            "What is the capital of France?", "answer": "Paris" } - "eval/messages-answer":
-            The dataset contains a messages column with list of messages and an answer
-            column for evaluation. { "messages": [ {"role": "user", "content": "Hello,
-            my name is John Doe."}, {"role": "assistant", "content": "Hello, John
-            Doe. How can I help you today?"}, {"role": "user", "content": "What's
-            my name?"}, ], "answer": "John Doe" }
-        source:
-          $ref: '#/components/schemas/DataSource'
-          description: >-
-            The data source of the dataset. Ensure that the data source schema is
-            compatible with the purpose of the dataset. Examples: - { "type": "uri",
-            "uri": "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri":
-            "lsfs://mydata.jsonl" } - { "type": "uri", "uri": "data:csv;base64,{base64_content}"
-            } - { "type": "uri", "uri": "huggingface://llamastack/simpleqa?split=train"
-            } - { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content":
-            "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ]
-            } ] }
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            The metadata for the dataset. - E.g. {"description": "My dataset"}.
-        dataset_id:
-          type: string
-          description: >-
-            The ID of the dataset. If not provided, an ID will be generated.
-      additionalProperties: false
-      required:
-        - purpose
-        - source
-      title: RegisterDatasetRequest
     Benchmark:
       type: object
       properties:
@@ -10939,47 +10506,6 @@ components:
       required:
         - data
       title: ListBenchmarksResponse
-    RegisterBenchmarkRequest:
-      type: object
-      properties:
-        benchmark_id:
-          type: string
-          description: The ID of the benchmark to register.
-        dataset_id:
-          type: string
-          description: >-
-            The ID of the dataset to use for the benchmark.
-        scoring_functions:
-          type: array
-          items:
-            type: string
-          description: >-
-            The scoring functions to use for the benchmark.
-        provider_benchmark_id:
-          type: string
-          description: >-
-            The ID of the provider benchmark to use for the benchmark.
-        provider_id:
-          type: string
-          description: >-
-            The ID of the provider to use for the benchmark.
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: The metadata to use for the benchmark.
-      additionalProperties: false
-      required:
-        - benchmark_id
-        - dataset_id
-        - scoring_functions
-      title: RegisterBenchmarkRequest
     BenchmarkConfig:
       type: object
       properties:
@@ -11841,6 +11367,109 @@ components:
         - hyperparam_search_config
         - logger_config
       title: SupervisedFineTuneRequest
+    DataSource:
+      oneOf:
+        - $ref: '#/components/schemas/URIDataSource'
+        - $ref: '#/components/schemas/RowsDataSource'
+      discriminator:
+        propertyName: type
+        mapping:
+          uri: '#/components/schemas/URIDataSource'
+          rows: '#/components/schemas/RowsDataSource'
+    RegisterDatasetRequest:
+      type: object
+      properties:
+        purpose:
+          type: string
+          enum:
+            - post-training/messages
+            - eval/question-answer
+            - eval/messages-answer
+          description: >-
+            The purpose of the dataset. One of: - "post-training/messages": The dataset
+            contains a messages column with list of messages for post-training. {
+            "messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant",
+            "content": "Hello, world!"}, ] } - "eval/question-answer": The dataset
+            contains a question column and an answer column for evaluation. { "question":
+            "What is the capital of France?", "answer": "Paris" } - "eval/messages-answer":
+            The dataset contains a messages column with list of messages and an answer
+            column for evaluation. { "messages": [ {"role": "user", "content": "Hello,
+            my name is John Doe."}, {"role": "assistant", "content": "Hello, John
+            Doe. How can I help you today?"}, {"role": "user", "content": "What's
+            my name?"}, ], "answer": "John Doe" }
+        source:
+          $ref: '#/components/schemas/DataSource'
+          description: >-
+            The data source of the dataset. Ensure that the data source schema is
+            compatible with the purpose of the dataset. Examples: - { "type": "uri",
+            "uri": "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri":
+            "lsfs://mydata.jsonl" } - { "type": "uri", "uri": "data:csv;base64,{base64_content}"
+            } - { "type": "uri", "uri": "huggingface://llamastack/simpleqa?split=train"
+            } - { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content":
+            "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ]
+            } ] }
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            The metadata for the dataset. - E.g. {"description": "My dataset"}.
+        dataset_id:
+          type: string
+          description: >-
+            The ID of the dataset. If not provided, an ID will be generated.
+      additionalProperties: false
+      required:
+        - purpose
+        - source
+      title: RegisterDatasetRequest
+    RegisterBenchmarkRequest:
+      type: object
+      properties:
+        benchmark_id:
+          type: string
+          description: The ID of the benchmark to register.
+        dataset_id:
+          type: string
+          description: >-
+            The ID of the dataset to use for the benchmark.
+        scoring_functions:
+          type: array
+          items:
+            type: string
+          description: >-
+            The scoring functions to use for the benchmark.
+        provider_benchmark_id:
+          type: string
+          description: >-
+            The ID of the provider benchmark to use for the benchmark.
+        provider_id:
+          type: string
+          description: >-
+            The ID of the provider to use for the benchmark.
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: The metadata to use for the benchmark.
+      additionalProperties: false
+      required:
+        - benchmark_id
+        - dataset_id
+        - scoring_functions
+      title: RegisterBenchmarkRequest
   responses:
     BadRequest400:
       description: The request was invalid or malformed
diff --git a/docs/static/deprecated-llama-stack-spec.yaml b/docs/static/deprecated-llama-stack-spec.yaml
index 3bc965eb7..dea2e5bbe 100644
--- a/docs/static/deprecated-llama-stack-spec.yaml
+++ b/docs/static/deprecated-llama-stack-spec.yaml
@@ -13,7 +13,352 @@ info:
     migration reference only.
 servers:
   - url: http://any-hosted-llama-stack.com
-paths: {}
+paths:
+  /v1/models:
+    post:
+      responses:
+        '200':
+          description: A Model.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/Model'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Models
+      summary: Register model.
+      description: >-
+        Register model.
+
+        Register a model.
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/RegisterModelRequest'
+        required: true
+      deprecated: true
+  /v1/models/{model_id}:
+    delete:
+      responses:
+        '200':
+          description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Models
+      summary: Unregister model.
+      description: >-
+        Unregister model.
+
+        Unregister a model.
+      parameters:
+        - name: model_id
+          in: path
+          description: >-
+            The identifier of the model to unregister.
+          required: true
+          schema:
+            type: string
+      deprecated: true
+  /v1/scoring-functions:
+    post:
+      responses:
+        '200':
+          description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - ScoringFunctions
+      summary: Register a scoring function.
+      description: Register a scoring function.
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/RegisterScoringFunctionRequest'
+        required: true
+      deprecated: true
+  /v1/scoring-functions/{scoring_fn_id}:
+    delete:
+      responses:
+        '200':
+          description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - ScoringFunctions
+      summary: Unregister a scoring function.
+      description: Unregister a scoring function.
+      parameters:
+        - name: scoring_fn_id
+          in: path
+          description: >-
+            The ID of the scoring function to unregister.
+          required: true
+          schema:
+            type: string
+      deprecated: true
+  /v1/shields:
+    post:
+      responses:
+        '200':
+          description: A Shield.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/Shield'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Shields
+      summary: Register a shield.
+      description: Register a shield.
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/RegisterShieldRequest'
+        required: true
+      deprecated: true
+  /v1/shields/{identifier}:
+    delete:
+      responses:
+        '200':
+          description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Shields
+      summary: Unregister a shield.
+      description: Unregister a shield.
+      parameters:
+        - name: identifier
+          in: path
+          description: >-
+            The identifier of the shield to unregister.
+          required: true
+          schema:
+            type: string
+      deprecated: true
+  /v1/toolgroups:
+    post:
+      responses:
+        '200':
+          description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - ToolGroups
+      summary: Register a tool group.
+      description: Register a tool group.
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/RegisterToolGroupRequest'
+        required: true
+      deprecated: true
+  /v1/toolgroups/{toolgroup_id}:
+    delete:
+      responses:
+        '200':
+          description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - ToolGroups
+      summary: Unregister a tool group.
+      description: Unregister a tool group.
+      parameters:
+        - name: toolgroup_id
+          in: path
+          description: The ID of the tool group to unregister.
+          required: true
+          schema:
+            type: string
+      deprecated: true
+  /v1beta/datasets:
+    post:
+      responses:
+        '200':
+          description: A Dataset.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/Dataset'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Datasets
+      summary: Register a new dataset.
+      description: Register a new dataset.
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/RegisterDatasetRequest'
+        required: true
+      deprecated: true
+  /v1beta/datasets/{dataset_id}:
+    delete:
+      responses:
+        '200':
+          description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Datasets
+      summary: Unregister a dataset by its ID.
+      description: Unregister a dataset by its ID.
+      parameters:
+        - name: dataset_id
+          in: path
+          description: The ID of the dataset to unregister.
+          required: true
+          schema:
+            type: string
+      deprecated: true
+  /v1alpha/eval/benchmarks:
+    post:
+      responses:
+        '200':
+          description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Benchmarks
+      summary: Register a benchmark.
+      description: Register a benchmark.
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/RegisterBenchmarkRequest'
+        required: true
+      deprecated: true
+  /v1alpha/eval/benchmarks/{benchmark_id}:
+    delete:
+      responses:
+        '200':
+          description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Benchmarks
+      summary: Unregister a benchmark.
+      description: Unregister a benchmark.
+      parameters:
+        - name: benchmark_id
+          in: path
+          description: The ID of the benchmark to unregister.
+          required: true
+          schema:
+            type: string
+      deprecated: true
 jsonSchemaDialect: >-
   https://json-schema.org/draft/2020-12/schema
 components:
@@ -46,6 +391,730 @@ components:
       title: Error
       description: >-
         Error response from the API. Roughly follows RFC 7807.
+    ModelType:
+      type: string
+      enum:
+        - llm
+        - embedding
+        - rerank
+      title: ModelType
+      description: >-
+        Enumeration of supported model types in Llama Stack.
+    RegisterModelRequest:
+      type: object
+      properties:
+        model_id:
+          type: string
+          description: The identifier of the model to register.
+        provider_model_id:
+          type: string
+          description: >-
+            The identifier of the model in the provider.
+        provider_id:
+          type: string
+          description: The identifier of the provider.
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: Any additional metadata for this model.
+        model_type:
+          $ref: '#/components/schemas/ModelType'
+          description: The type of model to register.
+      additionalProperties: false
+      required:
+        - model_id
+      title: RegisterModelRequest
+    Model:
+      type: object
+      properties:
+        identifier:
+          type: string
+          description: >-
+            Unique identifier for this resource in llama stack
+        provider_resource_id:
+          type: string
+          description: >-
+            Unique identifier for this resource in the provider
+        provider_id:
+          type: string
+          description: >-
+            ID of the provider that owns this resource
+        type:
+          type: string
+          enum:
+            - model
+            - shield
+            - vector_store
+            - dataset
+            - scoring_function
+            - benchmark
+            - tool
+            - tool_group
+            - prompt
+          const: model
+          default: model
+          description: >-
+            The resource type, always 'model' for model resources
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: Any additional metadata for this model
+        model_type:
+          $ref: '#/components/schemas/ModelType'
+          default: llm
+          description: >-
+            The type of model (LLM or embedding model)
+      additionalProperties: false
+      required:
+        - identifier
+        - provider_id
+        - type
+        - metadata
+        - model_type
+      title: Model
+      description: >-
+        A model resource representing an AI model registered in Llama Stack.
+    AggregationFunctionType:
+      type: string
+      enum:
+        - average
+        - weighted_average
+        - median
+        - categorical_count
+        - accuracy
+      title: AggregationFunctionType
+      description: >-
+        Types of aggregation functions for scoring results.
+    ArrayType:
+      type: object
+      properties:
+        type:
+          type: string
+          const: array
+          default: array
+          description: Discriminator type. Always "array"
+      additionalProperties: false
+      required:
+        - type
+      title: ArrayType
+      description: Parameter type for array values.
+    BasicScoringFnParams:
+      type: object
+      properties:
+        type:
+          $ref: '#/components/schemas/ScoringFnParamsType'
+          const: basic
+          default: basic
+          description: >-
+            The type of scoring function parameters, always basic
+        aggregation_functions:
+          type: array
+          items:
+            $ref: '#/components/schemas/AggregationFunctionType'
+          description: >-
+            Aggregation functions to apply to the scores of each row
+      additionalProperties: false
+      required:
+        - type
+        - aggregation_functions
+      title: BasicScoringFnParams
+      description: >-
+        Parameters for basic scoring function configuration.
+    BooleanType:
+      type: object
+      properties:
+        type:
+          type: string
+          const: boolean
+          default: boolean
+          description: Discriminator type. Always "boolean"
+      additionalProperties: false
+      required:
+        - type
+      title: BooleanType
+      description: Parameter type for boolean values.
+    ChatCompletionInputType:
+      type: object
+      properties:
+        type:
+          type: string
+          const: chat_completion_input
+          default: chat_completion_input
+          description: >-
+            Discriminator type. Always "chat_completion_input"
+      additionalProperties: false
+      required:
+        - type
+      title: ChatCompletionInputType
+      description: >-
+        Parameter type for chat completion input.
+    CompletionInputType:
+      type: object
+      properties:
+        type:
+          type: string
+          const: completion_input
+          default: completion_input
+          description: >-
+            Discriminator type. Always "completion_input"
+      additionalProperties: false
+      required:
+        - type
+      title: CompletionInputType
+      description: Parameter type for completion input.
+    JsonType:
+      type: object
+      properties:
+        type:
+          type: string
+          const: json
+          default: json
+          description: Discriminator type. Always "json"
+      additionalProperties: false
+      required:
+        - type
+      title: JsonType
+      description: Parameter type for JSON values.
+    LLMAsJudgeScoringFnParams:
+      type: object
+      properties:
+        type:
+          $ref: '#/components/schemas/ScoringFnParamsType'
+          const: llm_as_judge
+          default: llm_as_judge
+          description: >-
+            The type of scoring function parameters, always llm_as_judge
+        judge_model:
+          type: string
+          description: >-
+            Identifier of the LLM model to use as a judge for scoring
+        prompt_template:
+          type: string
+          description: >-
+            (Optional) Custom prompt template for the judge model
+        judge_score_regexes:
+          type: array
+          items:
+            type: string
+          description: >-
+            Regexes to extract the answer from generated response
+        aggregation_functions:
+          type: array
+          items:
+            $ref: '#/components/schemas/AggregationFunctionType'
+          description: >-
+            Aggregation functions to apply to the scores of each row
+      additionalProperties: false
+      required:
+        - type
+        - judge_model
+        - judge_score_regexes
+        - aggregation_functions
+      title: LLMAsJudgeScoringFnParams
+      description: >-
+        Parameters for LLM-as-judge scoring function configuration.
+    NumberType:
+      type: object
+      properties:
+        type:
+          type: string
+          const: number
+          default: number
+          description: Discriminator type. Always "number"
+      additionalProperties: false
+      required:
+        - type
+      title: NumberType
+      description: Parameter type for numeric values.
+    ObjectType:
+      type: object
+      properties:
+        type:
+          type: string
+          const: object
+          default: object
+          description: Discriminator type. Always "object"
+      additionalProperties: false
+      required:
+        - type
+      title: ObjectType
+      description: Parameter type for object values.
+    ParamType:
+      oneOf:
+        - $ref: '#/components/schemas/StringType'
+        - $ref: '#/components/schemas/NumberType'
+        - $ref: '#/components/schemas/BooleanType'
+        - $ref: '#/components/schemas/ArrayType'
+        - $ref: '#/components/schemas/ObjectType'
+        - $ref: '#/components/schemas/JsonType'
+        - $ref: '#/components/schemas/UnionType'
+        - $ref: '#/components/schemas/ChatCompletionInputType'
+        - $ref: '#/components/schemas/CompletionInputType'
+      discriminator:
+        propertyName: type
+        mapping:
+          string: '#/components/schemas/StringType'
+          number: '#/components/schemas/NumberType'
+          boolean: '#/components/schemas/BooleanType'
+          array: '#/components/schemas/ArrayType'
+          object: '#/components/schemas/ObjectType'
+          json: '#/components/schemas/JsonType'
+          union: '#/components/schemas/UnionType'
+          chat_completion_input: '#/components/schemas/ChatCompletionInputType'
+          completion_input: '#/components/schemas/CompletionInputType'
+    RegexParserScoringFnParams:
+      type: object
+      properties:
+        type:
+          $ref: '#/components/schemas/ScoringFnParamsType'
+          const: regex_parser
+          default: regex_parser
+          description: >-
+            The type of scoring function parameters, always regex_parser
+        parsing_regexes:
+          type: array
+          items:
+            type: string
+          description: >-
+            Regex to extract the answer from generated response
+        aggregation_functions:
+          type: array
+          items:
+            $ref: '#/components/schemas/AggregationFunctionType'
+          description: >-
+            Aggregation functions to apply to the scores of each row
+      additionalProperties: false
+      required:
+        - type
+        - parsing_regexes
+        - aggregation_functions
+      title: RegexParserScoringFnParams
+      description: >-
+        Parameters for regex parser scoring function configuration.
+    ScoringFnParams:
+      oneOf:
+        - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
+        - $ref: '#/components/schemas/RegexParserScoringFnParams'
+        - $ref: '#/components/schemas/BasicScoringFnParams'
+      discriminator:
+        propertyName: type
+        mapping:
+          llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams'
+          regex_parser: '#/components/schemas/RegexParserScoringFnParams'
+          basic: '#/components/schemas/BasicScoringFnParams'
+    ScoringFnParamsType:
+      type: string
+      enum:
+        - llm_as_judge
+        - regex_parser
+        - basic
+      title: ScoringFnParamsType
+      description: >-
+        Types of scoring function parameter configurations.
+    StringType:
+      type: object
+      properties:
+        type:
+          type: string
+          const: string
+          default: string
+          description: Discriminator type. Always "string"
+      additionalProperties: false
+      required:
+        - type
+      title: StringType
+      description: Parameter type for string values.
+    UnionType:
+      type: object
+      properties:
+        type:
+          type: string
+          const: union
+          default: union
+          description: Discriminator type. Always "union"
+      additionalProperties: false
+      required:
+        - type
+      title: UnionType
+      description: Parameter type for union values.
+    RegisterScoringFunctionRequest:
+      type: object
+      properties:
+        scoring_fn_id:
+          type: string
+          description: >-
+            The ID of the scoring function to register.
+        description:
+          type: string
+          description: The description of the scoring function.
+        return_type:
+          $ref: '#/components/schemas/ParamType'
+          description: The return type of the scoring function.
+        provider_scoring_fn_id:
+          type: string
+          description: >-
+            The ID of the provider scoring function to use for the scoring function.
+        provider_id:
+          type: string
+          description: >-
+            The ID of the provider to use for the scoring function.
+        params:
+          $ref: '#/components/schemas/ScoringFnParams'
+          description: >-
+            The parameters for the scoring function for benchmark eval, these can
+            be overridden for app eval.
+      additionalProperties: false
+      required:
+        - scoring_fn_id
+        - description
+        - return_type
+      title: RegisterScoringFunctionRequest
+    RegisterShieldRequest:
+      type: object
+      properties:
+        shield_id:
+          type: string
+          description: >-
+            The identifier of the shield to register.
+        provider_shield_id:
+          type: string
+          description: >-
+            The identifier of the shield in the provider.
+        provider_id:
+          type: string
+          description: The identifier of the provider.
+        params:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: The parameters of the shield.
+      additionalProperties: false
+      required:
+        - shield_id
+      title: RegisterShieldRequest
+    Shield:
+      type: object
+      properties:
+        identifier:
+          type: string
+        provider_resource_id:
+          type: string
+        provider_id:
+          type: string
+        type:
+          type: string
+          enum:
+            - model
+            - shield
+            - vector_store
+            - dataset
+            - scoring_function
+            - benchmark
+            - tool
+            - tool_group
+            - prompt
+          const: shield
+          default: shield
+          description: The resource type, always shield
+        params:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            (Optional) Configuration parameters for the shield
+      additionalProperties: false
+      required:
+        - identifier
+        - provider_id
+        - type
+      title: Shield
+      description: >-
+        A safety shield resource that can be used to check content.
+    URL:
+      type: object
+      properties:
+        uri:
+          type: string
+          description: The URL string pointing to the resource
+      additionalProperties: false
+      required:
+        - uri
+      title: URL
+      description: A URL reference to external content.
+    RegisterToolGroupRequest:
+      type: object
+      properties:
+        toolgroup_id:
+          type: string
+          description: The ID of the tool group to register.
+        provider_id:
+          type: string
+          description: >-
+            The ID of the provider to use for the tool group.
+        mcp_endpoint:
+          $ref: '#/components/schemas/URL'
+          description: >-
+            The MCP endpoint to use for the tool group.
+        args:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            A dictionary of arguments to pass to the tool group.
+      additionalProperties: false
+      required:
+        - toolgroup_id
+        - provider_id
+      title: RegisterToolGroupRequest
+    DataSource:
+      oneOf:
+        - $ref: '#/components/schemas/URIDataSource'
+        - $ref: '#/components/schemas/RowsDataSource'
+      discriminator:
+        propertyName: type
+        mapping:
+          uri: '#/components/schemas/URIDataSource'
+          rows: '#/components/schemas/RowsDataSource'
+    RowsDataSource:
+      type: object
+      properties:
+        type:
+          type: string
+          const: rows
+          default: rows
+        rows:
+          type: array
+          items:
+            type: object
+            additionalProperties:
+              oneOf:
+                - type: 'null'
+                - type: boolean
+                - type: number
+                - type: string
+                - type: array
+                - type: object
+          description: >-
+            The dataset is stored in rows. E.g. - [ {"messages": [{"role": "user",
+            "content": "Hello, world!"}, {"role": "assistant", "content": "Hello,
+            world!"}]} ]
+      additionalProperties: false
+      required:
+        - type
+        - rows
+      title: RowsDataSource
+      description: A dataset stored in rows.
+    URIDataSource:
+      type: object
+      properties:
+        type:
+          type: string
+          const: uri
+          default: uri
+        uri:
+          type: string
+          description: >-
+            The dataset can be obtained from a URI. E.g. - "https://mywebsite.com/mydata.jsonl"
+            - "lsfs://mydata.jsonl" - "data:csv;base64,{base64_content}"
+      additionalProperties: false
+      required:
+        - type
+        - uri
+      title: URIDataSource
+      description: >-
+        A dataset that can be obtained from a URI.
+    RegisterDatasetRequest:
+      type: object
+      properties:
+        purpose:
+          type: string
+          enum:
+            - post-training/messages
+            - eval/question-answer
+            - eval/messages-answer
+          description: >-
+            The purpose of the dataset. One of: - "post-training/messages": The dataset
+            contains a messages column with list of messages for post-training. {
+            "messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant",
+            "content": "Hello, world!"}, ] } - "eval/question-answer": The dataset
+            contains a question column and an answer column for evaluation. { "question":
+            "What is the capital of France?", "answer": "Paris" } - "eval/messages-answer":
+            The dataset contains a messages column with list of messages and an answer
+            column for evaluation. { "messages": [ {"role": "user", "content": "Hello,
+            my name is John Doe."}, {"role": "assistant", "content": "Hello, John
+            Doe. How can I help you today?"}, {"role": "user", "content": "What's
+            my name?"}, ], "answer": "John Doe" }
+        source:
+          $ref: '#/components/schemas/DataSource'
+          description: >-
+            The data source of the dataset. Ensure that the data source schema is
+            compatible with the purpose of the dataset. Examples: - { "type": "uri",
+            "uri": "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri":
+            "lsfs://mydata.jsonl" } - { "type": "uri", "uri": "data:csv;base64,{base64_content}"
+            } - { "type": "uri", "uri": "huggingface://llamastack/simpleqa?split=train"
+            } - { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content":
+            "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ]
+            } ] }
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            The metadata for the dataset. - E.g. {"description": "My dataset"}.
+        dataset_id:
+          type: string
+          description: >-
+            The ID of the dataset. If not provided, an ID will be generated.
+      additionalProperties: false
+      required:
+        - purpose
+        - source
+      title: RegisterDatasetRequest
+    Dataset:
+      type: object
+      properties:
+        identifier:
+          type: string
+        provider_resource_id:
+          type: string
+        provider_id:
+          type: string
+        type:
+          type: string
+          enum:
+            - model
+            - shield
+            - vector_store
+            - dataset
+            - scoring_function
+            - benchmark
+            - tool
+            - tool_group
+            - prompt
+          const: dataset
+          default: dataset
+          description: >-
+            Type of resource, always 'dataset' for datasets
+        purpose:
+          type: string
+          enum:
+            - post-training/messages
+            - eval/question-answer
+            - eval/messages-answer
+          description: >-
+            Purpose of the dataset indicating its intended use
+        source:
+          oneOf:
+            - $ref: '#/components/schemas/URIDataSource'
+            - $ref: '#/components/schemas/RowsDataSource'
+          discriminator:
+            propertyName: type
+            mapping:
+              uri: '#/components/schemas/URIDataSource'
+              rows: '#/components/schemas/RowsDataSource'
+          description: >-
+            Data source configuration for the dataset
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: Additional metadata for the dataset
+      additionalProperties: false
+      required:
+        - identifier
+        - provider_id
+        - type
+        - purpose
+        - source
+        - metadata
+      title: Dataset
+      description: >-
+        Dataset resource for storing and accessing training or evaluation data.
+    RegisterBenchmarkRequest:
+      type: object
+      properties:
+        benchmark_id:
+          type: string
+          description: The ID of the benchmark to register.
+        dataset_id:
+          type: string
+          description: >-
+            The ID of the dataset to use for the benchmark.
+        scoring_functions:
+          type: array
+          items:
+            type: string
+          description: >-
+            The scoring functions to use for the benchmark.
+        provider_benchmark_id:
+          type: string
+          description: >-
+            The ID of the provider benchmark to use for the benchmark.
+        provider_id:
+          type: string
+          description: >-
+            The ID of the provider to use for the benchmark.
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: The metadata to use for the benchmark.
+      additionalProperties: false
+      required:
+        - benchmark_id
+        - dataset_id
+        - scoring_functions
+      title: RegisterBenchmarkRequest
   responses:
     BadRequest400:
       description: The request was invalid or malformed
@@ -93,4 +1162,25 @@ components:
             detail: An unexpected error occurred
 security:
   - Default: []
-tags: []
+tags:
+  - name: Benchmarks
+    description: ''
+  - name: Datasets
+    description: ''
+  - name: Models
+    description: ''
+  - name: ScoringFunctions
+    description: ''
+  - name: Shields
+    description: ''
+  - name: ToolGroups
+    description: ''
+x-tagGroups:
+  - name: Operations
+    tags:
+      - Benchmarks
+      - Datasets
+      - Models
+      - ScoringFunctions
+      - Shields
+      - ToolGroups
diff --git a/docs/static/experimental-llama-stack-spec.yaml b/docs/static/experimental-llama-stack-spec.yaml
index 68e2f59be..6f379d17c 100644
--- a/docs/static/experimental-llama-stack-spec.yaml
+++ b/docs/static/experimental-llama-stack-spec.yaml
@@ -162,7 +162,7 @@ paths:
             schema:
               $ref: '#/components/schemas/RegisterDatasetRequest'
         required: true
-      deprecated: false
+      deprecated: true
   /v1beta/datasets/{dataset_id}:
     get:
       responses:
@@ -219,7 +219,7 @@ paths:
           required: true
           schema:
             type: string
-      deprecated: false
+      deprecated: true
   /v1alpha/eval/benchmarks:
     get:
       responses:
@@ -270,7 +270,7 @@ paths:
             schema:
               $ref: '#/components/schemas/RegisterBenchmarkRequest'
         required: true
-      deprecated: false
+      deprecated: true
   /v1alpha/eval/benchmarks/{benchmark_id}:
     get:
       responses:
@@ -327,7 +327,7 @@ paths:
           required: true
           schema:
             type: string
-      deprecated: false
+      deprecated: true
   /v1alpha/eval/benchmarks/{benchmark_id}/evaluations:
     post:
       responses:
@@ -936,68 +936,6 @@ components:
         - data
       title: ListDatasetsResponse
       description: Response from listing datasets.
-    DataSource:
-      oneOf:
-        - $ref: '#/components/schemas/URIDataSource'
-        - $ref: '#/components/schemas/RowsDataSource'
-      discriminator:
-        propertyName: type
-        mapping:
-          uri: '#/components/schemas/URIDataSource'
-          rows: '#/components/schemas/RowsDataSource'
-    RegisterDatasetRequest:
-      type: object
-      properties:
-        purpose:
-          type: string
-          enum:
-            - post-training/messages
-            - eval/question-answer
-            - eval/messages-answer
-          description: >-
-            The purpose of the dataset. One of: - "post-training/messages": The dataset
-            contains a messages column with list of messages for post-training. {
-            "messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant",
-            "content": "Hello, world!"}, ] } - "eval/question-answer": The dataset
-            contains a question column and an answer column for evaluation. { "question":
-            "What is the capital of France?", "answer": "Paris" } - "eval/messages-answer":
-            The dataset contains a messages column with list of messages and an answer
-            column for evaluation. { "messages": [ {"role": "user", "content": "Hello,
-            my name is John Doe."}, {"role": "assistant", "content": "Hello, John
-            Doe. How can I help you today?"}, {"role": "user", "content": "What's
-            my name?"}, ], "answer": "John Doe" }
-        source:
-          $ref: '#/components/schemas/DataSource'
-          description: >-
-            The data source of the dataset. Ensure that the data source schema is
-            compatible with the purpose of the dataset. Examples: - { "type": "uri",
-            "uri": "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri":
-            "lsfs://mydata.jsonl" } - { "type": "uri", "uri": "data:csv;base64,{base64_content}"
-            } - { "type": "uri", "uri": "huggingface://llamastack/simpleqa?split=train"
-            } - { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content":
-            "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ]
-            } ] }
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            The metadata for the dataset. - E.g. {"description": "My dataset"}.
-        dataset_id:
-          type: string
-          description: >-
-            The ID of the dataset. If not provided, an ID will be generated.
-      additionalProperties: false
-      required:
-        - purpose
-        - source
-      title: RegisterDatasetRequest
     Benchmark:
       type: object
       properties:
@@ -1065,47 +1003,6 @@ components:
       required:
         - data
       title: ListBenchmarksResponse
-    RegisterBenchmarkRequest:
-      type: object
-      properties:
-        benchmark_id:
-          type: string
-          description: The ID of the benchmark to register.
-        dataset_id:
-          type: string
-          description: >-
-            The ID of the dataset to use for the benchmark.
-        scoring_functions:
-          type: array
-          items:
-            type: string
-          description: >-
-            The scoring functions to use for the benchmark.
-        provider_benchmark_id:
-          type: string
-          description: >-
-            The ID of the provider benchmark to use for the benchmark.
-        provider_id:
-          type: string
-          description: >-
-            The ID of the provider to use for the benchmark.
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: The metadata to use for the benchmark.
-      additionalProperties: false
-      required:
-        - benchmark_id
-        - dataset_id
-        - scoring_functions
-      title: RegisterBenchmarkRequest
     AggregationFunctionType:
       type: string
       enum:
@@ -2254,6 +2151,109 @@ components:
         - hyperparam_search_config
         - logger_config
       title: SupervisedFineTuneRequest
+    DataSource:
+      oneOf:
+        - $ref: '#/components/schemas/URIDataSource'
+        - $ref: '#/components/schemas/RowsDataSource'
+      discriminator:
+        propertyName: type
+        mapping:
+          uri: '#/components/schemas/URIDataSource'
+          rows: '#/components/schemas/RowsDataSource'
+    RegisterDatasetRequest:
+      type: object
+      properties:
+        purpose:
+          type: string
+          enum:
+            - post-training/messages
+            - eval/question-answer
+            - eval/messages-answer
+          description: >-
+            The purpose of the dataset. One of: - "post-training/messages": The dataset
+            contains a messages column with list of messages for post-training. {
+            "messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant",
+            "content": "Hello, world!"}, ] } - "eval/question-answer": The dataset
+            contains a question column and an answer column for evaluation. { "question":
+            "What is the capital of France?", "answer": "Paris" } - "eval/messages-answer":
+            The dataset contains a messages column with list of messages and an answer
+            column for evaluation. { "messages": [ {"role": "user", "content": "Hello,
+            my name is John Doe."}, {"role": "assistant", "content": "Hello, John
+            Doe. How can I help you today?"}, {"role": "user", "content": "What's
+            my name?"}, ], "answer": "John Doe" }
+        source:
+          $ref: '#/components/schemas/DataSource'
+          description: >-
+            The data source of the dataset. Ensure that the data source schema is
+            compatible with the purpose of the dataset. Examples: - { "type": "uri",
+            "uri": "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri":
+            "lsfs://mydata.jsonl" } - { "type": "uri", "uri": "data:csv;base64,{base64_content}"
+            } - { "type": "uri", "uri": "huggingface://llamastack/simpleqa?split=train"
+            } - { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content":
+            "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ]
+            } ] }
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            The metadata for the dataset. - E.g. {"description": "My dataset"}.
+        dataset_id:
+          type: string
+          description: >-
+            The ID of the dataset. If not provided, an ID will be generated.
+      additionalProperties: false
+      required:
+        - purpose
+        - source
+      title: RegisterDatasetRequest
+    RegisterBenchmarkRequest:
+      type: object
+      properties:
+        benchmark_id:
+          type: string
+          description: The ID of the benchmark to register.
+        dataset_id:
+          type: string
+          description: >-
+            The ID of the dataset to use for the benchmark.
+        scoring_functions:
+          type: array
+          items:
+            type: string
+          description: >-
+            The scoring functions to use for the benchmark.
+        provider_benchmark_id:
+          type: string
+          description: >-
+            The ID of the provider benchmark to use for the benchmark.
+        provider_id:
+          type: string
+          description: >-
+            The ID of the provider to use for the benchmark.
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: The metadata to use for the benchmark.
+      additionalProperties: false
+      required:
+        - benchmark_id
+        - dataset_id
+        - scoring_functions
+      title: RegisterBenchmarkRequest
   responses:
     BadRequest400:
       description: The request was invalid or malformed
diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml
index 72600bf13..4680afac9 100644
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@@ -995,39 +995,6 @@ paths:
       description: List models using the OpenAI API.
       parameters: []
       deprecated: false
-    post:
-      responses:
-        '200':
-          description: A Model.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/Model'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Models
-      summary: Register model.
-      description: >-
-        Register model.
-
-        Register a model.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/RegisterModelRequest'
-        required: true
-      deprecated: false
   /v1/models/{model_id}:
     get:
       responses:
@@ -1062,36 +1029,6 @@ paths:
           schema:
             type: string
       deprecated: false
-    delete:
-      responses:
-        '200':
-          description: OK
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Models
-      summary: Unregister model.
-      description: >-
-        Unregister model.
-
-        Unregister a model.
-      parameters:
-        - name: model_id
-          in: path
-          description: >-
-            The identifier of the model to unregister.
-          required: true
-          schema:
-            type: string
-      deprecated: false
   /v1/moderations:
     post:
       responses:
@@ -1722,32 +1659,6 @@ paths:
       description: List all scoring functions.
       parameters: []
       deprecated: false
-    post:
-      responses:
-        '200':
-          description: OK
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - ScoringFunctions
-      summary: Register a scoring function.
-      description: Register a scoring function.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/RegisterScoringFunctionRequest'
-        required: true
-      deprecated: false
   /v1/scoring-functions/{scoring_fn_id}:
     get:
       responses:
@@ -1779,33 +1690,6 @@ paths:
           schema:
             type: string
       deprecated: false
-    delete:
-      responses:
-        '200':
-          description: OK
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - ScoringFunctions
-      summary: Unregister a scoring function.
-      description: Unregister a scoring function.
-      parameters:
-        - name: scoring_fn_id
-          in: path
-          description: >-
-            The ID of the scoring function to unregister.
-          required: true
-          schema:
-            type: string
-      deprecated: false
   /v1/scoring/score:
     post:
       responses:
@@ -1894,36 +1778,6 @@ paths:
       description: List all shields.
       parameters: []
       deprecated: false
-    post:
-      responses:
-        '200':
-          description: A Shield.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/Shield'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Shields
-      summary: Register a shield.
-      description: Register a shield.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/RegisterShieldRequest'
-        required: true
-      deprecated: false
   /v1/shields/{identifier}:
     get:
       responses:
@@ -1955,33 +1809,6 @@ paths:
           schema:
             type: string
       deprecated: false
-    delete:
-      responses:
-        '200':
-          description: OK
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Shields
-      summary: Unregister a shield.
-      description: Unregister a shield.
-      parameters:
-        - name: identifier
-          in: path
-          description: >-
-            The identifier of the shield to unregister.
-          required: true
-          schema:
-            type: string
-      deprecated: false
   /v1/tool-runtime/invoke:
     post:
       responses:
@@ -2077,32 +1904,6 @@ paths:
       description: List tool groups with optional provider.
       parameters: []
       deprecated: false
-    post:
-      responses:
-        '200':
-          description: OK
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - ToolGroups
-      summary: Register a tool group.
-      description: Register a tool group.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/RegisterToolGroupRequest'
-        required: true
-      deprecated: false
   /v1/toolgroups/{toolgroup_id}:
     get:
       responses:
@@ -2134,32 +1935,6 @@ paths:
           schema:
             type: string
       deprecated: false
-    delete:
-      responses:
-        '200':
-          description: OK
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - ToolGroups
-      summary: Unregister a tool group.
-      description: Unregister a tool group.
-      parameters:
-        - name: toolgroup_id
-          in: path
-          description: The ID of the tool group to unregister.
-          required: true
-          schema:
-            type: string
-      deprecated: false
   /v1/tools:
     get:
       responses:
@@ -5564,46 +5339,6 @@ components:
       required:
         - data
       title: OpenAIListModelsResponse
-    ModelType:
-      type: string
-      enum:
-        - llm
-        - embedding
-        - rerank
-      title: ModelType
-      description: >-
-        Enumeration of supported model types in Llama Stack.
-    RegisterModelRequest:
-      type: object
-      properties:
-        model_id:
-          type: string
-          description: The identifier of the model to register.
-        provider_model_id:
-          type: string
-          description: >-
-            The identifier of the model in the provider.
-        provider_id:
-          type: string
-          description: The identifier of the provider.
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: Any additional metadata for this model.
-        model_type:
-          $ref: '#/components/schemas/ModelType'
-          description: The type of model to register.
-      additionalProperties: false
-      required:
-        - model_id
-      title: RegisterModelRequest
     Model:
       type: object
       properties:
@@ -5661,6 +5396,15 @@ components:
       title: Model
       description: >-
         A model resource representing an AI model registered in Llama Stack.
+    ModelType:
+      type: string
+      enum:
+        - llm
+        - embedding
+        - rerank
+      title: ModelType
+      description: >-
+        Enumeration of supported model types in Llama Stack.
     RunModerationRequest:
       type: object
       properties:
@@ -8399,61 +8143,6 @@ components:
       required:
         - data
       title: ListScoringFunctionsResponse
-    ParamType:
-      oneOf:
-        - $ref: '#/components/schemas/StringType'
-        - $ref: '#/components/schemas/NumberType'
-        - $ref: '#/components/schemas/BooleanType'
-        - $ref: '#/components/schemas/ArrayType'
-        - $ref: '#/components/schemas/ObjectType'
-        - $ref: '#/components/schemas/JsonType'
-        - $ref: '#/components/schemas/UnionType'
-        - $ref: '#/components/schemas/ChatCompletionInputType'
-        - $ref: '#/components/schemas/CompletionInputType'
-      discriminator:
-        propertyName: type
-        mapping:
-          string: '#/components/schemas/StringType'
-          number: '#/components/schemas/NumberType'
-          boolean: '#/components/schemas/BooleanType'
-          array: '#/components/schemas/ArrayType'
-          object: '#/components/schemas/ObjectType'
-          json: '#/components/schemas/JsonType'
-          union: '#/components/schemas/UnionType'
-          chat_completion_input: '#/components/schemas/ChatCompletionInputType'
-          completion_input: '#/components/schemas/CompletionInputType'
-    RegisterScoringFunctionRequest:
-      type: object
-      properties:
-        scoring_fn_id:
-          type: string
-          description: >-
-            The ID of the scoring function to register.
-        description:
-          type: string
-          description: The description of the scoring function.
-        return_type:
-          $ref: '#/components/schemas/ParamType'
-          description: The return type of the scoring function.
-        provider_scoring_fn_id:
-          type: string
-          description: >-
-            The ID of the provider scoring function to use for the scoring function.
-        provider_id:
-          type: string
-          description: >-
-            The ID of the provider to use for the scoring function.
-        params:
-          $ref: '#/components/schemas/ScoringFnParams'
-          description: >-
-            The parameters for the scoring function for benchmark eval, these can
-            be overridden for app eval.
-      additionalProperties: false
-      required:
-        - scoring_fn_id
-        - description
-        - return_type
-      title: RegisterScoringFunctionRequest
     ScoreRequest:
       type: object
       properties:
@@ -8629,35 +8318,6 @@ components:
       required:
         - data
       title: ListShieldsResponse
-    RegisterShieldRequest:
-      type: object
-      properties:
-        shield_id:
-          type: string
-          description: >-
-            The identifier of the shield to register.
-        provider_shield_id:
-          type: string
-          description: >-
-            The identifier of the shield in the provider.
-        provider_id:
-          type: string
-          description: The identifier of the provider.
-        params:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: The parameters of the shield.
-      additionalProperties: false
-      required:
-        - shield_id
-      title: RegisterShieldRequest
     InvokeToolRequest:
       type: object
       properties:
@@ -8918,37 +8578,6 @@ components:
       title: ListToolGroupsResponse
       description: >-
         Response containing a list of tool groups.
-    RegisterToolGroupRequest:
-      type: object
-      properties:
-        toolgroup_id:
-          type: string
-          description: The ID of the tool group to register.
-        provider_id:
-          type: string
-          description: >-
-            The ID of the provider to use for the tool group.
-        mcp_endpoint:
-          $ref: '#/components/schemas/URL'
-          description: >-
-            The MCP endpoint to use for the tool group.
-        args:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            A dictionary of arguments to pass to the tool group.
-      additionalProperties: false
-      required:
-        - toolgroup_id
-        - provider_id
-      title: RegisterToolGroupRequest
     Chunk:
       type: object
       properties:
diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml
index adee2f086..2b9849535 100644
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@@ -998,39 +998,6 @@ paths:
       description: List models using the OpenAI API.
       parameters: []
       deprecated: false
-    post:
-      responses:
-        '200':
-          description: A Model.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/Model'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Models
-      summary: Register model.
-      description: >-
-        Register model.
-
-        Register a model.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/RegisterModelRequest'
-        required: true
-      deprecated: false
   /v1/models/{model_id}:
     get:
       responses:
@@ -1065,36 +1032,6 @@ paths:
           schema:
             type: string
       deprecated: false
-    delete:
-      responses:
-        '200':
-          description: OK
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Models
-      summary: Unregister model.
-      description: >-
-        Unregister model.
-
-        Unregister a model.
-      parameters:
-        - name: model_id
-          in: path
-          description: >-
-            The identifier of the model to unregister.
-          required: true
-          schema:
-            type: string
-      deprecated: false
   /v1/moderations:
     post:
       responses:
@@ -1725,32 +1662,6 @@ paths:
       description: List all scoring functions.
       parameters: []
       deprecated: false
-    post:
-      responses:
-        '200':
-          description: OK
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - ScoringFunctions
-      summary: Register a scoring function.
-      description: Register a scoring function.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/RegisterScoringFunctionRequest'
-        required: true
-      deprecated: false
   /v1/scoring-functions/{scoring_fn_id}:
     get:
       responses:
@@ -1782,33 +1693,6 @@ paths:
           schema:
             type: string
       deprecated: false
-    delete:
-      responses:
-        '200':
-          description: OK
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - ScoringFunctions
-      summary: Unregister a scoring function.
-      description: Unregister a scoring function.
-      parameters:
-        - name: scoring_fn_id
-          in: path
-          description: >-
-            The ID of the scoring function to unregister.
-          required: true
-          schema:
-            type: string
-      deprecated: false
   /v1/scoring/score:
     post:
       responses:
@@ -1897,36 +1781,6 @@ paths:
       description: List all shields.
       parameters: []
       deprecated: false
-    post:
-      responses:
-        '200':
-          description: A Shield.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/Shield'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Shields
-      summary: Register a shield.
-      description: Register a shield.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/RegisterShieldRequest'
-        required: true
-      deprecated: false
   /v1/shields/{identifier}:
     get:
       responses:
@@ -1958,33 +1812,6 @@ paths:
           schema:
             type: string
       deprecated: false
-    delete:
-      responses:
-        '200':
-          description: OK
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Shields
-      summary: Unregister a shield.
-      description: Unregister a shield.
-      parameters:
-        - name: identifier
-          in: path
-          description: >-
-            The identifier of the shield to unregister.
-          required: true
-          schema:
-            type: string
-      deprecated: false
   /v1/tool-runtime/invoke:
     post:
       responses:
@@ -2080,32 +1907,6 @@ paths:
       description: List tool groups with optional provider.
       parameters: []
       deprecated: false
-    post:
-      responses:
-        '200':
-          description: OK
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - ToolGroups
-      summary: Register a tool group.
-      description: Register a tool group.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/RegisterToolGroupRequest'
-        required: true
-      deprecated: false
   /v1/toolgroups/{toolgroup_id}:
     get:
       responses:
@@ -2137,32 +1938,6 @@ paths:
           schema:
             type: string
       deprecated: false
-    delete:
-      responses:
-        '200':
-          description: OK
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - ToolGroups
-      summary: Unregister a tool group.
-      description: Unregister a tool group.
-      parameters:
-        - name: toolgroup_id
-          in: path
-          description: The ID of the tool group to unregister.
-          required: true
-          schema:
-            type: string
-      deprecated: false
   /v1/tools:
     get:
       responses:
@@ -3171,7 +2946,7 @@ paths:
             schema:
               $ref: '#/components/schemas/RegisterDatasetRequest'
         required: true
-      deprecated: false
+      deprecated: true
   /v1beta/datasets/{dataset_id}:
     get:
       responses:
@@ -3228,7 +3003,7 @@ paths:
           required: true
           schema:
             type: string
-      deprecated: false
+      deprecated: true
   /v1alpha/eval/benchmarks:
     get:
       responses:
@@ -3279,7 +3054,7 @@ paths:
             schema:
               $ref: '#/components/schemas/RegisterBenchmarkRequest'
         required: true
-      deprecated: false
+      deprecated: true
   /v1alpha/eval/benchmarks/{benchmark_id}:
     get:
       responses:
@@ -3336,7 +3111,7 @@ paths:
           required: true
           schema:
             type: string
-      deprecated: false
+      deprecated: true
   /v1alpha/eval/benchmarks/{benchmark_id}/evaluations:
     post:
       responses:
@@ -6280,46 +6055,6 @@ components:
       required:
         - data
       title: OpenAIListModelsResponse
-    ModelType:
-      type: string
-      enum:
-        - llm
-        - embedding
-        - rerank
-      title: ModelType
-      description: >-
-        Enumeration of supported model types in Llama Stack.
-    RegisterModelRequest:
-      type: object
-      properties:
-        model_id:
-          type: string
-          description: The identifier of the model to register.
-        provider_model_id:
-          type: string
-          description: >-
-            The identifier of the model in the provider.
-        provider_id:
-          type: string
-          description: The identifier of the provider.
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: Any additional metadata for this model.
-        model_type:
-          $ref: '#/components/schemas/ModelType'
-          description: The type of model to register.
-      additionalProperties: false
-      required:
-        - model_id
-      title: RegisterModelRequest
     Model:
       type: object
       properties:
@@ -6377,6 +6112,15 @@ components:
       title: Model
       description: >-
         A model resource representing an AI model registered in Llama Stack.
+    ModelType:
+      type: string
+      enum:
+        - llm
+        - embedding
+        - rerank
+      title: ModelType
+      description: >-
+        Enumeration of supported model types in Llama Stack.
     RunModerationRequest:
       type: object
       properties:
@@ -9115,61 +8859,6 @@ components:
       required:
         - data
       title: ListScoringFunctionsResponse
-    ParamType:
-      oneOf:
-        - $ref: '#/components/schemas/StringType'
-        - $ref: '#/components/schemas/NumberType'
-        - $ref: '#/components/schemas/BooleanType'
-        - $ref: '#/components/schemas/ArrayType'
-        - $ref: '#/components/schemas/ObjectType'
-        - $ref: '#/components/schemas/JsonType'
-        - $ref: '#/components/schemas/UnionType'
-        - $ref: '#/components/schemas/ChatCompletionInputType'
-        - $ref: '#/components/schemas/CompletionInputType'
-      discriminator:
-        propertyName: type
-        mapping:
-          string: '#/components/schemas/StringType'
-          number: '#/components/schemas/NumberType'
-          boolean: '#/components/schemas/BooleanType'
-          array: '#/components/schemas/ArrayType'
-          object: '#/components/schemas/ObjectType'
-          json: '#/components/schemas/JsonType'
-          union: '#/components/schemas/UnionType'
-          chat_completion_input: '#/components/schemas/ChatCompletionInputType'
-          completion_input: '#/components/schemas/CompletionInputType'
-    RegisterScoringFunctionRequest:
-      type: object
-      properties:
-        scoring_fn_id:
-          type: string
-          description: >-
-            The ID of the scoring function to register.
-        description:
-          type: string
-          description: The description of the scoring function.
-        return_type:
-          $ref: '#/components/schemas/ParamType'
-          description: The return type of the scoring function.
-        provider_scoring_fn_id:
-          type: string
-          description: >-
-            The ID of the provider scoring function to use for the scoring function.
-        provider_id:
-          type: string
-          description: >-
-            The ID of the provider to use for the scoring function.
-        params:
-          $ref: '#/components/schemas/ScoringFnParams'
-          description: >-
-            The parameters for the scoring function for benchmark eval, these can
-            be overridden for app eval.
-      additionalProperties: false
-      required:
-        - scoring_fn_id
-        - description
-        - return_type
-      title: RegisterScoringFunctionRequest
     ScoreRequest:
       type: object
       properties:
@@ -9345,35 +9034,6 @@ components:
       required:
         - data
       title: ListShieldsResponse
-    RegisterShieldRequest:
-      type: object
-      properties:
-        shield_id:
-          type: string
-          description: >-
-            The identifier of the shield to register.
-        provider_shield_id:
-          type: string
-          description: >-
-            The identifier of the shield in the provider.
-        provider_id:
-          type: string
-          description: The identifier of the provider.
-        params:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: The parameters of the shield.
-      additionalProperties: false
-      required:
-        - shield_id
-      title: RegisterShieldRequest
     InvokeToolRequest:
       type: object
       properties:
@@ -9634,37 +9294,6 @@ components:
       title: ListToolGroupsResponse
       description: >-
         Response containing a list of tool groups.
-    RegisterToolGroupRequest:
-      type: object
-      properties:
-        toolgroup_id:
-          type: string
-          description: The ID of the tool group to register.
-        provider_id:
-          type: string
-          description: >-
-            The ID of the provider to use for the tool group.
-        mcp_endpoint:
-          $ref: '#/components/schemas/URL'
-          description: >-
-            The MCP endpoint to use for the tool group.
-        args:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            A dictionary of arguments to pass to the tool group.
-      additionalProperties: false
-      required:
-        - toolgroup_id
-        - provider_id
-      title: RegisterToolGroupRequest
     Chunk:
       type: object
       properties:
@@ -10810,68 +10439,6 @@ components:
         - data
       title: ListDatasetsResponse
       description: Response from listing datasets.
-    DataSource:
-      oneOf:
-        - $ref: '#/components/schemas/URIDataSource'
-        - $ref: '#/components/schemas/RowsDataSource'
-      discriminator:
-        propertyName: type
-        mapping:
-          uri: '#/components/schemas/URIDataSource'
-          rows: '#/components/schemas/RowsDataSource'
-    RegisterDatasetRequest:
-      type: object
-      properties:
-        purpose:
-          type: string
-          enum:
-            - post-training/messages
-            - eval/question-answer
-            - eval/messages-answer
-          description: >-
-            The purpose of the dataset. One of: - "post-training/messages": The dataset
-            contains a messages column with list of messages for post-training. {
-            "messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant",
-            "content": "Hello, world!"}, ] } - "eval/question-answer": The dataset
-            contains a question column and an answer column for evaluation. { "question":
-            "What is the capital of France?", "answer": "Paris" } - "eval/messages-answer":
-            The dataset contains a messages column with list of messages and an answer
-            column for evaluation. { "messages": [ {"role": "user", "content": "Hello,
-            my name is John Doe."}, {"role": "assistant", "content": "Hello, John
-            Doe. How can I help you today?"}, {"role": "user", "content": "What's
-            my name?"}, ], "answer": "John Doe" }
-        source:
-          $ref: '#/components/schemas/DataSource'
-          description: >-
-            The data source of the dataset. Ensure that the data source schema is
-            compatible with the purpose of the dataset. Examples: - { "type": "uri",
-            "uri": "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri":
-            "lsfs://mydata.jsonl" } - { "type": "uri", "uri": "data:csv;base64,{base64_content}"
-            } - { "type": "uri", "uri": "huggingface://llamastack/simpleqa?split=train"
-            } - { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content":
-            "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ]
-            } ] }
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            The metadata for the dataset. - E.g. {"description": "My dataset"}.
-        dataset_id:
-          type: string
-          description: >-
-            The ID of the dataset. If not provided, an ID will be generated.
-      additionalProperties: false
-      required:
-        - purpose
-        - source
-      title: RegisterDatasetRequest
     Benchmark:
       type: object
       properties:
@@ -10939,47 +10506,6 @@ components:
       required:
         - data
       title: ListBenchmarksResponse
-    RegisterBenchmarkRequest:
-      type: object
-      properties:
-        benchmark_id:
-          type: string
-          description: The ID of the benchmark to register.
-        dataset_id:
-          type: string
-          description: >-
-            The ID of the dataset to use for the benchmark.
-        scoring_functions:
-          type: array
-          items:
-            type: string
-          description: >-
-            The scoring functions to use for the benchmark.
-        provider_benchmark_id:
-          type: string
-          description: >-
-            The ID of the provider benchmark to use for the benchmark.
-        provider_id:
-          type: string
-          description: >-
-            The ID of the provider to use for the benchmark.
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: The metadata to use for the benchmark.
-      additionalProperties: false
-      required:
-        - benchmark_id
-        - dataset_id
-        - scoring_functions
-      title: RegisterBenchmarkRequest
     BenchmarkConfig:
       type: object
       properties:
@@ -11841,6 +11367,109 @@ components:
         - hyperparam_search_config
         - logger_config
       title: SupervisedFineTuneRequest
+    DataSource:
+      oneOf:
+        - $ref: '#/components/schemas/URIDataSource'
+        - $ref: '#/components/schemas/RowsDataSource'
+      discriminator:
+        propertyName: type
+        mapping:
+          uri: '#/components/schemas/URIDataSource'
+          rows: '#/components/schemas/RowsDataSource'
+    RegisterDatasetRequest:
+      type: object
+      properties:
+        purpose:
+          type: string
+          enum:
+            - post-training/messages
+            - eval/question-answer
+            - eval/messages-answer
+          description: >-
+            The purpose of the dataset. One of: - "post-training/messages": The dataset
+            contains a messages column with list of messages for post-training. {
+            "messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant",
+            "content": "Hello, world!"}, ] } - "eval/question-answer": The dataset
+            contains a question column and an answer column for evaluation. { "question":
+            "What is the capital of France?", "answer": "Paris" } - "eval/messages-answer":
+            The dataset contains a messages column with list of messages and an answer
+            column for evaluation. { "messages": [ {"role": "user", "content": "Hello,
+            my name is John Doe."}, {"role": "assistant", "content": "Hello, John
+            Doe. How can I help you today?"}, {"role": "user", "content": "What's
+            my name?"}, ], "answer": "John Doe" }
+        source:
+          $ref: '#/components/schemas/DataSource'
+          description: >-
+            The data source of the dataset. Ensure that the data source schema is
+            compatible with the purpose of the dataset. Examples: - { "type": "uri",
+            "uri": "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri":
+            "lsfs://mydata.jsonl" } - { "type": "uri", "uri": "data:csv;base64,{base64_content}"
+            } - { "type": "uri", "uri": "huggingface://llamastack/simpleqa?split=train"
+            } - { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content":
+            "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ]
+            } ] }
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            The metadata for the dataset. - E.g. {"description": "My dataset"}.
+        dataset_id:
+          type: string
+          description: >-
+            The ID of the dataset. If not provided, an ID will be generated.
+      additionalProperties: false
+      required:
+        - purpose
+        - source
+      title: RegisterDatasetRequest
+    RegisterBenchmarkRequest:
+      type: object
+      properties:
+        benchmark_id:
+          type: string
+          description: The ID of the benchmark to register.
+        dataset_id:
+          type: string
+          description: >-
+            The ID of the dataset to use for the benchmark.
+        scoring_functions:
+          type: array
+          items:
+            type: string
+          description: >-
+            The scoring functions to use for the benchmark.
+        provider_benchmark_id:
+          type: string
+          description: >-
+            The ID of the provider benchmark to use for the benchmark.
+        provider_id:
+          type: string
+          description: >-
+            The ID of the provider to use for the benchmark.
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: The metadata to use for the benchmark.
+      additionalProperties: false
+      required:
+        - benchmark_id
+        - dataset_id
+        - scoring_functions
+      title: RegisterBenchmarkRequest
   responses:
     BadRequest400:
       description: The request was invalid or malformed
diff --git a/src/llama_stack/apis/benchmarks/benchmarks.py b/src/llama_stack/apis/benchmarks/benchmarks.py
index 933205489..9a67269c3 100644
--- a/src/llama_stack/apis/benchmarks/benchmarks.py
+++ b/src/llama_stack/apis/benchmarks/benchmarks.py
@@ -74,7 +74,7 @@ class Benchmarks(Protocol):
         """
         ...
 
-    @webmethod(route="/eval/benchmarks", method="POST", level=LLAMA_STACK_API_V1ALPHA)
+    @webmethod(route="/eval/benchmarks", method="POST", level=LLAMA_STACK_API_V1ALPHA, deprecated=True)
     async def register_benchmark(
         self,
         benchmark_id: str,
@@ -95,7 +95,7 @@ class Benchmarks(Protocol):
         """
         ...
 
-    @webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA)
+    @webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA, deprecated=True)
     async def unregister_benchmark(self, benchmark_id: str) -> None:
         """Unregister a benchmark.
 
diff --git a/src/llama_stack/apis/datasets/datasets.py b/src/llama_stack/apis/datasets/datasets.py
index ed4ecec22..9bedc6209 100644
--- a/src/llama_stack/apis/datasets/datasets.py
+++ b/src/llama_stack/apis/datasets/datasets.py
@@ -146,7 +146,7 @@ class ListDatasetsResponse(BaseModel):
 
 
 class Datasets(Protocol):
-    @webmethod(route="/datasets", method="POST", level=LLAMA_STACK_API_V1BETA)
+    @webmethod(route="/datasets", method="POST", level=LLAMA_STACK_API_V1BETA, deprecated=True)
     async def register_dataset(
         self,
         purpose: DatasetPurpose,
@@ -235,7 +235,7 @@ class Datasets(Protocol):
         """
         ...
 
-    @webmethod(route="/datasets/{dataset_id:path}", method="DELETE", level=LLAMA_STACK_API_V1BETA)
+    @webmethod(route="/datasets/{dataset_id:path}", method="DELETE", level=LLAMA_STACK_API_V1BETA, deprecated=True)
     async def unregister_dataset(
         self,
         dataset_id: str,
diff --git a/src/llama_stack/apis/models/models.py b/src/llama_stack/apis/models/models.py
index 5c976886c..bbb359b51 100644
--- a/src/llama_stack/apis/models/models.py
+++ b/src/llama_stack/apis/models/models.py
@@ -136,7 +136,7 @@ class Models(Protocol):
         """
         ...
 
-    @webmethod(route="/models", method="POST", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/models", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
     async def register_model(
         self,
         model_id: str,
@@ -158,7 +158,7 @@ class Models(Protocol):
         """
         ...
 
-    @webmethod(route="/models/{model_id:path}", method="DELETE", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/models/{model_id:path}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
     async def unregister_model(
         self,
         model_id: str,
diff --git a/src/llama_stack/apis/scoring_functions/scoring_functions.py b/src/llama_stack/apis/scoring_functions/scoring_functions.py
index fe49723ab..78f4a7541 100644
--- a/src/llama_stack/apis/scoring_functions/scoring_functions.py
+++ b/src/llama_stack/apis/scoring_functions/scoring_functions.py
@@ -178,7 +178,7 @@ class ScoringFunctions(Protocol):
         """
         ...
 
-    @webmethod(route="/scoring-functions", method="POST", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/scoring-functions", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
     async def register_scoring_function(
         self,
         scoring_fn_id: str,
@@ -199,7 +199,9 @@ class ScoringFunctions(Protocol):
         """
         ...
 
-    @webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="DELETE", level=LLAMA_STACK_API_V1)
+    @webmethod(
+        route="/scoring-functions/{scoring_fn_id:path}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True
+    )
     async def unregister_scoring_function(self, scoring_fn_id: str) -> None:
         """Unregister a scoring function.
 
diff --git a/src/llama_stack/apis/shields/shields.py b/src/llama_stack/apis/shields/shields.py
index ca4483828..659ba8b75 100644
--- a/src/llama_stack/apis/shields/shields.py
+++ b/src/llama_stack/apis/shields/shields.py
@@ -67,7 +67,7 @@ class Shields(Protocol):
         """
         ...
 
-    @webmethod(route="/shields", method="POST", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/shields", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
     async def register_shield(
         self,
         shield_id: str,
@@ -85,7 +85,7 @@ class Shields(Protocol):
         """
         ...
 
-    @webmethod(route="/shields/{identifier:path}", method="DELETE", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/shields/{identifier:path}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
     async def unregister_shield(self, identifier: str) -> None:
         """Unregister a shield.
 
diff --git a/src/llama_stack/apis/tools/tools.py b/src/llama_stack/apis/tools/tools.py
index c9bdfcfb6..4e7cf2544 100644
--- a/src/llama_stack/apis/tools/tools.py
+++ b/src/llama_stack/apis/tools/tools.py
@@ -109,7 +109,7 @@ class ListToolDefsResponse(BaseModel):
 @runtime_checkable
 @telemetry_traceable
 class ToolGroups(Protocol):
-    @webmethod(route="/toolgroups", method="POST", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/toolgroups", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
     async def register_tool_group(
         self,
         toolgroup_id: str,
@@ -167,7 +167,7 @@ class ToolGroups(Protocol):
         """
         ...
 
-    @webmethod(route="/toolgroups/{toolgroup_id:path}", method="DELETE", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/toolgroups/{toolgroup_id:path}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
     async def unregister_toolgroup(
         self,
         toolgroup_id: str,

From 209a78b618f5e71b1ff384ba9877c815950ac8e1 Mon Sep 17 00:00:00 2001
From: Dennis Kennetz <dennis.kennetz@oracle.com>
Date: Mon, 10 Nov 2025 15:16:24 -0600
Subject: [PATCH 04/62] feat: add oci genai service as chat inference provider
 (#3876)

# What does this PR do?
Adds OCI GenAI PaaS models for openai chat completion endpoints.

## Test Plan
In an OCI tenancy with access to GenAI PaaS, perform the following
steps:

1. Ensure you have IAM policies in place to use service (check docs
included in this PR)
2. For local development, [setup OCI
cli](https://docs.oracle.com/en-us/iaas/Content/API/SDKDocs/cliinstall.htm)
and configure the CLI with your region, tenancy, and auth
[here](https://docs.oracle.com/en-us/iaas/Content/API/SDKDocs/cliconfigure.htm)
3. Once configured, go through llama-stack setup and run llama-stack
(uses config based auth) like:
```bash
OCI_AUTH_TYPE=config_file \
OCI_CLI_PROFILE=CHICAGO \
OCI_REGION=us-chicago-1 \
OCI_COMPARTMENT_OCID=ocid1.compartment.oc1..aaaaaaaa5...5a \
llama stack run oci
```
4. Hit the `models` endpoint to list models after server is running:
```bash
curl http://localhost:8321/v1/models | jq
...
{
      "identifier": "meta.llama-4-scout-17b-16e-instruct",
      "provider_resource_id": "ocid1.generativeaimodel.oc1.us-chicago-1.am...q",
      "provider_id": "oci",
      "type": "model",
      "metadata": {
        "display_name": "meta.llama-4-scout-17b-16e-instruct",
        "capabilities": [
          "CHAT"
        ],
        "oci_model_id": "ocid1.generativeaimodel.oc1.us-chicago-1.a...q"
      },
      "model_type": "llm"
},
   ...
```
5. Use the "display_name" field to use the model in a
`/chat/completions` request:
```bash
# Streaming result
curl -X POST http://localhost:8321/v1/chat/completions   -H "Content-Type: application/json"   -d '{
        "model": "meta.llama-4-scout-17b-16e-instruct",
       "stream": true,
       "temperature": 0.9,
      "messages": [
         {
           "role": "system",
           "content": "You are a funny comedian. You can be crass."
         },
          {
           "role": "user",
          "content": "Tell me a funny joke about programming."
         }
       ]
}'

# Non-streaming result
curl -X POST http://localhost:8321/v1/chat/completions   -H "Content-Type: application/json"   -d '{
        "model": "meta.llama-4-scout-17b-16e-instruct",
       "stream": false,
       "temperature": 0.9,
      "messages": [
         {
           "role": "system",
           "content": "You are a funny comedian. You can be crass."
         },
          {
           "role": "user",
          "content": "Tell me a funny joke about programming."
         }
       ]
}'
```
6. Try out other models from the `/models` endpoint.
---
 .../distributions/remote_hosted_distro/oci.md | 143 ++++++++++++++++++
 docs/docs/providers/inference/remote_oci.mdx  |  41 +++++
 pyproject.toml                                |   1 +
 src/llama_stack/distributions/oci/__init__.py |   7 +
 src/llama_stack/distributions/oci/build.yaml  |  35 +++++
 .../distributions/oci/doc_template.md         | 140 +++++++++++++++++
 src/llama_stack/distributions/oci/oci.py      | 108 +++++++++++++
 src/llama_stack/distributions/oci/run.yaml    | 136 +++++++++++++++++
 .../providers/registry/inference.py           |  14 ++
 .../remote/inference/oci/__init__.py          |  17 +++
 .../providers/remote/inference/oci/auth.py    |  79 ++++++++++
 .../providers/remote/inference/oci/config.py  |  75 +++++++++
 .../providers/remote/inference/oci/oci.py     | 140 +++++++++++++++++
 .../inference/test_openai_completion.py       |   1 +
 .../inference/test_openai_embeddings.py       |   1 +
 15 files changed, 938 insertions(+)
 create mode 100644 docs/docs/distributions/remote_hosted_distro/oci.md
 create mode 100644 docs/docs/providers/inference/remote_oci.mdx
 create mode 100644 src/llama_stack/distributions/oci/__init__.py
 create mode 100644 src/llama_stack/distributions/oci/build.yaml
 create mode 100644 src/llama_stack/distributions/oci/doc_template.md
 create mode 100644 src/llama_stack/distributions/oci/oci.py
 create mode 100644 src/llama_stack/distributions/oci/run.yaml
 create mode 100644 src/llama_stack/providers/remote/inference/oci/__init__.py
 create mode 100644 src/llama_stack/providers/remote/inference/oci/auth.py
 create mode 100644 src/llama_stack/providers/remote/inference/oci/config.py
 create mode 100644 src/llama_stack/providers/remote/inference/oci/oci.py

diff --git a/docs/docs/distributions/remote_hosted_distro/oci.md b/docs/docs/distributions/remote_hosted_distro/oci.md
new file mode 100644
index 000000000..b13cf5f73
--- /dev/null
+++ b/docs/docs/distributions/remote_hosted_distro/oci.md
@@ -0,0 +1,143 @@
+---
+orphan: true
+---
+<!-- This file was auto-generated by distro_codegen.py, please edit source -->
+# OCI Distribution
+
+The `llamastack/distribution-oci` distribution consists of the following provider configurations.
+
+| API | Provider(s) |
+|-----|-------------|
+| agents | `inline::meta-reference` |
+| datasetio | `remote::huggingface`, `inline::localfs` |
+| eval | `inline::meta-reference` |
+| files | `inline::localfs` |
+| inference | `remote::oci` |
+| safety | `inline::llama-guard` |
+| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
+| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::rag-runtime`, `remote::model-context-protocol` |
+| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
+
+
+### Environment Variables
+
+The following environment variables can be configured:
+
+- `OCI_AUTH_TYPE`: OCI authentication type (instance_principal or config_file) (default: `instance_principal`)
+- `OCI_REGION`: OCI region (e.g., us-ashburn-1, us-chicago-1, us-phoenix-1, eu-frankfurt-1) (default: ``)
+- `OCI_COMPARTMENT_OCID`: OCI compartment ID for the Generative AI service (default: ``)
+- `OCI_CONFIG_FILE_PATH`: OCI config file path (required if OCI_AUTH_TYPE is config_file) (default: `~/.oci/config`)
+- `OCI_CLI_PROFILE`: OCI CLI profile name to use from config file (default: `DEFAULT`)
+
+
+## Prerequisites
+### Oracle Cloud Infrastructure Setup
+
+Before using the OCI Generative AI distribution, ensure you have:
+
+1. **Oracle Cloud Infrastructure Account**: Sign up at [Oracle Cloud Infrastructure](https://cloud.oracle.com/)
+2. **Generative AI Service Access**: Enable the Generative AI service in your OCI tenancy
+3. **Compartment**: Create or identify a compartment where you'll deploy Generative AI models
+4. **Authentication**: Configure authentication using either:
+   - **Instance Principal** (recommended for cloud-hosted deployments)
+   - **API Key** (for on-premises or development environments)
+
+### Authentication Methods
+
+#### Instance Principal Authentication (Recommended)
+Instance Principal authentication allows OCI resources to authenticate using the identity of the compute instance they're running on. This is the most secure method for production deployments.
+
+Requirements:
+- Instance must be running in an Oracle Cloud Infrastructure compartment
+- Instance must have appropriate IAM policies to access Generative AI services
+
+#### API Key Authentication
+For development or on-premises deployments, follow [this doc](https://docs.oracle.com/en-us/iaas/Content/API/Concepts/apisigningkey.htm) to learn how to create your API signing key for your config file.
+
+### Required IAM Policies
+
+Ensure your OCI user or instance has the following policy statements:
+
+```
+Allow group <group_name> to use generative-ai-inference-endpoints in compartment <compartment_name>
+Allow group <group_name> to manage generative-ai-inference-endpoints in compartment <compartment_name>
+```
+
+## Supported Services
+
+### Inference: OCI Generative AI
+Oracle Cloud Infrastructure Generative AI provides access to high-performance AI models through OCI's Platform-as-a-Service offering. The service supports:
+
+- **Chat Completions**: Conversational AI with context awareness
+- **Text Generation**: Complete prompts and generate text content
+
+#### Available Models
+Common OCI Generative AI models include access to Meta, Cohere, OpenAI, Grok, and more models.
+
+### Safety: Llama Guard
+For content safety and moderation, this distribution uses Meta's LlamaGuard model through the OCI Generative AI service to provide:
+- Content filtering and moderation
+- Policy compliance checking
+- Harmful content detection
+
+### Vector Storage: Multiple Options
+The distribution supports several vector storage providers:
+- **FAISS**: Local in-memory vector search
+- **ChromaDB**: Distributed vector database
+- **PGVector**: PostgreSQL with vector extensions
+
+### Additional Services
+- **Dataset I/O**: Local filesystem and Hugging Face integration
+- **Tool Runtime**: Web search (Brave, Tavily) and RAG capabilities
+- **Evaluation**: Meta reference evaluation framework
+
+## Running Llama Stack with OCI
+
+You can run the OCI distribution via Docker or local virtual environment.
+
+### Via venv
+
+If you've set up your local development environment, you can also build the image using your local virtual environment.
+
+```bash
+OCI_AUTH=$OCI_AUTH_TYPE OCI_REGION=$OCI_REGION OCI_COMPARTMENT_OCID=$OCI_COMPARTMENT_OCID llama stack run --port 8321 oci
+```
+
+### Configuration Examples
+
+#### Using Instance Principal (Recommended for Production)
+```bash
+export OCI_AUTH_TYPE=instance_principal
+export OCI_REGION=us-chicago-1
+export OCI_COMPARTMENT_OCID=ocid1.compartment.oc1..<your-compartment-id>
+```
+
+#### Using API Key Authentication (Development)
+```bash
+export OCI_AUTH_TYPE=config_file
+export OCI_CONFIG_FILE_PATH=~/.oci/config
+export OCI_CLI_PROFILE=DEFAULT
+export OCI_REGION=us-chicago-1
+export OCI_COMPARTMENT_OCID=ocid1.compartment.oc1..your-compartment-id
+```
+
+## Regional Endpoints
+
+OCI Generative AI is available in multiple regions. The service automatically routes to the appropriate regional endpoint based on your configuration. For a full list of regional model availability, visit:
+
+https://docs.oracle.com/en-us/iaas/Content/generative-ai/overview.htm#regions
+
+## Troubleshooting
+
+### Common Issues
+
+1. **Authentication Errors**: Verify your OCI credentials and IAM policies
+2. **Model Not Found**: Ensure the model OCID is correct and the model is available in your region
+3. **Permission Denied**: Check compartment permissions and Generative AI service access
+4. **Region Unavailable**: Verify the specified region supports Generative AI services
+
+### Getting Help
+
+For additional support:
+- [OCI Generative AI Documentation](https://docs.oracle.com/en-us/iaas/Content/generative-ai/home.htm)
+- [Llama Stack Issues](https://github.com/meta-llama/llama-stack/issues)
diff --git a/docs/docs/providers/inference/remote_oci.mdx b/docs/docs/providers/inference/remote_oci.mdx
new file mode 100644
index 000000000..33a201a55
--- /dev/null
+++ b/docs/docs/providers/inference/remote_oci.mdx
@@ -0,0 +1,41 @@
+---
+description: |
+  Oracle Cloud Infrastructure (OCI) Generative AI inference provider for accessing OCI's Generative AI Platform-as-a-Service models.
+  Provider documentation
+  https://docs.oracle.com/en-us/iaas/Content/generative-ai/home.htm
+sidebar_label: Remote - Oci
+title: remote::oci
+---
+
+# remote::oci
+
+## Description
+
+
+Oracle Cloud Infrastructure (OCI) Generative AI inference provider for accessing OCI's Generative AI Platform-as-a-Service models.
+Provider documentation
+https://docs.oracle.com/en-us/iaas/Content/generative-ai/home.htm
+
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
+| `api_key` | `pydantic.types.SecretStr \| None` | No |  | Authentication credential for the provider |
+| `oci_auth_type` | `<class 'str'>` | No | instance_principal | OCI authentication type (must be one of: instance_principal, config_file) |
+| `oci_region` | `<class 'str'>` | No | us-ashburn-1 | OCI region (e.g., us-ashburn-1) |
+| `oci_compartment_id` | `<class 'str'>` | No |  | OCI compartment ID for the Generative AI service |
+| `oci_config_file_path` | `<class 'str'>` | No | ~/.oci/config | OCI config file path (required if oci_auth_type is config_file) |
+| `oci_config_profile` | `<class 'str'>` | No | DEFAULT | OCI config profile (required if oci_auth_type is config_file) |
+
+## Sample Configuration
+
+```yaml
+oci_auth_type: ${env.OCI_AUTH_TYPE:=instance_principal}
+oci_config_file_path: ${env.OCI_CONFIG_FILE_PATH:=~/.oci/config}
+oci_config_profile: ${env.OCI_CLI_PROFILE:=DEFAULT}
+oci_region: ${env.OCI_REGION:=us-ashburn-1}
+oci_compartment_id: ${env.OCI_COMPARTMENT_OCID:=}
+```
diff --git a/pyproject.toml b/pyproject.toml
index 4ec83249c..653c6d613 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -298,6 +298,7 @@ exclude = [
     "^src/llama_stack/providers/remote/agents/sample/",
     "^src/llama_stack/providers/remote/datasetio/huggingface/",
     "^src/llama_stack/providers/remote/datasetio/nvidia/",
+    "^src/llama_stack/providers/remote/inference/oci/",
     "^src/llama_stack/providers/remote/inference/bedrock/",
     "^src/llama_stack/providers/remote/inference/nvidia/",
     "^src/llama_stack/providers/remote/inference/passthrough/",
diff --git a/src/llama_stack/distributions/oci/__init__.py b/src/llama_stack/distributions/oci/__init__.py
new file mode 100644
index 000000000..68c0efe44
--- /dev/null
+++ b/src/llama_stack/distributions/oci/__init__.py
@@ -0,0 +1,7 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .oci import get_distribution_template  # noqa: F401
diff --git a/src/llama_stack/distributions/oci/build.yaml b/src/llama_stack/distributions/oci/build.yaml
new file mode 100644
index 000000000..7e082e1f6
--- /dev/null
+++ b/src/llama_stack/distributions/oci/build.yaml
@@ -0,0 +1,35 @@
+version: 2
+distribution_spec:
+  description: Use Oracle Cloud Infrastructure (OCI) Generative AI for running LLM
+    inference with scalable cloud services
+  providers:
+    inference:
+    - provider_type: remote::oci
+    vector_io:
+    - provider_type: inline::faiss
+    - provider_type: remote::chromadb
+    - provider_type: remote::pgvector
+    safety:
+    - provider_type: inline::llama-guard
+    agents:
+    - provider_type: inline::meta-reference
+    eval:
+    - provider_type: inline::meta-reference
+    datasetio:
+    - provider_type: remote::huggingface
+    - provider_type: inline::localfs
+    scoring:
+    - provider_type: inline::basic
+    - provider_type: inline::llm-as-judge
+    - provider_type: inline::braintrust
+    tool_runtime:
+    - provider_type: remote::brave-search
+    - provider_type: remote::tavily-search
+    - provider_type: inline::rag-runtime
+    - provider_type: remote::model-context-protocol
+    files:
+    - provider_type: inline::localfs
+image_type: venv
+additional_pip_packages:
+- aiosqlite
+- sqlalchemy[asyncio]
diff --git a/src/llama_stack/distributions/oci/doc_template.md b/src/llama_stack/distributions/oci/doc_template.md
new file mode 100644
index 000000000..320530ccd
--- /dev/null
+++ b/src/llama_stack/distributions/oci/doc_template.md
@@ -0,0 +1,140 @@
+---
+orphan: true
+---
+# OCI Distribution
+
+The `llamastack/distribution-{{ name }}` distribution consists of the following provider configurations.
+
+{{ providers_table }}
+
+{% if run_config_env_vars %}
+### Environment Variables
+
+The following environment variables can be configured:
+
+{% for var, (default_value, description) in run_config_env_vars.items() %}
+- `{{ var }}`: {{ description }} (default: `{{ default_value }}`)
+{% endfor %}
+{% endif %}
+
+{% if default_models %}
+### Models
+
+The following models are available by default:
+
+{% for model in default_models %}
+- `{{ model.model_id }} {{ model.doc_string }}`
+{% endfor %}
+{% endif %}
+
+## Prerequisites
+### Oracle Cloud Infrastructure Setup
+
+Before using the OCI Generative AI distribution, ensure you have:
+
+1. **Oracle Cloud Infrastructure Account**: Sign up at [Oracle Cloud Infrastructure](https://cloud.oracle.com/)
+2. **Generative AI Service Access**: Enable the Generative AI service in your OCI tenancy
+3. **Compartment**: Create or identify a compartment where you'll deploy Generative AI models
+4. **Authentication**: Configure authentication using either:
+   - **Instance Principal** (recommended for cloud-hosted deployments)
+   - **API Key** (for on-premises or development environments)
+
+### Authentication Methods
+
+#### Instance Principal Authentication (Recommended)
+Instance Principal authentication allows OCI resources to authenticate using the identity of the compute instance they're running on. This is the most secure method for production deployments.
+
+Requirements:
+- Instance must be running in an Oracle Cloud Infrastructure compartment
+- Instance must have appropriate IAM policies to access Generative AI services
+
+#### API Key Authentication
+For development or on-premises deployments, follow [this doc](https://docs.oracle.com/en-us/iaas/Content/API/Concepts/apisigningkey.htm) to learn how to create your API signing key for your config file.
+
+### Required IAM Policies
+
+Ensure your OCI user or instance has the following policy statements:
+
+```
+Allow group <group_name> to use generative-ai-inference-endpoints in compartment <compartment_name>
+Allow group <group_name> to manage generative-ai-inference-endpoints in compartment <compartment_name>
+```
+
+## Supported Services
+
+### Inference: OCI Generative AI
+Oracle Cloud Infrastructure Generative AI provides access to high-performance AI models through OCI's Platform-as-a-Service offering. The service supports:
+
+- **Chat Completions**: Conversational AI with context awareness
+- **Text Generation**: Complete prompts and generate text content
+
+#### Available Models
+Common OCI Generative AI models include access to Meta, Cohere, OpenAI, Grok, and more models.
+
+### Safety: Llama Guard
+For content safety and moderation, this distribution uses Meta's LlamaGuard model through the OCI Generative AI service to provide:
+- Content filtering and moderation
+- Policy compliance checking
+- Harmful content detection
+
+### Vector Storage: Multiple Options
+The distribution supports several vector storage providers:
+- **FAISS**: Local in-memory vector search
+- **ChromaDB**: Distributed vector database
+- **PGVector**: PostgreSQL with vector extensions
+
+### Additional Services
+- **Dataset I/O**: Local filesystem and Hugging Face integration
+- **Tool Runtime**: Web search (Brave, Tavily) and RAG capabilities
+- **Evaluation**: Meta reference evaluation framework
+
+## Running Llama Stack with OCI
+
+You can run the OCI distribution via Docker or local virtual environment.
+
+### Via venv
+
+If you've set up your local development environment, you can also build the image using your local virtual environment.
+
+```bash
+OCI_AUTH=$OCI_AUTH_TYPE OCI_REGION=$OCI_REGION OCI_COMPARTMENT_OCID=$OCI_COMPARTMENT_OCID llama stack run --port 8321 oci
+```
+
+### Configuration Examples
+
+#### Using Instance Principal (Recommended for Production)
+```bash
+export OCI_AUTH_TYPE=instance_principal
+export OCI_REGION=us-chicago-1
+export OCI_COMPARTMENT_OCID=ocid1.compartment.oc1..<your-compartment-id>
+```
+
+#### Using API Key Authentication (Development)
+```bash
+export OCI_AUTH_TYPE=config_file
+export OCI_CONFIG_FILE_PATH=~/.oci/config
+export OCI_CLI_PROFILE=DEFAULT
+export OCI_REGION=us-chicago-1
+export OCI_COMPARTMENT_OCID=ocid1.compartment.oc1..your-compartment-id
+```
+
+## Regional Endpoints
+
+OCI Generative AI is available in multiple regions. The service automatically routes to the appropriate regional endpoint based on your configuration. For a full list of regional model availability, visit:
+
+https://docs.oracle.com/en-us/iaas/Content/generative-ai/overview.htm#regions
+
+## Troubleshooting
+
+### Common Issues
+
+1. **Authentication Errors**: Verify your OCI credentials and IAM policies
+2. **Model Not Found**: Ensure the model OCID is correct and the model is available in your region
+3. **Permission Denied**: Check compartment permissions and Generative AI service access
+4. **Region Unavailable**: Verify the specified region supports Generative AI services
+
+### Getting Help
+
+For additional support:
+- [OCI Generative AI Documentation](https://docs.oracle.com/en-us/iaas/Content/generative-ai/home.htm)
+- [Llama Stack Issues](https://github.com/meta-llama/llama-stack/issues)
\ No newline at end of file
diff --git a/src/llama_stack/distributions/oci/oci.py b/src/llama_stack/distributions/oci/oci.py
new file mode 100644
index 000000000..1f21840f1
--- /dev/null
+++ b/src/llama_stack/distributions/oci/oci.py
@@ -0,0 +1,108 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from pathlib import Path
+
+from llama_stack.core.datatypes import BuildProvider, Provider, ToolGroupInput
+from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings
+from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplConfig
+from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
+from llama_stack.providers.remote.inference.oci.config import OCIConfig
+
+
+def get_distribution_template(name: str = "oci") -> DistributionTemplate:
+    providers = {
+        "inference": [BuildProvider(provider_type="remote::oci")],
+        "vector_io": [
+            BuildProvider(provider_type="inline::faiss"),
+            BuildProvider(provider_type="remote::chromadb"),
+            BuildProvider(provider_type="remote::pgvector"),
+        ],
+        "safety": [BuildProvider(provider_type="inline::llama-guard")],
+        "agents": [BuildProvider(provider_type="inline::meta-reference")],
+        "eval": [BuildProvider(provider_type="inline::meta-reference")],
+        "datasetio": [
+            BuildProvider(provider_type="remote::huggingface"),
+            BuildProvider(provider_type="inline::localfs"),
+        ],
+        "scoring": [
+            BuildProvider(provider_type="inline::basic"),
+            BuildProvider(provider_type="inline::llm-as-judge"),
+            BuildProvider(provider_type="inline::braintrust"),
+        ],
+        "tool_runtime": [
+            BuildProvider(provider_type="remote::brave-search"),
+            BuildProvider(provider_type="remote::tavily-search"),
+            BuildProvider(provider_type="inline::rag-runtime"),
+            BuildProvider(provider_type="remote::model-context-protocol"),
+        ],
+        "files": [BuildProvider(provider_type="inline::localfs")],
+    }
+
+    inference_provider = Provider(
+        provider_id="oci",
+        provider_type="remote::oci",
+        config=OCIConfig.sample_run_config(),
+    )
+
+    vector_io_provider = Provider(
+        provider_id="faiss",
+        provider_type="inline::faiss",
+        config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
+    )
+
+    files_provider = Provider(
+        provider_id="meta-reference-files",
+        provider_type="inline::localfs",
+        config=LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}"),
+    )
+    default_tool_groups = [
+        ToolGroupInput(
+            toolgroup_id="builtin::websearch",
+            provider_id="tavily-search",
+        ),
+    ]
+
+    return DistributionTemplate(
+        name=name,
+        distro_type="remote_hosted",
+        description="Use Oracle Cloud Infrastructure (OCI) Generative AI for running LLM inference with scalable cloud services",
+        container_image=None,
+        template_path=Path(__file__).parent / "doc_template.md",
+        providers=providers,
+        run_configs={
+            "run.yaml": RunConfigSettings(
+                provider_overrides={
+                    "inference": [inference_provider],
+                    "vector_io": [vector_io_provider],
+                    "files": [files_provider],
+                },
+                default_tool_groups=default_tool_groups,
+            ),
+        },
+        run_config_env_vars={
+            "OCI_AUTH_TYPE": (
+                "instance_principal",
+                "OCI authentication type (instance_principal or config_file)",
+            ),
+            "OCI_REGION": (
+                "",
+                "OCI region (e.g., us-ashburn-1, us-chicago-1, us-phoenix-1, eu-frankfurt-1)",
+            ),
+            "OCI_COMPARTMENT_OCID": (
+                "",
+                "OCI compartment ID for the Generative AI service",
+            ),
+            "OCI_CONFIG_FILE_PATH": (
+                "~/.oci/config",
+                "OCI config file path (required if OCI_AUTH_TYPE is config_file)",
+            ),
+            "OCI_CLI_PROFILE": (
+                "DEFAULT",
+                "OCI CLI profile name to use from config file",
+            ),
+        },
+    )
diff --git a/src/llama_stack/distributions/oci/run.yaml b/src/llama_stack/distributions/oci/run.yaml
new file mode 100644
index 000000000..e385ec606
--- /dev/null
+++ b/src/llama_stack/distributions/oci/run.yaml
@@ -0,0 +1,136 @@
+version: 2
+image_name: oci
+apis:
+- agents
+- datasetio
+- eval
+- files
+- inference
+- safety
+- scoring
+- tool_runtime
+- vector_io
+providers:
+  inference:
+  - provider_id: oci
+    provider_type: remote::oci
+    config:
+      oci_auth_type: ${env.OCI_AUTH_TYPE:=instance_principal}
+      oci_config_file_path: ${env.OCI_CONFIG_FILE_PATH:=~/.oci/config}
+      oci_config_profile: ${env.OCI_CLI_PROFILE:=DEFAULT}
+      oci_region: ${env.OCI_REGION:=us-ashburn-1}
+      oci_compartment_id: ${env.OCI_COMPARTMENT_OCID:=}
+  vector_io:
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      persistence:
+        namespace: vector_io::faiss
+        backend: kv_default
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config:
+      excluded_categories: []
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence:
+        agent_state:
+          namespace: agents
+          backend: kv_default
+        responses:
+          table_name: responses
+          backend: sql_default
+          max_write_queue_size: 10000
+          num_writers: 4
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      kvstore:
+        namespace: eval
+        backend: kv_default
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config:
+      kvstore:
+        namespace: datasetio::huggingface
+        backend: kv_default
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config:
+      kvstore:
+        namespace: datasetio::localfs
+        backend: kv_default
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: ${env.OPENAI_API_KEY:=}
+  tool_runtime:
+  - provider_id: brave-search
+    provider_type: remote::brave-search
+    config:
+      api_key: ${env.BRAVE_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: tavily-search
+    provider_type: remote::tavily-search
+    config:
+      api_key: ${env.TAVILY_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+  files:
+  - provider_id: meta-reference-files
+    provider_type: inline::localfs
+    config:
+      storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/oci/files}
+      metadata_store:
+        table_name: files_metadata
+        backend: sql_default
+storage:
+  backends:
+    kv_default:
+      type: kv_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/oci}/kvstore.db
+    sql_default:
+      type: sql_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/oci}/sql_store.db
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
+    prompts:
+      namespace: prompts
+      backend: kv_default
+registered_resources:
+  models: []
+  shields: []
+  vector_dbs: []
+  datasets: []
+  scoring_fns: []
+  benchmarks: []
+  tool_groups:
+  - toolgroup_id: builtin::websearch
+    provider_id: tavily-search
+server:
+  port: 8321
+telemetry:
+  enabled: true
diff --git a/src/llama_stack/providers/registry/inference.py b/src/llama_stack/providers/registry/inference.py
index 1b70182fc..3cbfd408b 100644
--- a/src/llama_stack/providers/registry/inference.py
+++ b/src/llama_stack/providers/registry/inference.py
@@ -297,6 +297,20 @@ Available Models:
 Azure OpenAI inference provider for accessing GPT models and other Azure services.
 Provider documentation
 https://learn.microsoft.com/en-us/azure/ai-foundry/openai/overview
+""",
+        ),
+        RemoteProviderSpec(
+            api=Api.inference,
+            provider_type="remote::oci",
+            adapter_type="oci",
+            pip_packages=["oci"],
+            module="llama_stack.providers.remote.inference.oci",
+            config_class="llama_stack.providers.remote.inference.oci.config.OCIConfig",
+            provider_data_validator="llama_stack.providers.remote.inference.oci.config.OCIProviderDataValidator",
+            description="""
+Oracle Cloud Infrastructure (OCI) Generative AI inference provider for accessing OCI's Generative AI Platform-as-a-Service models.
+Provider documentation
+https://docs.oracle.com/en-us/iaas/Content/generative-ai/home.htm
 """,
         ),
     ]
diff --git a/src/llama_stack/providers/remote/inference/oci/__init__.py b/src/llama_stack/providers/remote/inference/oci/__init__.py
new file mode 100644
index 000000000..280a8c1d2
--- /dev/null
+++ b/src/llama_stack/providers/remote/inference/oci/__init__.py
@@ -0,0 +1,17 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.apis.inference import InferenceProvider
+
+from .config import OCIConfig
+
+
+async def get_adapter_impl(config: OCIConfig, _deps) -> InferenceProvider:
+    from .oci import OCIInferenceAdapter
+
+    adapter = OCIInferenceAdapter(config=config)
+    await adapter.initialize()
+    return adapter
diff --git a/src/llama_stack/providers/remote/inference/oci/auth.py b/src/llama_stack/providers/remote/inference/oci/auth.py
new file mode 100644
index 000000000..f64436eb5
--- /dev/null
+++ b/src/llama_stack/providers/remote/inference/oci/auth.py
@@ -0,0 +1,79 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from collections.abc import Generator, Mapping
+from typing import Any, override
+
+import httpx
+import oci
+import requests
+from oci.config import DEFAULT_LOCATION, DEFAULT_PROFILE
+
+OciAuthSigner = type[oci.signer.AbstractBaseSigner]
+
+
+class HttpxOciAuth(httpx.Auth):
+    """
+    Custom HTTPX authentication class that implements OCI request signing.
+
+    This class handles the authentication flow for HTTPX requests by signing them
+    using the OCI Signer, which adds the necessary authentication headers for
+    OCI API calls.
+
+    Attributes:
+        signer (oci.signer.Signer): The OCI signer instance used for request signing
+    """
+
+    def __init__(self, signer: OciAuthSigner):
+        self.signer = signer
+
+    @override
+    def auth_flow(self, request: httpx.Request) -> Generator[httpx.Request, httpx.Response, None]:
+        # Read the request content to handle streaming requests properly
+        try:
+            content = request.content
+        except httpx.RequestNotRead:
+            # For streaming requests, we need to read the content first
+            content = request.read()
+
+        req = requests.Request(
+            method=request.method,
+            url=str(request.url),
+            headers=dict(request.headers),
+            data=content,
+        )
+        prepared_request = req.prepare()
+
+        # Sign the request using the OCI Signer
+        self.signer.do_request_sign(prepared_request)  # type: ignore
+
+        # Update the original HTTPX request with the signed headers
+        request.headers.update(prepared_request.headers)
+
+        yield request
+
+
+class OciInstancePrincipalAuth(HttpxOciAuth):
+    def __init__(self, **kwargs: Mapping[str, Any]):
+        self.signer = oci.auth.signers.InstancePrincipalsSecurityTokenSigner(**kwargs)
+
+
+class OciUserPrincipalAuth(HttpxOciAuth):
+    def __init__(self, config_file: str = DEFAULT_LOCATION, profile_name: str = DEFAULT_PROFILE):
+        config = oci.config.from_file(config_file, profile_name)
+        oci.config.validate_config(config)  # type: ignore
+        key_content = ""
+        with open(config["key_file"]) as f:
+            key_content = f.read()
+
+        self.signer = oci.signer.Signer(
+            tenancy=config["tenancy"],
+            user=config["user"],
+            fingerprint=config["fingerprint"],
+            private_key_file_location=config.get("key_file"),
+            pass_phrase="none",  # type: ignore
+            private_key_content=key_content,
+        )
diff --git a/src/llama_stack/providers/remote/inference/oci/config.py b/src/llama_stack/providers/remote/inference/oci/config.py
new file mode 100644
index 000000000..9747b08ea
--- /dev/null
+++ b/src/llama_stack/providers/remote/inference/oci/config.py
@@ -0,0 +1,75 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import os
+from typing import Any
+
+from pydantic import BaseModel, Field
+
+from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack.schema_utils import json_schema_type
+
+
+class OCIProviderDataValidator(BaseModel):
+    oci_auth_type: str = Field(
+        description="OCI authentication type (must be one of: instance_principal, config_file)",
+    )
+    oci_region: str = Field(
+        description="OCI region (e.g., us-ashburn-1)",
+    )
+    oci_compartment_id: str = Field(
+        description="OCI compartment ID for the Generative AI service",
+    )
+    oci_config_file_path: str | None = Field(
+        default="~/.oci/config",
+        description="OCI config file path (required if oci_auth_type is config_file)",
+    )
+    oci_config_profile: str | None = Field(
+        default="DEFAULT",
+        description="OCI config profile (required if oci_auth_type is config_file)",
+    )
+
+
+@json_schema_type
+class OCIConfig(RemoteInferenceProviderConfig):
+    oci_auth_type: str = Field(
+        description="OCI authentication type (must be one of: instance_principal, config_file)",
+        default_factory=lambda: os.getenv("OCI_AUTH_TYPE", "instance_principal"),
+    )
+    oci_region: str = Field(
+        default_factory=lambda: os.getenv("OCI_REGION", "us-ashburn-1"),
+        description="OCI region (e.g., us-ashburn-1)",
+    )
+    oci_compartment_id: str = Field(
+        default_factory=lambda: os.getenv("OCI_COMPARTMENT_OCID", ""),
+        description="OCI compartment ID for the Generative AI service",
+    )
+    oci_config_file_path: str = Field(
+        default_factory=lambda: os.getenv("OCI_CONFIG_FILE_PATH", "~/.oci/config"),
+        description="OCI config file path (required if oci_auth_type is config_file)",
+    )
+    oci_config_profile: str = Field(
+        default_factory=lambda: os.getenv("OCI_CLI_PROFILE", "DEFAULT"),
+        description="OCI config profile (required if oci_auth_type is config_file)",
+    )
+
+    @classmethod
+    def sample_run_config(
+        cls,
+        oci_auth_type: str = "${env.OCI_AUTH_TYPE:=instance_principal}",
+        oci_config_file_path: str = "${env.OCI_CONFIG_FILE_PATH:=~/.oci/config}",
+        oci_config_profile: str = "${env.OCI_CLI_PROFILE:=DEFAULT}",
+        oci_region: str = "${env.OCI_REGION:=us-ashburn-1}",
+        oci_compartment_id: str = "${env.OCI_COMPARTMENT_OCID:=}",
+        **kwargs,
+    ) -> dict[str, Any]:
+        return {
+            "oci_auth_type": oci_auth_type,
+            "oci_config_file_path": oci_config_file_path,
+            "oci_config_profile": oci_config_profile,
+            "oci_region": oci_region,
+            "oci_compartment_id": oci_compartment_id,
+        }
diff --git a/src/llama_stack/providers/remote/inference/oci/oci.py b/src/llama_stack/providers/remote/inference/oci/oci.py
new file mode 100644
index 000000000..253dcf2b6
--- /dev/null
+++ b/src/llama_stack/providers/remote/inference/oci/oci.py
@@ -0,0 +1,140 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+
+from collections.abc import Iterable
+from typing import Any
+
+import httpx
+import oci
+from oci.generative_ai.generative_ai_client import GenerativeAiClient
+from oci.generative_ai.models import ModelCollection
+from openai._base_client import DefaultAsyncHttpxClient
+
+from llama_stack.apis.inference.inference import (
+    OpenAIEmbeddingsRequestWithExtraBody,
+    OpenAIEmbeddingsResponse,
+)
+from llama_stack.apis.models import ModelType
+from llama_stack.log import get_logger
+from llama_stack.providers.remote.inference.oci.auth import OciInstancePrincipalAuth, OciUserPrincipalAuth
+from llama_stack.providers.remote.inference.oci.config import OCIConfig
+from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
+
+logger = get_logger(name=__name__, category="inference::oci")
+
+OCI_AUTH_TYPE_INSTANCE_PRINCIPAL = "instance_principal"
+OCI_AUTH_TYPE_CONFIG_FILE = "config_file"
+VALID_OCI_AUTH_TYPES = [OCI_AUTH_TYPE_INSTANCE_PRINCIPAL, OCI_AUTH_TYPE_CONFIG_FILE]
+DEFAULT_OCI_REGION = "us-ashburn-1"
+
+MODEL_CAPABILITIES = ["TEXT_GENERATION", "TEXT_SUMMARIZATION", "TEXT_EMBEDDINGS", "CHAT"]
+
+
+class OCIInferenceAdapter(OpenAIMixin):
+    config: OCIConfig
+
+    async def initialize(self) -> None:
+        """Initialize and validate OCI configuration."""
+        if self.config.oci_auth_type not in VALID_OCI_AUTH_TYPES:
+            raise ValueError(
+                f"Invalid OCI authentication type: {self.config.oci_auth_type}."
+                f"Valid types are one of: {VALID_OCI_AUTH_TYPES}"
+            )
+
+        if not self.config.oci_compartment_id:
+            raise ValueError("OCI_COMPARTMENT_OCID is a required parameter. Either set in env variable or config.")
+
+    def get_base_url(self) -> str:
+        region = self.config.oci_region or DEFAULT_OCI_REGION
+        return f"https://inference.generativeai.{region}.oci.oraclecloud.com/20231130/actions/v1"
+
+    def get_api_key(self) -> str | None:
+        # OCI doesn't use API keys, it uses request signing
+        return "<NOTUSED>"
+
+    def get_extra_client_params(self) -> dict[str, Any]:
+        """
+        Get extra parameters for the AsyncOpenAI client, including OCI-specific auth and headers.
+        """
+        auth = self._get_auth()
+        compartment_id = self.config.oci_compartment_id or ""
+
+        return {
+            "http_client": DefaultAsyncHttpxClient(
+                auth=auth,
+                headers={
+                    "CompartmentId": compartment_id,
+                },
+            ),
+        }
+
+    def _get_oci_signer(self) -> oci.signer.AbstractBaseSigner | None:
+        if self.config.oci_auth_type == OCI_AUTH_TYPE_INSTANCE_PRINCIPAL:
+            return oci.auth.signers.InstancePrincipalsSecurityTokenSigner()
+        return None
+
+    def _get_oci_config(self) -> dict:
+        if self.config.oci_auth_type == OCI_AUTH_TYPE_INSTANCE_PRINCIPAL:
+            config = {"region": self.config.oci_region}
+        elif self.config.oci_auth_type == OCI_AUTH_TYPE_CONFIG_FILE:
+            config = oci.config.from_file(self.config.oci_config_file_path, self.config.oci_config_profile)
+            if not config.get("region"):
+                raise ValueError(
+                    "Region not specified in config. Please specify in config or with OCI_REGION env variable."
+                )
+
+        return config
+
+    def _get_auth(self) -> httpx.Auth:
+        if self.config.oci_auth_type == OCI_AUTH_TYPE_INSTANCE_PRINCIPAL:
+            return OciInstancePrincipalAuth()
+        elif self.config.oci_auth_type == OCI_AUTH_TYPE_CONFIG_FILE:
+            return OciUserPrincipalAuth(
+                config_file=self.config.oci_config_file_path, profile_name=self.config.oci_config_profile
+            )
+        else:
+            raise ValueError(f"Invalid OCI authentication type: {self.config.oci_auth_type}")
+
+    async def list_provider_model_ids(self) -> Iterable[str]:
+        """
+        List available models from OCI Generative AI service.
+        """
+        oci_config = self._get_oci_config()
+        oci_signer = self._get_oci_signer()
+        compartment_id = self.config.oci_compartment_id or ""
+
+        if oci_signer is None:
+            client = GenerativeAiClient(config=oci_config)
+        else:
+            client = GenerativeAiClient(config=oci_config, signer=oci_signer)
+
+        models: ModelCollection = client.list_models(
+            compartment_id=compartment_id, capability=MODEL_CAPABILITIES, lifecycle_state="ACTIVE"
+        ).data
+
+        seen_models = set()
+        model_ids = []
+        for model in models.items:
+            if model.time_deprecated or model.time_on_demand_retired:
+                continue
+
+            if "CHAT" not in model.capabilities or "FINE_TUNE" in model.capabilities:
+                continue
+
+            # Use display_name + model_type as the key to avoid conflicts
+            model_key = (model.display_name, ModelType.llm)
+            if model_key in seen_models:
+                continue
+
+            seen_models.add(model_key)
+            model_ids.append(model.display_name)
+
+        return model_ids
+
+    async def openai_embeddings(self, params: OpenAIEmbeddingsRequestWithExtraBody) -> OpenAIEmbeddingsResponse:
+        # The constructed url is a mask that hits OCI's "chat" action, which is not supported for embeddings.
+        raise NotImplementedError("OCI Provider does not (currently) support embeddings")
diff --git a/tests/integration/inference/test_openai_completion.py b/tests/integration/inference/test_openai_completion.py
index 1568ffbe2..4ce2850b4 100644
--- a/tests/integration/inference/test_openai_completion.py
+++ b/tests/integration/inference/test_openai_completion.py
@@ -54,6 +54,7 @@ def skip_if_model_doesnt_support_openai_completion(client_with_models, model_id)
         # {"error":{"message":"Unknown request URL: GET /openai/v1/completions. Please check the URL for typos,
         # or see the docs at https://console.groq.com/docs/","type":"invalid_request_error","code":"unknown_url"}}
         "remote::groq",
+        "remote::oci",
         "remote::gemini",  # https://generativelanguage.googleapis.com/v1beta/openai/completions -> 404
         "remote::anthropic",  # at least claude-3-{5,7}-{haiku,sonnet}-* / claude-{sonnet,opus}-4-* are not supported
         "remote::azure",  # {'error': {'code': 'OperationNotSupported', 'message': 'The completion operation
diff --git a/tests/integration/inference/test_openai_embeddings.py b/tests/integration/inference/test_openai_embeddings.py
index 704775716..fe8070162 100644
--- a/tests/integration/inference/test_openai_embeddings.py
+++ b/tests/integration/inference/test_openai_embeddings.py
@@ -138,6 +138,7 @@ def skip_if_model_doesnt_support_openai_embeddings(client, model_id):
         "remote::runpod",
         "remote::sambanova",
         "remote::tgi",
+        "remote::oci",
     ):
         pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support OpenAI embeddings.")
 

From 433438cfc00f5c1aaa34f9545fc44c0c624247d0 Mon Sep 17 00:00:00 2001
From: Shabana Baig <43451943+s-akhtar-baig@users.noreply.github.com>
Date: Mon, 10 Nov 2025 16:21:27 -0500
Subject: [PATCH 05/62] feat: Implement the 'max_tool_calls' parameter for the
 Responses API (#4062)

# Problem
Responses API uses max_tool_calls parameter to limit the number of tool
calls that can be generated in a response. Currently, LLS implementation
of the Responses API does not support this parameter.

# What does this PR do?
This pull request adds the max_tool_calls field to the response object
definition and updates the inline provider. it also ensures that:

- the total number of calls to built-in and mcp tools do not exceed
max_tool_calls
- an error is thrown if max_tool_calls < 1 (behavior seen with the
OpenAI Responses API, but we can change this if needed)

Closes #[3563](https://github.com/llamastack/llama-stack/issues/3563)

## Test Plan
- Tested manually for change in model response w.r.t supplied
max_tool_calls field.
- Added integration tests to test invalid max_tool_calls parameter.
- Added integration tests to check max_tool_calls parameter with
built-in and function tools.
- Added integration tests to check max_tool_calls parameter in the
returned response object.
- Recorded OpenAI Responses API behavior using a sample script:
https://github.com/s-akhtar-baig/llama-stack-examples/blob/main/responses/src/max_tool_calls.py

Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com>
---
 client-sdks/stainless/openapi.yml             |  15 ++
 docs/static/llama-stack-spec.yaml             |  15 ++
 docs/static/stainless-llama-stack-spec.yaml   |  15 ++
 src/llama_stack/apis/agents/agents.py         |   2 +
 .../apis/agents/openai_responses.py           |   2 +
 .../inline/agents/meta_reference/agents.py    |   2 +
 .../responses/openai_responses.py             |   7 +
 .../meta_reference/responses/streaming.py     |  18 +-
 .../agents/test_openai_responses.py           | 166 ++++++++++++++++++
 9 files changed, 240 insertions(+), 2 deletions(-)

diff --git a/client-sdks/stainless/openapi.yml b/client-sdks/stainless/openapi.yml
index 2b9849535..58ebaa8ae 100644
--- a/client-sdks/stainless/openapi.yml
+++ b/client-sdks/stainless/openapi.yml
@@ -6626,6 +6626,11 @@ components:
           type: string
           description: >-
             (Optional) System message inserted into the model's context
+        max_tool_calls:
+          type: integer
+          description: >-
+            (Optional) Max number of total calls to built-in tools that can be processed
+            in a response
         input:
           type: array
           items:
@@ -6984,6 +6989,11 @@ components:
             (Optional) Additional fields to include in the response.
         max_infer_iters:
           type: integer
+        max_tool_calls:
+          type: integer
+          description: >-
+            (Optional) Max number of total calls to built-in tools that can be processed
+            in a response.
       additionalProperties: false
       required:
         - input
@@ -7065,6 +7075,11 @@ components:
           type: string
           description: >-
             (Optional) System message inserted into the model's context
+        max_tool_calls:
+          type: integer
+          description: >-
+            (Optional) Max number of total calls to built-in tools that can be processed
+            in a response
       additionalProperties: false
       required:
         - created_at
diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml
index 4680afac9..135ae910f 100644
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@@ -5910,6 +5910,11 @@ components:
           type: string
           description: >-
             (Optional) System message inserted into the model's context
+        max_tool_calls:
+          type: integer
+          description: >-
+            (Optional) Max number of total calls to built-in tools that can be processed
+            in a response
         input:
           type: array
           items:
@@ -6268,6 +6273,11 @@ components:
             (Optional) Additional fields to include in the response.
         max_infer_iters:
           type: integer
+        max_tool_calls:
+          type: integer
+          description: >-
+            (Optional) Max number of total calls to built-in tools that can be processed
+            in a response.
       additionalProperties: false
       required:
         - input
@@ -6349,6 +6359,11 @@ components:
           type: string
           description: >-
             (Optional) System message inserted into the model's context
+        max_tool_calls:
+          type: integer
+          description: >-
+            (Optional) Max number of total calls to built-in tools that can be processed
+            in a response
       additionalProperties: false
       required:
         - created_at
diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml
index 2b9849535..58ebaa8ae 100644
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@@ -6626,6 +6626,11 @@ components:
           type: string
           description: >-
             (Optional) System message inserted into the model's context
+        max_tool_calls:
+          type: integer
+          description: >-
+            (Optional) Max number of total calls to built-in tools that can be processed
+            in a response
         input:
           type: array
           items:
@@ -6984,6 +6989,11 @@ components:
             (Optional) Additional fields to include in the response.
         max_infer_iters:
           type: integer
+        max_tool_calls:
+          type: integer
+          description: >-
+            (Optional) Max number of total calls to built-in tools that can be processed
+            in a response.
       additionalProperties: false
       required:
         - input
@@ -7065,6 +7075,11 @@ components:
           type: string
           description: >-
             (Optional) System message inserted into the model's context
+        max_tool_calls:
+          type: integer
+          description: >-
+            (Optional) Max number of total calls to built-in tools that can be processed
+            in a response
       additionalProperties: false
       required:
         - created_at
diff --git a/src/llama_stack/apis/agents/agents.py b/src/llama_stack/apis/agents/agents.py
index cadef2edc..09687ef33 100644
--- a/src/llama_stack/apis/agents/agents.py
+++ b/src/llama_stack/apis/agents/agents.py
@@ -87,6 +87,7 @@ class Agents(Protocol):
                 "List of guardrails to apply during response generation. Guardrails provide safety and content moderation."
             ),
         ] = None,
+        max_tool_calls: int | None = None,
     ) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]:
         """Create a model response.
 
@@ -97,6 +98,7 @@ class Agents(Protocol):
         :param conversation: (Optional) The ID of a conversation to add the response to. Must begin with 'conv_'. Input and output messages will be automatically added to the conversation.
         :param include: (Optional) Additional fields to include in the response.
         :param guardrails: (Optional) List of guardrails to apply during response generation. Can be guardrail IDs (strings) or guardrail specifications.
+        :param max_tool_calls: (Optional) Max number of total calls to built-in tools that can be processed in a response.
         :returns: An OpenAIResponseObject.
         """
         ...
diff --git a/src/llama_stack/apis/agents/openai_responses.py b/src/llama_stack/apis/agents/openai_responses.py
index a38d1cba6..16657ab32 100644
--- a/src/llama_stack/apis/agents/openai_responses.py
+++ b/src/llama_stack/apis/agents/openai_responses.py
@@ -594,6 +594,7 @@ class OpenAIResponseObject(BaseModel):
     :param truncation: (Optional) Truncation strategy applied to the response
     :param usage: (Optional) Token usage information for the response
     :param instructions: (Optional) System message inserted into the model's context
+    :param max_tool_calls: (Optional) Max number of total calls to built-in tools that can be processed in a response
     """
 
     created_at: int
@@ -615,6 +616,7 @@ class OpenAIResponseObject(BaseModel):
     truncation: str | None = None
     usage: OpenAIResponseUsage | None = None
     instructions: str | None = None
+    max_tool_calls: int | None = None
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/agents.py b/src/llama_stack/providers/inline/agents/meta_reference/agents.py
index 7141d58bc..880e0b680 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/agents.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/agents.py
@@ -102,6 +102,7 @@ class MetaReferenceAgentsImpl(Agents):
         include: list[str] | None = None,
         max_infer_iters: int | None = 10,
         guardrails: list[ResponseGuardrail] | None = None,
+        max_tool_calls: int | None = None,
     ) -> OpenAIResponseObject:
         assert self.openai_responses_impl is not None, "OpenAI responses not initialized"
         result = await self.openai_responses_impl.create_openai_response(
@@ -119,6 +120,7 @@ class MetaReferenceAgentsImpl(Agents):
             include,
             max_infer_iters,
             guardrails,
+            max_tool_calls,
         )
         return result  # type: ignore[no-any-return]
 
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
index 933cfe963..ed7f959c0 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
@@ -255,6 +255,7 @@ class OpenAIResponsesImpl:
         include: list[str] | None = None,
         max_infer_iters: int | None = 10,
         guardrails: list[str | ResponseGuardrailSpec] | None = None,
+        max_tool_calls: int | None = None,
     ):
         stream = bool(stream)
         text = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")) if text is None else text
@@ -270,6 +271,9 @@ class OpenAIResponsesImpl:
             if not conversation.startswith("conv_"):
                 raise InvalidConversationIdError(conversation)
 
+        if max_tool_calls is not None and max_tool_calls < 1:
+            raise ValueError(f"Invalid {max_tool_calls=}; should be >= 1")
+
         stream_gen = self._create_streaming_response(
             input=input,
             conversation=conversation,
@@ -282,6 +286,7 @@ class OpenAIResponsesImpl:
             tools=tools,
             max_infer_iters=max_infer_iters,
             guardrail_ids=guardrail_ids,
+            max_tool_calls=max_tool_calls,
         )
 
         if stream:
@@ -331,6 +336,7 @@ class OpenAIResponsesImpl:
         tools: list[OpenAIResponseInputTool] | None = None,
         max_infer_iters: int | None = 10,
         guardrail_ids: list[str] | None = None,
+        max_tool_calls: int | None = None,
     ) -> AsyncIterator[OpenAIResponseObjectStream]:
         # These should never be None when called from create_openai_response (which sets defaults)
         # but we assert here to help mypy understand the types
@@ -373,6 +379,7 @@ class OpenAIResponsesImpl:
             safety_api=self.safety_api,
             guardrail_ids=guardrail_ids,
             instructions=instructions,
+            max_tool_calls=max_tool_calls,
         )
 
         # Stream the response
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
index ef5603420..c16bc8df3 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
@@ -115,6 +115,7 @@ class StreamingResponseOrchestrator:
         safety_api,
         guardrail_ids: list[str] | None = None,
         prompt: OpenAIResponsePrompt | None = None,
+        max_tool_calls: int | None = None,
     ):
         self.inference_api = inference_api
         self.ctx = ctx
@@ -126,6 +127,10 @@ class StreamingResponseOrchestrator:
         self.safety_api = safety_api
         self.guardrail_ids = guardrail_ids or []
         self.prompt = prompt
+        # System message that is inserted into the model's context
+        self.instructions = instructions
+        # Max number of total calls to built-in tools that can be processed in a response
+        self.max_tool_calls = max_tool_calls
         self.sequence_number = 0
         # Store MCP tool mapping that gets built during tool processing
         self.mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] = (
@@ -139,8 +144,8 @@ class StreamingResponseOrchestrator:
         self.accumulated_usage: OpenAIResponseUsage | None = None
         # Track if we've sent a refusal response
         self.violation_detected = False
-        # system message that is inserted into the model's context
-        self.instructions = instructions
+        # Track total calls made to built-in tools
+        self.accumulated_builtin_tool_calls = 0
 
     async def _create_refusal_response(self, violation_message: str) -> OpenAIResponseObjectStream:
         """Create a refusal response to replace streaming content."""
@@ -186,6 +191,7 @@ class StreamingResponseOrchestrator:
             usage=self.accumulated_usage,
             instructions=self.instructions,
             prompt=self.prompt,
+            max_tool_calls=self.max_tool_calls,
         )
 
     async def create_response(self) -> AsyncIterator[OpenAIResponseObjectStream]:
@@ -894,6 +900,11 @@ class StreamingResponseOrchestrator:
         """Coordinate execution of both function and non-function tool calls."""
         # Execute non-function tool calls
         for tool_call in non_function_tool_calls:
+            # Check if total calls made to built-in and mcp tools exceed max_tool_calls
+            if self.max_tool_calls is not None and self.accumulated_builtin_tool_calls >= self.max_tool_calls:
+                logger.info(f"Ignoring built-in and mcp tool call since reached the limit of {self.max_tool_calls=}.")
+                break
+
             # Find the item_id for this tool call
             matching_item_id = None
             for index, item_id in completion_result_data.tool_call_item_ids.items():
@@ -974,6 +985,9 @@ class StreamingResponseOrchestrator:
             if tool_response_message:
                 next_turn_messages.append(tool_response_message)
 
+            # Track number of calls made to built-in and mcp tools
+            self.accumulated_builtin_tool_calls += 1
+
         # Execute function tool calls (client-side)
         for tool_call in function_tool_calls:
             # Find the item_id for this tool call from our tracking dictionary
diff --git a/tests/integration/agents/test_openai_responses.py b/tests/integration/agents/test_openai_responses.py
index d413d5201..057cee774 100644
--- a/tests/integration/agents/test_openai_responses.py
+++ b/tests/integration/agents/test_openai_responses.py
@@ -516,3 +516,169 @@ def test_response_with_instructions(openai_client, client_with_models, text_mode
 
     # Verify instructions from previous response was not carried over to the next response
     assert response_with_instructions2.instructions == instructions2
+
+
+@pytest.mark.skip(reason="Tool calling is not reliable.")
+def test_max_tool_calls_with_function_tools(openai_client, client_with_models, text_model_id):
+    """Test handling of max_tool_calls with function tools in responses."""
+    if isinstance(client_with_models, LlamaStackAsLibraryClient):
+        pytest.skip("OpenAI responses are not supported when testing with library client yet.")
+
+    client = openai_client
+    max_tool_calls = 1
+
+    tools = [
+        {
+            "type": "function",
+            "name": "get_weather",
+            "description": "Get weather information for a specified location",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "location": {
+                        "type": "string",
+                        "description": "The city name (e.g., 'New York', 'London')",
+                    },
+                },
+            },
+        },
+        {
+            "type": "function",
+            "name": "get_time",
+            "description": "Get current time for a specified location",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "location": {
+                        "type": "string",
+                        "description": "The city name (e.g., 'New York', 'London')",
+                    },
+                },
+            },
+        },
+    ]
+
+    # First create a response that triggers function tools
+    response = client.responses.create(
+        model=text_model_id,
+        input="Can you tell me the weather in Paris and the current time?",
+        tools=tools,
+        stream=False,
+        max_tool_calls=max_tool_calls,
+    )
+
+    # Verify we got two function calls and that the max_tool_calls do not affect function tools
+    assert len(response.output) == 2
+    assert response.output[0].type == "function_call"
+    assert response.output[0].name == "get_weather"
+    assert response.output[0].status == "completed"
+    assert response.output[1].type == "function_call"
+    assert response.output[1].name == "get_time"
+    assert response.output[0].status == "completed"
+
+    # Verify we have a valid max_tool_calls field
+    assert response.max_tool_calls == max_tool_calls
+
+
+def test_max_tool_calls_invalid(openai_client, client_with_models, text_model_id):
+    """Test handling of invalid max_tool_calls in responses."""
+    if isinstance(client_with_models, LlamaStackAsLibraryClient):
+        pytest.skip("OpenAI responses are not supported when testing with library client yet.")
+
+    client = openai_client
+
+    input = "Search for today's top technology news."
+    invalid_max_tool_calls = 0
+    tools = [
+        {"type": "web_search"},
+    ]
+
+    # Create a response with an invalid max_tool_calls value i.e. 0
+    # Handle ValueError from LLS and BadRequestError from OpenAI client
+    with pytest.raises((ValueError, BadRequestError)) as excinfo:
+        client.responses.create(
+            model=text_model_id,
+            input=input,
+            tools=tools,
+            stream=False,
+            max_tool_calls=invalid_max_tool_calls,
+        )
+
+    error_message = str(excinfo.value)
+    assert f"Invalid max_tool_calls={invalid_max_tool_calls}; should be >= 1" in error_message, (
+        f"Expected error message about invalid max_tool_calls, got: {error_message}"
+    )
+
+
+def test_max_tool_calls_with_builtin_tools(openai_client, client_with_models, text_model_id):
+    """Test handling of max_tool_calls with built-in tools in responses."""
+    if isinstance(client_with_models, LlamaStackAsLibraryClient):
+        pytest.skip("OpenAI responses are not supported when testing with library client yet.")
+
+    client = openai_client
+
+    input = "Search for today's top technology and a positive news story. You MUST make exactly two separate web search calls."
+    max_tool_calls = [1, 5]
+    tools = [
+        {"type": "web_search"},
+    ]
+
+    # First create a response that triggers web_search tools without max_tool_calls
+    response = client.responses.create(
+        model=text_model_id,
+        input=input,
+        tools=tools,
+        stream=False,
+    )
+
+    # Verify we got two web search calls followed by a message
+    assert len(response.output) == 3
+    assert response.output[0].type == "web_search_call"
+    assert response.output[0].status == "completed"
+    assert response.output[1].type == "web_search_call"
+    assert response.output[1].status == "completed"
+    assert response.output[2].type == "message"
+    assert response.output[2].status == "completed"
+    assert response.output[2].role == "assistant"
+
+    # Next create a response that triggers web_search tools with max_tool_calls set to 1
+    response_2 = client.responses.create(
+        model=text_model_id,
+        input=input,
+        tools=tools,
+        stream=False,
+        max_tool_calls=max_tool_calls[0],
+    )
+
+    # Verify we got one web search tool call followed by a message
+    assert len(response_2.output) == 2
+    assert response_2.output[0].type == "web_search_call"
+    assert response_2.output[0].status == "completed"
+    assert response_2.output[1].type == "message"
+    assert response_2.output[1].status == "completed"
+    assert response_2.output[1].role == "assistant"
+
+    # Verify we have a valid max_tool_calls field
+    assert response_2.max_tool_calls == max_tool_calls[0]
+
+    # Finally create a response that triggers web_search tools with max_tool_calls set to 5
+    response_3 = client.responses.create(
+        model=text_model_id,
+        input=input,
+        tools=tools,
+        stream=False,
+        max_tool_calls=max_tool_calls[1],
+    )
+
+    # Verify we got two web search calls followed by a message
+    assert len(response_3.output) == 3
+    assert response_3.output[0].type == "web_search_call"
+    assert response_3.output[0].status == "completed"
+    assert response_3.output[1].type == "web_search_call"
+    assert response_3.output[1].status == "completed"
+    assert response_3.output[2].type == "message"
+    assert response_3.output[2].status == "completed"
+    assert response_3.output[2].role == "assistant"
+
+    # Verify we have a valid max_tool_calls field
+    assert response_3.max_tool_calls == max_tool_calls[1]

From 43adc23ef641d84d183a47afa8a8653fc092f9f7 Mon Sep 17 00:00:00 2001
From: Charlie Doern <cdoern@redhat.com>
Date: Mon, 10 Nov 2025 18:29:24 -0500
Subject: [PATCH 06/62] refactor: remove dead inference API code and clean up
 imports  (#4093)

# What does this PR do?

Delete ~2,000 lines of dead code from the old bespoke inference API that
was replaced by OpenAI-only API. This includes removing unused type
conversion functions, dead provider methods, and event_logger.py.

Clean up imports across the codebase to remove references to deleted
types. This eliminates unnecessary
code and dependencies, helping isolate the API package as a
self-contained module.

This is the last interdependency between the .api package and "exterior"
packages, meaning that now every other package in llama stack imports
the API, not the other way around.

## Test Plan

this is a structural change, no tests needed.

---------

Signed-off-by: Charlie Doern <cdoern@redhat.com>
---
 .../apis/inference/event_logger.py            |  43 -
 src/llama_stack/apis/inference/inference.py   | 182 +---
 src/llama_stack/core/routers/safety.py        |   4 +-
 .../models/llama/llama3/generation.py         |   4 +-
 .../models/llama/llama3/interface.py          |   7 +-
 .../llama3/prompt_templates/system_prompts.py |   2 +-
 .../models/llama/llama3/tool_utils.py         |   3 +-
 .../llama4/prompt_templates/system_prompts.py |   2 +-
 .../inference/meta_reference/generators.py    |  98 +-
 .../inference/meta_reference/inference.py     | 395 ++++++-
 .../meta_reference/model_parallel.py          |  33 +-
 .../meta_reference/parallel_utils.py          |  14 +-
 .../sentence_transformers.py                  |   4 -
 .../utils/inference/litellm_openai_mixin.py   |  51 -
 .../utils/inference/openai_compat.py          | 971 +-----------------
 .../utils/inference/prompt_adapter.py         | 287 +-----
 tests/unit/models/test_prompt_adapter.py      | 303 ------
 .../providers/inline/inference/__init__.py    |   5 +
 .../inline/inference/test_meta_reference.py   |  44 +
 tests/unit/providers/nvidia/test_safety.py    |  27 +-
 .../utils/inference/test_openai_compat.py     | 220 ----
 .../utils/inference/test_prompt_adapter.py    |  35 +
 22 files changed, 593 insertions(+), 2141 deletions(-)
 delete mode 100644 src/llama_stack/apis/inference/event_logger.py
 delete mode 100644 tests/unit/models/test_prompt_adapter.py
 create mode 100644 tests/unit/providers/inline/inference/__init__.py
 create mode 100644 tests/unit/providers/inline/inference/test_meta_reference.py
 delete mode 100644 tests/unit/providers/utils/inference/test_openai_compat.py
 create mode 100644 tests/unit/providers/utils/inference/test_prompt_adapter.py

diff --git a/src/llama_stack/apis/inference/event_logger.py b/src/llama_stack/apis/inference/event_logger.py
deleted file mode 100644
index d97ece6d4..000000000
--- a/src/llama_stack/apis/inference/event_logger.py
+++ /dev/null
@@ -1,43 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from termcolor import cprint
-
-from llama_stack.apis.inference import (
-    ChatCompletionResponseEventType,
-    ChatCompletionResponseStreamChunk,
-)
-
-
-class LogEvent:
-    def __init__(
-        self,
-        content: str = "",
-        end: str = "\n",
-        color="white",
-    ):
-        self.content = content
-        self.color = color
-        self.end = "\n" if end is None else end
-
-    def print(self, flush=True):
-        cprint(f"{self.content}", color=self.color, end=self.end, flush=flush)
-
-
-class EventLogger:
-    async def log(self, event_generator):
-        async for chunk in event_generator:
-            if isinstance(chunk, ChatCompletionResponseStreamChunk):
-                event = chunk.event
-                if event.event_type == ChatCompletionResponseEventType.start:
-                    yield LogEvent("Assistant> ", color="cyan", end="")
-                elif event.event_type == ChatCompletionResponseEventType.progress:
-                    yield LogEvent(event.delta, color="yellow", end="")
-                elif event.event_type == ChatCompletionResponseEventType.complete:
-                    yield LogEvent("")
-            else:
-                yield LogEvent("Assistant> ", color="cyan", end="")
-                yield LogEvent(chunk.completion_message.content, color="yellow")
diff --git a/src/llama_stack/apis/inference/inference.py b/src/llama_stack/apis/inference/inference.py
index 1a865ce5f..9f04917c9 100644
--- a/src/llama_stack/apis/inference/inference.py
+++ b/src/llama_stack/apis/inference/inference.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 from collections.abc import AsyncIterator
-from enum import Enum
+from enum import Enum, StrEnum
 from typing import (
     Annotated,
     Any,
@@ -15,28 +15,18 @@ from typing import (
 )
 
 from fastapi import Body
-from pydantic import BaseModel, Field, field_validator
+from pydantic import BaseModel, Field
 from typing_extensions import TypedDict
 
-from llama_stack.apis.common.content_types import ContentDelta, InterleavedContent
-from llama_stack.apis.common.responses import MetricResponseMixin, Order
+from llama_stack.apis.common.content_types import InterleavedContent
+from llama_stack.apis.common.responses import (
+    Order,
+)
 from llama_stack.apis.common.tracing import telemetry_traceable
 from llama_stack.apis.models import Model
 from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
-from llama_stack.models.llama.datatypes import (
-    BuiltinTool,
-    StopReason,
-    ToolCall,
-    ToolDefinition,
-    ToolPromptFormat,
-)
 from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
 
-register_schema(ToolCall)
-register_schema(ToolDefinition)
-
-from enum import StrEnum
-
 
 @json_schema_type
 class GreedySamplingStrategy(BaseModel):
@@ -201,58 +191,6 @@ class ToolResponseMessage(BaseModel):
     content: InterleavedContent
 
 
-@json_schema_type
-class CompletionMessage(BaseModel):
-    """A message containing the model's (assistant) response in a chat conversation.
-
-    :param role: Must be "assistant" to identify this as the model's response
-    :param content: The content of the model's response
-    :param stop_reason: Reason why the model stopped generating. Options are:
-        - `StopReason.end_of_turn`: The model finished generating the entire response.
-        - `StopReason.end_of_message`: The model finished generating but generated a partial response -- usually, a tool call. The user may call the tool and continue the conversation with the tool's response.
-        - `StopReason.out_of_tokens`: The model ran out of token budget.
-    :param tool_calls: List of tool calls. Each tool call is a ToolCall object.
-    """
-
-    role: Literal["assistant"] = "assistant"
-    content: InterleavedContent
-    stop_reason: StopReason
-    tool_calls: list[ToolCall] | None = Field(default_factory=lambda: [])
-
-
-Message = Annotated[
-    UserMessage | SystemMessage | ToolResponseMessage | CompletionMessage,
-    Field(discriminator="role"),
-]
-register_schema(Message, name="Message")
-
-
-@json_schema_type
-class ToolResponse(BaseModel):
-    """Response from a tool invocation.
-
-    :param call_id: Unique identifier for the tool call this response is for
-    :param tool_name: Name of the tool that was invoked
-    :param content: The response content from the tool
-    :param metadata: (Optional) Additional metadata about the tool response
-    """
-
-    call_id: str
-    tool_name: BuiltinTool | str
-    content: InterleavedContent
-    metadata: dict[str, Any] | None = None
-
-    @field_validator("tool_name", mode="before")
-    @classmethod
-    def validate_field(cls, v):
-        if isinstance(v, str):
-            try:
-                return BuiltinTool(v)
-            except ValueError:
-                return v
-        return v
-
-
 class ToolChoice(Enum):
     """Whether tool use is required or automatic. This is a hint to the model which may not be followed. It depends on the Instruction Following capabilities of the model.
 
@@ -289,22 +227,6 @@ class ChatCompletionResponseEventType(Enum):
     progress = "progress"
 
 
-@json_schema_type
-class ChatCompletionResponseEvent(BaseModel):
-    """An event during chat completion generation.
-
-    :param event_type: Type of the event
-    :param delta: Content generated since last event. This can be one or more tokens, or a tool call.
-    :param logprobs: Optional log probabilities for generated tokens
-    :param stop_reason: Optional reason why generation stopped, if complete
-    """
-
-    event_type: ChatCompletionResponseEventType
-    delta: ContentDelta
-    logprobs: list[TokenLogProbs] | None = None
-    stop_reason: StopReason | None = None
-
-
 class ResponseFormatType(StrEnum):
     """Types of formats for structured (guided) decoding.
 
@@ -357,34 +279,6 @@ class CompletionRequest(BaseModel):
     logprobs: LogProbConfig | None = None
 
 
-@json_schema_type
-class CompletionResponse(MetricResponseMixin):
-    """Response from a completion request.
-
-    :param content: The generated completion text
-    :param stop_reason: Reason why generation stopped
-    :param logprobs: Optional log probabilities for generated tokens
-    """
-
-    content: str
-    stop_reason: StopReason
-    logprobs: list[TokenLogProbs] | None = None
-
-
-@json_schema_type
-class CompletionResponseStreamChunk(MetricResponseMixin):
-    """A chunk of a streamed completion response.
-
-    :param delta: New content generated since last chunk. This can be one or more tokens.
-    :param stop_reason: Optional reason why generation stopped, if complete
-    :param logprobs: Optional log probabilities for generated tokens
-    """
-
-    delta: str
-    stop_reason: StopReason | None = None
-    logprobs: list[TokenLogProbs] | None = None
-
-
 class SystemMessageBehavior(Enum):
     """Config for how to override the default system prompt.
 
@@ -398,70 +292,6 @@ class SystemMessageBehavior(Enum):
     replace = "replace"
 
 
-@json_schema_type
-class ToolConfig(BaseModel):
-    """Configuration for tool use.
-
-    :param tool_choice: (Optional) Whether tool use is automatic, required, or none. Can also specify a tool name to use a specific tool. Defaults to ToolChoice.auto.
-    :param tool_prompt_format: (Optional) Instructs the model how to format tool calls. By default, Llama Stack will attempt to use a format that is best adapted to the model.
-        - `ToolPromptFormat.json`: The tool calls are formatted as a JSON object.
-        - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a <function=function_name> tag.
-        - `ToolPromptFormat.python_list`: The tool calls are output as Python syntax -- a list of function calls.
-    :param system_message_behavior: (Optional) Config for how to override the default system prompt.
-        - `SystemMessageBehavior.append`: Appends the provided system message to the default system prompt.
-        - `SystemMessageBehavior.replace`: Replaces the default system prompt with the provided system message. The system message can include the string
-            '{{function_definitions}}' to indicate where the function definitions should be inserted.
-    """
-
-    tool_choice: ToolChoice | str | None = Field(default=ToolChoice.auto)
-    tool_prompt_format: ToolPromptFormat | None = Field(default=None)
-    system_message_behavior: SystemMessageBehavior | None = Field(default=SystemMessageBehavior.append)
-
-    def model_post_init(self, __context: Any) -> None:
-        if isinstance(self.tool_choice, str):
-            try:
-                self.tool_choice = ToolChoice[self.tool_choice]
-            except KeyError:
-                pass
-
-
-# This is an internally used class
-@json_schema_type
-class ChatCompletionRequest(BaseModel):
-    model: str
-    messages: list[Message]
-    sampling_params: SamplingParams | None = Field(default_factory=SamplingParams)
-
-    tools: list[ToolDefinition] | None = Field(default_factory=lambda: [])
-    tool_config: ToolConfig | None = Field(default_factory=ToolConfig)
-
-    response_format: ResponseFormat | None = None
-    stream: bool | None = False
-    logprobs: LogProbConfig | None = None
-
-
-@json_schema_type
-class ChatCompletionResponseStreamChunk(MetricResponseMixin):
-    """A chunk of a streamed chat completion response.
-
-    :param event: The event containing the new content
-    """
-
-    event: ChatCompletionResponseEvent
-
-
-@json_schema_type
-class ChatCompletionResponse(MetricResponseMixin):
-    """Response from a chat completion request.
-
-    :param completion_message: The complete response message
-    :param logprobs: Optional log probabilities for generated tokens
-    """
-
-    completion_message: CompletionMessage
-    logprobs: list[TokenLogProbs] | None = None
-
-
 @json_schema_type
 class EmbeddingsResponse(BaseModel):
     """Response containing generated embeddings.
diff --git a/src/llama_stack/core/routers/safety.py b/src/llama_stack/core/routers/safety.py
index 79eac8b46..e5ff2ada9 100644
--- a/src/llama_stack/core/routers/safety.py
+++ b/src/llama_stack/core/routers/safety.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack.apis.inference import Message
+from llama_stack.apis.inference import OpenAIMessageParam
 from llama_stack.apis.safety import RunShieldResponse, Safety
 from llama_stack.apis.safety.safety import ModerationObject
 from llama_stack.apis.shields import Shield
@@ -52,7 +52,7 @@ class SafetyRouter(Safety):
     async def run_shield(
         self,
         shield_id: str,
-        messages: list[Message],
+        messages: list[OpenAIMessageParam],
         params: dict[str, Any] = None,
     ) -> RunShieldResponse:
         logger.debug(f"SafetyRouter.run_shield: {shield_id}")
diff --git a/src/llama_stack/models/llama/llama3/generation.py b/src/llama_stack/models/llama/llama3/generation.py
index fe7be5ea9..9ac215c3b 100644
--- a/src/llama_stack/models/llama/llama3/generation.py
+++ b/src/llama_stack/models/llama/llama3/generation.py
@@ -26,8 +26,10 @@ from fairscale.nn.model_parallel.initialize import (
 )
 from termcolor import cprint
 
+from llama_stack.models.llama.datatypes import ToolPromptFormat
+
 from ..checkpoint import maybe_reshard_state_dict
-from ..datatypes import GenerationResult, QuantizationMode, RawContent, RawMessage, ToolPromptFormat
+from ..datatypes import GenerationResult, QuantizationMode, RawContent, RawMessage
 from .args import ModelArgs
 from .chat_format import ChatFormat, LLMInput
 from .model import Transformer
diff --git a/src/llama_stack/models/llama/llama3/interface.py b/src/llama_stack/models/llama/llama3/interface.py
index b63ba4847..89be31a55 100644
--- a/src/llama_stack/models/llama/llama3/interface.py
+++ b/src/llama_stack/models/llama/llama3/interface.py
@@ -15,13 +15,10 @@ from pathlib import Path
 
 from termcolor import colored
 
+from llama_stack.models.llama.datatypes import BuiltinTool, StopReason, ToolCall, ToolDefinition, ToolPromptFormat
+
 from ..datatypes import (
-    BuiltinTool,
     RawMessage,
-    StopReason,
-    ToolCall,
-    ToolDefinition,
-    ToolPromptFormat,
 )
 from . import template_data
 from .chat_format import ChatFormat
diff --git a/src/llama_stack/models/llama/llama3/prompt_templates/system_prompts.py b/src/llama_stack/models/llama/llama3/prompt_templates/system_prompts.py
index 11a5993e9..3fbaa103e 100644
--- a/src/llama_stack/models/llama/llama3/prompt_templates/system_prompts.py
+++ b/src/llama_stack/models/llama/llama3/prompt_templates/system_prompts.py
@@ -15,7 +15,7 @@ import textwrap
 from datetime import datetime
 from typing import Any
 
-from llama_stack.apis.inference import (
+from llama_stack.models.llama.datatypes import (
     BuiltinTool,
     ToolDefinition,
 )
diff --git a/src/llama_stack/models/llama/llama3/tool_utils.py b/src/llama_stack/models/llama/llama3/tool_utils.py
index 8c12fe680..6f919e1fa 100644
--- a/src/llama_stack/models/llama/llama3/tool_utils.py
+++ b/src/llama_stack/models/llama/llama3/tool_utils.py
@@ -8,8 +8,9 @@ import json
 import re
 
 from llama_stack.log import get_logger
+from llama_stack.models.llama.datatypes import BuiltinTool, ToolCall, ToolPromptFormat
 
-from ..datatypes import BuiltinTool, RecursiveType, ToolCall, ToolPromptFormat
+from ..datatypes import RecursiveType
 
 logger = get_logger(name=__name__, category="models::llama")
 
diff --git a/src/llama_stack/models/llama/llama4/prompt_templates/system_prompts.py b/src/llama_stack/models/llama/llama4/prompt_templates/system_prompts.py
index 1ee570933..feded9f8c 100644
--- a/src/llama_stack/models/llama/llama4/prompt_templates/system_prompts.py
+++ b/src/llama_stack/models/llama/llama4/prompt_templates/system_prompts.py
@@ -13,7 +13,7 @@
 
 import textwrap
 
-from llama_stack.apis.inference import ToolDefinition
+from llama_stack.models.llama.datatypes import ToolDefinition
 from llama_stack.models.llama.llama3.prompt_templates.base import (
     PromptTemplate,
     PromptTemplateGeneratorBase,
diff --git a/src/llama_stack/providers/inline/inference/meta_reference/generators.py b/src/llama_stack/providers/inline/inference/meta_reference/generators.py
index cb926f529..51a2ddfad 100644
--- a/src/llama_stack/providers/inline/inference/meta_reference/generators.py
+++ b/src/llama_stack/providers/inline/inference/meta_reference/generators.py
@@ -5,7 +5,6 @@
 # the root directory of this source tree.
 
 import math
-from collections.abc import Generator
 from typing import Optional
 
 import torch
@@ -14,21 +13,19 @@ from lmformatenforcer import JsonSchemaParser, TokenEnforcer, TokenEnforcerToken
 from llama_stack.apis.inference import (
     GreedySamplingStrategy,
     JsonSchemaResponseFormat,
+    OpenAIChatCompletionRequestWithExtraBody,
+    OpenAIResponseFormatJSONSchema,
     ResponseFormat,
+    ResponseFormatType,
     SamplingParams,
     TopPSamplingStrategy,
 )
-from llama_stack.models.llama.datatypes import QuantizationMode
+from llama_stack.models.llama.datatypes import QuantizationMode, ToolPromptFormat
 from llama_stack.models.llama.llama3.generation import Llama3
 from llama_stack.models.llama.llama3.tokenizer import Tokenizer as Llama3Tokenizer
 from llama_stack.models.llama.llama4.generation import Llama4
 from llama_stack.models.llama.llama4.tokenizer import Tokenizer as Llama4Tokenizer
 from llama_stack.models.llama.sku_types import Model, ModelFamily
-from llama_stack.providers.utils.inference.prompt_adapter import (
-    ChatCompletionRequestWithRawContent,
-    CompletionRequestWithRawContent,
-    get_default_tool_prompt_format,
-)
 
 from .common import model_checkpoint_dir
 from .config import MetaReferenceInferenceConfig
@@ -106,14 +103,6 @@ def _infer_sampling_params(sampling_params: SamplingParams):
     return temperature, top_p
 
 
-def _infer_tool_prompt_format(request: ChatCompletionRequestWithRawContent):
-    tool_config = request.tool_config
-    if tool_config is not None and tool_config.tool_prompt_format is not None:
-        return tool_config.tool_prompt_format
-    else:
-        return get_default_tool_prompt_format(request.model)
-
-
 class LlamaGenerator:
     def __init__(
         self,
@@ -157,55 +146,56 @@ class LlamaGenerator:
         self.args = self.inner_generator.args
         self.formatter = self.inner_generator.formatter
 
-    def completion(
-        self,
-        request_batch: list[CompletionRequestWithRawContent],
-    ) -> Generator:
-        first_request = request_batch[0]
-        sampling_params = first_request.sampling_params or SamplingParams()
-        max_gen_len = sampling_params.max_tokens
-        if max_gen_len is None or max_gen_len == 0 or max_gen_len >= self.args.max_seq_len:
-            max_gen_len = self.args.max_seq_len - 1
-
-        temperature, top_p = _infer_sampling_params(sampling_params)
-        yield from self.inner_generator.generate(
-            llm_inputs=[self.formatter.encode_content(request.content) for request in request_batch],
-            max_gen_len=max_gen_len,
-            temperature=temperature,
-            top_p=top_p,
-            logprobs=bool(first_request.logprobs),
-            echo=False,
-            logits_processor=get_logits_processor(
-                self.tokenizer,
-                self.args.vocab_size,
-                first_request.response_format,
-            ),
-        )
-
     def chat_completion(
         self,
-        request_batch: list[ChatCompletionRequestWithRawContent],
-    ) -> Generator:
-        first_request = request_batch[0]
-        sampling_params = first_request.sampling_params or SamplingParams()
+        request: OpenAIChatCompletionRequestWithExtraBody,
+        raw_messages: list,
+    ):
+        """Generate chat completion using OpenAI request format.
+
+        Args:
+            request: OpenAI chat completion request
+            raw_messages: Pre-converted list of RawMessage objects
+        """
+
+        # Determine tool prompt format
+        tool_prompt_format = ToolPromptFormat.json if request.tools else ToolPromptFormat.json
+
+        # Prepare sampling params
+        sampling_params = SamplingParams()
+        if request.temperature is not None or request.top_p is not None:
+            sampling_params.strategy = TopPSamplingStrategy(
+                temperature=request.temperature if request.temperature is not None else 1.0,
+                top_p=request.top_p if request.top_p is not None else 1.0,
+            )
+        if request.max_tokens:
+            sampling_params.max_tokens = request.max_tokens
+
         max_gen_len = sampling_params.max_tokens
         if max_gen_len is None or max_gen_len == 0 or max_gen_len >= self.args.max_seq_len:
             max_gen_len = self.args.max_seq_len - 1
 
         temperature, top_p = _infer_sampling_params(sampling_params)
+
+        # Get logits processor for response format
+        logits_processor = None
+        if request.response_format:
+            if isinstance(request.response_format, OpenAIResponseFormatJSONSchema):
+                # Extract the actual schema from OpenAIJSONSchema TypedDict
+                schema_dict = request.response_format.json_schema.get("schema") or {}
+                json_schema_format = JsonSchemaResponseFormat(
+                    type=ResponseFormatType.json_schema,
+                    json_schema=schema_dict,
+                )
+                logits_processor = get_logits_processor(self.tokenizer, self.args.vocab_size, json_schema_format)
+
+        # Generate
         yield from self.inner_generator.generate(
-            llm_inputs=[
-                self.formatter.encode_dialog_prompt(request.messages, _infer_tool_prompt_format(request))
-                for request in request_batch
-            ],
+            llm_inputs=[self.formatter.encode_dialog_prompt(raw_messages, tool_prompt_format)],
             max_gen_len=max_gen_len,
             temperature=temperature,
             top_p=top_p,
-            logprobs=bool(first_request.logprobs),
+            logprobs=False,
             echo=False,
-            logits_processor=get_logits_processor(
-                self.tokenizer,
-                self.args.vocab_size,
-                first_request.response_format,
-            ),
+            logits_processor=logits_processor,
         )
diff --git a/src/llama_stack/providers/inline/inference/meta_reference/inference.py b/src/llama_stack/providers/inline/inference/meta_reference/inference.py
index 76d3fdd50..ef21132a0 100644
--- a/src/llama_stack/providers/inline/inference/meta_reference/inference.py
+++ b/src/llama_stack/providers/inline/inference/meta_reference/inference.py
@@ -5,12 +5,19 @@
 # the root directory of this source tree.
 
 import asyncio
+import time
+import uuid
 from collections.abc import AsyncIterator
 
 from llama_stack.apis.inference import (
     InferenceProvider,
+    OpenAIAssistantMessageParam,
     OpenAIChatCompletionRequestWithExtraBody,
+    OpenAIChatCompletionUsage,
+    OpenAIChoice,
     OpenAICompletionRequestWithExtraBody,
+    OpenAIUserMessageParam,
+    ToolChoice,
 )
 from llama_stack.apis.inference.inference import (
     OpenAIChatCompletion,
@@ -19,12 +26,20 @@ from llama_stack.apis.inference.inference import (
 )
 from llama_stack.apis.models import Model, ModelType
 from llama_stack.log import get_logger
+from llama_stack.models.llama.datatypes import RawMessage, RawTextItem, ToolDefinition
 from llama_stack.models.llama.llama3.chat_format import ChatFormat as Llama3ChatFormat
+from llama_stack.models.llama.llama3.prompt_templates import (
+    JsonCustomToolGenerator,
+    SystemDefaultGenerator,
+)
 from llama_stack.models.llama.llama3.tokenizer import Tokenizer as Llama3Tokenizer
 from llama_stack.models.llama.llama4.chat_format import ChatFormat as Llama4ChatFormat
+from llama_stack.models.llama.llama4.prompt_templates.system_prompts import (
+    PythonListCustomToolGenerator as PythonListCustomToolGeneratorLlama4,
+)
 from llama_stack.models.llama.llama4.tokenizer import Tokenizer as Llama4Tokenizer
 from llama_stack.models.llama.sku_list import resolve_model
-from llama_stack.models.llama.sku_types import ModelFamily
+from llama_stack.models.llama.sku_types import ModelFamily, is_multimodal
 from llama_stack.providers.datatypes import ModelsProtocolPrivate
 from llama_stack.providers.utils.inference.embedding_mixin import (
     SentenceTransformerEmbeddingMixin,
@@ -44,6 +59,170 @@ log = get_logger(__name__, category="inference")
 SEMAPHORE = asyncio.Semaphore(1)
 
 
+def _convert_openai_tool_to_tool_definition(tool) -> ToolDefinition:
+    """Convert OpenAI tool format to ToolDefinition format."""
+    # OpenAI tools have function.name and function.parameters
+    return ToolDefinition(
+        tool_name=tool.function.name,
+        description=tool.function.description or "",
+        parameters=tool.function.parameters or {},
+    )
+
+
+def _get_tool_choice_prompt(tool_choice, tools) -> str:
+    """Generate prompt text for tool_choice behavior."""
+    if not tool_choice or tool_choice == ToolChoice.auto or tool_choice == "auto":
+        return ""
+    elif tool_choice == ToolChoice.required or tool_choice == "required":
+        return "You MUST use one of the provided functions/tools to answer the user query."
+    elif tool_choice == ToolChoice.none or tool_choice == "none":
+        return ""
+    else:
+        # Specific tool specified
+        return f"You MUST use the tool `{tool_choice}` to answer the user query."
+
+
+def _raw_content_as_str(content) -> str:
+    """Convert RawContent to string for system messages."""
+    if isinstance(content, str):
+        return content
+    elif isinstance(content, RawTextItem):
+        return content.text
+    elif isinstance(content, list):
+        return "\n".join(_raw_content_as_str(c) for c in content)
+    else:
+        return "<media>"
+
+
+def _augment_raw_messages_for_tools_llama_3_1(
+    raw_messages: list[RawMessage],
+    tools: list,
+    tool_choice,
+) -> list[RawMessage]:
+    """Augment raw messages with tool definitions for Llama 3.1 style models."""
+    messages = raw_messages.copy()
+    existing_system_message = None
+    if messages and messages[0].role == "system":
+        existing_system_message = messages.pop(0)
+
+    sys_content = ""
+
+    # Add tool definitions first (if present)
+    if tools:
+        # Convert OpenAI tools to ToolDefinitions
+        tool_definitions = [_convert_openai_tool_to_tool_definition(t) for t in tools]
+
+        # For OpenAI format, all tools are custom (have string names)
+        tool_gen = JsonCustomToolGenerator()
+        tool_template = tool_gen.gen(tool_definitions)
+        sys_content += tool_template.render()
+        sys_content += "\n"
+
+    # Add default system prompt
+    default_gen = SystemDefaultGenerator()
+    default_template = default_gen.gen()
+    sys_content += default_template.render()
+
+    # Add existing system message if present
+    if existing_system_message:
+        sys_content += "\n" + _raw_content_as_str(existing_system_message.content)
+
+    # Add tool choice prompt if needed
+    if tool_choice_prompt := _get_tool_choice_prompt(tool_choice, tools):
+        sys_content += "\n" + tool_choice_prompt
+
+    # Create new system message
+    new_system_message = RawMessage(
+        role="system",
+        content=[RawTextItem(text=sys_content.strip())],
+    )
+
+    return [new_system_message] + messages
+
+
+def _augment_raw_messages_for_tools_llama_4(
+    raw_messages: list[RawMessage],
+    tools: list,
+    tool_choice,
+) -> list[RawMessage]:
+    """Augment raw messages with tool definitions for Llama 4/3.2/3.3 style models."""
+    messages = raw_messages.copy()
+    existing_system_message = None
+    if messages and messages[0].role == "system":
+        existing_system_message = messages.pop(0)
+
+    sys_content = ""
+
+    # Add tool definitions if present
+    if tools:
+        # Convert OpenAI tools to ToolDefinitions
+        tool_definitions = [_convert_openai_tool_to_tool_definition(t) for t in tools]
+
+        # Use python_list format for Llama 4
+        tool_gen = PythonListCustomToolGeneratorLlama4()
+        system_prompt = None
+        if existing_system_message:
+            system_prompt = _raw_content_as_str(existing_system_message.content)
+
+        tool_template = tool_gen.gen(tool_definitions, system_prompt)
+        sys_content = tool_template.render()
+    elif existing_system_message:
+        # No tools, just use existing system message
+        sys_content = _raw_content_as_str(existing_system_message.content)
+
+    # Add tool choice prompt if needed
+    if tool_choice_prompt := _get_tool_choice_prompt(tool_choice, tools):
+        sys_content += "\n" + tool_choice_prompt
+
+    if sys_content:
+        new_system_message = RawMessage(
+            role="system",
+            content=[RawTextItem(text=sys_content.strip())],
+        )
+        return [new_system_message] + messages
+
+    return messages
+
+
+def augment_raw_messages_for_tools(
+    raw_messages: list[RawMessage],
+    params: OpenAIChatCompletionRequestWithExtraBody,
+    llama_model,
+) -> list[RawMessage]:
+    """Augment raw messages with tool definitions based on model family."""
+    if not params.tools:
+        return raw_messages
+
+    # Determine augmentation strategy based on model family
+    if llama_model.model_family == ModelFamily.llama3_1 or (
+        llama_model.model_family == ModelFamily.llama3_2 and is_multimodal(llama_model.core_model_id)
+    ):
+        # Llama 3.1 and Llama 3.2 multimodal use JSON format
+        return _augment_raw_messages_for_tools_llama_3_1(
+            raw_messages,
+            params.tools,
+            params.tool_choice,
+        )
+    elif llama_model.model_family in (
+        ModelFamily.llama3_2,
+        ModelFamily.llama3_3,
+        ModelFamily.llama4,
+    ):
+        # Llama 3.2/3.3/4 use python_list format
+        return _augment_raw_messages_for_tools_llama_4(
+            raw_messages,
+            params.tools,
+            params.tool_choice,
+        )
+    else:
+        # Default to Llama 3.1 style
+        return _augment_raw_messages_for_tools_llama_3_1(
+            raw_messages,
+            params.tools,
+            params.tool_choice,
+        )
+
+
 def llama_builder_fn(config: MetaReferenceInferenceConfig, model_id: str, llama_model: Model) -> LlamaGenerator:
     return LlamaGenerator(config, model_id, llama_model)
 
@@ -136,10 +315,13 @@ class MetaReferenceInferenceImpl(
         self.llama_model = llama_model
 
         log.info("Warming up...")
+
         await self.openai_chat_completion(
-            model=model_id,
-            messages=[{"role": "user", "content": "Hi how are you?"}],
-            max_tokens=20,
+            params=OpenAIChatCompletionRequestWithExtraBody(
+                model=model_id,
+                messages=[OpenAIUserMessageParam(role="user", content="Hi how are you?")],
+                max_tokens=20,
+            )
         )
         log.info("Warmed up!")
 
@@ -155,4 +337,207 @@ class MetaReferenceInferenceImpl(
         self,
         params: OpenAIChatCompletionRequestWithExtraBody,
     ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
-        raise NotImplementedError("OpenAI chat completion not supported by meta-reference inference provider")
+        self.check_model(params)
+
+        # Convert OpenAI messages to RawMessages
+        from llama_stack.models.llama.datatypes import StopReason
+        from llama_stack.providers.utils.inference.prompt_adapter import (
+            convert_openai_message_to_raw_message,
+            decode_assistant_message,
+        )
+
+        raw_messages = [await convert_openai_message_to_raw_message(msg) for msg in params.messages]
+
+        # Augment messages with tool definitions if tools are present
+        raw_messages = augment_raw_messages_for_tools(raw_messages, params, self.llama_model)
+
+        # Call generator's chat_completion method (works for both single-GPU and model-parallel)
+        if isinstance(self.generator, LlamaGenerator):
+            generator = self.generator.chat_completion(params, raw_messages)
+        else:
+            # Model parallel: submit task to process group
+            generator = self.generator.group.run_inference(("chat_completion", [params, raw_messages]))
+
+        # Check if streaming is requested
+        if params.stream:
+            return self._stream_chat_completion(generator, params)
+
+        # Non-streaming: collect all generated text
+        generated_text = ""
+        for result_batch in generator:
+            for result in result_batch:
+                if not result.ignore_token and result.source == "output":
+                    generated_text += result.text
+
+        # Decode assistant message to extract tool calls and determine stop_reason
+        # Default to end_of_turn if generation completed normally
+        decoded_message = decode_assistant_message(generated_text, StopReason.end_of_turn)
+
+        # Convert tool calls to OpenAI format
+        openai_tool_calls = None
+        if decoded_message.tool_calls:
+            from llama_stack.apis.inference import (
+                OpenAIChatCompletionToolCall,
+                OpenAIChatCompletionToolCallFunction,
+            )
+
+            openai_tool_calls = [
+                OpenAIChatCompletionToolCall(
+                    # generate a uuid for the call id. This is the only inline provider that does this, so need to get creative.
+                    id=f"call_{uuid.uuid4().hex[:24]}",
+                    type="function",
+                    function=OpenAIChatCompletionToolCallFunction(
+                        name=str(tc.tool_name),
+                        arguments=tc.arguments,
+                    ),
+                )
+                for tc in decoded_message.tool_calls
+            ]
+
+        # Determine finish_reason based on whether tool calls are present
+        finish_reason = "tool_calls" if openai_tool_calls else "stop"
+
+        # Extract content from decoded message
+        content = ""
+        if isinstance(decoded_message.content, str):
+            content = decoded_message.content
+        elif isinstance(decoded_message.content, list):
+            for item in decoded_message.content:
+                if isinstance(item, RawTextItem):
+                    content += item.text
+
+        # Create OpenAI response
+        # generate a uuid for the call id. This is the only inline provider that does this, so need to get creative.
+        response_id = f"chatcmpl-{uuid.uuid4().hex[:24]}"
+        created = int(time.time())
+
+        return OpenAIChatCompletion(
+            id=response_id,
+            object="chat.completion",
+            created=created,
+            model=params.model,
+            choices=[
+                OpenAIChoice(
+                    index=0,
+                    message=OpenAIAssistantMessageParam(
+                        role="assistant",
+                        content=content,
+                        tool_calls=openai_tool_calls,
+                    ),
+                    finish_reason=finish_reason,
+                    logprobs=None,
+                )
+            ],
+            usage=OpenAIChatCompletionUsage(
+                prompt_tokens=0,  # TODO: calculate properly
+                completion_tokens=0,  # TODO: calculate properly
+                total_tokens=0,  # TODO: calculate properly
+            ),
+        )
+
+    async def _stream_chat_completion(
+        self,
+        generator,
+        params: OpenAIChatCompletionRequestWithExtraBody,
+    ) -> AsyncIterator[OpenAIChatCompletionChunk]:
+        """Stream chat completion chunks as they're generated."""
+        from llama_stack.apis.inference import (
+            OpenAIChatCompletionChunk,
+            OpenAIChatCompletionToolCall,
+            OpenAIChatCompletionToolCallFunction,
+            OpenAIChoiceDelta,
+            OpenAIChunkChoice,
+        )
+        from llama_stack.models.llama.datatypes import StopReason
+        from llama_stack.providers.utils.inference.prompt_adapter import decode_assistant_message
+
+        response_id = f"chatcmpl-{uuid.uuid4().hex[:24]}"
+        created = int(time.time())
+        generated_text = ""
+
+        # Yield chunks as tokens are generated
+        for result_batch in generator:
+            for result in result_batch:
+                if result.ignore_token or result.source != "output":
+                    continue
+
+                generated_text += result.text
+
+                # Yield delta chunk with the new text
+                chunk = OpenAIChatCompletionChunk(
+                    id=response_id,
+                    object="chat.completion.chunk",
+                    created=created,
+                    model=params.model,
+                    choices=[
+                        OpenAIChunkChoice(
+                            index=0,
+                            delta=OpenAIChoiceDelta(
+                                role="assistant",
+                                content=result.text,
+                            ),
+                            finish_reason="",
+                            logprobs=None,
+                        )
+                    ],
+                )
+                yield chunk
+
+        # After generation completes, decode the full message to extract tool calls
+        decoded_message = decode_assistant_message(generated_text, StopReason.end_of_turn)
+
+        # If tool calls are present, yield a final chunk with tool_calls
+        if decoded_message.tool_calls:
+            openai_tool_calls = [
+                OpenAIChatCompletionToolCall(
+                    # generate a uuid for the call id. This is the only inline provider that does this, so need to get creative.
+                    id=f"call_{uuid.uuid4().hex[:24]}",
+                    type="function",
+                    function=OpenAIChatCompletionToolCallFunction(
+                        name=str(tc.tool_name),
+                        arguments=tc.arguments,
+                    ),
+                )
+                for tc in decoded_message.tool_calls
+            ]
+
+            # Yield chunk with tool_calls
+            chunk = OpenAIChatCompletionChunk(
+                id=response_id,
+                object="chat.completion.chunk",
+                created=created,
+                model=params.model,
+                choices=[
+                    OpenAIChunkChoice(
+                        index=0,
+                        delta=OpenAIChoiceDelta(
+                            role="assistant",
+                            tool_calls=openai_tool_calls,
+                        ),
+                        finish_reason="",
+                        logprobs=None,
+                    )
+                ],
+            )
+            yield chunk
+
+            finish_reason = "tool_calls"
+        else:
+            finish_reason = "stop"
+
+        # Yield final chunk with finish_reason
+        final_chunk = OpenAIChatCompletionChunk(
+            id=response_id,
+            object="chat.completion.chunk",
+            created=created,
+            model=params.model,
+            choices=[
+                OpenAIChunkChoice(
+                    index=0,
+                    delta=OpenAIChoiceDelta(),
+                    finish_reason=finish_reason,
+                    logprobs=None,
+                )
+            ],
+        )
+        yield final_chunk
diff --git a/src/llama_stack/providers/inline/inference/meta_reference/model_parallel.py b/src/llama_stack/providers/inline/inference/meta_reference/model_parallel.py
index 9d0295d65..f50b41f34 100644
--- a/src/llama_stack/providers/inline/inference/meta_reference/model_parallel.py
+++ b/src/llama_stack/providers/inline/inference/meta_reference/model_parallel.py
@@ -4,17 +4,12 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from collections.abc import Callable, Generator
-from copy import deepcopy
+from collections.abc import Callable
 from functools import partial
 from typing import Any
 
 from llama_stack.models.llama.llama3.chat_format import ChatFormat as Llama3ChatFormat
 from llama_stack.models.llama.llama4.chat_format import ChatFormat as Llama4ChatFormat
-from llama_stack.providers.utils.inference.prompt_adapter import (
-    ChatCompletionRequestWithRawContent,
-    CompletionRequestWithRawContent,
-)
 
 from .parallel_utils import ModelParallelProcessGroup
 
@@ -23,12 +18,14 @@ class ModelRunner:
     def __init__(self, llama):
         self.llama = llama
 
-    # the `task` object is the same that is sent to `ModelParallelProcessGroup.run_inference()`
     def __call__(self, task: Any):
-        if task[0] == "chat_completion":
-            return self.llama.chat_completion(task[1])
+        task_type = task[0]
+        if task_type == "chat_completion":
+            # task[1] is [params, raw_messages]
+            params, raw_messages = task[1]
+            return self.llama.chat_completion(params, raw_messages)
         else:
-            raise ValueError(f"Unexpected task type {task[0]}")
+            raise ValueError(f"Unexpected task type {task_type}")
 
 
 def init_model_cb(
@@ -78,19 +75,3 @@ class LlamaModelParallelGenerator:
 
     def __exit__(self, exc_type, exc_value, exc_traceback):
         self.group.stop()
-
-    def completion(
-        self,
-        request_batch: list[CompletionRequestWithRawContent],
-    ) -> Generator:
-        req_obj = deepcopy(request_batch)
-        gen = self.group.run_inference(("completion", req_obj))
-        yield from gen
-
-    def chat_completion(
-        self,
-        request_batch: list[ChatCompletionRequestWithRawContent],
-    ) -> Generator:
-        req_obj = deepcopy(request_batch)
-        gen = self.group.run_inference(("chat_completion", req_obj))
-        yield from gen
diff --git a/src/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py b/src/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py
index bb6a1bd03..663e4793b 100644
--- a/src/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py
+++ b/src/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py
@@ -33,10 +33,6 @@ from torch.distributed.launcher.api import LaunchConfig, elastic_launch
 
 from llama_stack.log import get_logger
 from llama_stack.models.llama.datatypes import GenerationResult
-from llama_stack.providers.utils.inference.prompt_adapter import (
-    ChatCompletionRequestWithRawContent,
-    CompletionRequestWithRawContent,
-)
 
 log = get_logger(name=__name__, category="inference")
 
@@ -69,10 +65,7 @@ class CancelSentinel(BaseModel):
 
 class TaskRequest(BaseModel):
     type: Literal[ProcessingMessageName.task_request] = ProcessingMessageName.task_request
-    task: tuple[
-        str,
-        list[CompletionRequestWithRawContent] | list[ChatCompletionRequestWithRawContent],
-    ]
+    task: tuple[str, list]
 
 
 class TaskResponse(BaseModel):
@@ -328,10 +321,7 @@ class ModelParallelProcessGroup:
 
     def run_inference(
         self,
-        req: tuple[
-            str,
-            list[CompletionRequestWithRawContent] | list[ChatCompletionRequestWithRawContent],
-        ],
+        req: tuple[str, list],
     ) -> Generator:
         assert not self.running, "inference already running"
 
diff --git a/src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py b/src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py
index cb72aa13a..e6dcf3ae7 100644
--- a/src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py
+++ b/src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py
@@ -22,9 +22,6 @@ from llama_stack.providers.datatypes import Model, ModelsProtocolPrivate
 from llama_stack.providers.utils.inference.embedding_mixin import (
     SentenceTransformerEmbeddingMixin,
 )
-from llama_stack.providers.utils.inference.openai_compat import (
-    OpenAIChatCompletionToLlamaStackMixin,
-)
 
 from .config import SentenceTransformersInferenceConfig
 
@@ -32,7 +29,6 @@ log = get_logger(name=__name__, category="inference")
 
 
 class SentenceTransformersInferenceImpl(
-    OpenAIChatCompletionToLlamaStackMixin,
     SentenceTransformerEmbeddingMixin,
     InferenceProvider,
     ModelsProtocolPrivate,
diff --git a/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py b/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py
index 223497fb8..a793c499e 100644
--- a/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py
+++ b/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py
@@ -11,9 +11,7 @@ from collections.abc import AsyncIterator
 import litellm
 
 from llama_stack.apis.inference import (
-    ChatCompletionRequest,
     InferenceProvider,
-    JsonSchemaResponseFormat,
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
     OpenAIChatCompletionRequestWithExtraBody,
@@ -23,15 +21,11 @@ from llama_stack.apis.inference import (
     OpenAIEmbeddingsRequestWithExtraBody,
     OpenAIEmbeddingsResponse,
     OpenAIEmbeddingUsage,
-    ToolChoice,
 )
 from llama_stack.core.request_headers import NeedsRequestProviderData
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper, ProviderModelEntry
 from llama_stack.providers.utils.inference.openai_compat import (
-    convert_message_to_openai_dict_new,
-    convert_tooldef_to_openai_tool,
-    get_sampling_options,
     prepare_openai_completion_params,
 )
 
@@ -127,51 +121,6 @@ class LiteLLMOpenAIMixin(
 
         return schema
 
-    async def _get_params(self, request: ChatCompletionRequest) -> dict:
-        from typing import Any
-
-        input_dict: dict[str, Any] = {}
-
-        input_dict["messages"] = [
-            await convert_message_to_openai_dict_new(m, download_images=self.download_images) for m in request.messages
-        ]
-        if fmt := request.response_format:
-            if not isinstance(fmt, JsonSchemaResponseFormat):
-                raise ValueError(
-                    f"Unsupported response format: {type(fmt)}. Only JsonSchemaResponseFormat is supported."
-                )
-
-            # Convert to dict for manipulation
-            fmt_dict = dict(fmt.json_schema)
-            name = fmt_dict["title"]
-            del fmt_dict["title"]
-            fmt_dict["additionalProperties"] = False
-
-            # Apply additionalProperties: False recursively to all objects
-            fmt_dict = self._add_additional_properties_recursive(fmt_dict)
-
-            input_dict["response_format"] = {
-                "type": "json_schema",
-                "json_schema": {
-                    "name": name,
-                    "schema": fmt_dict,
-                    "strict": self.json_schema_strict,
-                },
-            }
-        if request.tools:
-            input_dict["tools"] = [convert_tooldef_to_openai_tool(tool) for tool in request.tools]
-            if request.tool_config and (tool_choice := request.tool_config.tool_choice):
-                input_dict["tool_choice"] = tool_choice.value if isinstance(tool_choice, ToolChoice) else tool_choice
-
-        return {
-            "model": request.model,
-            "api_key": self.get_api_key(),
-            "api_base": self.api_base,
-            **input_dict,
-            "stream": request.stream,
-            **get_sampling_options(request.sampling_params),
-        }
-
     def get_api_key(self) -> str:
         provider_data = self.get_request_provider_data()
         key_field = self.provider_data_api_key_field
diff --git a/src/llama_stack/providers/utils/inference/openai_compat.py b/src/llama_stack/providers/utils/inference/openai_compat.py
index aabcb50f8..c2e6829e0 100644
--- a/src/llama_stack/providers/utils/inference/openai_compat.py
+++ b/src/llama_stack/providers/utils/inference/openai_compat.py
@@ -3,31 +3,14 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-import json
-import time
-import uuid
-import warnings
-from collections.abc import AsyncGenerator, AsyncIterator, Awaitable, Iterable
+from collections.abc import Iterable
 from typing import (
     Any,
 )
 
-from openai import AsyncStream
-from openai.types.chat import (
-    ChatCompletionAssistantMessageParam as OpenAIChatCompletionAssistantMessage,
-)
-from openai.types.chat import (
-    ChatCompletionChunk as OpenAIChatCompletionChunk,
-)
-from openai.types.chat import (
-    ChatCompletionContentPartImageParam as OpenAIChatCompletionContentPartImageParam,
-)
 from openai.types.chat import (
     ChatCompletionContentPartParam as OpenAIChatCompletionContentPartParam,
 )
-from openai.types.chat import (
-    ChatCompletionContentPartTextParam as OpenAIChatCompletionContentPartTextParam,
-)
 
 try:
     from openai.types.chat import (
@@ -37,84 +20,24 @@ except ImportError:
     from openai.types.chat.chat_completion_message_tool_call import (
         ChatCompletionMessageToolCall as OpenAIChatCompletionMessageFunctionToolCall,
     )
-from openai.types.chat import (
-    ChatCompletionMessageParam as OpenAIChatCompletionMessage,
-)
 from openai.types.chat import (
     ChatCompletionMessageToolCall,
 )
-from openai.types.chat import (
-    ChatCompletionSystemMessageParam as OpenAIChatCompletionSystemMessage,
-)
-from openai.types.chat import (
-    ChatCompletionToolMessageParam as OpenAIChatCompletionToolMessage,
-)
-from openai.types.chat import (
-    ChatCompletionUserMessageParam as OpenAIChatCompletionUserMessage,
-)
-from openai.types.chat.chat_completion import (
-    Choice as OpenAIChoice,
-)
-from openai.types.chat.chat_completion import (
-    ChoiceLogprobs as OpenAIChoiceLogprobs,  # same as chat_completion_chunk ChoiceLogprobs
-)
-from openai.types.chat.chat_completion_chunk import (
-    Choice as OpenAIChatCompletionChunkChoice,
-)
-from openai.types.chat.chat_completion_chunk import (
-    ChoiceDelta as OpenAIChoiceDelta,
-)
-from openai.types.chat.chat_completion_chunk import (
-    ChoiceDeltaToolCall as OpenAIChoiceDeltaToolCall,
-)
-from openai.types.chat.chat_completion_chunk import (
-    ChoiceDeltaToolCallFunction as OpenAIChoiceDeltaToolCallFunction,
-)
-from openai.types.chat.chat_completion_content_part_image_param import (
-    ImageURL as OpenAIImageURL,
-)
-from openai.types.chat.chat_completion_message_tool_call import (
-    Function as OpenAIFunction,
-)
 from pydantic import BaseModel
 
 from llama_stack.apis.common.content_types import (
     URL,
     ImageContentItem,
-    InterleavedContent,
     TextContentItem,
-    TextDelta,
-    ToolCallDelta,
-    ToolCallParseStatus,
     _URLOrData,
 )
 from llama_stack.apis.inference import (
-    ChatCompletionRequest,
-    ChatCompletionResponse,
-    ChatCompletionResponseEvent,
-    ChatCompletionResponseEventType,
-    ChatCompletionResponseStreamChunk,
-    CompletionMessage,
-    CompletionResponse,
-    CompletionResponseStreamChunk,
     GreedySamplingStrategy,
     JsonSchemaResponseFormat,
-    Message,
-    OpenAIChatCompletion,
-    OpenAIMessageParam,
     OpenAIResponseFormatParam,
     SamplingParams,
-    SystemMessage,
-    TokenLogProbs,
-    ToolChoice,
-    ToolConfig,
-    ToolResponseMessage,
     TopKSamplingStrategy,
     TopPSamplingStrategy,
-    UserMessage,
-)
-from llama_stack.apis.inference import (
-    OpenAIChoice as OpenAIChatCompletionChoice,
 )
 from llama_stack.log import get_logger
 from llama_stack.models.llama.datatypes import (
@@ -123,10 +46,6 @@ from llama_stack.models.llama.datatypes import (
     ToolCall,
     ToolDefinition,
 )
-from llama_stack.providers.utils.inference.prompt_adapter import (
-    convert_image_content_to_url,
-    decode_assistant_message,
-)
 
 logger = get_logger(name=__name__, category="providers::utils")
 
@@ -213,345 +132,6 @@ def get_stop_reason(finish_reason: str) -> StopReason:
     return StopReason.out_of_tokens
 
 
-def convert_openai_completion_logprobs(
-    logprobs: OpenAICompatLogprobs | None,
-) -> list[TokenLogProbs] | None:
-    if not logprobs:
-        return None
-    if hasattr(logprobs, "top_logprobs") and logprobs.top_logprobs:
-        return [TokenLogProbs(logprobs_by_token=x) for x in logprobs.top_logprobs]
-
-    # Together supports logprobs with top_k=1 only. This means for each token position,
-    # they return only the logprobs for the selected token (vs. the top n most likely tokens).
-    # Here we construct the response by matching the selected token with the logprobs.
-    if logprobs.tokens and logprobs.token_logprobs:
-        return [
-            TokenLogProbs(logprobs_by_token={token: token_lp})
-            for token, token_lp in zip(logprobs.tokens, logprobs.token_logprobs, strict=False)
-        ]
-    return None
-
-
-def convert_openai_completion_logprobs_stream(text: str, logprobs: float | OpenAICompatLogprobs | None):
-    if logprobs is None:
-        return None
-    if isinstance(logprobs, float):
-        # Adapt response from Together CompletionChoicesChunk
-        return [TokenLogProbs(logprobs_by_token={text: logprobs})]
-    if hasattr(logprobs, "top_logprobs") and logprobs.top_logprobs:
-        return [TokenLogProbs(logprobs_by_token=x) for x in logprobs.top_logprobs]
-    return None
-
-
-def process_completion_response(
-    response: OpenAICompatCompletionResponse,
-) -> CompletionResponse:
-    choice = response.choices[0]
-    text = choice.text or ""
-    # drop suffix <eot_id> if present and return stop reason as end of turn
-    if text.endswith("<|eot_id|>"):
-        return CompletionResponse(
-            stop_reason=StopReason.end_of_turn,
-            content=text[: -len("<|eot_id|>")],
-            logprobs=convert_openai_completion_logprobs(choice.logprobs),
-        )
-    # drop suffix <eom_id> if present and return stop reason as end of message
-    if text.endswith("<|eom_id|>"):
-        return CompletionResponse(
-            stop_reason=StopReason.end_of_message,
-            content=text[: -len("<|eom_id|>")],
-            logprobs=convert_openai_completion_logprobs(choice.logprobs),
-        )
-    return CompletionResponse(
-        stop_reason=get_stop_reason(choice.finish_reason or "stop"),
-        content=text,
-        logprobs=convert_openai_completion_logprobs(choice.logprobs),
-    )
-
-
-def process_chat_completion_response(
-    response: OpenAICompatCompletionResponse,
-    request: ChatCompletionRequest,
-) -> ChatCompletionResponse:
-    choice = response.choices[0]
-    if choice.finish_reason == "tool_calls":
-        if not hasattr(choice, "message") or not choice.message or not choice.message.tool_calls:  # type: ignore[attr-defined]  # OpenAICompatCompletionChoice is runtime duck-typed
-            raise ValueError("Tool calls are not present in the response")
-
-        tool_calls = [convert_tool_call(tool_call) for tool_call in choice.message.tool_calls]  # type: ignore[attr-defined]  # OpenAICompatCompletionChoice is runtime duck-typed
-        if any(isinstance(tool_call, UnparseableToolCall) for tool_call in tool_calls):
-            # If we couldn't parse a tool call, jsonify the tool calls and return them
-            return ChatCompletionResponse(
-                completion_message=CompletionMessage(
-                    stop_reason=StopReason.end_of_turn,
-                    content=json.dumps(tool_calls, default=lambda x: x.model_dump()),
-                ),
-                logprobs=None,
-            )
-        else:
-            # Otherwise, return tool calls as normal
-            # Filter to only valid ToolCall objects
-            valid_tool_calls = [tc for tc in tool_calls if isinstance(tc, ToolCall)]
-            return ChatCompletionResponse(
-                completion_message=CompletionMessage(
-                    tool_calls=valid_tool_calls,
-                    stop_reason=StopReason.end_of_turn,
-                    # Content is not optional
-                    content="",
-                ),
-                logprobs=None,
-            )
-
-    # TODO: This does not work well with tool calls for vLLM remote provider
-    #   Ref: https://github.com/meta-llama/llama-stack/issues/1058
-    raw_message = decode_assistant_message(text_from_choice(choice), get_stop_reason(choice.finish_reason or "stop"))
-
-    # NOTE: If we do not set tools in chat-completion request, we should not
-    # expect the ToolCall in the response. Instead, we should return the raw
-    # response from the model.
-    if raw_message.tool_calls:
-        if not request.tools:
-            raw_message.tool_calls = []
-            raw_message.content = text_from_choice(choice)
-        else:
-            # only return tool_calls if provided in the request
-            new_tool_calls = []
-            request_tools = {t.tool_name: t for t in request.tools}
-            for t in raw_message.tool_calls:
-                if t.tool_name in request_tools:
-                    new_tool_calls.append(t)
-                else:
-                    logger.warning(f"Tool {t.tool_name} not found in request tools")
-
-            if len(new_tool_calls) < len(raw_message.tool_calls):
-                raw_message.tool_calls = new_tool_calls
-                raw_message.content = text_from_choice(choice)
-
-    return ChatCompletionResponse(
-        completion_message=CompletionMessage(
-            content=raw_message.content,  # type: ignore[arg-type]  # decode_assistant_message returns Union[str, InterleavedContent]
-            stop_reason=raw_message.stop_reason or StopReason.end_of_turn,
-            tool_calls=raw_message.tool_calls,
-        ),
-        logprobs=None,
-    )
-
-
-async def process_completion_stream_response(
-    stream: AsyncGenerator[OpenAICompatCompletionResponse, None],
-) -> AsyncGenerator[CompletionResponseStreamChunk, None]:
-    stop_reason = None
-
-    async for chunk in stream:
-        choice = chunk.choices[0]
-        finish_reason = choice.finish_reason
-
-        text = text_from_choice(choice)
-        if text == "<|eot_id|>":
-            stop_reason = StopReason.end_of_turn
-            text = ""
-            continue
-        elif text == "<|eom_id|>":
-            stop_reason = StopReason.end_of_message
-            text = ""
-            continue
-        yield CompletionResponseStreamChunk(
-            delta=text,
-            stop_reason=stop_reason,
-            logprobs=convert_openai_completion_logprobs_stream(text, choice.logprobs),
-        )
-        if finish_reason:
-            if finish_reason in ["stop", "eos", "eos_token"]:
-                stop_reason = StopReason.end_of_turn
-            elif finish_reason == "length":
-                stop_reason = StopReason.out_of_tokens
-            break
-
-    yield CompletionResponseStreamChunk(
-        delta="",
-        stop_reason=stop_reason,
-    )
-
-
-async def process_chat_completion_stream_response(
-    stream: AsyncGenerator[OpenAICompatCompletionResponse, None],
-    request: ChatCompletionRequest,
-) -> AsyncGenerator[ChatCompletionResponseStreamChunk, None]:
-    yield ChatCompletionResponseStreamChunk(
-        event=ChatCompletionResponseEvent(
-            event_type=ChatCompletionResponseEventType.start,
-            delta=TextDelta(text=""),
-        )
-    )
-
-    buffer = ""
-    ipython = False
-    stop_reason = None
-
-    async for chunk in stream:
-        choice = chunk.choices[0]
-        finish_reason = choice.finish_reason
-
-        if finish_reason:
-            if stop_reason is None and finish_reason in ["stop", "eos", "eos_token"]:
-                stop_reason = StopReason.end_of_turn
-            elif stop_reason is None and finish_reason == "length":
-                stop_reason = StopReason.out_of_tokens
-            break
-
-        text = text_from_choice(choice)
-        if not text:
-            # Sometimes you get empty chunks from providers
-            continue
-
-        # check if its a tool call ( aka starts with <|python_tag|> )
-        if not ipython and text.startswith("<|python_tag|>"):
-            ipython = True
-            yield ChatCompletionResponseStreamChunk(
-                event=ChatCompletionResponseEvent(
-                    event_type=ChatCompletionResponseEventType.progress,
-                    delta=ToolCallDelta(
-                        tool_call="",
-                        parse_status=ToolCallParseStatus.started,
-                    ),
-                )
-            )
-            buffer += text
-            continue
-
-        if text == "<|eot_id|>":
-            stop_reason = StopReason.end_of_turn
-            text = ""
-            continue
-        elif text == "<|eom_id|>":
-            stop_reason = StopReason.end_of_message
-            text = ""
-            continue
-
-        if ipython:
-            buffer += text
-            delta = ToolCallDelta(
-                tool_call=text,
-                parse_status=ToolCallParseStatus.in_progress,
-            )
-
-            yield ChatCompletionResponseStreamChunk(
-                event=ChatCompletionResponseEvent(
-                    event_type=ChatCompletionResponseEventType.progress,
-                    delta=delta,
-                    stop_reason=stop_reason,
-                )
-            )
-        else:
-            buffer += text
-            yield ChatCompletionResponseStreamChunk(
-                event=ChatCompletionResponseEvent(
-                    event_type=ChatCompletionResponseEventType.progress,
-                    delta=TextDelta(text=text),
-                    stop_reason=stop_reason,
-                )
-            )
-
-    # parse tool calls and report errors
-    message = decode_assistant_message(buffer, stop_reason or StopReason.end_of_turn)
-
-    parsed_tool_calls = len(message.tool_calls) > 0
-    if ipython and not parsed_tool_calls:
-        yield ChatCompletionResponseStreamChunk(
-            event=ChatCompletionResponseEvent(
-                event_type=ChatCompletionResponseEventType.progress,
-                delta=ToolCallDelta(
-                    tool_call="",
-                    parse_status=ToolCallParseStatus.failed,
-                ),
-                stop_reason=stop_reason,
-            )
-        )
-
-    request_tools = {t.tool_name: t for t in (request.tools or [])}
-    for tool_call in message.tool_calls:
-        if tool_call.tool_name in request_tools:
-            yield ChatCompletionResponseStreamChunk(
-                event=ChatCompletionResponseEvent(
-                    event_type=ChatCompletionResponseEventType.progress,
-                    delta=ToolCallDelta(
-                        tool_call=tool_call,
-                        parse_status=ToolCallParseStatus.succeeded,
-                    ),
-                    stop_reason=stop_reason,
-                )
-            )
-        else:
-            logger.warning(f"Tool {tool_call.tool_name} not found in request tools")
-            yield ChatCompletionResponseStreamChunk(
-                event=ChatCompletionResponseEvent(
-                    event_type=ChatCompletionResponseEventType.progress,
-                    delta=ToolCallDelta(
-                        # Parsing tool call failed due to tool call not being found in request tools,
-                        # We still add the raw message text inside tool_call for responding back to the user
-                        tool_call=buffer,
-                        parse_status=ToolCallParseStatus.failed,
-                    ),
-                    stop_reason=stop_reason,
-                )
-            )
-
-    yield ChatCompletionResponseStreamChunk(
-        event=ChatCompletionResponseEvent(
-            event_type=ChatCompletionResponseEventType.complete,
-            delta=TextDelta(text=""),
-            stop_reason=stop_reason,
-        )
-    )
-
-
-async def convert_message_to_openai_dict(message: Message, download: bool = False) -> dict:
-    async def _convert_content(content) -> dict:
-        if isinstance(content, ImageContentItem):
-            return {
-                "type": "image_url",
-                "image_url": {
-                    "url": await convert_image_content_to_url(content, download=download),
-                },
-            }
-        else:
-            text = content.text if isinstance(content, TextContentItem) else content
-            assert isinstance(text, str)
-            return {"type": "text", "text": text}
-
-    if isinstance(message.content, list):
-        content = [await _convert_content(c) for c in message.content]
-    else:
-        content = [await _convert_content(message.content)]
-
-    result = {
-        "role": message.role,
-        "content": content,
-    }
-
-    if hasattr(message, "tool_calls") and message.tool_calls:
-        tool_calls_list = []
-        for tc in message.tool_calls:
-            # The tool.tool_name can be a str or a BuiltinTool enum. If
-            # it's the latter, convert to a string.
-            tool_name = tc.tool_name
-            if isinstance(tool_name, BuiltinTool):
-                tool_name = tool_name.value
-
-            tool_calls_list.append(
-                {
-                    "id": tc.call_id,
-                    "type": "function",
-                    "function": {
-                        "name": tool_name,
-                        "arguments": tc.arguments,
-                    },
-                }
-            )
-        result["tool_calls"] = tool_calls_list  # type: ignore[assignment]  # dict allows Any value, stricter type expected
-    return result
-
-
 class UnparseableToolCall(BaseModel):
     """
     A ToolCall with arguments that are not valid JSON.
@@ -563,112 +143,6 @@ class UnparseableToolCall(BaseModel):
     arguments: str = ""
 
 
-async def convert_message_to_openai_dict_new(
-    message: Message | dict,
-    download_images: bool = False,
-) -> OpenAIChatCompletionMessage:
-    """
-    Convert a Message to an OpenAI API-compatible dictionary.
-    """
-    # users can supply a dict instead of a Message object, we'll
-    # convert it to a Message object and proceed with some type safety.
-    if isinstance(message, dict):
-        if "role" not in message:
-            raise ValueError("role is required in message")
-        if message["role"] == "user":
-            message = UserMessage(**message)
-        elif message["role"] == "assistant":
-            message = CompletionMessage(**message)
-        elif message["role"] == "tool":
-            message = ToolResponseMessage(**message)
-        elif message["role"] == "system":
-            message = SystemMessage(**message)
-        else:
-            raise ValueError(f"Unsupported message role: {message['role']}")
-
-    # Map Llama Stack spec to OpenAI spec -
-    #  str -> str
-    #  {"type": "text", "text": ...} -> {"type": "text", "text": ...}
-    #  {"type": "image", "image": {"url": {"uri": ...}}} -> {"type": "image_url", "image_url": {"url": ...}}
-    #  {"type": "image", "image": {"data": ...}} -> {"type": "image_url", "image_url": {"url": "data:image/?;base64,..."}}
-    #  List[...] -> List[...]
-    async def _convert_message_content(
-        content: InterleavedContent,
-    ) -> str | Iterable[OpenAIChatCompletionContentPartParam]:
-        async def impl(
-            content_: InterleavedContent,
-        ) -> str | OpenAIChatCompletionContentPartParam | list[OpenAIChatCompletionContentPartParam]:
-            # Llama Stack and OpenAI spec match for str and text input
-            if isinstance(content_, str):
-                return content_
-            elif isinstance(content_, TextContentItem):
-                return OpenAIChatCompletionContentPartTextParam(
-                    type="text",
-                    text=content_.text,
-                )
-            elif isinstance(content_, ImageContentItem):
-                return OpenAIChatCompletionContentPartImageParam(
-                    type="image_url",
-                    image_url=OpenAIImageURL(
-                        url=await convert_image_content_to_url(content_, download=download_images)
-                    ),
-                )
-            elif isinstance(content_, list):
-                return [await impl(item) for item in content_]  # type: ignore[misc]  # recursive list comprehension confuses mypy's type narrowing
-            else:
-                raise ValueError(f"Unsupported content type: {type(content_)}")
-
-        ret = await impl(content)
-
-        # OpenAI*Message expects a str or list
-        if isinstance(ret, str) or isinstance(ret, list):
-            return ret
-        else:
-            return [ret]
-
-    out: OpenAIChatCompletionMessage
-    if isinstance(message, UserMessage):
-        out = OpenAIChatCompletionUserMessage(
-            role="user",
-            content=await _convert_message_content(message.content),
-        )
-    elif isinstance(message, CompletionMessage):
-        tool_calls = [
-            OpenAIChatCompletionMessageFunctionToolCall(
-                id=tool.call_id,
-                function=OpenAIFunction(
-                    name=(tool.tool_name if not isinstance(tool.tool_name, BuiltinTool) else tool.tool_name.value),
-                    arguments=tool.arguments,  # Already a JSON string, don't double-encode
-                ),
-                type="function",
-            )
-            for tool in (message.tool_calls or [])
-        ]
-        params = {}
-        if tool_calls:
-            params["tool_calls"] = tool_calls
-        out = OpenAIChatCompletionAssistantMessage(
-            role="assistant",
-            content=await _convert_message_content(message.content),
-            **params,  # type: ignore[typeddict-item]  # tool_calls dict expansion conflicts with TypedDict optional field
-        )
-    elif isinstance(message, ToolResponseMessage):
-        out = OpenAIChatCompletionToolMessage(
-            role="tool",
-            tool_call_id=message.call_id,
-            content=await _convert_message_content(message.content),  # type: ignore[typeddict-item]  # content union type incompatible with TypedDict str requirement
-        )
-    elif isinstance(message, SystemMessage):
-        out = OpenAIChatCompletionSystemMessage(
-            role="system",
-            content=await _convert_message_content(message.content),  # type: ignore[typeddict-item]  # content union type incompatible with TypedDict str requirement
-        )
-    else:
-        raise ValueError(f"Unsupported message type: {type(message)}")
-
-    return out
-
-
 def convert_tool_call(
     tool_call: ChatCompletionMessageToolCall,
 ) -> ToolCall | UnparseableToolCall:
@@ -817,17 +291,6 @@ def _convert_openai_finish_reason(finish_reason: str) -> StopReason:
     }.get(finish_reason, StopReason.end_of_turn)
 
 
-def _convert_openai_request_tool_config(tool_choice: str | dict[str, Any] | None = None) -> ToolConfig:
-    tool_config = ToolConfig()
-    if tool_choice:
-        try:
-            tool_choice = ToolChoice(tool_choice)  # type: ignore[assignment]  # reassigning to enum narrows union but mypy can't track after exception
-        except ValueError:
-            pass
-        tool_config.tool_choice = tool_choice  # type: ignore[assignment]  # ToolConfig.tool_choice accepts Union[ToolChoice, dict] but mypy tracks narrower type
-    return tool_config
-
-
 def _convert_openai_request_tools(tools: list[dict[str, Any]] | None = None) -> list[ToolDefinition]:
     lls_tools: list[ToolDefinition] = []
     if not tools:
@@ -898,40 +361,6 @@ def _convert_openai_tool_calls(
     ]
 
 
-def _convert_openai_logprobs(
-    logprobs: OpenAIChoiceLogprobs,
-) -> list[TokenLogProbs] | None:
-    """
-    Convert an OpenAI ChoiceLogprobs into a list of TokenLogProbs.
-
-    OpenAI ChoiceLogprobs:
-        content: Optional[List[ChatCompletionTokenLogprob]]
-
-    OpenAI ChatCompletionTokenLogprob:
-        token: str
-        logprob: float
-        top_logprobs: List[TopLogprob]
-
-    OpenAI TopLogprob:
-        token: str
-        logprob: float
-
-    ->
-
-    TokenLogProbs:
-        logprobs_by_token: Dict[str, float]
-         - token, logprob
-
-    """
-    if not logprobs or not logprobs.content:
-        return None
-
-    return [
-        TokenLogProbs(logprobs_by_token={logprobs.token: logprobs.logprob for logprobs in content.top_logprobs})
-        for content in logprobs.content
-    ]
-
-
 def _convert_openai_sampling_params(
     max_tokens: int | None = None,
     temperature: float | None = None,
@@ -956,37 +385,6 @@ def _convert_openai_sampling_params(
     return sampling_params
 
 
-def openai_messages_to_messages(
-    messages: list[OpenAIMessageParam],
-) -> list[Message]:
-    """
-    Convert a list of OpenAIChatCompletionMessage into a list of Message.
-    """
-    converted_messages: list[Message] = []
-    for message in messages:
-        converted_message: Message
-        if message.role == "system":
-            converted_message = SystemMessage(content=openai_content_to_content(message.content))  # type: ignore[arg-type]  # OpenAI SDK uses aliased types internally that mypy sees as incompatible with base types
-        elif message.role == "user":
-            converted_message = UserMessage(content=openai_content_to_content(message.content))  # type: ignore[arg-type]  # OpenAI SDK uses aliased types internally that mypy sees as incompatible with base types
-        elif message.role == "assistant":
-            converted_message = CompletionMessage(
-                content=openai_content_to_content(message.content),  # type: ignore[arg-type]  # OpenAI SDK uses aliased types internally that mypy sees as incompatible with base types
-                tool_calls=_convert_openai_tool_calls(message.tool_calls) if message.tool_calls else [],  # type: ignore[arg-type]  # OpenAI tool_calls type incompatible with conversion function
-                stop_reason=StopReason.end_of_turn,
-            )
-        elif message.role == "tool":
-            converted_message = ToolResponseMessage(
-                role="tool",
-                call_id=message.tool_call_id,
-                content=openai_content_to_content(message.content),  # type: ignore[arg-type]  # OpenAI SDK uses aliased types internally that mypy sees as incompatible with base types
-            )
-        else:
-            raise ValueError(f"Unknown role {message.role}")
-        converted_messages.append(converted_message)
-    return converted_messages
-
-
 def openai_content_to_content(content: str | Iterable[OpenAIChatCompletionContentPartParam] | None):
     if content is None:
         return ""
@@ -1005,216 +403,6 @@ def openai_content_to_content(content: str | Iterable[OpenAIChatCompletionConten
         raise ValueError(f"Unknown content type: {content}")
 
 
-def convert_openai_chat_completion_choice(
-    choice: OpenAIChoice,
-) -> ChatCompletionResponse:
-    """
-    Convert an OpenAI Choice into a ChatCompletionResponse.
-
-    OpenAI Choice:
-        message: ChatCompletionMessage
-        finish_reason: str
-        logprobs: Optional[ChoiceLogprobs]
-
-    OpenAI ChatCompletionMessage:
-        role: Literal["assistant"]
-        content: Optional[str]
-        tool_calls: Optional[List[ChatCompletionMessageToolCall]]
-
-    ->
-
-    ChatCompletionResponse:
-        completion_message: CompletionMessage
-        logprobs: Optional[List[TokenLogProbs]]
-
-    CompletionMessage:
-        role: Literal["assistant"]
-        content: str | ImageMedia | List[str | ImageMedia]
-        stop_reason: StopReason
-        tool_calls: List[ToolCall]
-
-    class StopReason(Enum):
-        end_of_turn = "end_of_turn"
-        end_of_message = "end_of_message"
-        out_of_tokens = "out_of_tokens"
-    """
-    assert hasattr(choice, "message") and choice.message, "error in server response: message not found"
-    assert hasattr(choice, "finish_reason") and choice.finish_reason, (
-        "error in server response: finish_reason not found"
-    )
-
-    return ChatCompletionResponse(
-        completion_message=CompletionMessage(
-            content=choice.message.content or "",  # CompletionMessage content is not optional
-            stop_reason=_convert_openai_finish_reason(choice.finish_reason),
-            tool_calls=_convert_openai_tool_calls(choice.message.tool_calls) if choice.message.tool_calls else [],  # type: ignore[arg-type]  # OpenAI tool_calls Optional type broadens union
-        ),
-        logprobs=_convert_openai_logprobs(getattr(choice, "logprobs", None)),  # type: ignore[arg-type]  # getattr returns Any, can't narrow without inspection
-    )
-
-
-async def convert_openai_chat_completion_stream(
-    stream: AsyncStream[OpenAIChatCompletionChunk],
-    enable_incremental_tool_calls: bool,
-) -> AsyncGenerator[ChatCompletionResponseStreamChunk, None]:
-    """
-    Convert a stream of OpenAI chat completion chunks into a stream
-    of ChatCompletionResponseStreamChunk.
-    """
-    yield ChatCompletionResponseStreamChunk(
-        event=ChatCompletionResponseEvent(
-            event_type=ChatCompletionResponseEventType.start,
-            delta=TextDelta(text=""),
-        )
-    )
-    event_type = ChatCompletionResponseEventType.progress
-
-    stop_reason = None
-    tool_call_idx_to_buffer = {}
-
-    async for chunk in stream:
-        choice = chunk.choices[0]  # assuming only one choice per chunk
-
-        # we assume there's only one finish_reason in the stream
-        stop_reason = _convert_openai_finish_reason(choice.finish_reason) if choice.finish_reason else stop_reason
-        logprobs = getattr(choice, "logprobs", None)
-
-        # if there's a tool call, emit an event for each tool in the list
-        # if tool call and content, emit both separately
-        if choice.delta.tool_calls:
-            # the call may have content and a tool call. ChatCompletionResponseEvent
-            # does not support both, so we emit the content first
-            if choice.delta.content:
-                yield ChatCompletionResponseStreamChunk(
-                    event=ChatCompletionResponseEvent(
-                        event_type=event_type,
-                        delta=TextDelta(text=choice.delta.content),
-                        logprobs=_convert_openai_logprobs(logprobs),  # type: ignore[arg-type]  # logprobs type broadened from getattr result
-                    )
-                )
-
-            # it is possible to have parallel tool calls in stream, but
-            # ChatCompletionResponseEvent only supports one per stream
-            if len(choice.delta.tool_calls) > 1:
-                warnings.warn(
-                    "multiple tool calls found in a single delta, using the first, ignoring the rest",
-                    stacklevel=2,
-                )
-
-            if not enable_incremental_tool_calls:
-                for tool_call in choice.delta.tool_calls:
-                    yield ChatCompletionResponseStreamChunk(
-                        event=ChatCompletionResponseEvent(
-                            event_type=event_type,
-                            delta=ToolCallDelta(
-                                tool_call=_convert_openai_tool_calls([tool_call])[0],  # type: ignore[arg-type, list-item]  # delta tool_call type differs from complete tool_call
-                                parse_status=ToolCallParseStatus.succeeded,
-                            ),
-                            logprobs=_convert_openai_logprobs(logprobs),  # type: ignore[arg-type]  # logprobs type broadened from getattr result
-                        )
-                    )
-            else:
-                for tool_call in choice.delta.tool_calls:
-                    idx = tool_call.index if hasattr(tool_call, "index") else 0
-
-                    if idx not in tool_call_idx_to_buffer:
-                        tool_call_idx_to_buffer[idx] = {
-                            "call_id": tool_call.id,
-                            "name": None,
-                            "arguments": "",
-                            "content": "",
-                        }
-
-                    buffer = tool_call_idx_to_buffer[idx]
-
-                    if tool_call.function:
-                        if tool_call.function.name:
-                            buffer["name"] = tool_call.function.name
-                            delta = f"{buffer['name']}("
-                            if buffer["content"] is not None:
-                                buffer["content"] += delta
-
-                        if tool_call.function.arguments:
-                            delta = tool_call.function.arguments
-                            if buffer["arguments"] is not None and delta:
-                                buffer["arguments"] += delta
-                            if buffer["content"] is not None and delta:
-                                buffer["content"] += delta
-
-                        yield ChatCompletionResponseStreamChunk(
-                            event=ChatCompletionResponseEvent(
-                                event_type=event_type,
-                                delta=ToolCallDelta(
-                                    tool_call=delta,
-                                    parse_status=ToolCallParseStatus.in_progress,
-                                ),
-                                logprobs=_convert_openai_logprobs(logprobs),  # type: ignore[arg-type]  # logprobs type broadened from getattr result
-                            )
-                        )
-        elif choice.delta.content:
-            yield ChatCompletionResponseStreamChunk(
-                event=ChatCompletionResponseEvent(
-                    event_type=event_type,
-                    delta=TextDelta(text=choice.delta.content or ""),
-                    logprobs=_convert_openai_logprobs(logprobs),  # type: ignore[arg-type]  # logprobs type broadened from getattr result
-                )
-            )
-
-    for idx, buffer in tool_call_idx_to_buffer.items():
-        logger.debug(f"toolcall_buffer[{idx}]: {buffer}")
-        if buffer["name"]:
-            delta = ")"
-            if buffer["content"] is not None:
-                buffer["content"] += delta
-            yield ChatCompletionResponseStreamChunk(
-                event=ChatCompletionResponseEvent(
-                    event_type=event_type,
-                    delta=ToolCallDelta(
-                        tool_call=delta,
-                        parse_status=ToolCallParseStatus.in_progress,
-                    ),
-                    logprobs=None,
-                )
-            )
-
-            try:
-                parsed_tool_call = ToolCall(
-                    call_id=buffer["call_id"] or "",
-                    tool_name=buffer["name"] or "",
-                    arguments=buffer["arguments"] or "",
-                )
-                yield ChatCompletionResponseStreamChunk(
-                    event=ChatCompletionResponseEvent(
-                        event_type=ChatCompletionResponseEventType.progress,
-                        delta=ToolCallDelta(
-                            tool_call=parsed_tool_call,  # type: ignore[arg-type]  # ToolCallDelta.tool_call accepts Union[str, ToolCall]
-                            parse_status=ToolCallParseStatus.succeeded,
-                        ),
-                        stop_reason=stop_reason,
-                    )
-                )
-            except json.JSONDecodeError as e:
-                print(f"Failed to parse arguments: {e}")
-                yield ChatCompletionResponseStreamChunk(
-                    event=ChatCompletionResponseEvent(
-                        event_type=ChatCompletionResponseEventType.progress,
-                        delta=ToolCallDelta(
-                            tool_call=buffer["content"],  # type: ignore[arg-type]  # ToolCallDelta.tool_call accepts Union[str, ToolCall]
-                            parse_status=ToolCallParseStatus.failed,
-                        ),
-                        stop_reason=stop_reason,
-                    )
-                )
-
-    yield ChatCompletionResponseStreamChunk(
-        event=ChatCompletionResponseEvent(
-            event_type=ChatCompletionResponseEventType.complete,
-            delta=TextDelta(text=""),
-            stop_reason=stop_reason,
-        )
-    )
-
-
 async def prepare_openai_completion_params(**params):
     async def _prepare_value(value: Any) -> Any:
         new_value = value
@@ -1233,163 +421,6 @@ async def prepare_openai_completion_params(**params):
     return completion_params
 
 
-class OpenAIChatCompletionToLlamaStackMixin:
-    async def openai_chat_completion(
-        self,
-        model: str,
-        messages: list[OpenAIMessageParam],
-        frequency_penalty: float | None = None,
-        function_call: str | dict[str, Any] | None = None,
-        functions: list[dict[str, Any]] | None = None,
-        logit_bias: dict[str, float] | None = None,
-        logprobs: bool | None = None,
-        max_completion_tokens: int | None = None,
-        max_tokens: int | None = None,
-        n: int | None = None,
-        parallel_tool_calls: bool | None = None,
-        presence_penalty: float | None = None,
-        response_format: OpenAIResponseFormatParam | None = None,
-        seed: int | None = None,
-        stop: str | list[str] | None = None,
-        stream: bool | None = None,
-        stream_options: dict[str, Any] | None = None,
-        temperature: float | None = None,
-        tool_choice: str | dict[str, Any] | None = None,
-        tools: list[dict[str, Any]] | None = None,
-        top_logprobs: int | None = None,
-        top_p: float | None = None,
-        user: str | None = None,
-    ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
-        messages = openai_messages_to_messages(messages)  # type: ignore[assignment]  # converted from OpenAI to LlamaStack message format
-        response_format = _convert_openai_request_response_format(response_format)
-        sampling_params = _convert_openai_sampling_params(
-            max_tokens=max_tokens,
-            temperature=temperature,
-            top_p=top_p,
-        )
-        tool_config = _convert_openai_request_tool_config(tool_choice)
-
-        tools = _convert_openai_request_tools(tools)  # type: ignore[assignment]  # converted from OpenAI to LlamaStack tool format
-        if tool_config.tool_choice == ToolChoice.none:
-            tools = []  # type: ignore[assignment]  # empty list narrows return type but mypy tracks broader type
-
-        outstanding_responses = []
-        # "n" is the number of completions to generate per prompt
-        n = n or 1
-        for _i in range(0, n):
-            response = self.chat_completion(  # type: ignore[attr-defined]  # mixin expects class to implement chat_completion
-                model_id=model,
-                messages=messages,
-                sampling_params=sampling_params,
-                response_format=response_format,
-                stream=stream,
-                tool_config=tool_config,
-                tools=tools,
-            )
-            outstanding_responses.append(response)
-
-        if stream:
-            return OpenAIChatCompletionToLlamaStackMixin._process_stream_response(self, model, outstanding_responses)  # type: ignore[no-any-return]  # mixin async generator return type too complex for mypy
-
-        return await OpenAIChatCompletionToLlamaStackMixin._process_non_stream_response(
-            self, model, outstanding_responses
-        )
-
-    async def _process_stream_response(
-        self,
-        model: str,
-        outstanding_responses: list[Awaitable[AsyncIterator[ChatCompletionResponseStreamChunk]]],
-    ):
-        id = f"chatcmpl-{uuid.uuid4()}"
-        for i, outstanding_response in enumerate(outstanding_responses):
-            response = await outstanding_response
-            async for chunk in response:
-                event = chunk.event
-                finish_reason = (
-                    _convert_stop_reason_to_openai_finish_reason(event.stop_reason) if event.stop_reason else None
-                )
-
-                if isinstance(event.delta, TextDelta):
-                    text_delta = event.delta.text
-                    delta = OpenAIChoiceDelta(content=text_delta)
-                    yield OpenAIChatCompletionChunk(
-                        id=id,
-                        choices=[OpenAIChatCompletionChunkChoice(index=i, finish_reason=finish_reason, delta=delta)],  # type: ignore[arg-type]  # finish_reason Optional[str] incompatible with Literal union
-                        created=int(time.time()),
-                        model=model,
-                        object="chat.completion.chunk",
-                    )
-                elif isinstance(event.delta, ToolCallDelta):
-                    if event.delta.parse_status == ToolCallParseStatus.succeeded:
-                        tool_call = event.delta.tool_call
-                        if isinstance(tool_call, str):
-                            continue
-
-                        # First chunk includes full structure
-                        openai_tool_call = OpenAIChoiceDeltaToolCall(
-                            index=0,
-                            id=tool_call.call_id,
-                            function=OpenAIChoiceDeltaToolCallFunction(
-                                name=tool_call.tool_name
-                                if isinstance(tool_call.tool_name, str)
-                                else tool_call.tool_name.value,  # type: ignore[arg-type]  # enum .value extraction on Union confuses mypy
-                                arguments="",
-                            ),
-                        )
-                        delta = OpenAIChoiceDelta(tool_calls=[openai_tool_call])
-                        yield OpenAIChatCompletionChunk(
-                            id=id,
-                            choices=[
-                                OpenAIChatCompletionChunkChoice(index=i, finish_reason=finish_reason, delta=delta)  # type: ignore[arg-type]  # finish_reason Optional[str] incompatible with Literal union
-                            ],
-                            created=int(time.time()),
-                            model=model,
-                            object="chat.completion.chunk",
-                        )
-                        # arguments
-                        openai_tool_call = OpenAIChoiceDeltaToolCall(
-                            index=0,
-                            function=OpenAIChoiceDeltaToolCallFunction(
-                                arguments=tool_call.arguments,
-                            ),
-                        )
-                        delta = OpenAIChoiceDelta(tool_calls=[openai_tool_call])
-                        yield OpenAIChatCompletionChunk(
-                            id=id,
-                            choices=[
-                                OpenAIChatCompletionChunkChoice(index=i, finish_reason=finish_reason, delta=delta)  # type: ignore[arg-type]  # finish_reason Optional[str] incompatible with Literal union
-                            ],
-                            created=int(time.time()),
-                            model=model,
-                            object="chat.completion.chunk",
-                        )
-
-    async def _process_non_stream_response(
-        self, model: str, outstanding_responses: list[Awaitable[ChatCompletionResponse]]
-    ) -> OpenAIChatCompletion:
-        choices: list[OpenAIChatCompletionChoice] = []
-        for outstanding_response in outstanding_responses:
-            response = await outstanding_response
-            completion_message = response.completion_message
-            message = await convert_message_to_openai_dict_new(completion_message)
-            finish_reason = _convert_stop_reason_to_openai_finish_reason(completion_message.stop_reason)
-
-            choice = OpenAIChatCompletionChoice(
-                index=len(choices),
-                message=message,  # type: ignore[arg-type]  # OpenAIChatCompletionMessage union incompatible with narrower Message type
-                finish_reason=finish_reason,
-            )
-            choices.append(choice)  # type: ignore[arg-type]  # OpenAIChatCompletionChoice type annotation mismatch
-
-        return OpenAIChatCompletion(
-            id=f"chatcmpl-{uuid.uuid4()}",
-            choices=choices,  # type: ignore[arg-type]  # list[OpenAIChatCompletionChoice] union incompatible
-            created=int(time.time()),
-            model=model,
-            object="chat.completion",
-        )
-
-
 def prepare_openai_embeddings_params(
     model: str,
     input: str | list[str],
diff --git a/src/llama_stack/providers/utils/inference/prompt_adapter.py b/src/llama_stack/providers/utils/inference/prompt_adapter.py
index d06b7454d..35a7b3484 100644
--- a/src/llama_stack/providers/utils/inference/prompt_adapter.py
+++ b/src/llama_stack/providers/utils/inference/prompt_adapter.py
@@ -21,19 +21,18 @@ from llama_stack.apis.common.content_types import (
     TextContentItem,
 )
 from llama_stack.apis.inference import (
-    ChatCompletionRequest,
     CompletionRequest,
-    Message,
+    OpenAIAssistantMessageParam,
     OpenAIChatCompletionContentPartImageParam,
     OpenAIChatCompletionContentPartTextParam,
     OpenAIFile,
+    OpenAIMessageParam,
+    OpenAISystemMessageParam,
+    OpenAIToolMessageParam,
+    OpenAIUserMessageParam,
     ResponseFormat,
     ResponseFormatType,
-    SystemMessage,
-    SystemMessageBehavior,
     ToolChoice,
-    ToolDefinition,
-    UserMessage,
 )
 from llama_stack.log import get_logger
 from llama_stack.models.llama.datatypes import (
@@ -42,33 +41,19 @@ from llama_stack.models.llama.datatypes import (
     RawMediaItem,
     RawMessage,
     RawTextItem,
-    Role,
     StopReason,
+    ToolCall,
+    ToolDefinition,
     ToolPromptFormat,
 )
 from llama_stack.models.llama.llama3.chat_format import ChatFormat
-from llama_stack.models.llama.llama3.prompt_templates import (
-    BuiltinToolGenerator,
-    FunctionTagCustomToolGenerator,
-    JsonCustomToolGenerator,
-    PythonListCustomToolGenerator,
-    SystemDefaultGenerator,
-)
 from llama_stack.models.llama.llama3.tokenizer import Tokenizer
-from llama_stack.models.llama.llama4.prompt_templates.system_prompts import (
-    PythonListCustomToolGenerator as PythonListCustomToolGeneratorLlama4,
-)
 from llama_stack.models.llama.sku_list import resolve_model
 from llama_stack.models.llama.sku_types import ModelFamily, is_multimodal
-from llama_stack.providers.utils.inference import supported_inference_models
 
 log = get_logger(name=__name__, category="providers::utils")
 
 
-class ChatCompletionRequestWithRawContent(ChatCompletionRequest):
-    messages: list[RawMessage]
-
-
 class CompletionRequestWithRawContent(CompletionRequest):
     content: RawContent
 
@@ -103,28 +88,6 @@ def interleaved_content_as_str(
         return _process(content)
 
 
-async def convert_request_to_raw(
-    request: ChatCompletionRequest | CompletionRequest,
-) -> ChatCompletionRequestWithRawContent | CompletionRequestWithRawContent:
-    if isinstance(request, ChatCompletionRequest):
-        messages = []
-        for m in request.messages:
-            content = await interleaved_content_convert_to_raw(m.content)
-            d = m.model_dump()
-            d["content"] = content
-            messages.append(RawMessage(**d))
-
-        d = request.model_dump()
-        d["messages"] = messages
-        request = ChatCompletionRequestWithRawContent(**d)
-    else:
-        d = request.model_dump()
-        d["content"] = await interleaved_content_convert_to_raw(request.content)
-        request = CompletionRequestWithRawContent(**d)
-
-    return request
-
-
 async def interleaved_content_convert_to_raw(
     content: InterleavedContent,
 ) -> RawContent:
@@ -171,6 +134,36 @@ async def interleaved_content_convert_to_raw(
         return await _localize_single(content)
 
 
+async def convert_openai_message_to_raw_message(message: OpenAIMessageParam) -> RawMessage:
+    """Convert OpenAI message format to RawMessage format used by Llama formatters."""
+    if isinstance(message, OpenAIUserMessageParam):
+        content = await interleaved_content_convert_to_raw(message.content)  # type: ignore[arg-type]
+        return RawMessage(role="user", content=content)
+    elif isinstance(message, OpenAISystemMessageParam):
+        content = await interleaved_content_convert_to_raw(message.content)  # type: ignore[arg-type]
+        return RawMessage(role="system", content=content)
+    elif isinstance(message, OpenAIAssistantMessageParam):
+        content = await interleaved_content_convert_to_raw(message.content or "")  # type: ignore[arg-type]
+        tool_calls = []
+        if message.tool_calls:
+            for tc in message.tool_calls:
+                if tc.function:
+                    tool_calls.append(
+                        ToolCall(
+                            call_id=tc.id or "",
+                            tool_name=tc.function.name or "",
+                            arguments=tc.function.arguments or "{}",
+                        )
+                    )
+        return RawMessage(role="assistant", content=content, tool_calls=tool_calls)
+    elif isinstance(message, OpenAIToolMessageParam):
+        content = await interleaved_content_convert_to_raw(message.content)  # type: ignore[arg-type]
+        return RawMessage(role="tool", content=content)
+    else:
+        # Handle OpenAIDeveloperMessageParam if needed
+        raise ValueError(f"Unsupported message type: {type(message)}")
+
+
 def content_has_media(content: InterleavedContent):
     def _has_media_content(c):
         return isinstance(c, ImageContentItem)
@@ -181,17 +174,6 @@ def content_has_media(content: InterleavedContent):
         return _has_media_content(content)
 
 
-def messages_have_media(messages: list[Message]):
-    return any(content_has_media(m.content) for m in messages)
-
-
-def request_has_media(request: ChatCompletionRequest | CompletionRequest):
-    if isinstance(request, ChatCompletionRequest):
-        return messages_have_media(request.messages)
-    else:
-        return content_has_media(request.content)
-
-
 async def localize_image_content(uri: str) -> tuple[bytes, str] | None:
     if uri.startswith("http"):
         async with httpx.AsyncClient() as client:
@@ -253,79 +235,6 @@ def augment_content_with_response_format_prompt(response_format, content):
     return content
 
 
-async def chat_completion_request_to_prompt(request: ChatCompletionRequest, llama_model: str) -> str:
-    messages = chat_completion_request_to_messages(request, llama_model)
-    request.messages = messages
-    request = await convert_request_to_raw(request)
-
-    formatter = ChatFormat(tokenizer=Tokenizer.get_instance())
-    model_input = formatter.encode_dialog_prompt(
-        request.messages,
-        tool_prompt_format=request.tool_config.tool_prompt_format or get_default_tool_prompt_format(llama_model),
-    )
-    return formatter.tokenizer.decode(model_input.tokens)
-
-
-async def chat_completion_request_to_model_input_info(
-    request: ChatCompletionRequest, llama_model: str
-) -> tuple[str, int]:
-    messages = chat_completion_request_to_messages(request, llama_model)
-    request.messages = messages
-    request = await convert_request_to_raw(request)
-
-    formatter = ChatFormat(tokenizer=Tokenizer.get_instance())
-    model_input = formatter.encode_dialog_prompt(
-        request.messages,
-        tool_prompt_format=request.tool_config.tool_prompt_format or get_default_tool_prompt_format(llama_model),
-    )
-    return (
-        formatter.tokenizer.decode(model_input.tokens),
-        len(model_input.tokens),
-    )
-
-
-def chat_completion_request_to_messages(
-    request: ChatCompletionRequest,
-    llama_model: str,
-) -> list[Message]:
-    """Reads chat completion request and augments the messages to handle tools.
-    For eg. for llama_3_1, add system message with the appropriate tools or
-    add user messsage for custom tools, etc.
-    """
-    assert llama_model is not None, "llama_model is required"
-    model = resolve_model(llama_model)
-    if model is None:
-        log.error(f"Could not resolve model {llama_model}")
-        return request.messages
-
-    allowed_models = supported_inference_models()
-    descriptors = [m.descriptor() for m in allowed_models]
-    if model.descriptor() not in descriptors:
-        log.error(f"Unsupported inference model? {model.descriptor()}")
-        return request.messages
-
-    if model.model_family == ModelFamily.llama3_1 or (
-        model.model_family == ModelFamily.llama3_2 and is_multimodal(model.core_model_id)
-    ):
-        # llama3.1 and llama3.2 multimodal models follow the same tool prompt format
-        messages = augment_messages_for_tools_llama_3_1(request)
-    elif model.model_family in (
-        ModelFamily.llama3_2,
-        ModelFamily.llama3_3,
-    ):
-        # llama3.2, llama3.3 follow the same tool prompt format
-        messages = augment_messages_for_tools_llama(request, PythonListCustomToolGenerator)
-    elif model.model_family == ModelFamily.llama4:
-        messages = augment_messages_for_tools_llama(request, PythonListCustomToolGeneratorLlama4)
-    else:
-        messages = request.messages
-
-    if fmt_prompt := response_format_prompt(request.response_format):
-        messages.append(UserMessage(content=fmt_prompt))
-
-    return messages
-
-
 def response_format_prompt(fmt: ResponseFormat | None):
     if not fmt:
         return None
@@ -338,128 +247,6 @@ def response_format_prompt(fmt: ResponseFormat | None):
         raise ValueError(f"Unknown response format {fmt.type}")
 
 
-def augment_messages_for_tools_llama_3_1(
-    request: ChatCompletionRequest,
-) -> list[Message]:
-    existing_messages = request.messages
-    existing_system_message = None
-    if existing_messages[0].role == Role.system.value:
-        existing_system_message = existing_messages.pop(0)
-
-    assert existing_messages[0].role != Role.system.value, "Should only have 1 system message"
-
-    messages = []
-
-    default_gen = SystemDefaultGenerator()
-    default_template = default_gen.gen()
-
-    sys_content = ""
-
-    tool_template = None
-    if request.tools:
-        tool_gen = BuiltinToolGenerator()
-        tool_template = tool_gen.gen(request.tools)
-
-        sys_content += tool_template.render()
-        sys_content += "\n"
-
-    sys_content += default_template.render()
-
-    if existing_system_message:
-        # TODO: this fn is needed in many places
-        def _process(c):
-            if isinstance(c, str):
-                return c
-            else:
-                return "<media>"
-
-        sys_content += "\n"
-
-        if isinstance(existing_system_message.content, str):
-            sys_content += _process(existing_system_message.content)
-        elif isinstance(existing_system_message.content, list):
-            sys_content += "\n".join([_process(c) for c in existing_system_message.content])
-
-    tool_choice_prompt = _get_tool_choice_prompt(request.tool_config.tool_choice, request.tools)
-    if tool_choice_prompt:
-        sys_content += "\n" + tool_choice_prompt
-
-    messages.append(SystemMessage(content=sys_content))
-
-    has_custom_tools = request.tools is not None and any(isinstance(dfn.tool_name, str) for dfn in request.tools)
-    if has_custom_tools:
-        fmt = request.tool_config.tool_prompt_format or ToolPromptFormat.json
-        if fmt == ToolPromptFormat.json:
-            tool_gen = JsonCustomToolGenerator()
-        elif fmt == ToolPromptFormat.function_tag:
-            tool_gen = FunctionTagCustomToolGenerator()
-        else:
-            raise ValueError(f"Non supported ToolPromptFormat {fmt}")
-
-        custom_tools = [t for t in request.tools if isinstance(t.tool_name, str)]
-        custom_template = tool_gen.gen(custom_tools)
-        messages.append(UserMessage(content=custom_template.render()))
-
-    # Add back existing messages from the request
-    messages += existing_messages
-
-    return messages
-
-
-def augment_messages_for_tools_llama(
-    request: ChatCompletionRequest,
-    custom_tool_prompt_generator,
-) -> list[Message]:
-    existing_messages = request.messages
-    existing_system_message = None
-    if existing_messages[0].role == Role.system.value:
-        existing_system_message = existing_messages.pop(0)
-
-    assert existing_messages[0].role != Role.system.value, "Should only have 1 system message"
-
-    sys_content = ""
-    custom_tools, builtin_tools = [], []
-    for t in request.tools:
-        if isinstance(t.tool_name, str):
-            custom_tools.append(t)
-        else:
-            builtin_tools.append(t)
-
-    if builtin_tools:
-        tool_gen = BuiltinToolGenerator()
-        tool_template = tool_gen.gen(builtin_tools)
-
-        sys_content += tool_template.render()
-        sys_content += "\n"
-
-    custom_tools = [dfn for dfn in request.tools if isinstance(dfn.tool_name, str)]
-    if custom_tools:
-        fmt = request.tool_config.tool_prompt_format or ToolPromptFormat.python_list
-        if fmt != ToolPromptFormat.python_list:
-            raise ValueError(f"Non supported ToolPromptFormat {request.tool_config.tool_prompt_format}")
-
-        system_prompt = None
-        if existing_system_message and request.tool_config.system_message_behavior == SystemMessageBehavior.replace:
-            system_prompt = existing_system_message.content
-
-        tool_template = custom_tool_prompt_generator().gen(custom_tools, system_prompt)
-
-        sys_content += tool_template.render()
-        sys_content += "\n"
-
-    if existing_system_message and (
-        request.tool_config.system_message_behavior == SystemMessageBehavior.append or not custom_tools
-    ):
-        sys_content += interleaved_content_as_str(existing_system_message.content, sep="\n")
-
-    tool_choice_prompt = _get_tool_choice_prompt(request.tool_config.tool_choice, request.tools)
-    if tool_choice_prompt:
-        sys_content += "\n" + tool_choice_prompt
-
-    messages = [SystemMessage(content=sys_content.strip("\n")), *existing_messages]
-    return messages
-
-
 def _get_tool_choice_prompt(tool_choice: ToolChoice | str, tools: list[ToolDefinition]) -> str:
     if tool_choice == ToolChoice.auto:
         return ""
diff --git a/tests/unit/models/test_prompt_adapter.py b/tests/unit/models/test_prompt_adapter.py
deleted file mode 100644
index d31426135..000000000
--- a/tests/unit/models/test_prompt_adapter.py
+++ /dev/null
@@ -1,303 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-
-from llama_stack.apis.inference import (
-    ChatCompletionRequest,
-    CompletionMessage,
-    StopReason,
-    SystemMessage,
-    SystemMessageBehavior,
-    ToolCall,
-    ToolConfig,
-    UserMessage,
-)
-from llama_stack.models.llama.datatypes import (
-    BuiltinTool,
-    ToolDefinition,
-    ToolPromptFormat,
-)
-from llama_stack.providers.utils.inference.prompt_adapter import (
-    chat_completion_request_to_messages,
-    chat_completion_request_to_prompt,
-    interleaved_content_as_str,
-)
-
-MODEL = "Llama3.1-8B-Instruct"
-MODEL3_2 = "Llama3.2-3B-Instruct"
-
-
-async def test_system_default():
-    content = "Hello !"
-    request = ChatCompletionRequest(
-        model=MODEL,
-        messages=[
-            UserMessage(content=content),
-        ],
-    )
-    messages = chat_completion_request_to_messages(request, MODEL)
-    assert len(messages) == 2
-    assert messages[-1].content == content
-    assert "Cutting Knowledge Date: December 2023" in interleaved_content_as_str(messages[0].content)
-
-
-async def test_system_builtin_only():
-    content = "Hello !"
-    request = ChatCompletionRequest(
-        model=MODEL,
-        messages=[
-            UserMessage(content=content),
-        ],
-        tools=[
-            ToolDefinition(tool_name=BuiltinTool.code_interpreter),
-            ToolDefinition(tool_name=BuiltinTool.brave_search),
-        ],
-    )
-    messages = chat_completion_request_to_messages(request, MODEL)
-    assert len(messages) == 2
-    assert messages[-1].content == content
-    assert "Cutting Knowledge Date: December 2023" in interleaved_content_as_str(messages[0].content)
-    assert "Tools: brave_search" in interleaved_content_as_str(messages[0].content)
-
-
-async def test_system_custom_only():
-    content = "Hello !"
-    request = ChatCompletionRequest(
-        model=MODEL,
-        messages=[
-            UserMessage(content=content),
-        ],
-        tools=[
-            ToolDefinition(
-                tool_name="custom1",
-                description="custom1 tool",
-                input_schema={
-                    "type": "object",
-                    "properties": {
-                        "param1": {
-                            "type": "str",
-                            "description": "param1 description",
-                        },
-                    },
-                    "required": ["param1"],
-                },
-            )
-        ],
-        tool_config=ToolConfig(tool_prompt_format=ToolPromptFormat.json),
-    )
-    messages = chat_completion_request_to_messages(request, MODEL)
-    assert len(messages) == 3
-    assert "Environment: ipython" in interleaved_content_as_str(messages[0].content)
-
-    assert "Return function calls in JSON format" in interleaved_content_as_str(messages[1].content)
-    assert messages[-1].content == content
-
-
-async def test_system_custom_and_builtin():
-    content = "Hello !"
-    request = ChatCompletionRequest(
-        model=MODEL,
-        messages=[
-            UserMessage(content=content),
-        ],
-        tools=[
-            ToolDefinition(tool_name=BuiltinTool.code_interpreter),
-            ToolDefinition(tool_name=BuiltinTool.brave_search),
-            ToolDefinition(
-                tool_name="custom1",
-                description="custom1 tool",
-                input_schema={
-                    "type": "object",
-                    "properties": {
-                        "param1": {
-                            "type": "str",
-                            "description": "param1 description",
-                        },
-                    },
-                    "required": ["param1"],
-                },
-            ),
-        ],
-    )
-    messages = chat_completion_request_to_messages(request, MODEL)
-    assert len(messages) == 3
-
-    assert "Environment: ipython" in interleaved_content_as_str(messages[0].content)
-    assert "Tools: brave_search" in interleaved_content_as_str(messages[0].content)
-
-    assert "Return function calls in JSON format" in interleaved_content_as_str(messages[1].content)
-    assert messages[-1].content == content
-
-
-async def test_completion_message_encoding():
-    request = ChatCompletionRequest(
-        model=MODEL3_2,
-        messages=[
-            UserMessage(content="hello"),
-            CompletionMessage(
-                content="",
-                stop_reason=StopReason.end_of_turn,
-                tool_calls=[
-                    ToolCall(
-                        tool_name="custom1",
-                        arguments='{"param1": "value1"}',  # arguments must be a JSON string
-                        call_id="123",
-                    )
-                ],
-            ),
-        ],
-        tools=[
-            ToolDefinition(
-                tool_name="custom1",
-                description="custom1 tool",
-                input_schema={
-                    "type": "object",
-                    "properties": {
-                        "param1": {
-                            "type": "str",
-                            "description": "param1 description",
-                        },
-                    },
-                    "required": ["param1"],
-                },
-            ),
-        ],
-        tool_config=ToolConfig(tool_prompt_format=ToolPromptFormat.python_list),
-    )
-    prompt = await chat_completion_request_to_prompt(request, request.model)
-    assert '[custom1(param1="value1")]' in prompt
-
-    request.model = MODEL
-    request.tool_config = ToolConfig(tool_prompt_format=ToolPromptFormat.json)
-    prompt = await chat_completion_request_to_prompt(request, request.model)
-    assert '{"type": "function", "name": "custom1", "parameters": {"param1": "value1"}}' in prompt
-
-
-async def test_user_provided_system_message():
-    content = "Hello !"
-    system_prompt = "You are a pirate"
-    request = ChatCompletionRequest(
-        model=MODEL,
-        messages=[
-            SystemMessage(content=system_prompt),
-            UserMessage(content=content),
-        ],
-        tools=[
-            ToolDefinition(tool_name=BuiltinTool.code_interpreter),
-        ],
-    )
-    messages = chat_completion_request_to_messages(request, MODEL)
-    assert len(messages) == 2
-    assert interleaved_content_as_str(messages[0].content).endswith(system_prompt)
-
-    assert messages[-1].content == content
-
-
-async def test_replace_system_message_behavior_builtin_tools():
-    content = "Hello !"
-    system_prompt = "You are a pirate"
-    request = ChatCompletionRequest(
-        model=MODEL,
-        messages=[
-            SystemMessage(content=system_prompt),
-            UserMessage(content=content),
-        ],
-        tools=[
-            ToolDefinition(tool_name=BuiltinTool.code_interpreter),
-        ],
-        tool_config=ToolConfig(
-            tool_choice="auto",
-            tool_prompt_format=ToolPromptFormat.python_list,
-            system_message_behavior=SystemMessageBehavior.replace,
-        ),
-    )
-    messages = chat_completion_request_to_messages(request, MODEL3_2)
-    assert len(messages) == 2
-    assert interleaved_content_as_str(messages[0].content).endswith(system_prompt)
-    assert "Environment: ipython" in interleaved_content_as_str(messages[0].content)
-    assert messages[-1].content == content
-
-
-async def test_replace_system_message_behavior_custom_tools():
-    content = "Hello !"
-    system_prompt = "You are a pirate"
-    request = ChatCompletionRequest(
-        model=MODEL,
-        messages=[
-            SystemMessage(content=system_prompt),
-            UserMessage(content=content),
-        ],
-        tools=[
-            ToolDefinition(tool_name=BuiltinTool.code_interpreter),
-            ToolDefinition(
-                tool_name="custom1",
-                description="custom1 tool",
-                input_schema={
-                    "type": "object",
-                    "properties": {
-                        "param1": {
-                            "type": "str",
-                            "description": "param1 description",
-                        },
-                    },
-                    "required": ["param1"],
-                },
-            ),
-        ],
-        tool_config=ToolConfig(
-            tool_choice="auto",
-            tool_prompt_format=ToolPromptFormat.python_list,
-            system_message_behavior=SystemMessageBehavior.replace,
-        ),
-    )
-    messages = chat_completion_request_to_messages(request, MODEL3_2)
-
-    assert len(messages) == 2
-    assert interleaved_content_as_str(messages[0].content).endswith(system_prompt)
-    assert "Environment: ipython" in interleaved_content_as_str(messages[0].content)
-    assert messages[-1].content == content
-
-
-async def test_replace_system_message_behavior_custom_tools_with_template():
-    content = "Hello !"
-    system_prompt = "You are a pirate {{ function_description }}"
-    request = ChatCompletionRequest(
-        model=MODEL,
-        messages=[
-            SystemMessage(content=system_prompt),
-            UserMessage(content=content),
-        ],
-        tools=[
-            ToolDefinition(tool_name=BuiltinTool.code_interpreter),
-            ToolDefinition(
-                tool_name="custom1",
-                description="custom1 tool",
-                input_schema={
-                    "type": "object",
-                    "properties": {
-                        "param1": {
-                            "type": "str",
-                            "description": "param1 description",
-                        },
-                    },
-                    "required": ["param1"],
-                },
-            ),
-        ],
-        tool_config=ToolConfig(
-            tool_choice="auto",
-            tool_prompt_format=ToolPromptFormat.python_list,
-            system_message_behavior=SystemMessageBehavior.replace,
-        ),
-    )
-    messages = chat_completion_request_to_messages(request, MODEL3_2)
-
-    assert len(messages) == 2
-    assert "Environment: ipython" in interleaved_content_as_str(messages[0].content)
-    assert "You are a pirate" in interleaved_content_as_str(messages[0].content)
-    # function description is present in the system prompt
-    assert '"name": "custom1"' in interleaved_content_as_str(messages[0].content)
-    assert messages[-1].content == content
diff --git a/tests/unit/providers/inline/inference/__init__.py b/tests/unit/providers/inline/inference/__init__.py
new file mode 100644
index 000000000..756f351d8
--- /dev/null
+++ b/tests/unit/providers/inline/inference/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
diff --git a/tests/unit/providers/inline/inference/test_meta_reference.py b/tests/unit/providers/inline/inference/test_meta_reference.py
new file mode 100644
index 000000000..381836397
--- /dev/null
+++ b/tests/unit/providers/inline/inference/test_meta_reference.py
@@ -0,0 +1,44 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from unittest.mock import Mock
+
+import pytest
+
+from llama_stack.providers.inline.inference.meta_reference.model_parallel import (
+    ModelRunner,
+)
+
+
+class TestModelRunner:
+    """Test ModelRunner task dispatching for model-parallel inference."""
+
+    def test_chat_completion_task_dispatch(self):
+        """Verify ModelRunner correctly dispatches chat_completion tasks."""
+        # Create a mock generator
+        mock_generator = Mock()
+        mock_generator.chat_completion = Mock(return_value=iter([]))
+
+        runner = ModelRunner(mock_generator)
+
+        # Create a chat_completion task
+        fake_params = {"model": "test"}
+        fake_messages = [{"role": "user", "content": "test"}]
+        task = ("chat_completion", [fake_params, fake_messages])
+
+        # Execute task
+        runner(task)
+
+        # Verify chat_completion was called with correct arguments
+        mock_generator.chat_completion.assert_called_once_with(fake_params, fake_messages)
+
+    def test_invalid_task_type_raises_error(self):
+        """Verify ModelRunner rejects invalid task types."""
+        mock_generator = Mock()
+        runner = ModelRunner(mock_generator)
+
+        with pytest.raises(ValueError, match="Unexpected task type"):
+            runner(("invalid_task", []))
diff --git a/tests/unit/providers/nvidia/test_safety.py b/tests/unit/providers/nvidia/test_safety.py
index 922d7f61f..622302630 100644
--- a/tests/unit/providers/nvidia/test_safety.py
+++ b/tests/unit/providers/nvidia/test_safety.py
@@ -10,11 +10,13 @@ from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
 
-from llama_stack.apis.inference import CompletionMessage, UserMessage
+from llama_stack.apis.inference import (
+    OpenAIAssistantMessageParam,
+    OpenAIUserMessageParam,
+)
 from llama_stack.apis.resource import ResourceType
 from llama_stack.apis.safety import RunShieldResponse, ViolationLevel
 from llama_stack.apis.shields import Shield
-from llama_stack.models.llama.datatypes import StopReason
 from llama_stack.providers.remote.safety.nvidia.config import NVIDIASafetyConfig
 from llama_stack.providers.remote.safety.nvidia.nvidia import NVIDIASafetyAdapter
 
@@ -136,11 +138,9 @@ async def test_run_shield_allowed(nvidia_adapter, mock_guardrails_post):
 
     # Run the shield
     messages = [
-        UserMessage(role="user", content="Hello, how are you?"),
-        CompletionMessage(
-            role="assistant",
+        OpenAIUserMessageParam(content="Hello, how are you?"),
+        OpenAIAssistantMessageParam(
             content="I'm doing well, thank you for asking!",
-            stop_reason=StopReason.end_of_message,
             tool_calls=[],
         ),
     ]
@@ -191,13 +191,10 @@ async def test_run_shield_blocked(nvidia_adapter, mock_guardrails_post):
     # Mock Guardrails API response
     mock_guardrails_post.return_value = {"status": "blocked", "rails_status": {"reason": "harmful_content"}}
 
-    # Run the shield
     messages = [
-        UserMessage(role="user", content="Hello, how are you?"),
-        CompletionMessage(
-            role="assistant",
+        OpenAIUserMessageParam(content="Hello, how are you?"),
+        OpenAIAssistantMessageParam(
             content="I'm doing well, thank you for asking!",
-            stop_reason=StopReason.end_of_message,
             tool_calls=[],
         ),
     ]
@@ -243,7 +240,7 @@ async def test_run_shield_not_found(nvidia_adapter, mock_guardrails_post):
     adapter.shield_store.get_shield.return_value = None
 
     messages = [
-        UserMessage(role="user", content="Hello, how are you?"),
+        OpenAIUserMessageParam(content="Hello, how are you?"),
     ]
 
     with pytest.raises(ValueError):
@@ -274,11 +271,9 @@ async def test_run_shield_http_error(nvidia_adapter, mock_guardrails_post):
 
     # Running the shield should raise an exception
     messages = [
-        UserMessage(role="user", content="Hello, how are you?"),
-        CompletionMessage(
-            role="assistant",
+        OpenAIUserMessageParam(content="Hello, how are you?"),
+        OpenAIAssistantMessageParam(
             content="I'm doing well, thank you for asking!",
-            stop_reason=StopReason.end_of_message,
             tool_calls=[],
         ),
     ]
diff --git a/tests/unit/providers/utils/inference/test_openai_compat.py b/tests/unit/providers/utils/inference/test_openai_compat.py
deleted file mode 100644
index c200c4395..000000000
--- a/tests/unit/providers/utils/inference/test_openai_compat.py
+++ /dev/null
@@ -1,220 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import pytest
-from pydantic import ValidationError
-
-from llama_stack.apis.common.content_types import TextContentItem
-from llama_stack.apis.inference import (
-    CompletionMessage,
-    OpenAIAssistantMessageParam,
-    OpenAIChatCompletionContentPartImageParam,
-    OpenAIChatCompletionContentPartTextParam,
-    OpenAIDeveloperMessageParam,
-    OpenAIImageURL,
-    OpenAISystemMessageParam,
-    OpenAIToolMessageParam,
-    OpenAIUserMessageParam,
-    SystemMessage,
-    UserMessage,
-)
-from llama_stack.models.llama.datatypes import BuiltinTool, StopReason, ToolCall
-from llama_stack.providers.utils.inference.openai_compat import (
-    convert_message_to_openai_dict,
-    convert_message_to_openai_dict_new,
-    openai_messages_to_messages,
-)
-
-
-async def test_convert_message_to_openai_dict():
-    message = UserMessage(content=[TextContentItem(text="Hello, world!")], role="user")
-    assert await convert_message_to_openai_dict(message) == {
-        "role": "user",
-        "content": [{"type": "text", "text": "Hello, world!"}],
-    }
-
-
-# Test convert_message_to_openai_dict with a tool call
-async def test_convert_message_to_openai_dict_with_tool_call():
-    message = CompletionMessage(
-        content="",
-        tool_calls=[ToolCall(call_id="123", tool_name="test_tool", arguments='{"foo": "bar"}')],
-        stop_reason=StopReason.end_of_turn,
-    )
-
-    openai_dict = await convert_message_to_openai_dict(message)
-
-    assert openai_dict == {
-        "role": "assistant",
-        "content": [{"type": "text", "text": ""}],
-        "tool_calls": [
-            {"id": "123", "type": "function", "function": {"name": "test_tool", "arguments": '{"foo": "bar"}'}}
-        ],
-    }
-
-
-async def test_convert_message_to_openai_dict_with_builtin_tool_call():
-    message = CompletionMessage(
-        content="",
-        tool_calls=[
-            ToolCall(
-                call_id="123",
-                tool_name=BuiltinTool.brave_search,
-                arguments='{"foo": "bar"}',
-            )
-        ],
-        stop_reason=StopReason.end_of_turn,
-    )
-
-    openai_dict = await convert_message_to_openai_dict(message)
-
-    assert openai_dict == {
-        "role": "assistant",
-        "content": [{"type": "text", "text": ""}],
-        "tool_calls": [
-            {"id": "123", "type": "function", "function": {"name": "brave_search", "arguments": '{"foo": "bar"}'}}
-        ],
-    }
-
-
-async def test_openai_messages_to_messages_with_content_str():
-    openai_messages = [
-        OpenAISystemMessageParam(content="system message"),
-        OpenAIUserMessageParam(content="user message"),
-        OpenAIAssistantMessageParam(content="assistant message"),
-    ]
-
-    llama_messages = openai_messages_to_messages(openai_messages)
-    assert len(llama_messages) == 3
-    assert isinstance(llama_messages[0], SystemMessage)
-    assert isinstance(llama_messages[1], UserMessage)
-    assert isinstance(llama_messages[2], CompletionMessage)
-    assert llama_messages[0].content == "system message"
-    assert llama_messages[1].content == "user message"
-    assert llama_messages[2].content == "assistant message"
-
-
-async def test_openai_messages_to_messages_with_content_list():
-    openai_messages = [
-        OpenAISystemMessageParam(content=[OpenAIChatCompletionContentPartTextParam(text="system message")]),
-        OpenAIUserMessageParam(content=[OpenAIChatCompletionContentPartTextParam(text="user message")]),
-        OpenAIAssistantMessageParam(content=[OpenAIChatCompletionContentPartTextParam(text="assistant message")]),
-    ]
-
-    llama_messages = openai_messages_to_messages(openai_messages)
-    assert len(llama_messages) == 3
-    assert isinstance(llama_messages[0], SystemMessage)
-    assert isinstance(llama_messages[1], UserMessage)
-    assert isinstance(llama_messages[2], CompletionMessage)
-    assert llama_messages[0].content[0].text == "system message"
-    assert llama_messages[1].content[0].text == "user message"
-    assert llama_messages[2].content[0].text == "assistant message"
-
-
-@pytest.mark.parametrize(
-    "message_class,kwargs",
-    [
-        (OpenAISystemMessageParam, {}),
-        (OpenAIAssistantMessageParam, {}),
-        (OpenAIDeveloperMessageParam, {}),
-        (OpenAIUserMessageParam, {}),
-        (OpenAIToolMessageParam, {"tool_call_id": "call_123"}),
-    ],
-)
-def test_message_accepts_text_string(message_class, kwargs):
-    """Test that messages accept string text content."""
-    msg = message_class(content="Test message", **kwargs)
-    assert msg.content == "Test message"
-
-
-@pytest.mark.parametrize(
-    "message_class,kwargs",
-    [
-        (OpenAISystemMessageParam, {}),
-        (OpenAIAssistantMessageParam, {}),
-        (OpenAIDeveloperMessageParam, {}),
-        (OpenAIUserMessageParam, {}),
-        (OpenAIToolMessageParam, {"tool_call_id": "call_123"}),
-    ],
-)
-def test_message_accepts_text_list(message_class, kwargs):
-    """Test that messages accept list of text content parts."""
-    content_list = [OpenAIChatCompletionContentPartTextParam(text="Test message")]
-    msg = message_class(content=content_list, **kwargs)
-    assert len(msg.content) == 1
-    assert msg.content[0].text == "Test message"
-
-
-@pytest.mark.parametrize(
-    "message_class,kwargs",
-    [
-        (OpenAISystemMessageParam, {}),
-        (OpenAIAssistantMessageParam, {}),
-        (OpenAIDeveloperMessageParam, {}),
-        (OpenAIToolMessageParam, {"tool_call_id": "call_123"}),
-    ],
-)
-def test_message_rejects_images(message_class, kwargs):
-    """Test that system, assistant, developer, and tool messages reject image content."""
-    with pytest.raises(ValidationError):
-        message_class(
-            content=[
-                OpenAIChatCompletionContentPartImageParam(image_url=OpenAIImageURL(url="http://example.com/image.jpg"))
-            ],
-            **kwargs,
-        )
-
-
-def test_user_message_accepts_images():
-    """Test that user messages accept image content (unlike other message types)."""
-    # List with images should work
-    msg = OpenAIUserMessageParam(
-        content=[
-            OpenAIChatCompletionContentPartTextParam(text="Describe this image:"),
-            OpenAIChatCompletionContentPartImageParam(image_url=OpenAIImageURL(url="http://example.com/image.jpg")),
-        ]
-    )
-    assert len(msg.content) == 2
-    assert msg.content[0].text == "Describe this image:"
-    assert msg.content[1].image_url.url == "http://example.com/image.jpg"
-
-
-async def test_convert_message_to_openai_dict_new_user_message():
-    """Test convert_message_to_openai_dict_new with UserMessage."""
-    message = UserMessage(content="Hello, world!", role="user")
-    result = await convert_message_to_openai_dict_new(message)
-
-    assert result["role"] == "user"
-    assert result["content"] == "Hello, world!"
-
-
-async def test_convert_message_to_openai_dict_new_completion_message_with_tool_calls():
-    """Test convert_message_to_openai_dict_new with CompletionMessage containing tool calls."""
-    message = CompletionMessage(
-        content="I'll help you find the weather.",
-        tool_calls=[
-            ToolCall(
-                call_id="call_123",
-                tool_name="get_weather",
-                arguments='{"city": "Sligo"}',
-            )
-        ],
-        stop_reason=StopReason.end_of_turn,
-    )
-    result = await convert_message_to_openai_dict_new(message)
-
-    # This would have failed with "Cannot instantiate typing.Union" before the fix
-    assert result["role"] == "assistant"
-    assert result["content"] == "I'll help you find the weather."
-    assert "tool_calls" in result
-    assert result["tool_calls"] is not None
-    assert len(result["tool_calls"]) == 1
-
-    tool_call = result["tool_calls"][0]
-    assert tool_call.id == "call_123"
-    assert tool_call.type == "function"
-    assert tool_call.function.name == "get_weather"
-    assert tool_call.function.arguments == '{"city": "Sligo"}'
diff --git a/tests/unit/providers/utils/inference/test_prompt_adapter.py b/tests/unit/providers/utils/inference/test_prompt_adapter.py
new file mode 100644
index 000000000..62c8db74d
--- /dev/null
+++ b/tests/unit/providers/utils/inference/test_prompt_adapter.py
@@ -0,0 +1,35 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.apis.inference import (
+    OpenAIAssistantMessageParam,
+    OpenAIUserMessageParam,
+)
+from llama_stack.models.llama.datatypes import RawTextItem
+from llama_stack.providers.utils.inference.prompt_adapter import (
+    convert_openai_message_to_raw_message,
+)
+
+
+class TestConvertOpenAIMessageToRawMessage:
+    """Test conversion of OpenAI message types to RawMessage format."""
+
+    async def test_user_message_conversion(self):
+        msg = OpenAIUserMessageParam(role="user", content="Hello world")
+        raw_msg = await convert_openai_message_to_raw_message(msg)
+
+        assert raw_msg.role == "user"
+        assert isinstance(raw_msg.content, RawTextItem)
+        assert raw_msg.content.text == "Hello world"
+
+    async def test_assistant_message_conversion(self):
+        msg = OpenAIAssistantMessageParam(role="assistant", content="Hi there!")
+        raw_msg = await convert_openai_message_to_raw_message(msg)
+
+        assert raw_msg.role == "assistant"
+        assert isinstance(raw_msg.content, RawTextItem)
+        assert raw_msg.content.text == "Hi there!"
+        assert raw_msg.tool_calls == []

From 97ccfb5e626919956aee0bf0e890a7196e8af6a2 Mon Sep 17 00:00:00 2001
From: Nathan Weinberg <31703736+nathan-weinberg@users.noreply.github.com>
Date: Mon, 10 Nov 2025 18:57:17 -0500
Subject: [PATCH 07/62] refactor: inspect routes now shows all non-deprecated
 APIs (#4116)

# What does this PR do?
the inspect API lacked any mechanism to get all
non-deprecated APIs (v1, v1alpha, v1beta)
change default to this behavior

'v1' filter can be used for user' wanting a list
of stable APIs

## Test Plan
1. pull the PR
2. launch a LLS server
3. run `curl http://beanlab3.bss.redhat.com:8321/v1/inspect/routes`
4. note there are APIs for `v1`, `v1alpha`, and `v1beta` but no
deprecated APIs

Signed-off-by: Nathan Weinberg <nweinber@redhat.com>
---
 client-sdks/stainless/openapi.yml           | 2 +-
 docs/static/llama-stack-spec.yaml           | 2 +-
 docs/static/stainless-llama-stack-spec.yaml | 2 +-
 src/llama_stack/apis/inspect/inspect.py     | 2 +-
 src/llama_stack/core/inspect.py             | 5 ++---
 5 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/client-sdks/stainless/openapi.yml b/client-sdks/stainless/openapi.yml
index 58ebaa8ae..9f3ef15b5 100644
--- a/client-sdks/stainless/openapi.yml
+++ b/client-sdks/stainless/openapi.yml
@@ -963,7 +963,7 @@ paths:
             Optional filter to control which routes are returned. Can be an API level
             ('v1', 'v1alpha', 'v1beta') to show non-deprecated routes at that level,
             or 'deprecated' to show deprecated routes across all levels. If not specified,
-            returns only non-deprecated v1 routes.
+            returns all non-deprecated routes.
           required: false
           schema:
             type: string
diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml
index 135ae910f..ce8708b68 100644
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@@ -960,7 +960,7 @@ paths:
             Optional filter to control which routes are returned. Can be an API level
             ('v1', 'v1alpha', 'v1beta') to show non-deprecated routes at that level,
             or 'deprecated' to show deprecated routes across all levels. If not specified,
-            returns only non-deprecated v1 routes.
+            returns all non-deprecated routes.
           required: false
           schema:
             type: string
diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml
index 58ebaa8ae..9f3ef15b5 100644
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@@ -963,7 +963,7 @@ paths:
             Optional filter to control which routes are returned. Can be an API level
             ('v1', 'v1alpha', 'v1beta') to show non-deprecated routes at that level,
             or 'deprecated' to show deprecated routes across all levels. If not specified,
-            returns only non-deprecated v1 routes.
+            returns all non-deprecated routes.
           required: false
           schema:
             type: string
diff --git a/src/llama_stack/apis/inspect/inspect.py b/src/llama_stack/apis/inspect/inspect.py
index 4e0e2548b..235abb124 100644
--- a/src/llama_stack/apis/inspect/inspect.py
+++ b/src/llama_stack/apis/inspect/inspect.py
@@ -76,7 +76,7 @@ class Inspect(Protocol):
 
         List all available API routes with their methods and implementing providers.
 
-        :param api_filter: Optional filter to control which routes are returned. Can be an API level ('v1', 'v1alpha', 'v1beta') to show non-deprecated routes at that level, or 'deprecated' to show deprecated routes across all levels. If not specified, returns only non-deprecated v1 routes.
+        :param api_filter: Optional filter to control which routes are returned. Can be an API level ('v1', 'v1alpha', 'v1beta') to show non-deprecated routes at that level, or 'deprecated' to show deprecated routes across all levels. If not specified, returns all non-deprecated routes.
         :returns: Response containing information about all available routes.
         """
         ...
diff --git a/src/llama_stack/core/inspect.py b/src/llama_stack/core/inspect.py
index 6352af00f..07b51128f 100644
--- a/src/llama_stack/core/inspect.py
+++ b/src/llama_stack/core/inspect.py
@@ -15,7 +15,6 @@ from llama_stack.apis.inspect import (
     RouteInfo,
     VersionInfo,
 )
-from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.core.datatypes import StackRunConfig
 from llama_stack.core.external import load_external_apis
 from llama_stack.core.server.routes import get_all_api_routes
@@ -46,8 +45,8 @@ class DistributionInspectImpl(Inspect):
         # Helper function to determine if a route should be included based on api_filter
         def should_include_route(webmethod) -> bool:
             if api_filter is None:
-                # Default: only non-deprecated v1 APIs
-                return not webmethod.deprecated and webmethod.level == LLAMA_STACK_API_V1
+                # Default: only non-deprecated APIs
+                return not webmethod.deprecated
             elif api_filter == "deprecated":
                 # Special filter: show deprecated routes regardless of their actual level
                 return bool(webmethod.deprecated)

From e5a55f36776575f89145be1004935e8113aed90a Mon Sep 17 00:00:00 2001
From: paulengineer <154521137+paulengineer@users.noreply.github.com>
Date: Tue, 11 Nov 2025 12:49:03 +0000
Subject: [PATCH 08/62] docs: use 'uv pip' to avoid pitfalls of using 'pip' in
 virtual environment (#4122)

# What does this PR do?
In the **Detailed Tutorial**, at **Step 3**, the **Install with venv**
option creates a new virtual environment `client`, activates it then
attempts to install the llama-stack-client using pip.
```
uv venv client --python 3.12
source client/bin/activate
pip install llama-stack-client    <- this is the problematic line
```
However, the pip command will likely fail because the `uv venv` command
doesn't, by default, include adding the pip command to the virtual
environment that is created. The pip command will error either because
pip doesn't exist at all, or, if the pip command does exist outside of
the virtual environment, return a different error message. The latter
may be unclear to the user why it is failing.

This PR changes 'pip' to 'uv pip', allowing the install action to
function in the virtual environment as intended, and without the need
for pip to be installed.

<!-- If resolving an issue, uncomment and update the line below -->
<!-- Closes #[issue-number] -->

## Test Plan
1. Use linux or WSL (virtual environments on Windows use `Scripts`
folder instead of `bin` [virtualenv
#993ba13](https://github.com/pypa/virtualenv/commit/993ba1316a83b760370f5a3872b3f5ef4dd904c1)
which doesn't align with the tutorial)
2. Clone the `llama-stack` repo
3. Run the following and verify success:
```
uv venv client --python 3.12
source client/bin/activate
```
5. Run the updated command:
```
uv pip install llama-stack-client
```
6. Observe the console output confirms that the virtual environment
`client` was used:

> Using Python 3.12.3 environment at: **client**
---
 docs/docs/getting_started/detailed_tutorial.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/docs/getting_started/detailed_tutorial.mdx b/docs/docs/getting_started/detailed_tutorial.mdx
index 623301d0d..2816f67a2 100644
--- a/docs/docs/getting_started/detailed_tutorial.mdx
+++ b/docs/docs/getting_started/detailed_tutorial.mdx
@@ -144,7 +144,7 @@ source .venv/bin/activate
 ```bash
 uv venv client --python 3.12
 source client/bin/activate
-pip install llama-stack-client
+uv pip install llama-stack-client
 ```
 </TabItem>
 </Tabs>

From 71b328fc4bddd672a0665a15484ee64e5464c62a Mon Sep 17 00:00:00 2001
From: ehhuang <ehhuang@users.noreply.github.com>
Date: Tue, 11 Nov 2025 10:40:31 -0800
Subject: [PATCH 09/62] chore(ui): add npm package and dockerfile (#4100)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

# What does this PR do?
- sets up package.json for npm `llama-stack-ui` package (will update
llama-stack-ops)
- adds dockerfile for UI docker image

## Test Plan
npx:
npm build && npm pack
LLAMA_STACK_UI_PORT=8322 npx
/Users/erichuang/projects/ui/src/llama_stack_ui/llama-stack-ui-0.4.0-alpha.2.tgz

docker:
cd src/llama_stack_ui
docker build . -f Dockerfile  --tag test_ui --no-cache

❯ docker run -p 8322:8322 \
      -e LLAMA_STACK_UI_PORT=8322 \
      test_ui:latest
---
 docs/docs/distributions/index.mdx          |   1 +
 docs/docs/distributions/llama_stack_ui.mdx | 109 +++++++++++++++++++++
 docs/sidebars.ts                           |   1 +
 src/llama_stack_ui/.dockerignore           |  20 ++++
 src/llama_stack_ui/Containerfile           |  18 ++++
 src/llama_stack_ui/bin/cli.js              |  34 +++++++
 src/llama_stack_ui/next.config.ts          |   8 +-
 src/llama_stack_ui/package-lock.json       |  16 +--
 src/llama_stack_ui/package.json            |  30 +++++-
 src/llama_stack_ui/scripts/postbuild.js    |  40 ++++++++
 10 files changed, 264 insertions(+), 13 deletions(-)
 create mode 100644 docs/docs/distributions/llama_stack_ui.mdx
 create mode 100644 src/llama_stack_ui/.dockerignore
 create mode 100644 src/llama_stack_ui/Containerfile
 create mode 100755 src/llama_stack_ui/bin/cli.js
 create mode 100644 src/llama_stack_ui/scripts/postbuild.js

diff --git a/docs/docs/distributions/index.mdx b/docs/docs/distributions/index.mdx
index 0149f143f..ebf4bd6ce 100644
--- a/docs/docs/distributions/index.mdx
+++ b/docs/docs/distributions/index.mdx
@@ -19,3 +19,4 @@ This section provides an overview of the distributions available in Llama Stack.
 - **[Starting Llama Stack Server](./starting_llama_stack_server.mdx)** - How to run distributions
 - **[Importing as Library](./importing_as_library.mdx)** - Use distributions in your code
 - **[Configuration Reference](./configuration.mdx)** - Configuration file format details
+- **[Llama Stack UI](./llama_stack_ui.mdx)** - Web-based user interface for interacting with Llama Stack servers
diff --git a/docs/docs/distributions/llama_stack_ui.mdx b/docs/docs/distributions/llama_stack_ui.mdx
new file mode 100644
index 000000000..7ba47ea4d
--- /dev/null
+++ b/docs/docs/distributions/llama_stack_ui.mdx
@@ -0,0 +1,109 @@
+---
+title: Llama Stack UI
+description: Web-based user interface for interacting with Llama Stack servers
+sidebar_label: Llama Stack UI
+sidebar_position: 8
+---
+
+# Llama Stack UI
+
+The Llama Stack UI is a web-based interface for interacting with Llama Stack servers. Built with Next.js and React, it provides a visual way to work with agents, manage resources, and view logs.
+
+## Features
+
+- **Logs & Monitoring**: View chat completions, agent responses, and vector store activity
+- **Vector Stores**: Create and manage vector databases for RAG (Retrieval-Augmented Generation) workflows
+- **Prompt Management**: Create and manage reusable prompts
+
+## Prerequisites
+
+You need a running Llama Stack server. The UI is a client that connects to the Llama Stack backend.
+
+If you don't have a Llama Stack server running yet, see the [Starting Llama Stack Server](../getting_started/starting_llama_stack_server.mdx) guide.
+
+## Running the UI
+
+### Option 1: Using npx (Recommended for Quick Start)
+
+The fastest way to get started is using `npx`:
+
+```bash
+npx llama-stack-ui
+```
+
+This will start the UI server on `http://localhost:8322` (default port).
+
+### Option 2: Using Docker
+
+Run the UI in a container:
+
+```bash
+docker run -p 8322:8322 llamastack/ui
+```
+
+Access the UI at `http://localhost:8322`.
+
+## Environment Variables
+
+The UI can be configured using the following environment variables:
+
+| Variable | Description | Default |
+|----------|-------------|---------|
+| `LLAMA_STACK_BACKEND_URL` | URL of your Llama Stack server | `http://localhost:8321` |
+| `LLAMA_STACK_UI_PORT` | Port for the UI server | `8322` |
+
+If the Llama Stack server is running with authentication enabled, you can configure the UI to use it by setting the following environment variables:
+
+| Variable | Description | Default |
+|----------|-------------|---------|
+| `NEXTAUTH_URL` | NextAuth URL for authentication | `http://localhost:8322` |
+| `GITHUB_CLIENT_ID` | GitHub OAuth client ID (optional, for authentication) | - |
+| `GITHUB_CLIENT_SECRET` | GitHub OAuth client secret (optional, for authentication) | - |
+
+### Setting Environment Variables
+
+#### For npx:
+
+```bash
+LLAMA_STACK_BACKEND_URL=http://localhost:8321 \
+LLAMA_STACK_UI_PORT=8080 \
+npx llama-stack-ui
+```
+
+#### For Docker:
+
+```bash
+docker run -p 8080:8080 \
+  -e LLAMA_STACK_BACKEND_URL=http://localhost:8321 \
+  -e LLAMA_STACK_UI_PORT=8080 \
+  llamastack/ui
+```
+
+## Using the UI
+
+### Managing Resources
+
+- **Vector Stores**: Create vector databases for RAG workflows, view stored documents and embeddings
+- **Prompts**: Create and manage reusable prompt templates
+- **Chat Completions**: View history of chat interactions
+- **Responses**: Browse detailed agent responses and tool calls
+
+## Development
+
+If you want to run the UI from source for development:
+
+```bash
+# From the project root
+cd src/llama_stack_ui
+
+# Install dependencies
+npm install
+
+# Set environment variables
+export LLAMA_STACK_BACKEND_URL=http://localhost:8321
+
+# Start the development server
+npm run dev
+```
+
+The development server will start on `http://localhost:8322` with hot reloading enabled.
diff --git a/docs/sidebars.ts b/docs/sidebars.ts
index 641c2eed3..7b4ac5ac8 100644
--- a/docs/sidebars.ts
+++ b/docs/sidebars.ts
@@ -57,6 +57,7 @@ const sidebars: SidebarsConfig = {
         'distributions/importing_as_library',
         'distributions/configuration',
         'distributions/starting_llama_stack_server',
+        'distributions/llama_stack_ui',
         {
           type: 'category',
           label: 'Self-Hosted Distributions',
diff --git a/src/llama_stack_ui/.dockerignore b/src/llama_stack_ui/.dockerignore
new file mode 100644
index 000000000..e3d1daae6
--- /dev/null
+++ b/src/llama_stack_ui/.dockerignore
@@ -0,0 +1,20 @@
+.git
+.gitignore
+.env.local
+.env.*.local
+.next
+node_modules
+npm-debug.log
+*.md
+.DS_Store
+.vscode
+.idea
+playwright-report
+e2e
+jest.config.ts
+jest.setup.ts
+eslint.config.mjs
+.prettierrc
+.prettierignore
+.nvmrc
+playwright.config.ts
diff --git a/src/llama_stack_ui/Containerfile b/src/llama_stack_ui/Containerfile
new file mode 100644
index 000000000..6aea3dbfd
--- /dev/null
+++ b/src/llama_stack_ui/Containerfile
@@ -0,0 +1,18 @@
+FROM node:22.5.1-alpine
+
+ENV NODE_ENV=production
+
+# Install dumb-init for proper signal handling
+RUN apk add --no-cache dumb-init
+
+# Create non-root user for security
+RUN addgroup --system --gid 1001 nodejs
+RUN adduser --system --uid 1001 nextjs
+
+# Install llama-stack-ui from npm
+RUN npm install -g llama-stack-ui
+
+USER nextjs
+
+ENTRYPOINT ["dumb-init", "--"]
+CMD ["llama-stack-ui"]
diff --git a/src/llama_stack_ui/bin/cli.js b/src/llama_stack_ui/bin/cli.js
new file mode 100755
index 000000000..6069d2f22
--- /dev/null
+++ b/src/llama_stack_ui/bin/cli.js
@@ -0,0 +1,34 @@
+#!/usr/bin/env node
+
+const { spawn } = require('child_process');
+const path = require('path');
+
+const port = process.env.LLAMA_STACK_UI_PORT || 8322;
+const uiDir = path.resolve(__dirname, '..');
+const serverPath = path.join(uiDir, '.next', 'standalone', 'ui', 'src', 'llama_stack_ui', 'server.js');
+const serverDir = path.dirname(serverPath);
+
+console.log(`Starting Llama Stack UI on http://localhost:${port}`);
+
+const child = spawn(process.execPath, [serverPath], {
+  cwd: serverDir,
+  stdio: 'inherit',
+  env: {
+    ...process.env,
+    PORT: port,
+  },
+});
+
+process.on('SIGINT', () => {
+  child.kill('SIGINT');
+  process.exit(0);
+});
+
+process.on('SIGTERM', () => {
+  child.kill('SIGTERM');
+  process.exit(0);
+});
+
+child.on('exit', (code) => {
+  process.exit(code);
+});
diff --git a/src/llama_stack_ui/next.config.ts b/src/llama_stack_ui/next.config.ts
index e9ffa3083..9f4a74eca 100644
--- a/src/llama_stack_ui/next.config.ts
+++ b/src/llama_stack_ui/next.config.ts
@@ -1,7 +1,13 @@
 import type { NextConfig } from "next";
 
 const nextConfig: NextConfig = {
-  /* config options here */
+  typescript: {
+    ignoreBuildErrors: true,
+  },
+  output: "standalone",
+  images: {
+    unoptimized: true,
+  },
 };
 
 export default nextConfig;
diff --git a/src/llama_stack_ui/package-lock.json b/src/llama_stack_ui/package-lock.json
index 14e34b720..aa8b2ac26 100644
--- a/src/llama_stack_ui/package-lock.json
+++ b/src/llama_stack_ui/package-lock.json
@@ -1,12 +1,13 @@
 {
-  "name": "ui",
-  "version": "0.1.0",
+  "name": "llama-stack-ui",
+  "version": "0.4.0-alpha.1",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
-      "name": "ui",
-      "version": "0.1.0",
+      "name": "llama-stack-ui",
+      "version": "0.4.0-alpha.1",
+      "license": "MIT",
       "dependencies": {
         "@radix-ui/react-collapsible": "^1.1.12",
         "@radix-ui/react-dialog": "^1.1.15",
@@ -20,7 +21,7 @@
         "class-variance-authority": "^0.7.1",
         "clsx": "^2.1.1",
         "framer-motion": "^12.23.24",
-        "llama-stack-client": "github:llamastack/llama-stack-client-typescript",
+        "llama-stack-client": "^0.3.1",
         "lucide-react": "^0.545.0",
         "next": "15.5.4",
         "next-auth": "^4.24.11",
@@ -9684,8 +9685,9 @@
       "license": "MIT"
     },
     "node_modules/llama-stack-client": {
-      "version": "0.4.0-alpha.1",
-      "resolved": "git+ssh://git@github.com/llamastack/llama-stack-client-typescript.git#78de4862c4b7d77939ac210fa9f9bde77a2c5c5f",
+      "version": "0.3.1",
+      "resolved": "https://registry.npmjs.org/llama-stack-client/-/llama-stack-client-0.3.1.tgz",
+      "integrity": "sha512-4aYoF2aAQiBSfxyZEtczeQmJn8q9T22ePDqGhR+ej5RG6a8wvl5B3v7ZoKuFkft+vcP/kbJ58GQZEPLekxekZA==",
       "license": "MIT",
       "dependencies": {
         "@types/node": "^18.11.18",
diff --git a/src/llama_stack_ui/package.json b/src/llama_stack_ui/package.json
index fb7dbee75..41afc9a11 100644
--- a/src/llama_stack_ui/package.json
+++ b/src/llama_stack_ui/package.json
@@ -1,11 +1,31 @@
 {
-  "name": "ui",
-  "version": "0.1.0",
-  "private": true,
+  "name": "llama-stack-ui",
+  "version": "0.4.0-alpha.4",
+  "description": "Web UI for Llama Stack",
+  "license": "MIT",
+  "author": "Llama Stack <llamastack@meta.com>",
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/llamastack/llama-stack.git",
+    "directory": "llama_stack_ui"
+  },
+  "bin": {
+    "llama-stack-ui": "bin/cli.js"
+  },
+  "files": [
+    "bin",
+    ".next",
+    "public",
+    "next.config.ts",
+    "instrumentation.ts",
+    "tsconfig.json",
+    "package.json"
+  ],
   "scripts": {
     "dev": "next dev --turbopack --port ${LLAMA_STACK_UI_PORT:-8322}",
-    "build": "next build",
+    "build": "next build && node scripts/postbuild.js",
     "start": "next start",
+    "prepublishOnly": "npm run build",
     "lint": "next lint",
     "format": "prettier --write \"./**/*.{ts,tsx}\"",
     "format:check": "prettier --check \"./**/*.{ts,tsx}\"",
@@ -25,7 +45,7 @@
     "class-variance-authority": "^0.7.1",
     "clsx": "^2.1.1",
     "framer-motion": "^12.23.24",
-    "llama-stack-client": "github:llamastack/llama-stack-client-typescript",
+    "llama-stack-client": "^0.3.1",
     "lucide-react": "^0.545.0",
     "next": "15.5.4",
     "next-auth": "^4.24.11",
diff --git a/src/llama_stack_ui/scripts/postbuild.js b/src/llama_stack_ui/scripts/postbuild.js
new file mode 100644
index 000000000..4b4dbdf5d
--- /dev/null
+++ b/src/llama_stack_ui/scripts/postbuild.js
@@ -0,0 +1,40 @@
+const fs = require('fs');
+const path = require('path');
+
+// Copy public directory to standalone
+const publicSrc = path.join(__dirname, '..', 'public');
+const publicDest = path.join(__dirname, '..', '.next', 'standalone', 'ui', 'src', 'llama_stack_ui', 'public');
+
+if (fs.existsSync(publicSrc) && !fs.existsSync(publicDest)) {
+  console.log('Copying public directory to standalone...');
+  copyDir(publicSrc, publicDest);
+}
+
+// Copy .next/static to standalone
+const staticSrc = path.join(__dirname, '..', '.next', 'static');
+const staticDest = path.join(__dirname, '..', '.next', 'standalone', 'ui', 'src', 'llama_stack_ui', '.next', 'static');
+
+if (fs.existsSync(staticSrc) && !fs.existsSync(staticDest)) {
+  console.log('Copying .next/static to standalone...');
+  copyDir(staticSrc, staticDest);
+}
+
+function copyDir(src, dest) {
+  if (!fs.existsSync(dest)) {
+    fs.mkdirSync(dest, { recursive: true });
+  }
+
+  const files = fs.readdirSync(src);
+  files.forEach((file) => {
+    const srcFile = path.join(src, file);
+    const destFile = path.join(dest, file);
+
+    if (fs.statSync(srcFile).isDirectory()) {
+      copyDir(srcFile, destFile);
+    } else {
+      fs.copyFileSync(srcFile, destFile);
+    }
+  });
+}
+
+console.log('Postbuild complete!');

From 6ca2a67a9f1bfec5c4e520b6d82407d4d8ecd914 Mon Sep 17 00:00:00 2001
From: Charlie Doern <cdoern@redhat.com>
Date: Wed, 12 Nov 2025 04:09:14 -0500
Subject: [PATCH 10/62] chore: remove dead code (#4125)

# What does this PR do?

build_image is not used because `llama stack build` is gone. Remove it.

Signed-off-by: Charlie Doern <cdoern@redhat.com>
---
 src/llama_stack/core/build.py | 65 -----------------------------------
 1 file changed, 65 deletions(-)

diff --git a/src/llama_stack/core/build.py b/src/llama_stack/core/build.py
index 2ceb9e9be..fb3a22109 100644
--- a/src/llama_stack/core/build.py
+++ b/src/llama_stack/core/build.py
@@ -4,7 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-import importlib.resources
 import sys
 
 from pydantic import BaseModel
@@ -12,9 +11,6 @@ from termcolor import cprint
 
 from llama_stack.core.datatypes import BuildConfig
 from llama_stack.core.distribution import get_provider_registry
-from llama_stack.core.external import load_external_apis
-from llama_stack.core.utils.exec import run_command
-from llama_stack.core.utils.image_types import LlamaStackImageType
 from llama_stack.distributions.template import DistributionTemplate
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import Api
@@ -101,64 +97,3 @@ def print_pip_install_help(config: BuildConfig):
     for special_dep in special_deps:
         cprint(f"uv pip install {special_dep}", color="yellow", file=sys.stderr)
     print()
-
-
-def build_image(
-    build_config: BuildConfig,
-    image_name: str,
-    distro_or_config: str,
-    run_config: str | None = None,
-):
-    container_base = build_config.distribution_spec.container_image or "python:3.12-slim"
-
-    normal_deps, special_deps, external_provider_deps = get_provider_dependencies(build_config)
-    normal_deps += SERVER_DEPENDENCIES
-    if build_config.external_apis_dir:
-        external_apis = load_external_apis(build_config)
-        if external_apis:
-            for _, api_spec in external_apis.items():
-                normal_deps.extend(api_spec.pip_packages)
-
-    if build_config.image_type == LlamaStackImageType.CONTAINER.value:
-        script = str(importlib.resources.files("llama_stack") / "core/build_container.sh")
-        args = [
-            script,
-            "--distro-or-config",
-            distro_or_config,
-            "--image-name",
-            image_name,
-            "--container-base",
-            container_base,
-            "--normal-deps",
-            " ".join(normal_deps),
-        ]
-        # When building from a config file (not a template), include the run config path in the
-        # build arguments
-        if run_config is not None:
-            args.extend(["--run-config", run_config])
-    else:
-        script = str(importlib.resources.files("llama_stack") / "core/build_venv.sh")
-        args = [
-            script,
-            "--env-name",
-            str(image_name),
-            "--normal-deps",
-            " ".join(normal_deps),
-        ]
-
-    # Always pass both arguments, even if empty, to maintain consistent positional arguments
-    if special_deps:
-        args.extend(["--optional-deps", "#".join(special_deps)])
-    if external_provider_deps:
-        args.extend(
-            ["--external-provider-deps", "#".join(external_provider_deps)]
-        )  # the script will install external provider module, get its deps, and install those too.
-
-    return_code = run_command(args)
-
-    if return_code != 0:
-        log.error(
-            f"Failed to build target {image_name} with return code {return_code}",
-        )
-
-    return return_code

From 539b9c08f38269a80aa5f79cc348b5a2a6032ba3 Mon Sep 17 00:00:00 2001
From: Akshay Ghodake <aghodake@redhat.com>
Date: Wed, 12 Nov 2025 14:54:19 +0530
Subject: [PATCH 11/62] chore(deps): update pypdf to fix DoS vulnerabilities
 (#4121)

Update pypdf dependency to address vulnerabilities causing potential
denial of service through infinite loops or excessive memory usage when
handling malicious PDFs. The update remains fully backward compatible,
with no changes to the PdfReader API.


# What does this PR do?
<!-- Provide a short summary of what this PR does and why. Link to
relevant issues if applicable. -->
Fixes #4120

<!-- If resolving an issue, uncomment and update the line below -->
<!-- Closes #[issue-number] -->

## Test Plan
<!-- Describe the tests you ran to verify your changes with result
summaries. *Provide clear instructions so the plan can be easily
re-executed.* -->

Co-authored-by: Francisco Arceo <arceofrancisco@gmail.com>
---
 pyproject.toml |  4 ++--
 uv.lock        | 12 ++++++------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 653c6d613..e6808af8a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -112,7 +112,7 @@ unit = [
     "aiosqlite",
     "aiohttp",
     "psycopg2-binary>=2.9.0",
-    "pypdf",
+    "pypdf>=6.1.3",
     "mcp",
     "chardet",
     "sqlalchemy",
@@ -135,7 +135,7 @@ test = [
     "torchvision>=0.21.0",
     "chardet",
     "psycopg2-binary>=2.9.0",
-    "pypdf",
+    "pypdf>=6.1.3",
     "mcp",
     "datasets>=4.0.0",
     "autoevals",
diff --git a/uv.lock b/uv.lock
index ba9a862a3..f1808f005 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1,5 +1,5 @@
 version = 1
-revision = 3
+revision = 2
 requires-python = ">=3.12"
 resolution-markers = [
     "(python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux')",
@@ -2166,7 +2166,7 @@ test = [
     { name = "milvus-lite", specifier = ">=2.5.0" },
     { name = "psycopg2-binary", specifier = ">=2.9.0" },
     { name = "pymilvus", specifier = ">=2.6.1" },
-    { name = "pypdf" },
+    { name = "pypdf", specifier = ">=6.1.3" },
     { name = "qdrant-client" },
     { name = "requests" },
     { name = "sqlalchemy" },
@@ -2219,7 +2219,7 @@ unit = [
     { name = "moto", extras = ["s3"], specifier = ">=5.1.10" },
     { name = "ollama" },
     { name = "psycopg2-binary", specifier = ">=2.9.0" },
-    { name = "pypdf" },
+    { name = "pypdf", specifier = ">=6.1.3" },
     { name = "sqlalchemy" },
     { name = "sqlalchemy", extras = ["asyncio"], specifier = ">=2.0.41" },
     { name = "sqlite-vec" },
@@ -3973,11 +3973,11 @@ wheels = [
 
 [[package]]
 name = "pypdf"
-version = "5.9.0"
+version = "6.2.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/89/3a/584b97a228950ed85aec97c811c68473d9b8d149e6a8c155668287cf1a28/pypdf-5.9.0.tar.gz", hash = "sha256:30f67a614d558e495e1fbb157ba58c1de91ffc1718f5e0dfeb82a029233890a1", size = 5035118, upload-time = "2025-07-27T14:04:52.364Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/4e/2b/8795ec0378384000b0a37a2b5e6d67fa3d84802945aa2c612a78a784d7d4/pypdf-6.2.0.tar.gz", hash = "sha256:46b4d8495d68ae9c818e7964853cd9984e6a04c19fe7112760195395992dce48", size = 5272001, upload-time = "2025-11-09T11:10:41.911Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/48/d9/6cff57c80a6963e7dd183bf09e9f21604a77716644b1e580e97b259f7612/pypdf-5.9.0-py3-none-any.whl", hash = "sha256:be10a4c54202f46d9daceaa8788be07aa8cd5ea8c25c529c50dd509206382c35", size = 313193, upload-time = "2025-07-27T14:04:50.53Z" },
+    { url = "https://files.pythonhosted.org/packages/de/ba/743ddcaf1a8fb439342399645921e2cf2c600464cba5531a11f1cc0822b6/pypdf-6.2.0-py3-none-any.whl", hash = "sha256:4c0f3e62677217a777ab79abe22bf1285442d70efabf552f61c7a03b6f5c569f", size = 326592, upload-time = "2025-11-09T11:10:39.941Z" },
 ]
 
 [[package]]

From 63137f9af1fde09eee62a0b28798297a9166c42e Mon Sep 17 00:00:00 2001
From: Sam El-Borai <sam@elborai.me>
Date: Wed, 12 Nov 2025 17:39:21 +0100
Subject: [PATCH 12/62] chore(stainless): add config for file header (#4126)

# What does this PR do?
<!-- Provide a short summary of what this PR does and why. Link to
relevant issues if applicable. -->

<!-- If resolving an issue, uncomment and update the line below -->
<!-- Closes #[issue-number] -->

This PR adds Stainless config to specify the Meta copyright file header
for generated files.

Doing it via config instead of custom code will reduce the probability
of git conflict.

## Test Plan
<!-- Describe the tests you ran to verify your changes with result
summaries. *Provide clear instructions so the plan can be easily
re-executed.* -->

- review preview builds
---
 client-sdks/stainless/config.yml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/client-sdks/stainless/config.yml b/client-sdks/stainless/config.yml
index ab9342c49..c61b53654 100644
--- a/client-sdks/stainless/config.yml
+++ b/client-sdks/stainless/config.yml
@@ -463,6 +463,12 @@ resources:
 settings:
   license: MIT
   unwrap_response_fields: [data]
+  file_header: |
+    Copyright (c) Meta Platforms, Inc. and affiliates.
+    All rights reserved.
+
+    This source code is licensed under the terms described in the LICENSE file in
+    the root directory of this source tree.
 
 openapi:
   transformations:

From 37853ca5581a832ef7db9a130b2064ae705bcce3 Mon Sep 17 00:00:00 2001
From: Charlie Doern <cdoern@redhat.com>
Date: Wed, 12 Nov 2025 12:17:13 -0500
Subject: [PATCH 13/62] fix(tests): add OpenAI client connection cleanup to
 prevent CI hangs (#4119)

# What does this PR do?

Add explicit connection cleanup and shorter timeouts to OpenAI client
fixtures. Fixes CI deadlock after 25+ tests due to connection pool
exhaustion. Also adds 60s timeout to test_conversation_context_loading
as safety net.

## Test Plan

tests pass

Signed-off-by: Charlie Doern <cdoern@redhat.com>
---
 tests/integration/fixtures/common.py                   |  8 +++++++-
 tests/integration/responses/fixtures/fixtures.py       | 10 +++++++++-
 .../responses/test_conversation_responses.py           |  8 +++++++-
 3 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/tests/integration/fixtures/common.py b/tests/integration/fixtures/common.py
index d5e4c15f7..407564c15 100644
--- a/tests/integration/fixtures/common.py
+++ b/tests/integration/fixtures/common.py
@@ -323,7 +323,13 @@ def require_server(llama_stack_client):
 @pytest.fixture(scope="session")
 def openai_client(llama_stack_client, require_server):
     base_url = f"{llama_stack_client.base_url}/v1"
-    return OpenAI(base_url=base_url, api_key="fake")
+    client = OpenAI(base_url=base_url, api_key="fake", max_retries=0, timeout=30.0)
+    yield client
+    # Cleanup: close HTTP connections
+    try:
+        client.close()
+    except Exception:
+        pass
 
 
 @pytest.fixture(params=["openai_client", "client_with_models"])
diff --git a/tests/integration/responses/fixtures/fixtures.py b/tests/integration/responses/fixtures/fixtures.py
index dbf67e138..b06117b98 100644
--- a/tests/integration/responses/fixtures/fixtures.py
+++ b/tests/integration/responses/fixtures/fixtures.py
@@ -115,7 +115,15 @@ def openai_client(base_url, api_key, provider):
         client = LlamaStackAsLibraryClient(config, skip_logger_removal=True)
         return client
 
-    return OpenAI(
+    client = OpenAI(
         base_url=base_url,
         api_key=api_key,
+        max_retries=0,
+        timeout=30.0,
     )
+    yield client
+    # Cleanup: close HTTP connections
+    try:
+        client.close()
+    except Exception:
+        pass
diff --git a/tests/integration/responses/test_conversation_responses.py b/tests/integration/responses/test_conversation_responses.py
index ef7ea7c4e..babb77793 100644
--- a/tests/integration/responses/test_conversation_responses.py
+++ b/tests/integration/responses/test_conversation_responses.py
@@ -65,8 +65,14 @@ class TestConversationResponses:
         conversation_items = openai_client.conversations.items.list(conversation.id)
         assert len(conversation_items.data) >= 4  # 2 user + 2 assistant messages
 
+    @pytest.mark.timeout(60, method="thread")
     def test_conversation_context_loading(self, openai_client, text_model_id):
-        """Test that conversation context is properly loaded for responses."""
+        """Test that conversation context is properly loaded for responses.
+
+        Note: 60s timeout added due to CI-specific deadlock in pytest/OpenAI client/httpx
+        after running 25+ tests. Hangs before first HTTP request is made. Works fine locally.
+        Investigation needed: connection pool exhaustion or event loop state issue.
+        """
         conversation = openai_client.conversations.create(
             items=[
                 {"type": "message", "role": "user", "content": "My name is Alice. I like to eat apples."},

From eb3f9ac2781d0079eb65ea14b77296fcd3d317d4 Mon Sep 17 00:00:00 2001
From: Francisco Arceo <arceofrancisco@gmail.com>
Date: Wed, 12 Nov 2025 12:59:48 -0500
Subject: [PATCH 14/62] feat: allow returning embeddings and metadata from
 `/vector_stores/` methods; disallow changing Provider ID (#4046)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

# What does this PR do?

- Updates `/vector_stores/{vector_store_id}/files/{file_id}/content` to
allow returning `embeddings` and `metadata` using the `extra_query`
    -  Updates the UI accordingly to display them.

- Update UI to support CRUD operations in the Vector Stores section and
adds a new modal exposing the functionality.

- Updates Vector Store update to fail if a user tries to update Provider
ID (which doesn't make sense to allow)

```python
In  [1]: client.vector_stores.files.content(
    vector_store_id=vector_store.id,
    file_id=file.id,
    extra_query={"include_embeddings": True, "include_metadata": True}
)
Out [1]: FileContentResponse(attributes={}, content=[Content(text='This is a test document to check if embeddings are generated properly.\n', type='text', embedding=[0.33760684728622437, ...,], chunk_metadata={'chunk_id': '62a63ae0-c202-f060-1b86-0a688995b8d3', 'document_id': 'file-27291dbc679642ac94ffac6d2810c339', 'source': None, 'created_timestamp': 1762053437, 'updated_timestamp': 1762053437, 'chunk_window': '0-13', 'chunk_tokenizer': 'DEFAULT_TIKTOKEN_TOKENIZER', 'chunk_embedding_model': 'sentence-transformers/nomic
-ai/nomic-embed-text-v1.5', 'chunk_embedding_dimension': 768, 'content_token_count': 13, 'metadata_token_count': 9}, metadata={'filename': 'test-embedding.txt', 'chunk_id': '62a63ae0-c202-f060-1b86-0a688995b8d3', 'document_id': 'file-27291dbc679642ac94ffac6d2810c339', 'token_count': 13, 'metadata_token_count': 9})], file_id='file-27291dbc679642ac94ffac6d2810c339', filename='test-embedding.txt')
```

Screenshots of UI are displayed below:

### List Vector Store with Added "Create New Vector Store"
<img width="1912" height="491" alt="Screenshot 2025-11-06 at 10 47
25 PM"
src="https://github.com/user-attachments/assets/a3a3ddd9-758d-4005-ac9c-5047f03916f3"
/>

### Create New Vector Store
<img width="1918" height="1048" alt="Screenshot 2025-11-06 at 10 47
49 PM"
src="https://github.com/user-attachments/assets/b4dc0d31-696f-4e68-b109-27915090f158"
/>

### Edit Vector Store
<img width="1916" height="1355" alt="Screenshot 2025-11-06 at 10 48
32 PM"
src="https://github.com/user-attachments/assets/ec879c63-4cf7-489f-bb1e-57ccc7931414"
/>


### Vector Store Files Contents page (with Embeddings)
<img width="1914" height="849" alt="Screenshot 2025-11-06 at 11 54
32 PM"
src="https://github.com/user-attachments/assets/3095520d-0e90-41f7-83bd-652f6c3fbf27"
/>

### Vector Store Files Contents Details page (with Embeddings)
<img width="1916" height="1221" alt="Screenshot 2025-11-06 at 11 55
00 PM"
src="https://github.com/user-attachments/assets/e71dbdc5-5b49-472b-a43a-5785f58d196c"
/>

<!-- If resolving an issue, uncomment and update the line below -->
<!-- Closes #[issue-number] -->

## Test Plan
Tests added for Middleware extension and Provider failures.

---------

Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
---
 client-sdks/stainless/openapi.yml             |  40 +-
 docs/static/llama-stack-spec.yaml             |  40 +-
 docs/static/stainless-llama-stack-spec.yaml   |  40 +-
 src/llama_stack/apis/vector_io/vector_io.py   |  46 ++-
 src/llama_stack/core/library_client.py        |   6 +
 src/llama_stack/core/routers/vector_io.py     |  20 +-
 .../core/routing_tables/vector_stores.py      |   5 +
 .../utils/memory/openai_vector_store_mixin.py |  64 +--
 .../app/logs/vector-stores/page.tsx           | 386 +++++++++++++++---
 .../components/prompts/prompt-editor.test.tsx |   2 +-
 .../vector-store-detail.test.tsx              |  14 +
 .../vector-stores/vector-store-detail.tsx     | 183 ++++++++-
 .../vector-stores/vector-store-editor.tsx     | 235 +++++++++++
 src/llama_stack_ui/lib/contents-api.ts        |  40 +-
 .../vector_io/test_openai_vector_stores.py    |  95 +++++
 tests/unit/core/routers/test_vector_io.py     |  62 +++
 tests/unit/server/test_sse.py                 |   8 +-
 17 files changed, 1161 insertions(+), 125 deletions(-)
 create mode 100644 src/llama_stack_ui/components/vector-stores/vector-store-editor.tsx

diff --git a/client-sdks/stainless/openapi.yml b/client-sdks/stainless/openapi.yml
index 9f3ef15b5..1be4af6c9 100644
--- a/client-sdks/stainless/openapi.yml
+++ b/client-sdks/stainless/openapi.yml
@@ -2691,7 +2691,8 @@ paths:
       responses:
         '200':
           description: >-
-            A VectorStoreFileContentResponse representing the file contents.
+            File contents, optionally with embeddings and metadata based on query
+            parameters.
           content:
             application/json:
               schema:
@@ -2726,6 +2727,20 @@ paths:
           required: true
           schema:
             type: string
+        - name: include_embeddings
+          in: query
+          description: >-
+            Whether to include embedding vectors in the response.
+          required: false
+          schema:
+            $ref: '#/components/schemas/bool'
+        - name: include_metadata
+          in: query
+          description: >-
+            Whether to include chunk metadata in the response.
+          required: false
+          schema:
+            $ref: '#/components/schemas/bool'
       deprecated: false
   /v1/vector_stores/{vector_store_id}/search:
     post:
@@ -10091,6 +10106,8 @@ components:
       title: VectorStoreFileDeleteResponse
       description: >-
         Response from deleting a vector store file.
+    bool:
+      type: boolean
     VectorStoreContent:
       type: object
       properties:
@@ -10102,6 +10119,26 @@ components:
         text:
           type: string
           description: The actual text content
+        embedding:
+          type: array
+          items:
+            type: number
+          description: >-
+            Optional embedding vector for this content chunk
+        chunk_metadata:
+          $ref: '#/components/schemas/ChunkMetadata'
+          description: Optional chunk metadata
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: Optional user-defined metadata
       additionalProperties: false
       required:
         - type
@@ -10125,6 +10162,7 @@ components:
           description: Parsed content of the file
         has_more:
           type: boolean
+          default: false
           description: >-
             Indicates if there are more content pages to fetch
         next_page:
diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml
index ce8708b68..66eda78c7 100644
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@@ -2688,7 +2688,8 @@ paths:
       responses:
         '200':
           description: >-
-            A VectorStoreFileContentResponse representing the file contents.
+            File contents, optionally with embeddings and metadata based on query
+            parameters.
           content:
             application/json:
               schema:
@@ -2723,6 +2724,20 @@ paths:
           required: true
           schema:
             type: string
+        - name: include_embeddings
+          in: query
+          description: >-
+            Whether to include embedding vectors in the response.
+          required: false
+          schema:
+            $ref: '#/components/schemas/bool'
+        - name: include_metadata
+          in: query
+          description: >-
+            Whether to include chunk metadata in the response.
+          required: false
+          schema:
+            $ref: '#/components/schemas/bool'
       deprecated: false
   /v1/vector_stores/{vector_store_id}/search:
     post:
@@ -9375,6 +9390,8 @@ components:
       title: VectorStoreFileDeleteResponse
       description: >-
         Response from deleting a vector store file.
+    bool:
+      type: boolean
     VectorStoreContent:
       type: object
       properties:
@@ -9386,6 +9403,26 @@ components:
         text:
           type: string
           description: The actual text content
+        embedding:
+          type: array
+          items:
+            type: number
+          description: >-
+            Optional embedding vector for this content chunk
+        chunk_metadata:
+          $ref: '#/components/schemas/ChunkMetadata'
+          description: Optional chunk metadata
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: Optional user-defined metadata
       additionalProperties: false
       required:
         - type
@@ -9409,6 +9446,7 @@ components:
           description: Parsed content of the file
         has_more:
           type: boolean
+          default: false
           description: >-
             Indicates if there are more content pages to fetch
         next_page:
diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml
index 9f3ef15b5..1be4af6c9 100644
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@@ -2691,7 +2691,8 @@ paths:
       responses:
         '200':
           description: >-
-            A VectorStoreFileContentResponse representing the file contents.
+            File contents, optionally with embeddings and metadata based on query
+            parameters.
           content:
             application/json:
               schema:
@@ -2726,6 +2727,20 @@ paths:
           required: true
           schema:
             type: string
+        - name: include_embeddings
+          in: query
+          description: >-
+            Whether to include embedding vectors in the response.
+          required: false
+          schema:
+            $ref: '#/components/schemas/bool'
+        - name: include_metadata
+          in: query
+          description: >-
+            Whether to include chunk metadata in the response.
+          required: false
+          schema:
+            $ref: '#/components/schemas/bool'
       deprecated: false
   /v1/vector_stores/{vector_store_id}/search:
     post:
@@ -10091,6 +10106,8 @@ components:
       title: VectorStoreFileDeleteResponse
       description: >-
         Response from deleting a vector store file.
+    bool:
+      type: boolean
     VectorStoreContent:
       type: object
       properties:
@@ -10102,6 +10119,26 @@ components:
         text:
           type: string
           description: The actual text content
+        embedding:
+          type: array
+          items:
+            type: number
+          description: >-
+            Optional embedding vector for this content chunk
+        chunk_metadata:
+          $ref: '#/components/schemas/ChunkMetadata'
+          description: Optional chunk metadata
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: Optional user-defined metadata
       additionalProperties: false
       required:
         - type
@@ -10125,6 +10162,7 @@ components:
           description: Parsed content of the file
         has_more:
           type: boolean
+          default: false
           description: >-
             Indicates if there are more content pages to fetch
         next_page:
diff --git a/src/llama_stack/apis/vector_io/vector_io.py b/src/llama_stack/apis/vector_io/vector_io.py
index 846c6f191..699241128 100644
--- a/src/llama_stack/apis/vector_io/vector_io.py
+++ b/src/llama_stack/apis/vector_io/vector_io.py
@@ -10,7 +10,7 @@
 # the root directory of this source tree.
 from typing import Annotated, Any, Literal, Protocol, runtime_checkable
 
-from fastapi import Body
+from fastapi import Body, Query
 from pydantic import BaseModel, Field
 
 from llama_stack.apis.common.tracing import telemetry_traceable
@@ -224,10 +224,16 @@ class VectorStoreContent(BaseModel):
 
     :param type: Content type, currently only "text" is supported
     :param text: The actual text content
+    :param embedding: Optional embedding vector for this content chunk
+    :param chunk_metadata: Optional chunk metadata
+    :param metadata: Optional user-defined metadata
     """
 
     type: Literal["text"]
     text: str
+    embedding: list[float] | None = None
+    chunk_metadata: ChunkMetadata | None = None
+    metadata: dict[str, Any] | None = None
 
 
 @json_schema_type
@@ -280,6 +286,22 @@ class VectorStoreDeleteResponse(BaseModel):
     deleted: bool = True
 
 
+@json_schema_type
+class VectorStoreFileContentResponse(BaseModel):
+    """Represents the parsed content of a vector store file.
+
+    :param object: The object type, which is always `vector_store.file_content.page`
+    :param data: Parsed content of the file
+    :param has_more: Indicates if there are more content pages to fetch
+    :param next_page: The token for the next page, if any
+    """
+
+    object: Literal["vector_store.file_content.page"] = "vector_store.file_content.page"
+    data: list[VectorStoreContent]
+    has_more: bool = False
+    next_page: str | None = None
+
+
 @json_schema_type
 class VectorStoreChunkingStrategyAuto(BaseModel):
     """Automatic chunking strategy for vector store files.
@@ -395,22 +417,6 @@ class VectorStoreListFilesResponse(BaseModel):
     has_more: bool = False
 
 
-@json_schema_type
-class VectorStoreFileContentResponse(BaseModel):
-    """Represents the parsed content of a vector store file.
-
-    :param object: The object type, which is always `vector_store.file_content.page`
-    :param data: Parsed content of the file
-    :param has_more: Indicates if there are more content pages to fetch
-    :param next_page: The token for the next page, if any
-    """
-
-    object: Literal["vector_store.file_content.page"] = "vector_store.file_content.page"
-    data: list[VectorStoreContent]
-    has_more: bool
-    next_page: str | None = None
-
-
 @json_schema_type
 class VectorStoreFileDeleteResponse(BaseModel):
     """Response from deleting a vector store file.
@@ -732,12 +738,16 @@ class VectorIO(Protocol):
         self,
         vector_store_id: str,
         file_id: str,
+        include_embeddings: Annotated[bool | None, Query(default=False)] = False,
+        include_metadata: Annotated[bool | None, Query(default=False)] = False,
     ) -> VectorStoreFileContentResponse:
         """Retrieves the contents of a vector store file.
 
         :param vector_store_id: The ID of the vector store containing the file to retrieve.
         :param file_id: The ID of the file to retrieve.
-        :returns: A VectorStoreFileContentResponse representing the file contents.
+        :param include_embeddings: Whether to include embedding vectors in the response.
+        :param include_metadata: Whether to include chunk metadata in the response.
+        :returns: File contents, optionally with embeddings and metadata based on query parameters.
         """
         ...
 
diff --git a/src/llama_stack/core/library_client.py b/src/llama_stack/core/library_client.py
index b8f9f715f..db990368b 100644
--- a/src/llama_stack/core/library_client.py
+++ b/src/llama_stack/core/library_client.py
@@ -389,6 +389,12 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
         matched_func, path_params, route_path, webmethod = find_matching_route(options.method, path, self.route_impls)
         body |= path_params
 
+        # Pass through params that aren't already handled as path params
+        if options.params:
+            extra_query_params = {k: v for k, v in options.params.items() if k not in path_params}
+            if extra_query_params:
+                body["extra_query"] = extra_query_params
+
         body, field_names = self._handle_file_uploads(options, body)
 
         body = self._convert_body(matched_func, body, exclude_params=set(field_names))
diff --git a/src/llama_stack/core/routers/vector_io.py b/src/llama_stack/core/routers/vector_io.py
index 9dac461db..ed5fb8253 100644
--- a/src/llama_stack/core/routers/vector_io.py
+++ b/src/llama_stack/core/routers/vector_io.py
@@ -247,6 +247,13 @@ class VectorIORouter(VectorIO):
         metadata: dict[str, Any] | None = None,
     ) -> VectorStoreObject:
         logger.debug(f"VectorIORouter.openai_update_vector_store: {vector_store_id}")
+
+        # Check if provider_id is being changed (not supported)
+        if metadata and "provider_id" in metadata:
+            current_store = await self.routing_table.get_object_by_identifier("vector_store", vector_store_id)
+            if current_store and current_store.provider_id != metadata["provider_id"]:
+                raise ValueError("provider_id cannot be changed after vector store creation")
+
         provider = await self.routing_table.get_provider_impl(vector_store_id)
         return await provider.openai_update_vector_store(
             vector_store_id=vector_store_id,
@@ -338,12 +345,19 @@ class VectorIORouter(VectorIO):
         self,
         vector_store_id: str,
         file_id: str,
+        include_embeddings: bool | None = False,
+        include_metadata: bool | None = False,
     ) -> VectorStoreFileContentResponse:
-        logger.debug(f"VectorIORouter.openai_retrieve_vector_store_file_contents: {vector_store_id}, {file_id}")
-        provider = await self.routing_table.get_provider_impl(vector_store_id)
-        return await provider.openai_retrieve_vector_store_file_contents(
+        logger.debug(
+            f"VectorIORouter.openai_retrieve_vector_store_file_contents: {vector_store_id}, {file_id}, "
+            f"include_embeddings={include_embeddings}, include_metadata={include_metadata}"
+        )
+
+        return await self.routing_table.openai_retrieve_vector_store_file_contents(
             vector_store_id=vector_store_id,
             file_id=file_id,
+            include_embeddings=include_embeddings,
+            include_metadata=include_metadata,
         )
 
     async def openai_update_vector_store_file(
diff --git a/src/llama_stack/core/routing_tables/vector_stores.py b/src/llama_stack/core/routing_tables/vector_stores.py
index f95a4dbe3..e77739abe 100644
--- a/src/llama_stack/core/routing_tables/vector_stores.py
+++ b/src/llama_stack/core/routing_tables/vector_stores.py
@@ -195,12 +195,17 @@ class VectorStoresRoutingTable(CommonRoutingTableImpl):
         self,
         vector_store_id: str,
         file_id: str,
+        include_embeddings: bool | None = False,
+        include_metadata: bool | None = False,
     ) -> VectorStoreFileContentResponse:
         await self.assert_action_allowed("read", "vector_store", vector_store_id)
+
         provider = await self.get_provider_impl(vector_store_id)
         return await provider.openai_retrieve_vector_store_file_contents(
             vector_store_id=vector_store_id,
             file_id=file_id,
+            include_embeddings=include_embeddings,
+            include_metadata=include_metadata,
         )
 
     async def openai_update_vector_store_file(
diff --git a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
index 86e6ea013..853245598 100644
--- a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
+++ b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
@@ -704,34 +704,35 @@ class OpenAIVectorStoreMixin(ABC):
             # Unknown filter type, default to no match
             raise ValueError(f"Unsupported filter type: {filter_type}")
 
-    def _chunk_to_vector_store_content(self, chunk: Chunk) -> list[VectorStoreContent]:
-        # content is InterleavedContent
+    def _chunk_to_vector_store_content(
+        self, chunk: Chunk, include_embeddings: bool = False, include_metadata: bool = False
+    ) -> list[VectorStoreContent]:
+        def extract_fields() -> dict:
+            """Extract embedding and metadata fields from chunk based on include flags."""
+            return {
+                "embedding": chunk.embedding if include_embeddings else None,
+                "chunk_metadata": chunk.chunk_metadata if include_metadata else None,
+                "metadata": chunk.metadata if include_metadata else None,
+            }
+
+        fields = extract_fields()
+
         if isinstance(chunk.content, str):
-            content = [
-                VectorStoreContent(
-                    type="text",
-                    text=chunk.content,
-                )
-            ]
+            content_item = VectorStoreContent(type="text", text=chunk.content, **fields)
+            content = [content_item]
         elif isinstance(chunk.content, list):
             # TODO: Add support for other types of content
-            content = [
-                VectorStoreContent(
-                    type="text",
-                    text=item.text,
-                )
-                for item in chunk.content
-                if item.type == "text"
-            ]
+            content = []
+            for item in chunk.content:
+                if item.type == "text":
+                    content_item = VectorStoreContent(type="text", text=item.text, **fields)
+                    content.append(content_item)
         else:
             if chunk.content.type != "text":
                 raise ValueError(f"Unsupported content type: {chunk.content.type}")
-            content = [
-                VectorStoreContent(
-                    type="text",
-                    text=chunk.content.text,
-                )
-            ]
+
+            content_item = VectorStoreContent(type="text", text=chunk.content.text, **fields)
+            content = [content_item]
         return content
 
     async def openai_attach_file_to_vector_store(
@@ -820,13 +821,12 @@ class OpenAIVectorStoreMixin(ABC):
                 message=str(e),
             )
 
-        # Create OpenAI vector store file metadata
+        # Save vector store file to persistent storage AFTER insert_chunks
+        # so that chunks include the embeddings that were generated
         file_info = vector_store_file_object.model_dump(exclude={"last_error"})
         file_info["filename"] = file_response.filename if file_response else ""
 
-        # Save vector store file to persistent storage (provider-specific)
         dict_chunks = [c.model_dump() for c in chunks]
-        # This should be updated to include chunk_id
         await self._save_openai_vector_store_file(vector_store_id, file_id, file_info, dict_chunks)
 
         # Update file_ids and file_counts in vector store metadata
@@ -921,21 +921,27 @@ class OpenAIVectorStoreMixin(ABC):
         self,
         vector_store_id: str,
         file_id: str,
+        include_embeddings: bool | None = False,
+        include_metadata: bool | None = False,
     ) -> VectorStoreFileContentResponse:
         """Retrieves the contents of a vector store file."""
         if vector_store_id not in self.openai_vector_stores:
             raise VectorStoreNotFoundError(vector_store_id)
 
+        # Parameters are already provided directly
+        # include_embeddings and include_metadata are now function parameters
+
         dict_chunks = await self._load_openai_vector_store_file_contents(vector_store_id, file_id)
         chunks = [Chunk.model_validate(c) for c in dict_chunks]
         content = []
         for chunk in chunks:
-            content.extend(self._chunk_to_vector_store_content(chunk))
+            content.extend(
+                self._chunk_to_vector_store_content(
+                    chunk, include_embeddings=include_embeddings or False, include_metadata=include_metadata or False
+                )
+            )
         return VectorStoreFileContentResponse(
-            object="vector_store.file_content.page",
             data=content,
-            has_more=False,
-            next_page=None,
         )
 
     async def openai_update_vector_store_file(
diff --git a/src/llama_stack_ui/app/logs/vector-stores/page.tsx b/src/llama_stack_ui/app/logs/vector-stores/page.tsx
index 72196d496..84680e01a 100644
--- a/src/llama_stack_ui/app/logs/vector-stores/page.tsx
+++ b/src/llama_stack_ui/app/logs/vector-stores/page.tsx
@@ -8,6 +8,9 @@ import type {
 import { useRouter } from "next/navigation";
 import { usePagination } from "@/hooks/use-pagination";
 import { Button } from "@/components/ui/button";
+import { Plus, Trash2, Search, Edit, X } from "lucide-react";
+import { useState } from "react";
+import { Input } from "@/components/ui/input";
 import {
   Table,
   TableBody,
@@ -17,9 +20,21 @@ import {
   TableRow,
 } from "@/components/ui/table";
 import { Skeleton } from "@/components/ui/skeleton";
+import { useAuthClient } from "@/hooks/use-auth-client";
+import {
+  VectorStoreEditor,
+  VectorStoreFormData,
+} from "@/components/vector-stores/vector-store-editor";
 
 export default function VectorStoresPage() {
   const router = useRouter();
+  const client = useAuthClient();
+  const [deletingStores, setDeletingStores] = useState<Set<string>>(new Set());
+  const [searchTerm, setSearchTerm] = useState("");
+  const [showVectorStoreModal, setShowVectorStoreModal] = useState(false);
+  const [editingStore, setEditingStore] = useState<VectorStore | null>(null);
+  const [modalError, setModalError] = useState<string | null>(null);
+  const [showSuccessState, setShowSuccessState] = useState(false);
   const {
     data: stores,
     status,
@@ -47,6 +62,142 @@ export default function VectorStoresPage() {
     }
   }, [status, hasMore, loadMore]);
 
+  // Handle ESC key to close modal
+  React.useEffect(() => {
+    const handleEscape = (event: KeyboardEvent) => {
+      if (event.key === "Escape" && showVectorStoreModal) {
+        handleCancel();
+      }
+    };
+
+    document.addEventListener("keydown", handleEscape);
+    return () => document.removeEventListener("keydown", handleEscape);
+  }, [showVectorStoreModal]);
+
+  const handleDeleteVectorStore = async (storeId: string) => {
+    if (
+      !confirm(
+        "Are you sure you want to delete this vector store? This action cannot be undone."
+      )
+    ) {
+      return;
+    }
+
+    setDeletingStores(prev => new Set([...prev, storeId]));
+
+    try {
+      await client.vectorStores.delete(storeId);
+      // Reload the data to reflect the deletion
+      window.location.reload();
+    } catch (err: unknown) {
+      console.error("Failed to delete vector store:", err);
+      const errorMessage = err instanceof Error ? err.message : "Unknown error";
+      alert(`Failed to delete vector store: ${errorMessage}`);
+    } finally {
+      setDeletingStores(prev => {
+        const newSet = new Set(prev);
+        newSet.delete(storeId);
+        return newSet;
+      });
+    }
+  };
+
+  const handleSaveVectorStore = async (formData: VectorStoreFormData) => {
+    try {
+      setModalError(null);
+
+      if (editingStore) {
+        // Update existing vector store
+        const updateParams: {
+          name?: string;
+          extra_body?: Record<string, unknown>;
+        } = {};
+
+        // Only include fields that have changed or are provided
+        if (formData.name && formData.name !== editingStore.name) {
+          updateParams.name = formData.name;
+        }
+
+        // Add all parameters to extra_body (except provider_id which can't be changed)
+        const extraBody: Record<string, unknown> = {};
+        if (formData.embedding_model) {
+          extraBody.embedding_model = formData.embedding_model;
+        }
+        if (formData.embedding_dimension) {
+          extraBody.embedding_dimension = formData.embedding_dimension;
+        }
+
+        if (Object.keys(extraBody).length > 0) {
+          updateParams.extra_body = extraBody;
+        }
+
+        await client.vectorStores.update(editingStore.id, updateParams);
+
+        // Show success state with close button
+        setShowSuccessState(true);
+        setModalError(
+          "✅ Vector store updated successfully! You can close this modal and refresh the page to see changes."
+        );
+        return;
+      }
+
+      const createParams: {
+        name?: string;
+        provider_id?: string;
+        extra_body?: Record<string, unknown>;
+      } = {
+        name: formData.name || undefined,
+      };
+
+      // Extract provider_id to top-level (like Python client does)
+      if (formData.provider_id) {
+        createParams.provider_id = formData.provider_id;
+      }
+
+      // Add remaining parameters to extra_body
+      const extraBody: Record<string, unknown> = {};
+      if (formData.provider_id) {
+        extraBody.provider_id = formData.provider_id;
+      }
+      if (formData.embedding_model) {
+        extraBody.embedding_model = formData.embedding_model;
+      }
+      if (formData.embedding_dimension) {
+        extraBody.embedding_dimension = formData.embedding_dimension;
+      }
+
+      if (Object.keys(extraBody).length > 0) {
+        createParams.extra_body = extraBody;
+      }
+
+      await client.vectorStores.create(createParams);
+
+      // Show success state with close button
+      setShowSuccessState(true);
+      setModalError(
+        "✅ Vector store created successfully! You can close this modal and refresh the page to see changes."
+      );
+    } catch (err: unknown) {
+      console.error("Failed to create vector store:", err);
+      const errorMessage =
+        err instanceof Error ? err.message : "Failed to create vector store";
+      setModalError(errorMessage);
+    }
+  };
+
+  const handleEditVectorStore = (store: VectorStore) => {
+    setEditingStore(store);
+    setShowVectorStoreModal(true);
+    setModalError(null);
+  };
+
+  const handleCancel = () => {
+    setShowVectorStoreModal(false);
+    setEditingStore(null);
+    setModalError(null);
+    setShowSuccessState(false);
+  };
+
   const renderContent = () => {
     if (status === "loading") {
       return (
@@ -66,73 +217,190 @@ export default function VectorStoresPage() {
       return <p>No vector stores found.</p>;
     }
 
-    return (
-      <div className="overflow-auto flex-1 min-h-0">
-        <Table>
-          <TableHeader>
-            <TableRow>
-              <TableHead>ID</TableHead>
-              <TableHead>Name</TableHead>
-              <TableHead>Created</TableHead>
-              <TableHead>Completed</TableHead>
-              <TableHead>Cancelled</TableHead>
-              <TableHead>Failed</TableHead>
-              <TableHead>In Progress</TableHead>
-              <TableHead>Total</TableHead>
-              <TableHead>Usage Bytes</TableHead>
-              <TableHead>Provider ID</TableHead>
-              <TableHead>Provider Vector DB ID</TableHead>
-            </TableRow>
-          </TableHeader>
-          <TableBody>
-            {stores.map(store => {
-              const fileCounts = store.file_counts;
-              const metadata = store.metadata || {};
-              const providerId = metadata.provider_id ?? "";
-              const providerDbId = metadata.provider_vector_db_id ?? "";
+    // Filter stores based on search term
+    const filteredStores = stores.filter(store => {
+      if (!searchTerm) return true;
 
-              return (
-                <TableRow
-                  key={store.id}
-                  onClick={() => router.push(`/logs/vector-stores/${store.id}`)}
-                  className="cursor-pointer hover:bg-muted/50"
-                >
-                  <TableCell>
-                    <Button
-                      variant="link"
-                      className="p-0 h-auto font-mono text-blue-600 hover:text-blue-800 dark:text-blue-400 dark:hover:text-blue-300"
-                      onClick={() =>
-                        router.push(`/logs/vector-stores/${store.id}`)
-                      }
-                    >
-                      {store.id}
-                    </Button>
-                  </TableCell>
-                  <TableCell>{store.name}</TableCell>
-                  <TableCell>
-                    {new Date(store.created_at * 1000).toLocaleString()}
-                  </TableCell>
-                  <TableCell>{fileCounts.completed}</TableCell>
-                  <TableCell>{fileCounts.cancelled}</TableCell>
-                  <TableCell>{fileCounts.failed}</TableCell>
-                  <TableCell>{fileCounts.in_progress}</TableCell>
-                  <TableCell>{fileCounts.total}</TableCell>
-                  <TableCell>{store.usage_bytes}</TableCell>
-                  <TableCell>{providerId}</TableCell>
-                  <TableCell>{providerDbId}</TableCell>
-                </TableRow>
-              );
-            })}
-          </TableBody>
-        </Table>
+      const searchLower = searchTerm.toLowerCase();
+      return (
+        store.id.toLowerCase().includes(searchLower) ||
+        (store.name && store.name.toLowerCase().includes(searchLower)) ||
+        (store.metadata?.provider_id &&
+          String(store.metadata.provider_id)
+            .toLowerCase()
+            .includes(searchLower)) ||
+        (store.metadata?.provider_vector_db_id &&
+          String(store.metadata.provider_vector_db_id)
+            .toLowerCase()
+            .includes(searchLower))
+      );
+    });
+
+    return (
+      <div className="space-y-4">
+        {/* Search Bar */}
+        <div className="relative flex-1 max-w-md">
+          <Search className="absolute left-3 top-1/2 transform -translate-y-1/2 text-muted-foreground h-4 w-4" />
+          <Input
+            placeholder="Search vector stores..."
+            value={searchTerm}
+            onChange={e => setSearchTerm(e.target.value)}
+            className="pl-10"
+          />
+        </div>
+
+        <div className="overflow-auto flex-1 min-h-0">
+          <Table>
+            <TableHeader>
+              <TableRow>
+                <TableHead>ID</TableHead>
+                <TableHead>Name</TableHead>
+                <TableHead>Created</TableHead>
+                <TableHead>Completed</TableHead>
+                <TableHead>Cancelled</TableHead>
+                <TableHead>Failed</TableHead>
+                <TableHead>In Progress</TableHead>
+                <TableHead>Total</TableHead>
+                <TableHead>Usage Bytes</TableHead>
+                <TableHead>Provider ID</TableHead>
+                <TableHead>Provider Vector DB ID</TableHead>
+                <TableHead>Actions</TableHead>
+              </TableRow>
+            </TableHeader>
+            <TableBody>
+              {filteredStores.map(store => {
+                const fileCounts = store.file_counts;
+                const metadata = store.metadata || {};
+                const providerId = metadata.provider_id ?? "";
+                const providerDbId = metadata.provider_vector_db_id ?? "";
+
+                return (
+                  <TableRow
+                    key={store.id}
+                    onClick={() =>
+                      router.push(`/logs/vector-stores/${store.id}`)
+                    }
+                    className="cursor-pointer hover:bg-muted/50"
+                  >
+                    <TableCell>
+                      <Button
+                        variant="link"
+                        className="p-0 h-auto font-mono text-blue-600 hover:text-blue-800 dark:text-blue-400 dark:hover:text-blue-300"
+                        onClick={() =>
+                          router.push(`/logs/vector-stores/${store.id}`)
+                        }
+                      >
+                        {store.id}
+                      </Button>
+                    </TableCell>
+                    <TableCell>{store.name}</TableCell>
+                    <TableCell>
+                      {new Date(store.created_at * 1000).toLocaleString()}
+                    </TableCell>
+                    <TableCell>{fileCounts.completed}</TableCell>
+                    <TableCell>{fileCounts.cancelled}</TableCell>
+                    <TableCell>{fileCounts.failed}</TableCell>
+                    <TableCell>{fileCounts.in_progress}</TableCell>
+                    <TableCell>{fileCounts.total}</TableCell>
+                    <TableCell>{store.usage_bytes}</TableCell>
+                    <TableCell>{providerId}</TableCell>
+                    <TableCell>{providerDbId}</TableCell>
+                    <TableCell>
+                      <div className="flex gap-2">
+                        <Button
+                          variant="outline"
+                          size="sm"
+                          onClick={e => {
+                            e.stopPropagation();
+                            handleEditVectorStore(store);
+                          }}
+                        >
+                          <Edit className="h-4 w-4" />
+                        </Button>
+                        <Button
+                          variant="outline"
+                          size="sm"
+                          onClick={e => {
+                            e.stopPropagation();
+                            handleDeleteVectorStore(store.id);
+                          }}
+                          disabled={deletingStores.has(store.id)}
+                        >
+                          {deletingStores.has(store.id) ? (
+                            "Deleting..."
+                          ) : (
+                            <>
+                              <Trash2 className="h-4 w-4" />
+                            </>
+                          )}
+                        </Button>
+                      </div>
+                    </TableCell>
+                  </TableRow>
+                );
+              })}
+            </TableBody>
+          </Table>
+        </div>
       </div>
     );
   };
 
   return (
     <div className="space-y-4">
-      <h1 className="text-2xl font-semibold">Vector Stores</h1>
+      <div className="flex items-center justify-between">
+        <h1 className="text-2xl font-semibold">Vector Stores</h1>
+        <Button
+          onClick={() => setShowVectorStoreModal(true)}
+          disabled={status === "loading"}
+        >
+          <Plus className="h-4 w-4 mr-2" />
+          New Vector Store
+        </Button>
+      </div>
       {renderContent()}
+
+      {/* Create Vector Store Modal */}
+      {showVectorStoreModal && (
+        <div className="fixed inset-0 bg-black/50 flex items-center justify-center z-50">
+          <div className="bg-background border rounded-lg shadow-lg max-w-2xl w-full mx-4 max-h-[90vh] overflow-hidden">
+            <div className="p-6 border-b flex items-center justify-between">
+              <h2 className="text-2xl font-bold">
+                {editingStore ? "Edit Vector Store" : "Create New Vector Store"}
+              </h2>
+              <Button
+                variant="ghost"
+                size="sm"
+                onClick={handleCancel}
+                className="p-1 h-auto"
+              >
+                <X className="h-4 w-4" />
+              </Button>
+            </div>
+            <div className="p-6 overflow-y-auto max-h-[calc(90vh-120px)]">
+              <VectorStoreEditor
+                onSave={handleSaveVectorStore}
+                onCancel={handleCancel}
+                error={modalError}
+                showSuccessState={showSuccessState}
+                isEditing={!!editingStore}
+                initialData={
+                  editingStore
+                    ? {
+                        name: editingStore.name || "",
+                        embedding_model:
+                          editingStore.metadata?.embedding_model || "",
+                        embedding_dimension:
+                          editingStore.metadata?.embedding_dimension || 768,
+                        provider_id: editingStore.metadata?.provider_id || "",
+                      }
+                    : undefined
+                }
+              />
+            </div>
+          </div>
+        </div>
+      )}
     </div>
   );
 }
diff --git a/src/llama_stack_ui/components/prompts/prompt-editor.test.tsx b/src/llama_stack_ui/components/prompts/prompt-editor.test.tsx
index 458a5f942..70e0e4e66 100644
--- a/src/llama_stack_ui/components/prompts/prompt-editor.test.tsx
+++ b/src/llama_stack_ui/components/prompts/prompt-editor.test.tsx
@@ -2,7 +2,7 @@ import React from "react";
 import { render, screen, fireEvent } from "@testing-library/react";
 import "@testing-library/jest-dom";
 import { PromptEditor } from "./prompt-editor";
-import type { Prompt, PromptFormData } from "./types";
+import type { Prompt } from "./types";
 
 describe("PromptEditor", () => {
   const mockOnSave = jest.fn();
diff --git a/src/llama_stack_ui/components/vector-stores/vector-store-detail.test.tsx b/src/llama_stack_ui/components/vector-stores/vector-store-detail.test.tsx
index 08f90ac0d..78bec8147 100644
--- a/src/llama_stack_ui/components/vector-stores/vector-store-detail.test.tsx
+++ b/src/llama_stack_ui/components/vector-stores/vector-store-detail.test.tsx
@@ -12,6 +12,20 @@ jest.mock("next/navigation", () => ({
   }),
 }));
 
+// Mock NextAuth
+jest.mock("next-auth/react", () => ({
+  useSession: () => ({
+    data: {
+      accessToken: "mock-access-token",
+      user: {
+        id: "mock-user-id",
+        email: "test@example.com",
+      },
+    },
+    status: "authenticated",
+  }),
+}));
+
 describe("VectorStoreDetailView", () => {
   const defaultProps = {
     store: null,
diff --git a/src/llama_stack_ui/components/vector-stores/vector-store-detail.tsx b/src/llama_stack_ui/components/vector-stores/vector-store-detail.tsx
index d3d0fa249..f5b6281e7 100644
--- a/src/llama_stack_ui/components/vector-stores/vector-store-detail.tsx
+++ b/src/llama_stack_ui/components/vector-stores/vector-store-detail.tsx
@@ -1,16 +1,18 @@
 "use client";
 
 import { useRouter } from "next/navigation";
+import { useState, useEffect } from "react";
 import type { VectorStore } from "llama-stack-client/resources/vector-stores/vector-stores";
 import type { VectorStoreFile } from "llama-stack-client/resources/vector-stores/files";
 import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
 import { Skeleton } from "@/components/ui/skeleton";
 import { Button } from "@/components/ui/button";
+import { useAuthClient } from "@/hooks/use-auth-client";
+import { Edit2, Trash2, X } from "lucide-react";
 import {
   DetailLoadingView,
   DetailErrorView,
   DetailNotFoundView,
-  DetailLayout,
   PropertiesCard,
   PropertyItem,
 } from "@/components/layout/detail-layout";
@@ -23,6 +25,7 @@ import {
   TableHeader,
   TableRow,
 } from "@/components/ui/table";
+import { VectorStoreEditor, VectorStoreFormData } from "./vector-store-editor";
 
 interface VectorStoreDetailViewProps {
   store: VectorStore | null;
@@ -43,21 +46,122 @@ export function VectorStoreDetailView({
   errorFiles,
   id,
 }: VectorStoreDetailViewProps) {
-  const title = "Vector Store Details";
   const router = useRouter();
+  const client = useAuthClient();
+  const [isDeleting, setIsDeleting] = useState(false);
+  const [showEditModal, setShowEditModal] = useState(false);
+  const [modalError, setModalError] = useState<string | null>(null);
+  const [showSuccessState, setShowSuccessState] = useState(false);
+
+  // Handle ESC key to close modal
+  useEffect(() => {
+    const handleEscape = (event: KeyboardEvent) => {
+      if (event.key === "Escape" && showEditModal) {
+        handleCancel();
+      }
+    };
+
+    document.addEventListener("keydown", handleEscape);
+    return () => document.removeEventListener("keydown", handleEscape);
+  }, [showEditModal]);
 
   const handleFileClick = (fileId: string) => {
     router.push(`/logs/vector-stores/${id}/files/${fileId}`);
   };
 
+  const handleEditVectorStore = () => {
+    setShowEditModal(true);
+    setModalError(null);
+    setShowSuccessState(false);
+  };
+
+  const handleCancel = () => {
+    setShowEditModal(false);
+    setModalError(null);
+    setShowSuccessState(false);
+  };
+
+  const handleSaveVectorStore = async (formData: VectorStoreFormData) => {
+    try {
+      setModalError(null);
+
+      // Update existing vector store (same logic as list page)
+      const updateParams: {
+        name?: string;
+        extra_body?: Record<string, unknown>;
+      } = {};
+
+      // Only include fields that have changed or are provided
+      if (formData.name && formData.name !== store?.name) {
+        updateParams.name = formData.name;
+      }
+
+      // Add all parameters to extra_body (except provider_id which can't be changed)
+      const extraBody: Record<string, unknown> = {};
+      if (formData.embedding_model) {
+        extraBody.embedding_model = formData.embedding_model;
+      }
+      if (formData.embedding_dimension) {
+        extraBody.embedding_dimension = formData.embedding_dimension;
+      }
+
+      if (Object.keys(extraBody).length > 0) {
+        updateParams.extra_body = extraBody;
+      }
+
+      await client.vectorStores.update(id, updateParams);
+
+      // Show success state
+      setShowSuccessState(true);
+      setModalError(
+        "✅ Vector store updated successfully! You can close this modal and refresh the page to see changes."
+      );
+    } catch (err: unknown) {
+      console.error("Failed to update vector store:", err);
+      const errorMessage =
+        err instanceof Error ? err.message : "Failed to update vector store";
+      setModalError(errorMessage);
+    }
+  };
+
+  const handleDeleteVectorStore = async () => {
+    if (
+      !confirm(
+        "Are you sure you want to delete this vector store? This action cannot be undone."
+      )
+    ) {
+      return;
+    }
+
+    setIsDeleting(true);
+
+    try {
+      await client.vectorStores.delete(id);
+      // Redirect to the vector stores list after successful deletion
+      router.push("/logs/vector-stores");
+    } catch (err: unknown) {
+      console.error("Failed to delete vector store:", err);
+      const errorMessage = err instanceof Error ? err.message : "Unknown error";
+      alert(`Failed to delete vector store: ${errorMessage}`);
+    } finally {
+      setIsDeleting(false);
+    }
+  };
+
   if (errorStore) {
-    return <DetailErrorView title={title} id={id} error={errorStore} />;
+    return (
+      <DetailErrorView
+        title="Vector Store Details"
+        id={id}
+        error={errorStore}
+      />
+    );
   }
   if (isLoadingStore) {
-    return <DetailLoadingView title={title} />;
+    return <DetailLoadingView />;
   }
   if (!store) {
-    return <DetailNotFoundView title={title} id={id} />;
+    return <DetailNotFoundView title="Vector Store Details" id={id} />;
   }
 
   const mainContent = (
@@ -138,6 +242,73 @@ export function VectorStoreDetailView({
   );
 
   return (
-    <DetailLayout title={title} mainContent={mainContent} sidebar={sidebar} />
+    <>
+      <div className="flex items-center justify-between mb-6">
+        <h1 className="text-2xl font-bold">Vector Store Details</h1>
+        <div className="flex gap-2">
+          <Button
+            variant="outline"
+            onClick={handleEditVectorStore}
+            disabled={isDeleting}
+          >
+            <Edit2 className="h-4 w-4 mr-2" />
+            Edit
+          </Button>
+          <Button
+            variant="destructive"
+            onClick={handleDeleteVectorStore}
+            disabled={isDeleting}
+          >
+            {isDeleting ? (
+              "Deleting..."
+            ) : (
+              <>
+                <Trash2 className="h-4 w-4 mr-2" />
+                Delete
+              </>
+            )}
+          </Button>
+        </div>
+      </div>
+      <div className="flex flex-col md:flex-row gap-6">
+        <div className="flex-grow md:w-2/3 space-y-6">{mainContent}</div>
+        <div className="md:w-1/3">{sidebar}</div>
+      </div>
+
+      {/* Edit Vector Store Modal */}
+      {showEditModal && (
+        <div className="fixed inset-0 bg-black/50 flex items-center justify-center z-50">
+          <div className="bg-background border rounded-lg shadow-lg max-w-2xl w-full mx-4 max-h-[90vh] overflow-hidden">
+            <div className="p-6 border-b flex items-center justify-between">
+              <h2 className="text-2xl font-bold">Edit Vector Store</h2>
+              <Button
+                variant="ghost"
+                size="sm"
+                onClick={handleCancel}
+                className="p-1 h-auto"
+              >
+                <X className="h-4 w-4" />
+              </Button>
+            </div>
+            <div className="p-6 overflow-y-auto max-h-[calc(90vh-120px)]">
+              <VectorStoreEditor
+                onSave={handleSaveVectorStore}
+                onCancel={handleCancel}
+                error={modalError}
+                showSuccessState={showSuccessState}
+                isEditing={true}
+                initialData={{
+                  name: store?.name || "",
+                  embedding_model: store?.metadata?.embedding_model || "",
+                  embedding_dimension:
+                    store?.metadata?.embedding_dimension || 768,
+                  provider_id: store?.metadata?.provider_id || "",
+                }}
+              />
+            </div>
+          </div>
+        </div>
+      )}
+    </>
   );
 }
diff --git a/src/llama_stack_ui/components/vector-stores/vector-store-editor.tsx b/src/llama_stack_ui/components/vector-stores/vector-store-editor.tsx
new file mode 100644
index 000000000..719a2a9fd
--- /dev/null
+++ b/src/llama_stack_ui/components/vector-stores/vector-store-editor.tsx
@@ -0,0 +1,235 @@
+"use client";
+
+import { useState, useEffect } from "react";
+import { Button } from "@/components/ui/button";
+import { Input } from "@/components/ui/input";
+import { Label } from "@/components/ui/label";
+import { Card, CardContent } from "@/components/ui/card";
+import {
+  Select,
+  SelectContent,
+  SelectItem,
+  SelectTrigger,
+  SelectValue,
+} from "@/components/ui/select";
+import { useAuthClient } from "@/hooks/use-auth-client";
+import type { Model } from "llama-stack-client/resources/models";
+
+export interface VectorStoreFormData {
+  name: string;
+  embedding_model?: string;
+  embedding_dimension?: number;
+  provider_id?: string;
+}
+
+interface VectorStoreEditorProps {
+  onSave: (formData: VectorStoreFormData) => Promise<void>;
+  onCancel: () => void;
+  error?: string | null;
+  initialData?: VectorStoreFormData;
+  showSuccessState?: boolean;
+  isEditing?: boolean;
+}
+
+export function VectorStoreEditor({
+  onSave,
+  onCancel,
+  error,
+  initialData,
+  showSuccessState,
+  isEditing = false,
+}: VectorStoreEditorProps) {
+  const client = useAuthClient();
+  const [formData, setFormData] = useState<VectorStoreFormData>(
+    initialData || {
+      name: "",
+      embedding_model: "",
+      embedding_dimension: 768,
+      provider_id: "",
+    }
+  );
+  const [loading, setLoading] = useState(false);
+  const [models, setModels] = useState<Model[]>([]);
+  const [modelsLoading, setModelsLoading] = useState(true);
+  const [modelsError, setModelsError] = useState<string | null>(null);
+
+  const embeddingModels = models.filter(
+    model => model.custom_metadata?.model_type === "embedding"
+  );
+
+  useEffect(() => {
+    const fetchModels = async () => {
+      try {
+        setModelsLoading(true);
+        setModelsError(null);
+        const modelList = await client.models.list();
+        setModels(modelList);
+
+        // Set default embedding model if available
+        const embeddingModelsList = modelList.filter(model => {
+          return model.custom_metadata?.model_type === "embedding";
+        });
+        if (embeddingModelsList.length > 0 && !formData.embedding_model) {
+          setFormData(prev => ({
+            ...prev,
+            embedding_model: embeddingModelsList[0].id,
+          }));
+        }
+      } catch (err) {
+        console.error("Failed to load models:", err);
+        setModelsError(
+          err instanceof Error ? err.message : "Failed to load models"
+        );
+      } finally {
+        setModelsLoading(false);
+      }
+    };
+
+    fetchModels();
+  }, [client]);
+
+  const handleSubmit = async (e: React.FormEvent) => {
+    e.preventDefault();
+    setLoading(true);
+
+    try {
+      await onSave(formData);
+    } finally {
+      setLoading(false);
+    }
+  };
+
+  return (
+    <Card>
+      <CardContent className="pt-6">
+        <form onSubmit={handleSubmit} className="space-y-4">
+          <div className="space-y-2">
+            <Label htmlFor="name">Name</Label>
+            <Input
+              id="name"
+              value={formData.name}
+              onChange={e => setFormData({ ...formData, name: e.target.value })}
+              placeholder="Enter vector store name"
+              required
+            />
+          </div>
+
+          <div className="space-y-2">
+            <Label htmlFor="embedding_model">Embedding Model (Optional)</Label>
+            {modelsLoading ? (
+              <div className="text-sm text-muted-foreground">
+                Loading models... ({models.length} loaded)
+              </div>
+            ) : modelsError ? (
+              <div className="text-sm text-destructive">
+                Error: {modelsError}
+              </div>
+            ) : embeddingModels.length === 0 ? (
+              <div className="text-sm text-muted-foreground">
+                No embedding models available ({models.length} total models)
+              </div>
+            ) : (
+              <Select
+                value={formData.embedding_model}
+                onValueChange={value =>
+                  setFormData({ ...formData, embedding_model: value })
+                }
+              >
+                <SelectTrigger>
+                  <SelectValue placeholder="Select Embedding Model" />
+                </SelectTrigger>
+                <SelectContent>
+                  {embeddingModels.map((model, index) => (
+                    <SelectItem key={model.id} value={model.id}>
+                      {model.id}
+                    </SelectItem>
+                  ))}
+                </SelectContent>
+              </Select>
+            )}
+            {formData.embedding_model && (
+              <p className="text-xs text-muted-foreground mt-1">
+                Dimension:{" "}
+                {embeddingModels.find(m => m.id === formData.embedding_model)
+                  ?.custom_metadata?.embedding_dimension || "Unknown"}
+              </p>
+            )}
+          </div>
+
+          <div className="space-y-2">
+            <Label htmlFor="embedding_dimension">
+              Embedding Dimension (Optional)
+            </Label>
+            <Input
+              id="embedding_dimension"
+              type="number"
+              value={formData.embedding_dimension}
+              onChange={e =>
+                setFormData({
+                  ...formData,
+                  embedding_dimension: parseInt(e.target.value) || 768,
+                })
+              }
+              placeholder="768"
+            />
+          </div>
+
+          <div className="space-y-2">
+            <Label htmlFor="provider_id">
+              Provider ID {isEditing ? "(Read-only)" : "(Optional)"}
+            </Label>
+            <Input
+              id="provider_id"
+              value={formData.provider_id}
+              onChange={e =>
+                setFormData({ ...formData, provider_id: e.target.value })
+              }
+              placeholder="e.g., faiss, chroma, sqlite"
+              disabled={isEditing}
+            />
+            {isEditing && (
+              <p className="text-xs text-muted-foreground">
+                Provider ID cannot be changed after vector store creation
+              </p>
+            )}
+          </div>
+
+          {error && (
+            <div
+              className={`text-sm p-3 rounded ${
+                error.startsWith("✅")
+                  ? "text-green-700 bg-green-50 border border-green-200"
+                  : "text-destructive bg-destructive/10"
+              }`}
+            >
+              {error}
+            </div>
+          )}
+
+          <div className="flex gap-2 pt-4">
+            {showSuccessState ? (
+              <Button type="button" onClick={onCancel}>
+                Close
+              </Button>
+            ) : (
+              <>
+                <Button type="submit" disabled={loading}>
+                  {loading
+                    ? initialData
+                      ? "Updating..."
+                      : "Creating..."
+                    : initialData
+                      ? "Update Vector Store"
+                      : "Create Vector Store"}
+                </Button>
+                <Button type="button" variant="outline" onClick={onCancel}>
+                  Cancel
+                </Button>
+              </>
+            )}
+          </div>
+        </form>
+      </CardContent>
+    </Card>
+  );
+}
diff --git a/src/llama_stack_ui/lib/contents-api.ts b/src/llama_stack_ui/lib/contents-api.ts
index f4920f3db..35456faff 100644
--- a/src/llama_stack_ui/lib/contents-api.ts
+++ b/src/llama_stack_ui/lib/contents-api.ts
@@ -34,9 +34,35 @@ export class ContentsAPI {
 
   async getFileContents(
     vectorStoreId: string,
-    fileId: string
+    fileId: string,
+    includeEmbeddings: boolean = true,
+    includeMetadata: boolean = true
   ): Promise<VectorStoreContentsResponse> {
-    return this.client.vectorStores.files.content(vectorStoreId, fileId);
+    try {
+      // Use query parameters to pass embeddings and metadata flags (OpenAI-compatible pattern)
+      const extraQuery: Record<string, boolean> = {};
+      if (includeEmbeddings) {
+        extraQuery.include_embeddings = true;
+      }
+      if (includeMetadata) {
+        extraQuery.include_metadata = true;
+      }
+
+      const result = await this.client.vectorStores.files.content(
+        vectorStoreId,
+        fileId,
+        {
+          query: {
+            include_embeddings: includeEmbeddings,
+            include_metadata: includeMetadata,
+          },
+        }
+      );
+      return result;
+    } catch (error) {
+      console.error("ContentsAPI.getFileContents error:", error);
+      throw error;
+    }
   }
 
   async getContent(
@@ -70,11 +96,15 @@ export class ContentsAPI {
       order?: string;
       after?: string;
       before?: string;
+      includeEmbeddings?: boolean;
+      includeMetadata?: boolean;
     }
   ): Promise<VectorStoreListContentsResponse> {
-    const fileContents = await this.client.vectorStores.files.content(
+    const fileContents = await this.getFileContents(
       vectorStoreId,
-      fileId
+      fileId,
+      options?.includeEmbeddings ?? true,
+      options?.includeMetadata ?? true
     );
     const contentItems: VectorStoreContentItem[] = [];
 
@@ -82,7 +112,7 @@ export class ContentsAPI {
       const rawContent = content as Record<string, unknown>;
 
       // Extract actual fields from the API response
-      const embedding = rawContent.embedding || undefined;
+      const embedding = rawContent.embedding as number[] | undefined;
       const created_timestamp =
         rawContent.created_timestamp ||
         rawContent.created_at ||
diff --git a/tests/integration/vector_io/test_openai_vector_stores.py b/tests/integration/vector_io/test_openai_vector_stores.py
index 20f9d2978..1043d4903 100644
--- a/tests/integration/vector_io/test_openai_vector_stores.py
+++ b/tests/integration/vector_io/test_openai_vector_stores.py
@@ -11,6 +11,7 @@ import pytest
 from llama_stack_client import BadRequestError
 from openai import BadRequestError as OpenAIBadRequestError
 
+from llama_stack.apis.files import ExpiresAfter
 from llama_stack.apis.vector_io import Chunk
 from llama_stack.core.library_client import LlamaStackAsLibraryClient
 from llama_stack.log import get_logger
@@ -1604,3 +1605,97 @@ def test_openai_vector_store_embedding_config_from_metadata(
 
     assert "metadata_config_store" in store_names
     assert "consistent_config_store" in store_names
+
+
+@vector_provider_wrapper
+def test_openai_vector_store_file_contents_with_extra_query(
+    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id
+):
+    """Test that vector store file contents endpoint supports extra_query parameter."""
+    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
+    compat_client = compat_client_with_empty_stores
+
+    # Create a vector store
+    vector_store = compat_client.vector_stores.create(
+        name="test_extra_query_store",
+        extra_body={
+            "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
+        },
+    )
+
+    # Create and attach a file
+    test_content = b"This is test content for extra_query validation."
+    with BytesIO(test_content) as file_buffer:
+        file_buffer.name = "test_extra_query.txt"
+        file = compat_client.files.create(
+            file=file_buffer,
+            purpose="assistants",
+            expires_after=ExpiresAfter(anchor="created_at", seconds=86400),
+        )
+
+    file_attach_response = compat_client.vector_stores.files.create(
+        vector_store_id=vector_store.id,
+        file_id=file.id,
+        extra_body={"embedding_model": embedding_model_id},
+    )
+    assert file_attach_response.status == "completed"
+
+    # Wait for processing
+    time.sleep(2)
+
+    # Test that extra_query parameter is accepted and processed
+    content_with_extra_query = compat_client.vector_stores.files.content(
+        vector_store_id=vector_store.id,
+        file_id=file.id,
+        extra_query={"include_embeddings": True, "include_metadata": True},
+    )
+
+    # Test without extra_query for comparison
+    content_without_extra_query = compat_client.vector_stores.files.content(
+        vector_store_id=vector_store.id,
+        file_id=file.id,
+    )
+
+    # Validate that both calls succeed
+    assert content_with_extra_query is not None
+    assert content_without_extra_query is not None
+    assert len(content_with_extra_query.data) > 0
+    assert len(content_without_extra_query.data) > 0
+
+    # Validate that extra_query parameter is processed correctly
+    # Both should have the embedding/metadata fields available (may be None based on flags)
+    first_chunk_with_flags = content_with_extra_query.data[0]
+    first_chunk_without_flags = content_without_extra_query.data[0]
+
+    # The key validation: extra_query fields are present in the response
+    # Handle both dict and object responses (different clients may return different formats)
+    def has_field(obj, field):
+        if isinstance(obj, dict):
+            return field in obj
+        else:
+            return hasattr(obj, field)
+
+    # Validate that all expected fields are present in both responses
+    expected_fields = ["embedding", "chunk_metadata", "metadata", "text"]
+    for field in expected_fields:
+        assert has_field(first_chunk_with_flags, field), f"Field '{field}' missing from response with extra_query"
+        assert has_field(first_chunk_without_flags, field), f"Field '{field}' missing from response without extra_query"
+
+    # Validate content is the same
+    def get_field(obj, field):
+        if isinstance(obj, dict):
+            return obj[field]
+        else:
+            return getattr(obj, field)
+
+    assert get_field(first_chunk_with_flags, "text") == test_content.decode("utf-8")
+    assert get_field(first_chunk_without_flags, "text") == test_content.decode("utf-8")
+
+    with_flags_embedding = get_field(first_chunk_with_flags, "embedding")
+    without_flags_embedding = get_field(first_chunk_without_flags, "embedding")
+
+    # Validate that embeddings are included when requested and excluded when not requested
+    assert with_flags_embedding is not None, "Embeddings should be included when include_embeddings=True"
+    assert len(with_flags_embedding) > 0, "Embedding should be a non-empty list"
+    assert without_flags_embedding is None, "Embeddings should not be included when include_embeddings=False"
diff --git a/tests/unit/core/routers/test_vector_io.py b/tests/unit/core/routers/test_vector_io.py
index dd3246cb3..f9bd84a37 100644
--- a/tests/unit/core/routers/test_vector_io.py
+++ b/tests/unit/core/routers/test_vector_io.py
@@ -55,3 +55,65 @@ async def test_create_vector_stores_multiple_providers_missing_provider_id_error
 
     with pytest.raises(ValueError, match="Multiple vector_io providers available"):
         await router.openai_create_vector_store(request)
+
+
+async def test_update_vector_store_provider_id_change_fails():
+    """Test that updating a vector store with a different provider_id fails with clear error."""
+    mock_routing_table = Mock()
+
+    # Mock an existing vector store with provider_id "faiss"
+    mock_existing_store = Mock()
+    mock_existing_store.provider_id = "inline::faiss"
+    mock_existing_store.identifier = "vs_123"
+
+    mock_routing_table.get_object_by_identifier = AsyncMock(return_value=mock_existing_store)
+    mock_routing_table.get_provider_impl = AsyncMock(
+        return_value=Mock(openai_update_vector_store=AsyncMock(return_value=Mock(id="vs_123")))
+    )
+
+    router = VectorIORouter(mock_routing_table)
+
+    # Try to update with different provider_id in metadata - this should fail
+    with pytest.raises(ValueError, match="provider_id cannot be changed after vector store creation"):
+        await router.openai_update_vector_store(
+            vector_store_id="vs_123",
+            name="updated_name",
+            metadata={"provider_id": "inline::sqlite"},  # Different provider_id
+        )
+
+    # Verify the existing store was looked up to check provider_id
+    mock_routing_table.get_object_by_identifier.assert_called_once_with("vector_store", "vs_123")
+
+    # Provider should not be called since validation failed
+    mock_routing_table.get_provider_impl.assert_not_called()
+
+
+async def test_update_vector_store_same_provider_id_succeeds():
+    """Test that updating a vector store with the same provider_id succeeds."""
+    mock_routing_table = Mock()
+
+    # Mock an existing vector store with provider_id "faiss"
+    mock_existing_store = Mock()
+    mock_existing_store.provider_id = "inline::faiss"
+    mock_existing_store.identifier = "vs_123"
+
+    mock_routing_table.get_object_by_identifier = AsyncMock(return_value=mock_existing_store)
+    mock_routing_table.get_provider_impl = AsyncMock(
+        return_value=Mock(openai_update_vector_store=AsyncMock(return_value=Mock(id="vs_123")))
+    )
+
+    router = VectorIORouter(mock_routing_table)
+
+    # Update with same provider_id should succeed
+    await router.openai_update_vector_store(
+        vector_store_id="vs_123",
+        name="updated_name",
+        metadata={"provider_id": "inline::faiss"},  # Same provider_id
+    )
+
+    # Verify the provider update method was called
+    mock_routing_table.get_provider_impl.assert_called_once_with("vs_123")
+    provider = await mock_routing_table.get_provider_impl("vs_123")
+    provider.openai_update_vector_store.assert_called_once_with(
+        vector_store_id="vs_123", name="updated_name", expires_after=None, metadata={"provider_id": "inline::faiss"}
+    )
diff --git a/tests/unit/server/test_sse.py b/tests/unit/server/test_sse.py
index f36c8c181..0303a6ded 100644
--- a/tests/unit/server/test_sse.py
+++ b/tests/unit/server/test_sse.py
@@ -104,12 +104,18 @@ async def test_paginated_response_url_setting():
 
     route_handler = create_dynamic_typed_route(mock_api_method, "get", "/test/route")
 
-    # Mock minimal request
+    # Mock minimal request with proper state object
     request = MagicMock()
     request.scope = {"user_attributes": {}, "principal": ""}
     request.headers = {}
     request.body = AsyncMock(return_value=b"")
 
+    # Create a simple state object without auto-generating attributes
+    class MockState:
+        pass
+
+    request.state = MockState()
+
     result = await route_handler(request)
 
     assert isinstance(result, PaginatedResponse)

From 94e977c257f70296c73a913dd4218d9119558632 Mon Sep 17 00:00:00 2001
From: Ken Dreyer <ktdreyer@ktdreyer.com>
Date: Wed, 12 Nov 2025 13:04:56 -0500
Subject: [PATCH 15/62] fix(docs): link to test replay-record docs for
 discoverability (#4134)

Help users find the comprehensive integration testing docs by linking to
the record-replay documentation. This clarifies that the technical
README complements the main docs.
---
 tests/integration/recordings/README.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tests/integration/recordings/README.md b/tests/integration/recordings/README.md
index 621a07562..bdf4f532f 100644
--- a/tests/integration/recordings/README.md
+++ b/tests/integration/recordings/README.md
@@ -2,6 +2,10 @@
 
 This directory contains recorded inference API responses used for deterministic testing without requiring live API access.
 
+For more information, see the
+[docs](https://llamastack.github.io/docs/contributing/testing/record-replay).
+This README provides more technical information.
+
 ## Structure
 
 - `responses/` - JSON files containing request/response pairs for inference operations

From 356f37b1bae1e98f23ed8f2dd224973b249827ec Mon Sep 17 00:00:00 2001
From: Derek Higgins <derekh@redhat.com>
Date: Wed, 12 Nov 2025 18:13:26 +0000
Subject: [PATCH 16/62] docs: clarify model identification uses
 provider_model_id not model_id (#4128)

Updated documentation to accurately reflect current behavior where
models are identified as provider_id/provider_model_id in the system.

Changes:
o Clarify that model_id is for configuration purposes only o Explain
models are accessed as provider_id/provider_model_id o Remove outdated
aliasing example that suggested model_id could be used
  as a custom identifier

This corrects the documentation which previously suggested model_id
could be used to create friendly aliases, which is not how the code
actually works.

Signed-off-by: Derek Higgins <derekh@redhat.com>
---
 docs/docs/distributions/configuration.mdx | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/docs/docs/distributions/configuration.mdx b/docs/docs/distributions/configuration.mdx
index ff50c406a..46ecfa475 100644
--- a/docs/docs/distributions/configuration.mdx
+++ b/docs/docs/distributions/configuration.mdx
@@ -221,7 +221,15 @@ models:
 ```
 A Model is an instance of a "Resource" (see [Concepts](../concepts/)) and is associated with a specific inference provider (in this case, the provider with identifier `ollama`). This is an instance of a "pre-registered" model. While we always encourage the clients to register models before using them, some Stack servers may come up a list of "already known and available" models.
 
-What's with the `provider_model_id` field? This is an identifier for the model inside the provider's model catalog. Contrast it with `model_id` which is the identifier for the same model for Llama Stack's purposes. For example, you may want to name "llama3.2:vision-11b" as "image_captioning_model" when you use it in your Stack interactions. When omitted, the server will set `provider_model_id` to be the same as `model_id`.
+What's with the `provider_model_id` field? This is an identifier for the model inside the provider's model catalog. The `model_id` field is provided for configuration purposes but is not used as part of the model identifier.
+
+**Important:** Models are identified as `provider_id/provider_model_id` in the system and when making API calls. When `provider_model_id` is omitted, the server will set it to be the same as `model_id`.
+
+Examples:
+- Config: `model_id: llama3.2`, `provider_id: ollama`, `provider_model_id: null`
+  → Access as: `ollama/llama3.2`
+- Config: `model_id: my-llama`, `provider_id: vllm-inference`, `provider_model_id: llama-3-2-3b`
+  → Access as: `vllm-inference/llama-3-2-3b` (the `model_id` is not used in the identifier)
 
 If you need to conditionally register a model in the configuration, such as only when specific environment variable(s) are set, this can be accomplished by utilizing a special `__disabled__` string as the default value of an environment variable substitution, as shown below:
 

From 492f79ca9b2c4ac5c77346fc91ec8a9811dec342 Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Wed, 12 Nov 2025 10:35:39 -0800
Subject: [PATCH 17/62] fix: harden storage semantics (#4118)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes issues in the storage system by guaranteeing immediate durability
for responses and ensuring background writers stay alive. Three related
fixes:

* Responses to the OpenAI-compatible API now write directly to
Postgres/SQLite inside the request instead of detouring through an async
queue that might never drain; this restores the expected
read-after-write behavior and removes the "response not found" races
reported by users.

* The access-control shim was stamping owner_principal/access_attributes
as SQL NULL, which Postgres interprets as non-public rows; fixing it to
use the empty-string/JSON-null pattern means conversations and responses
stored without an authenticated user stay queryable (matching SQLite).

* The inference-store queue remains for batching, but its worker tasks
now start lazily on the live event loop so server startup doesn't cancel
them—writes keep flowing even when the stack is launched via llama stack
run.

Closes #4115

### Test Plan

Added a matrix entry to test our "base" suite against Postgres as the
store.
---
 .../actions/setup-test-environment/action.yml |  26 ++
 .github/workflows/integration-tests.yml       |  12 +-
 .../distributions/ci-tests/ci_tests.py        |   1 -
 .../ci-tests/run-with-postgres-store.yaml     | 293 ++++++++++++++++++
 .../starter-gpu/run-with-postgres-store.yaml  |  58 ++--
 .../starter/run-with-postgres-store.yaml      |  58 ++--
 .../distributions/starter/starter.py          | 105 ++-----
 .../utils/inference/inference_store.py        |  27 +-
 .../utils/responses/responses_store.py        |  75 +----
 .../utils/sqlstore/authorized_sqlstore.py     |  34 +-
 tests/integration/ci_matrix.json              |   1 +
 tests/integration/fixtures/common.py          |  17 +-
 tests/integration/suites.py                   |  20 ++
 13 files changed, 516 insertions(+), 211 deletions(-)
 create mode 100644 src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml

diff --git a/.github/actions/setup-test-environment/action.yml b/.github/actions/setup-test-environment/action.yml
index 7b306fef5..1c9d019cc 100644
--- a/.github/actions/setup-test-environment/action.yml
+++ b/.github/actions/setup-test-environment/action.yml
@@ -39,6 +39,32 @@ runs:
       if: ${{ inputs.setup == 'vllm' && inputs.inference-mode == 'record' }}
       uses: ./.github/actions/setup-vllm
 
+    - name: Start Postgres service
+      if: ${{ contains(inputs.setup, 'postgres') }}
+      shell: bash
+      run: |
+        sudo docker rm -f postgres-ci || true
+        sudo docker run -d --name postgres-ci \
+          -e POSTGRES_USER=llamastack \
+          -e POSTGRES_PASSWORD=llamastack \
+          -e POSTGRES_DB=llamastack \
+          -p 5432:5432 \
+          postgres:16
+
+        echo "Waiting for Postgres to become ready..."
+        for i in {1..30}; do
+          if sudo docker exec postgres-ci pg_isready -U llamastack -d llamastack >/dev/null 2>&1; then
+            echo "Postgres is ready"
+            break
+          fi
+          if [ "$i" -eq 30 ]; then
+            echo "Postgres failed to start in time"
+            sudo docker logs postgres-ci || true
+            exit 1
+          fi
+          sleep 2
+        done
+
     - name: Build Llama Stack
       shell: bash
       run: |
diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml
index 2c797e906..71c7933b4 100644
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@@ -66,12 +66,12 @@ jobs:
   run-replay-mode-tests:
     needs: generate-matrix
     runs-on: ubuntu-latest
-    name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, {4})', matrix.client-type, matrix.config.setup, matrix.python-version, matrix.client-version, matrix.config.suite) }}
+    name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, {4})', matrix.client, matrix.config.setup, matrix.python-version, matrix.client-version, matrix.config.suite) }}
 
     strategy:
       fail-fast: false
       matrix:
-        client-type: [library, docker, server]
+        client: [library, docker, server]
         # Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
         python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
         client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
@@ -84,6 +84,7 @@ jobs:
         uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
 
       - name: Setup test environment
+        if: ${{ matrix.config.allowed_clients == null || contains(matrix.config.allowed_clients, matrix.client) }}
         uses: ./.github/actions/setup-test-environment
         with:
           python-version: ${{ matrix.python-version }}
@@ -93,11 +94,16 @@ jobs:
           inference-mode: 'replay'
 
       - name: Run tests
+        if: ${{ matrix.config.allowed_clients == null || contains(matrix.config.allowed_clients, matrix.client) }}
         uses: ./.github/actions/run-and-record-tests
         env:
           OPENAI_API_KEY: dummy
         with:
-          stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || matrix.client-type == 'server' && 'server:ci-tests' || 'docker:ci-tests' }}
+          stack-config: >-
+            ${{ matrix.config.stack_config
+                || (matrix.client == 'library' && 'ci-tests')
+                || (matrix.client == 'server' && 'server:ci-tests')
+                || 'docker:ci-tests' }}
           setup: ${{ matrix.config.setup }}
           inference-mode: 'replay'
           suite: ${{ matrix.config.suite }}
diff --git a/src/llama_stack/distributions/ci-tests/ci_tests.py b/src/llama_stack/distributions/ci-tests/ci_tests.py
index c06b1b98d..ab102f5f3 100644
--- a/src/llama_stack/distributions/ci-tests/ci_tests.py
+++ b/src/llama_stack/distributions/ci-tests/ci_tests.py
@@ -13,6 +13,5 @@ from ..starter.starter import get_distribution_template as get_starter_distribut
 def get_distribution_template() -> DistributionTemplate:
     template = get_starter_distribution_template(name="ci-tests")
     template.description = "CI tests for Llama Stack"
-    template.run_configs.pop("run-with-postgres-store.yaml", None)
 
     return template
diff --git a/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml b/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml
new file mode 100644
index 000000000..5384b58fe
--- /dev/null
+++ b/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml
@@ -0,0 +1,293 @@
+version: 2
+image_name: ci-tests
+apis:
+- agents
+- batches
+- datasetio
+- eval
+- files
+- inference
+- post_training
+- safety
+- scoring
+- tool_runtime
+- vector_io
+providers:
+  inference:
+  - provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
+    provider_type: remote::cerebras
+    config:
+      base_url: https://api.cerebras.ai
+      api_key: ${env.CEREBRAS_API_KEY:=}
+  - provider_id: ${env.OLLAMA_URL:+ollama}
+    provider_type: remote::ollama
+    config:
+      url: ${env.OLLAMA_URL:=http://localhost:11434}
+  - provider_id: ${env.VLLM_URL:+vllm}
+    provider_type: remote::vllm
+    config:
+      url: ${env.VLLM_URL:=}
+      max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
+      api_token: ${env.VLLM_API_TOKEN:=fake}
+      tls_verify: ${env.VLLM_TLS_VERIFY:=true}
+  - provider_id: ${env.TGI_URL:+tgi}
+    provider_type: remote::tgi
+    config:
+      url: ${env.TGI_URL:=}
+  - provider_id: fireworks
+    provider_type: remote::fireworks
+    config:
+      url: https://api.fireworks.ai/inference/v1
+      api_key: ${env.FIREWORKS_API_KEY:=}
+  - provider_id: together
+    provider_type: remote::together
+    config:
+      url: https://api.together.xyz/v1
+      api_key: ${env.TOGETHER_API_KEY:=}
+  - provider_id: bedrock
+    provider_type: remote::bedrock
+    config:
+      api_key: ${env.AWS_BEDROCK_API_KEY:=}
+      region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
+  - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
+    provider_type: remote::nvidia
+    config:
+      url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
+      api_key: ${env.NVIDIA_API_KEY:=}
+      append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
+  - provider_id: openai
+    provider_type: remote::openai
+    config:
+      api_key: ${env.OPENAI_API_KEY:=}
+      base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1}
+  - provider_id: anthropic
+    provider_type: remote::anthropic
+    config:
+      api_key: ${env.ANTHROPIC_API_KEY:=}
+  - provider_id: gemini
+    provider_type: remote::gemini
+    config:
+      api_key: ${env.GEMINI_API_KEY:=}
+  - provider_id: ${env.VERTEX_AI_PROJECT:+vertexai}
+    provider_type: remote::vertexai
+    config:
+      project: ${env.VERTEX_AI_PROJECT:=}
+      location: ${env.VERTEX_AI_LOCATION:=us-central1}
+  - provider_id: groq
+    provider_type: remote::groq
+    config:
+      url: https://api.groq.com
+      api_key: ${env.GROQ_API_KEY:=}
+  - provider_id: sambanova
+    provider_type: remote::sambanova
+    config:
+      url: https://api.sambanova.ai/v1
+      api_key: ${env.SAMBANOVA_API_KEY:=}
+  - provider_id: ${env.AZURE_API_KEY:+azure}
+    provider_type: remote::azure
+    config:
+      api_key: ${env.AZURE_API_KEY:=}
+      api_base: ${env.AZURE_API_BASE:=}
+      api_version: ${env.AZURE_API_VERSION:=}
+      api_type: ${env.AZURE_API_TYPE:=}
+  - provider_id: sentence-transformers
+    provider_type: inline::sentence-transformers
+  vector_io:
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      persistence:
+        namespace: vector_io::faiss
+        backend: kv_default
+  - provider_id: sqlite-vec
+    provider_type: inline::sqlite-vec
+    config:
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sqlite_vec.db
+      persistence:
+        namespace: vector_io::sqlite_vec
+        backend: kv_default
+  - provider_id: ${env.MILVUS_URL:+milvus}
+    provider_type: inline::milvus
+    config:
+      db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/ci-tests}/milvus.db
+      persistence:
+        namespace: vector_io::milvus
+        backend: kv_default
+  - provider_id: ${env.CHROMADB_URL:+chromadb}
+    provider_type: remote::chromadb
+    config:
+      url: ${env.CHROMADB_URL:=}
+      persistence:
+        namespace: vector_io::chroma_remote
+        backend: kv_default
+  - provider_id: ${env.PGVECTOR_DB:+pgvector}
+    provider_type: remote::pgvector
+    config:
+      host: ${env.PGVECTOR_HOST:=localhost}
+      port: ${env.PGVECTOR_PORT:=5432}
+      db: ${env.PGVECTOR_DB:=}
+      user: ${env.PGVECTOR_USER:=}
+      password: ${env.PGVECTOR_PASSWORD:=}
+      persistence:
+        namespace: vector_io::pgvector
+        backend: kv_default
+  - provider_id: ${env.QDRANT_URL:+qdrant}
+    provider_type: remote::qdrant
+    config:
+      api_key: ${env.QDRANT_API_KEY:=}
+      persistence:
+        namespace: vector_io::qdrant_remote
+        backend: kv_default
+  - provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate}
+    provider_type: remote::weaviate
+    config:
+      weaviate_api_key: null
+      weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080}
+      persistence:
+        namespace: vector_io::weaviate
+        backend: kv_default
+  files:
+  - provider_id: meta-reference-files
+    provider_type: inline::localfs
+    config:
+      storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/ci-tests/files}
+      metadata_store:
+        table_name: files_metadata
+        backend: sql_default
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config:
+      excluded_categories: []
+  - provider_id: code-scanner
+    provider_type: inline::code-scanner
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence:
+        agent_state:
+          namespace: agents
+          backend: kv_default
+        responses:
+          table_name: responses
+          backend: sql_default
+          max_write_queue_size: 10000
+          num_writers: 4
+  post_training:
+  - provider_id: torchtune-cpu
+    provider_type: inline::torchtune-cpu
+    config:
+      checkpoint_format: meta
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      kvstore:
+        namespace: eval
+        backend: kv_default
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config:
+      kvstore:
+        namespace: datasetio::huggingface
+        backend: kv_default
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config:
+      kvstore:
+        namespace: datasetio::localfs
+        backend: kv_default
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: ${env.OPENAI_API_KEY:=}
+  tool_runtime:
+  - provider_id: brave-search
+    provider_type: remote::brave-search
+    config:
+      api_key: ${env.BRAVE_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: tavily-search
+    provider_type: remote::tavily-search
+    config:
+      api_key: ${env.TAVILY_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+  batches:
+  - provider_id: reference
+    provider_type: inline::reference
+    config:
+      kvstore:
+        namespace: batches
+        backend: kv_default
+storage:
+  backends:
+    kv_default:
+      type: kv_postgres
+      host: ${env.POSTGRES_HOST:=localhost}
+      port: ${env.POSTGRES_PORT:=5432}
+      db: ${env.POSTGRES_DB:=llamastack}
+      user: ${env.POSTGRES_USER:=llamastack}
+      password: ${env.POSTGRES_PASSWORD:=llamastack}
+      table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
+    sql_default:
+      type: sql_postgres
+      host: ${env.POSTGRES_HOST:=localhost}
+      port: ${env.POSTGRES_PORT:=5432}
+      db: ${env.POSTGRES_DB:=llamastack}
+      user: ${env.POSTGRES_USER:=llamastack}
+      password: ${env.POSTGRES_PASSWORD:=llamastack}
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
+    prompts:
+      namespace: prompts
+      backend: kv_default
+registered_resources:
+  models: []
+  shields:
+  - shield_id: llama-guard
+    provider_id: ${env.SAFETY_MODEL:+llama-guard}
+    provider_shield_id: ${env.SAFETY_MODEL:=}
+  - shield_id: code-scanner
+    provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner}
+    provider_shield_id: ${env.CODE_SCANNER_MODEL:=}
+  vector_dbs: []
+  datasets: []
+  scoring_fns: []
+  benchmarks: []
+  tool_groups:
+  - toolgroup_id: builtin::websearch
+    provider_id: tavily-search
+  - toolgroup_id: builtin::rag
+    provider_id: rag-runtime
+server:
+  port: 8321
+telemetry:
+  enabled: true
+vector_stores:
+  default_provider_id: faiss
+  default_embedding_model:
+    provider_id: sentence-transformers
+    model_id: nomic-ai/nomic-embed-text-v1.5
+safety:
+  default_shield_id: llama-guard
diff --git a/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml b/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml
index 1920ebd9d..e29ada6f4 100644
--- a/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml
+++ b/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml
@@ -165,20 +165,15 @@ providers:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      persistence_store:
-        type: sql_postgres
-        host: ${env.POSTGRES_HOST:=localhost}
-        port: ${env.POSTGRES_PORT:=5432}
-        db: ${env.POSTGRES_DB:=llamastack}
-        user: ${env.POSTGRES_USER:=llamastack}
-        password: ${env.POSTGRES_PASSWORD:=llamastack}
-      responses_store:
-        type: sql_postgres
-        host: ${env.POSTGRES_HOST:=localhost}
-        port: ${env.POSTGRES_PORT:=5432}
-        db: ${env.POSTGRES_DB:=llamastack}
-        user: ${env.POSTGRES_USER:=llamastack}
-        password: ${env.POSTGRES_PASSWORD:=llamastack}
+      persistence:
+        agent_state:
+          namespace: agents
+          backend: kv_default
+        responses:
+          table_name: responses
+          backend: sql_default
+          max_write_queue_size: 10000
+          num_writers: 4
   post_training:
   - provider_id: huggingface-gpu
     provider_type: inline::huggingface-gpu
@@ -237,10 +232,10 @@ providers:
     config:
       kvstore:
         namespace: batches
-        backend: kv_postgres
+        backend: kv_default
 storage:
   backends:
-    kv_postgres:
+    kv_default:
       type: kv_postgres
       host: ${env.POSTGRES_HOST:=localhost}
       port: ${env.POSTGRES_PORT:=5432}
@@ -248,7 +243,7 @@ storage:
       user: ${env.POSTGRES_USER:=llamastack}
       password: ${env.POSTGRES_PASSWORD:=llamastack}
       table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
-    sql_postgres:
+    sql_default:
       type: sql_postgres
       host: ${env.POSTGRES_HOST:=localhost}
       port: ${env.POSTGRES_PORT:=5432}
@@ -258,27 +253,44 @@ storage:
   stores:
     metadata:
       namespace: registry
-      backend: kv_postgres
+      backend: kv_default
     inference:
       table_name: inference_store
-      backend: sql_postgres
+      backend: sql_default
       max_write_queue_size: 10000
       num_writers: 4
     conversations:
       table_name: openai_conversations
-      backend: sql_postgres
+      backend: sql_default
     prompts:
       namespace: prompts
-      backend: kv_postgres
+      backend: kv_default
 registered_resources:
   models: []
-  shields: []
+  shields:
+  - shield_id: llama-guard
+    provider_id: ${env.SAFETY_MODEL:+llama-guard}
+    provider_shield_id: ${env.SAFETY_MODEL:=}
+  - shield_id: code-scanner
+    provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner}
+    provider_shield_id: ${env.CODE_SCANNER_MODEL:=}
   vector_dbs: []
   datasets: []
   scoring_fns: []
   benchmarks: []
-  tool_groups: []
+  tool_groups:
+  - toolgroup_id: builtin::websearch
+    provider_id: tavily-search
+  - toolgroup_id: builtin::rag
+    provider_id: rag-runtime
 server:
   port: 8321
 telemetry:
   enabled: true
+vector_stores:
+  default_provider_id: faiss
+  default_embedding_model:
+    provider_id: sentence-transformers
+    model_id: nomic-ai/nomic-embed-text-v1.5
+safety:
+  default_shield_id: llama-guard
diff --git a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml
index 702f95381..437674bf9 100644
--- a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml
+++ b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml
@@ -165,20 +165,15 @@ providers:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      persistence_store:
-        type: sql_postgres
-        host: ${env.POSTGRES_HOST:=localhost}
-        port: ${env.POSTGRES_PORT:=5432}
-        db: ${env.POSTGRES_DB:=llamastack}
-        user: ${env.POSTGRES_USER:=llamastack}
-        password: ${env.POSTGRES_PASSWORD:=llamastack}
-      responses_store:
-        type: sql_postgres
-        host: ${env.POSTGRES_HOST:=localhost}
-        port: ${env.POSTGRES_PORT:=5432}
-        db: ${env.POSTGRES_DB:=llamastack}
-        user: ${env.POSTGRES_USER:=llamastack}
-        password: ${env.POSTGRES_PASSWORD:=llamastack}
+      persistence:
+        agent_state:
+          namespace: agents
+          backend: kv_default
+        responses:
+          table_name: responses
+          backend: sql_default
+          max_write_queue_size: 10000
+          num_writers: 4
   post_training:
   - provider_id: torchtune-cpu
     provider_type: inline::torchtune-cpu
@@ -234,10 +229,10 @@ providers:
     config:
       kvstore:
         namespace: batches
-        backend: kv_postgres
+        backend: kv_default
 storage:
   backends:
-    kv_postgres:
+    kv_default:
       type: kv_postgres
       host: ${env.POSTGRES_HOST:=localhost}
       port: ${env.POSTGRES_PORT:=5432}
@@ -245,7 +240,7 @@ storage:
       user: ${env.POSTGRES_USER:=llamastack}
       password: ${env.POSTGRES_PASSWORD:=llamastack}
       table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
-    sql_postgres:
+    sql_default:
       type: sql_postgres
       host: ${env.POSTGRES_HOST:=localhost}
       port: ${env.POSTGRES_PORT:=5432}
@@ -255,27 +250,44 @@ storage:
   stores:
     metadata:
       namespace: registry
-      backend: kv_postgres
+      backend: kv_default
     inference:
       table_name: inference_store
-      backend: sql_postgres
+      backend: sql_default
       max_write_queue_size: 10000
       num_writers: 4
     conversations:
       table_name: openai_conversations
-      backend: sql_postgres
+      backend: sql_default
     prompts:
       namespace: prompts
-      backend: kv_postgres
+      backend: kv_default
 registered_resources:
   models: []
-  shields: []
+  shields:
+  - shield_id: llama-guard
+    provider_id: ${env.SAFETY_MODEL:+llama-guard}
+    provider_shield_id: ${env.SAFETY_MODEL:=}
+  - shield_id: code-scanner
+    provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner}
+    provider_shield_id: ${env.CODE_SCANNER_MODEL:=}
   vector_dbs: []
   datasets: []
   scoring_fns: []
   benchmarks: []
-  tool_groups: []
+  tool_groups:
+  - toolgroup_id: builtin::websearch
+    provider_id: tavily-search
+  - toolgroup_id: builtin::rag
+    provider_id: rag-runtime
 server:
   port: 8321
 telemetry:
   enabled: true
+vector_stores:
+  default_provider_id: faiss
+  default_embedding_model:
+    provider_id: sentence-transformers
+    model_id: nomic-ai/nomic-embed-text-v1.5
+safety:
+  default_shield_id: llama-guard
diff --git a/src/llama_stack/distributions/starter/starter.py b/src/llama_stack/distributions/starter/starter.py
index 88cd3a4fe..7b7773289 100644
--- a/src/llama_stack/distributions/starter/starter.py
+++ b/src/llama_stack/distributions/starter/starter.py
@@ -17,11 +17,6 @@ from llama_stack.core.datatypes import (
     ToolGroupInput,
     VectorStoresConfig,
 )
-from llama_stack.core.storage.datatypes import (
-    InferenceStoreReference,
-    KVStoreReference,
-    SqlStoreReference,
-)
 from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings
 from llama_stack.providers.datatypes import RemoteProviderSpec
@@ -154,10 +149,11 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
             BuildProvider(provider_type="inline::reference"),
         ],
     }
+    files_config = LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}")
     files_provider = Provider(
         provider_id="meta-reference-files",
         provider_type="inline::localfs",
-        config=LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}"),
+        config=files_config,
     )
     embedding_provider = Provider(
         provider_id="sentence-transformers",
@@ -187,7 +183,8 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
             provider_shield_id="${env.CODE_SCANNER_MODEL:=}",
         ),
     ]
-    postgres_config = PostgresSqlStoreConfig.sample_run_config()
+    postgres_sql_config = PostgresSqlStoreConfig.sample_run_config()
+    postgres_kv_config = PostgresKVStoreConfig.sample_run_config()
     default_overrides = {
         "inference": remote_inference_providers + [embedding_provider],
         "vector_io": [
@@ -244,6 +241,33 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
         "files": [files_provider],
     }
 
+    base_run_settings = RunConfigSettings(
+        provider_overrides=default_overrides,
+        default_models=[],
+        default_tool_groups=default_tool_groups,
+        default_shields=default_shields,
+        vector_stores_config=VectorStoresConfig(
+            default_provider_id="faiss",
+            default_embedding_model=QualifiedModel(
+                provider_id="sentence-transformers",
+                model_id="nomic-ai/nomic-embed-text-v1.5",
+            ),
+        ),
+        safety_config=SafetyConfig(
+            default_shield_id="llama-guard",
+        ),
+    )
+
+    postgres_run_settings = base_run_settings.model_copy(
+        update={
+            "storage_backends": {
+                "kv_default": postgres_kv_config,
+                "sql_default": postgres_sql_config,
+            }
+        },
+        deep=True,
+    )
+
     return DistributionTemplate(
         name=name,
         distro_type="self_hosted",
@@ -253,71 +277,8 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
         providers=providers,
         additional_pip_packages=list(set(PostgresSqlStoreConfig.pip_packages() + PostgresKVStoreConfig.pip_packages())),
         run_configs={
-            "run.yaml": RunConfigSettings(
-                provider_overrides=default_overrides,
-                default_models=[],
-                default_tool_groups=default_tool_groups,
-                default_shields=default_shields,
-                vector_stores_config=VectorStoresConfig(
-                    default_provider_id="faiss",
-                    default_embedding_model=QualifiedModel(
-                        provider_id="sentence-transformers",
-                        model_id="nomic-ai/nomic-embed-text-v1.5",
-                    ),
-                ),
-                safety_config=SafetyConfig(
-                    default_shield_id="llama-guard",
-                ),
-            ),
-            "run-with-postgres-store.yaml": RunConfigSettings(
-                provider_overrides={
-                    **default_overrides,
-                    "agents": [
-                        Provider(
-                            provider_id="meta-reference",
-                            provider_type="inline::meta-reference",
-                            config=dict(
-                                persistence_store=postgres_config,
-                                responses_store=postgres_config,
-                            ),
-                        )
-                    ],
-                    "batches": [
-                        Provider(
-                            provider_id="reference",
-                            provider_type="inline::reference",
-                            config=dict(
-                                kvstore=KVStoreReference(
-                                    backend="kv_postgres",
-                                    namespace="batches",
-                                ).model_dump(exclude_none=True),
-                            ),
-                        )
-                    ],
-                },
-                storage_backends={
-                    "kv_postgres": PostgresKVStoreConfig.sample_run_config(),
-                    "sql_postgres": postgres_config,
-                },
-                storage_stores={
-                    "metadata": KVStoreReference(
-                        backend="kv_postgres",
-                        namespace="registry",
-                    ).model_dump(exclude_none=True),
-                    "inference": InferenceStoreReference(
-                        backend="sql_postgres",
-                        table_name="inference_store",
-                    ).model_dump(exclude_none=True),
-                    "conversations": SqlStoreReference(
-                        backend="sql_postgres",
-                        table_name="openai_conversations",
-                    ).model_dump(exclude_none=True),
-                    "prompts": KVStoreReference(
-                        backend="kv_postgres",
-                        namespace="prompts",
-                    ).model_dump(exclude_none=True),
-                },
-            ),
+            "run.yaml": base_run_settings,
+            "run-with-postgres-store.yaml": postgres_run_settings,
         },
         run_config_env_vars={
             "LLAMA_STACK_PORT": (
diff --git a/src/llama_stack/providers/utils/inference/inference_store.py b/src/llama_stack/providers/utils/inference/inference_store.py
index 2bf947a8d..a3a28aec0 100644
--- a/src/llama_stack/providers/utils/inference/inference_store.py
+++ b/src/llama_stack/providers/utils/inference/inference_store.py
@@ -66,14 +66,6 @@ class InferenceStore:
             },
         )
 
-        if self.enable_write_queue:
-            self._queue = asyncio.Queue(maxsize=self._max_write_queue_size)
-            for _ in range(self._num_writers):
-                self._worker_tasks.append(asyncio.create_task(self._worker_loop()))
-            logger.debug(
-                f"Inference store write queue enabled with {self._num_writers} writers, max queue size {self._max_write_queue_size}"
-            )
-
     async def shutdown(self) -> None:
         if not self._worker_tasks:
             return
@@ -94,10 +86,29 @@ class InferenceStore:
         if self.enable_write_queue and self._queue is not None:
             await self._queue.join()
 
+    async def _ensure_workers_started(self) -> None:
+        """Ensure the async write queue workers run on the current loop."""
+        if not self.enable_write_queue:
+            return
+
+        if self._queue is None:
+            self._queue = asyncio.Queue(maxsize=self._max_write_queue_size)
+            logger.debug(
+                f"Inference store write queue created with max size {self._max_write_queue_size} "
+                f"and {self._num_writers} writers"
+            )
+
+        if not self._worker_tasks:
+            loop = asyncio.get_running_loop()
+            for _ in range(self._num_writers):
+                task = loop.create_task(self._worker_loop())
+                self._worker_tasks.append(task)
+
     async def store_chat_completion(
         self, chat_completion: OpenAIChatCompletion, input_messages: list[OpenAIMessageParam]
     ) -> None:
         if self.enable_write_queue:
+            await self._ensure_workers_started()
             if self._queue is None:
                 raise ValueError("Inference store is not initialized")
             try:
diff --git a/src/llama_stack/providers/utils/responses/responses_store.py b/src/llama_stack/providers/utils/responses/responses_store.py
index 40466d00c..f5024a9ed 100644
--- a/src/llama_stack/providers/utils/responses/responses_store.py
+++ b/src/llama_stack/providers/utils/responses/responses_store.py
@@ -3,8 +3,6 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-import asyncio
-from typing import Any
 
 from llama_stack.apis.agents import (
     Order,
@@ -19,12 +17,12 @@ from llama_stack.apis.agents.openai_responses import (
 )
 from llama_stack.apis.inference import OpenAIMessageParam
 from llama_stack.core.datatypes import AccessRule
-from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqlStoreReference, StorageBackendType
+from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqlStoreReference
 from llama_stack.log import get_logger
 
 from ..sqlstore.api import ColumnDefinition, ColumnType
 from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore
-from ..sqlstore.sqlstore import _SQLSTORE_BACKENDS, sqlstore_impl
+from ..sqlstore.sqlstore import sqlstore_impl
 
 logger = get_logger(name=__name__, category="openai_responses")
 
@@ -55,28 +53,12 @@ class ResponsesStore:
 
         self.policy = policy
         self.sql_store = None
-        self.enable_write_queue = True
-
-        # Async write queue and worker control
-        self._queue: (
-            asyncio.Queue[tuple[OpenAIResponseObject, list[OpenAIResponseInput], list[OpenAIMessageParam]]] | None
-        ) = None
-        self._worker_tasks: list[asyncio.Task[Any]] = []
-        self._max_write_queue_size: int = self.reference.max_write_queue_size
-        self._num_writers: int = max(1, self.reference.num_writers)
 
     async def initialize(self):
         """Create the necessary tables if they don't exist."""
         base_store = sqlstore_impl(self.reference)
         self.sql_store = AuthorizedSqlStore(base_store, self.policy)
 
-        # Disable write queue for SQLite since WAL mode handles concurrency
-        # Keep it enabled for other backends (like Postgres) for performance
-        backend_config = _SQLSTORE_BACKENDS.get(self.reference.backend)
-        if backend_config and backend_config.type == StorageBackendType.SQL_SQLITE:
-            self.enable_write_queue = False
-            logger.debug("Write queue disabled for SQLite (WAL mode handles concurrency)")
-
         await self.sql_store.create_table(
             "openai_responses",
             {
@@ -95,33 +77,12 @@ class ResponsesStore:
             },
         )
 
-        if self.enable_write_queue:
-            self._queue = asyncio.Queue(maxsize=self._max_write_queue_size)
-            for _ in range(self._num_writers):
-                self._worker_tasks.append(asyncio.create_task(self._worker_loop()))
-            logger.debug(
-                f"Responses store write queue enabled with {self._num_writers} writers, max queue size {self._max_write_queue_size}"
-            )
-
     async def shutdown(self) -> None:
-        if not self._worker_tasks:
-            return
-        if self._queue is not None:
-            await self._queue.join()
-        for t in self._worker_tasks:
-            if not t.done():
-                t.cancel()
-        for t in self._worker_tasks:
-            try:
-                await t
-            except asyncio.CancelledError:
-                pass
-        self._worker_tasks.clear()
+        return
 
     async def flush(self) -> None:
-        """Wait for all queued writes to complete. Useful for testing."""
-        if self.enable_write_queue and self._queue is not None:
-            await self._queue.join()
+        """Maintained for compatibility; no-op now that writes are synchronous."""
+        return
 
     async def store_response_object(
         self,
@@ -129,31 +90,7 @@ class ResponsesStore:
         input: list[OpenAIResponseInput],
         messages: list[OpenAIMessageParam],
     ) -> None:
-        if self.enable_write_queue:
-            if self._queue is None:
-                raise ValueError("Responses store is not initialized")
-            try:
-                self._queue.put_nowait((response_object, input, messages))
-            except asyncio.QueueFull:
-                logger.warning(f"Write queue full; adding response id={getattr(response_object, 'id', '<unknown>')}")
-                await self._queue.put((response_object, input, messages))
-        else:
-            await self._write_response_object(response_object, input, messages)
-
-    async def _worker_loop(self) -> None:
-        assert self._queue is not None
-        while True:
-            try:
-                item = await self._queue.get()
-            except asyncio.CancelledError:
-                break
-            response_object, input, messages = item
-            try:
-                await self._write_response_object(response_object, input, messages)
-            except Exception as e:  # noqa: BLE001
-                logger.error(f"Error writing response object: {e}")
-            finally:
-                self._queue.task_done()
+        await self._write_response_object(response_object, input, messages)
 
     async def _write_response_object(
         self,
diff --git a/src/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py b/src/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py
index 3dfc82677..eb2d9a491 100644
--- a/src/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py
+++ b/src/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py
@@ -45,8 +45,13 @@ def _enhance_item_with_access_control(item: Mapping[str, Any], current_user: Use
         enhanced["owner_principal"] = current_user.principal
         enhanced["access_attributes"] = current_user.attributes
     else:
-        enhanced["owner_principal"] = None
-        enhanced["access_attributes"] = None
+        # IMPORTANT: Use empty string and null value (not None) to match public access filter
+        # The public access filter in _get_public_access_conditions() expects:
+        # - owner_principal = '' (empty string)
+        # - access_attributes = null (JSON null, which serializes to the string 'null')
+        # Setting them to None (SQL NULL) will cause rows to be filtered out on read.
+        enhanced["owner_principal"] = ""
+        enhanced["access_attributes"] = None  # Pydantic/JSON will serialize this as JSON null
     return enhanced
 
 
@@ -188,8 +193,9 @@ class AuthorizedSqlStore:
             enhanced_data["owner_principal"] = current_user.principal
             enhanced_data["access_attributes"] = current_user.attributes
         else:
-            enhanced_data["owner_principal"] = None
-            enhanced_data["access_attributes"] = None
+            # IMPORTANT: Use empty string for owner_principal to match public access filter
+            enhanced_data["owner_principal"] = ""
+            enhanced_data["access_attributes"] = None  # Will serialize as JSON null
 
         await self.sql_store.update(table, enhanced_data, where)
 
@@ -245,14 +251,24 @@ class AuthorizedSqlStore:
             raise ValueError(f"Unsupported database type: {self.database_type}")
 
     def _get_public_access_conditions(self) -> list[str]:
-        """Get the SQL conditions for public access."""
-        # Public records are records that have no owner_principal or access_attributes
+        """Get the SQL conditions for public access.
+
+        Public records are those with:
+        - owner_principal = '' (empty string)
+        - access_attributes is either SQL NULL or JSON null
+
+        Note: Different databases serialize None differently:
+        - SQLite: None → JSON null (text = 'null')
+        - Postgres: None → SQL NULL (IS NULL)
+        """
         conditions = ["owner_principal = ''"]
         if self.database_type == StorageBackendType.SQL_POSTGRES.value:
-            # Postgres stores JSON null as 'null'
-            conditions.append("access_attributes::text = 'null'")
+            # Accept both SQL NULL and JSON null for Postgres compatibility
+            # This handles both old rows (SQL NULL) and new rows (JSON null)
+            conditions.append("(access_attributes IS NULL OR access_attributes::text = 'null')")
         elif self.database_type == StorageBackendType.SQL_SQLITE.value:
-            conditions.append("access_attributes = 'null'")
+            # SQLite serializes None as JSON null
+            conditions.append("(access_attributes IS NULL OR access_attributes = 'null')")
         else:
             raise ValueError(f"Unsupported database type: {self.database_type}")
         return conditions
diff --git a/tests/integration/ci_matrix.json b/tests/integration/ci_matrix.json
index 858176dff..43678e5c7 100644
--- a/tests/integration/ci_matrix.json
+++ b/tests/integration/ci_matrix.json
@@ -1,6 +1,7 @@
 {
   "default": [
     {"suite": "base", "setup": "ollama"},
+    {"suite": "base", "setup": "ollama-postgres", "allowed_clients": ["server"], "stack_config": "server:ci-tests::run-with-postgres-store.yaml"},
     {"suite": "vision", "setup": "ollama-vision"},
     {"suite": "responses", "setup": "gpt"},
     {"suite": "base-vllm-subset", "setup": "vllm"}
diff --git a/tests/integration/fixtures/common.py b/tests/integration/fixtures/common.py
index 407564c15..817180cfe 100644
--- a/tests/integration/fixtures/common.py
+++ b/tests/integration/fixtures/common.py
@@ -233,10 +233,21 @@ def instantiate_llama_stack_client(session):
         raise ValueError("You must specify either --stack-config or LLAMA_STACK_CONFIG")
 
     # Handle server:<config_name> format or server:<config_name>:<port>
+    # Also handles server:<distro>::<run_file.yaml> format
     if config.startswith("server:"):
-        parts = config.split(":")
-        config_name = parts[1]
-        port = int(parts[2]) if len(parts) > 2 else int(os.environ.get("LLAMA_STACK_PORT", DEFAULT_PORT))
+        # Strip the "server:" prefix first
+        config_part = config[7:]  # len("server:") == 7
+
+        # Check for :: (distro::runfile format)
+        if "::" in config_part:
+            config_name = config_part
+            port = int(os.environ.get("LLAMA_STACK_PORT", DEFAULT_PORT))
+        else:
+            # Single colon format: either <name> or <name>:<port>
+            parts = config_part.split(":")
+            config_name = parts[0]
+            port = int(parts[1]) if len(parts) > 1 else int(os.environ.get("LLAMA_STACK_PORT", DEFAULT_PORT))
+
         base_url = f"http://localhost:{port}"
 
         force_restart = os.environ.get("LLAMA_STACK_TEST_FORCE_SERVER_RESTART") == "1"
diff --git a/tests/integration/suites.py b/tests/integration/suites.py
index 0cec66afe..7689657b4 100644
--- a/tests/integration/suites.py
+++ b/tests/integration/suites.py
@@ -71,6 +71,26 @@ SETUP_DEFINITIONS: dict[str, Setup] = {
             "embedding_model": "ollama/nomic-embed-text:v1.5",
         },
     ),
+    "ollama-postgres": Setup(
+        name="ollama-postgres",
+        description="Server-mode tests with Postgres-backed persistence",
+        env={
+            "OLLAMA_URL": "http://0.0.0.0:11434",
+            "SAFETY_MODEL": "ollama/llama-guard3:1b",
+            "POSTGRES_HOST": "127.0.0.1",
+            "POSTGRES_PORT": "5432",
+            "POSTGRES_DB": "llamastack",
+            "POSTGRES_USER": "llamastack",
+            "POSTGRES_PASSWORD": "llamastack",
+            "LLAMA_STACK_LOGGING": "openai_responses=info",
+        },
+        defaults={
+            "text_model": "ollama/llama3.2:3b-instruct-fp16",
+            "embedding_model": "sentence-transformers/nomic-embed-text-v1.5",
+            "safety_model": "ollama/llama-guard3:1b",
+            "safety_shield": "llama-guard",
+        },
+    ),
     "vllm": Setup(
         name="vllm",
         description="vLLM provider with a text model",

From fcf649b97a8bb99f52097b558acfe2d0285f4ef3 Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Wed, 12 Nov 2025 12:14:26 -0800
Subject: [PATCH 18/62] feat(storage): share sql/kv instances and add upsert
 support (#4140)

A few changes to the storage layer to ensure we reduce unnecessary
contention arising out of our design choices (and letting the database
layer do its correct thing):

- SQL stores now share a single `SqlAlchemySqlStoreImpl` per backend,
and `kvstore_impl` caches instances per `(backend, namespace)`. This
avoids spawning multiple SQLite connections for the same file, reducing
lock contention and aligning the cache story for all backends.

- Added an async upsert API (with SQLite/Postgres dialect inserts) and
routed it through `AuthorizedSqlStore`, then switched conversations and
responses to call it. Using native `ON CONFLICT DO UPDATE` eliminates
the insert-then-update retry window that previously caused long WAL lock
retries.

### Test Plan

Existing tests, added a unit test for `upsert()`
---
 .../core/conversations/conversations.py       | 15 ++---
 .../providers/utils/kvstore/kvstore.py        | 57 +++++++++++++------
 .../utils/responses/responses_store.py        | 19 ++-----
 .../providers/utils/sqlstore/api.py           | 12 ++++
 .../utils/sqlstore/authorized_sqlstore.py     | 17 ++++++
 .../utils/sqlstore/sqlalchemy_sqlstore.py     | 39 ++++++++++++-
 .../providers/utils/sqlstore/sqlstore.py      | 30 ++++++++--
 tests/unit/utils/sqlstore/test_sqlstore.py    | 34 ++++++++++-
 8 files changed, 172 insertions(+), 51 deletions(-)

diff --git a/src/llama_stack/core/conversations/conversations.py b/src/llama_stack/core/conversations/conversations.py
index 951de5e9d..f83834522 100644
--- a/src/llama_stack/core/conversations/conversations.py
+++ b/src/llama_stack/core/conversations/conversations.py
@@ -203,16 +203,11 @@ class ConversationServiceImpl(Conversations):
                 "item_data": item_dict,
             }
 
-            # TODO: Add support for upsert in sql_store, this will fail first if ID exists and then update
-            try:
-                await self.sql_store.insert(table="conversation_items", data=item_record)
-            except Exception:
-                # If insert fails due to ID conflict, update existing record
-                await self.sql_store.update(
-                    table="conversation_items",
-                    data={"created_at": created_at, "item_data": item_dict},
-                    where={"id": item_id},
-                )
+            await self.sql_store.upsert(
+                table="conversation_items",
+                data=item_record,
+                conflict_columns=["id"],
+            )
 
             created_items.append(item_dict)
 
diff --git a/src/llama_stack/providers/utils/kvstore/kvstore.py b/src/llama_stack/providers/utils/kvstore/kvstore.py
index eee51e5d9..5b8d77102 100644
--- a/src/llama_stack/providers/utils/kvstore/kvstore.py
+++ b/src/llama_stack/providers/utils/kvstore/kvstore.py
@@ -11,6 +11,9 @@
 
 from __future__ import annotations
 
+import asyncio
+from collections import defaultdict
+
 from llama_stack.core.storage.datatypes import KVStoreReference, StorageBackendConfig, StorageBackendType
 
 from .api import KVStore
@@ -53,45 +56,63 @@ class InmemoryKVStoreImpl(KVStore):
 
 
 _KVSTORE_BACKENDS: dict[str, KVStoreConfig] = {}
+_KVSTORE_INSTANCES: dict[tuple[str, str], KVStore] = {}
+_KVSTORE_LOCKS: defaultdict[tuple[str, str], asyncio.Lock] = defaultdict(asyncio.Lock)
 
 
 def register_kvstore_backends(backends: dict[str, StorageBackendConfig]) -> None:
     """Register the set of available KV store backends for reference resolution."""
     global _KVSTORE_BACKENDS
+    global _KVSTORE_INSTANCES
+    global _KVSTORE_LOCKS
 
     _KVSTORE_BACKENDS.clear()
+    _KVSTORE_INSTANCES.clear()
+    _KVSTORE_LOCKS.clear()
     for name, cfg in backends.items():
         _KVSTORE_BACKENDS[name] = cfg
 
 
 async def kvstore_impl(reference: KVStoreReference) -> KVStore:
     backend_name = reference.backend
+    cache_key = (backend_name, reference.namespace)
+
+    existing = _KVSTORE_INSTANCES.get(cache_key)
+    if existing:
+        return existing
 
     backend_config = _KVSTORE_BACKENDS.get(backend_name)
     if backend_config is None:
         raise ValueError(f"Unknown KVStore backend '{backend_name}'. Registered backends: {sorted(_KVSTORE_BACKENDS)}")
 
-    config = backend_config.model_copy()
-    config.namespace = reference.namespace
+    lock = _KVSTORE_LOCKS[cache_key]
+    async with lock:
+        existing = _KVSTORE_INSTANCES.get(cache_key)
+        if existing:
+            return existing
 
-    if config.type == StorageBackendType.KV_REDIS.value:
-        from .redis import RedisKVStoreImpl
+        config = backend_config.model_copy()
+        config.namespace = reference.namespace
 
-        impl = RedisKVStoreImpl(config)
-    elif config.type == StorageBackendType.KV_SQLITE.value:
-        from .sqlite import SqliteKVStoreImpl
+        if config.type == StorageBackendType.KV_REDIS.value:
+            from .redis import RedisKVStoreImpl
 
-        impl = SqliteKVStoreImpl(config)
-    elif config.type == StorageBackendType.KV_POSTGRES.value:
-        from .postgres import PostgresKVStoreImpl
+            impl = RedisKVStoreImpl(config)
+        elif config.type == StorageBackendType.KV_SQLITE.value:
+            from .sqlite import SqliteKVStoreImpl
 
-        impl = PostgresKVStoreImpl(config)
-    elif config.type == StorageBackendType.KV_MONGODB.value:
-        from .mongodb import MongoDBKVStoreImpl
+            impl = SqliteKVStoreImpl(config)
+        elif config.type == StorageBackendType.KV_POSTGRES.value:
+            from .postgres import PostgresKVStoreImpl
 
-        impl = MongoDBKVStoreImpl(config)
-    else:
-        raise ValueError(f"Unknown kvstore type {config.type}")
+            impl = PostgresKVStoreImpl(config)
+        elif config.type == StorageBackendType.KV_MONGODB.value:
+            from .mongodb import MongoDBKVStoreImpl
 
-    await impl.initialize()
-    return impl
+            impl = MongoDBKVStoreImpl(config)
+        else:
+            raise ValueError(f"Unknown kvstore type {config.type}")
+
+        await impl.initialize()
+        _KVSTORE_INSTANCES[cache_key] = impl
+        return impl
diff --git a/src/llama_stack/providers/utils/responses/responses_store.py b/src/llama_stack/providers/utils/responses/responses_store.py
index f5024a9ed..fdca8ddee 100644
--- a/src/llama_stack/providers/utils/responses/responses_store.py
+++ b/src/llama_stack/providers/utils/responses/responses_store.py
@@ -252,19 +252,12 @@ class ResponsesStore:
         # Serialize messages to dict format for JSON storage
         messages_data = [msg.model_dump() for msg in messages]
 
-        # Upsert: try insert first, update if exists
-        try:
-            await self.sql_store.insert(
-                table="conversation_messages",
-                data={"conversation_id": conversation_id, "messages": messages_data},
-            )
-        except Exception:
-            # If insert fails due to ID conflict, update existing record
-            await self.sql_store.update(
-                table="conversation_messages",
-                data={"messages": messages_data},
-                where={"conversation_id": conversation_id},
-            )
+        await self.sql_store.upsert(
+            table="conversation_messages",
+            data={"conversation_id": conversation_id, "messages": messages_data},
+            conflict_columns=["conversation_id"],
+            update_columns=["messages"],
+        )
 
         logger.debug(f"Stored {len(messages)} messages for conversation {conversation_id}")
 
diff --git a/src/llama_stack/providers/utils/sqlstore/api.py b/src/llama_stack/providers/utils/sqlstore/api.py
index a61fd1090..bcd224234 100644
--- a/src/llama_stack/providers/utils/sqlstore/api.py
+++ b/src/llama_stack/providers/utils/sqlstore/api.py
@@ -47,6 +47,18 @@ class SqlStore(Protocol):
         """
         pass
 
+    async def upsert(
+        self,
+        table: str,
+        data: Mapping[str, Any],
+        conflict_columns: list[str],
+        update_columns: list[str] | None = None,
+    ) -> None:
+        """
+        Insert a row and update specified columns when conflicts occur.
+        """
+        pass
+
     async def fetch_all(
         self,
         table: str,
diff --git a/src/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py b/src/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py
index eb2d9a491..ba95dd120 100644
--- a/src/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py
+++ b/src/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py
@@ -129,6 +129,23 @@ class AuthorizedSqlStore:
             enhanced_data = [_enhance_item_with_access_control(item, current_user) for item in data]
         await self.sql_store.insert(table, enhanced_data)
 
+    async def upsert(
+        self,
+        table: str,
+        data: Mapping[str, Any],
+        conflict_columns: list[str],
+        update_columns: list[str] | None = None,
+    ) -> None:
+        """Upsert a row with automatic access control attribute capture."""
+        current_user = get_authenticated_user()
+        enhanced_data = _enhance_item_with_access_control(data, current_user)
+        await self.sql_store.upsert(
+            table=table,
+            data=enhanced_data,
+            conflict_columns=conflict_columns,
+            update_columns=update_columns,
+        )
+
     async def fetch_all(
         self,
         table: str,
diff --git a/src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py b/src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py
index 356f49ed1..cfc3131f4 100644
--- a/src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py
+++ b/src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py
@@ -72,13 +72,14 @@ def _build_where_expr(column: ColumnElement, value: Any) -> ColumnElement:
 class SqlAlchemySqlStoreImpl(SqlStore):
     def __init__(self, config: SqlAlchemySqlStoreConfig):
         self.config = config
+        self._is_sqlite_backend = "sqlite" in self.config.engine_str
         self.async_session = async_sessionmaker(self.create_engine())
         self.metadata = MetaData()
 
     def create_engine(self) -> AsyncEngine:
         # Configure connection args for better concurrency support
         connect_args = {}
-        if "sqlite" in self.config.engine_str:
+        if self._is_sqlite_backend:
             # SQLite-specific optimizations for concurrent access
             # With WAL mode, most locks resolve in milliseconds, but allow up to 5s for edge cases
             connect_args["timeout"] = 5.0
@@ -91,7 +92,7 @@ class SqlAlchemySqlStoreImpl(SqlStore):
         )
 
         # Enable WAL mode for SQLite to support concurrent readers and writers
-        if "sqlite" in self.config.engine_str:
+        if self._is_sqlite_backend:
 
             @event.listens_for(engine.sync_engine, "connect")
             def set_sqlite_pragma(dbapi_conn, connection_record):
@@ -151,6 +152,29 @@ class SqlAlchemySqlStoreImpl(SqlStore):
             await session.execute(self.metadata.tables[table].insert(), data)
             await session.commit()
 
+    async def upsert(
+        self,
+        table: str,
+        data: Mapping[str, Any],
+        conflict_columns: list[str],
+        update_columns: list[str] | None = None,
+    ) -> None:
+        table_obj = self.metadata.tables[table]
+        dialect_insert = self._get_dialect_insert(table_obj)
+        insert_stmt = dialect_insert.values(**data)
+
+        if update_columns is None:
+            update_columns = [col for col in data.keys() if col not in conflict_columns]
+
+        update_mapping = {col: getattr(insert_stmt.excluded, col) for col in update_columns}
+        conflict_cols = [table_obj.c[col] for col in conflict_columns]
+
+        stmt = insert_stmt.on_conflict_do_update(index_elements=conflict_cols, set_=update_mapping)
+
+        async with self.async_session() as session:
+            await session.execute(stmt)
+            await session.commit()
+
     async def fetch_all(
         self,
         table: str,
@@ -333,9 +357,18 @@ class SqlAlchemySqlStoreImpl(SqlStore):
                 add_column_sql = text(f"ALTER TABLE {table} ADD COLUMN {column_name} {compiled_type}{nullable_clause}")
 
                 await conn.execute(add_column_sql)
-
         except Exception as e:
             # If any error occurs during migration, log it but don't fail
             # The table creation will handle adding the column
             logger.error(f"Error adding column {column_name} to table {table}: {e}")
             pass
+
+    def _get_dialect_insert(self, table: Table):
+        if self._is_sqlite_backend:
+            from sqlalchemy.dialects.sqlite import insert as sqlite_insert
+
+            return sqlite_insert(table)
+        else:
+            from sqlalchemy.dialects.postgresql import insert as pg_insert
+
+            return pg_insert(table)
diff --git a/src/llama_stack/providers/utils/sqlstore/sqlstore.py b/src/llama_stack/providers/utils/sqlstore/sqlstore.py
index 31801c4ca..9409b7d00 100644
--- a/src/llama_stack/providers/utils/sqlstore/sqlstore.py
+++ b/src/llama_stack/providers/utils/sqlstore/sqlstore.py
@@ -4,6 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
+from threading import Lock
 from typing import Annotated, cast
 
 from pydantic import Field
@@ -21,6 +22,8 @@ from .api import SqlStore
 sql_store_pip_packages = ["sqlalchemy[asyncio]", "aiosqlite", "asyncpg"]
 
 _SQLSTORE_BACKENDS: dict[str, StorageBackendConfig] = {}
+_SQLSTORE_INSTANCES: dict[str, SqlStore] = {}
+_SQLSTORE_LOCKS: dict[str, Lock] = {}
 
 
 SqlStoreConfig = Annotated[
@@ -52,19 +55,34 @@ def sqlstore_impl(reference: SqlStoreReference) -> SqlStore:
             f"Unknown SQL store backend '{backend_name}'. Registered backends: {sorted(_SQLSTORE_BACKENDS)}"
         )
 
-    if isinstance(backend_config, SqliteSqlStoreConfig | PostgresSqlStoreConfig):
-        from .sqlalchemy_sqlstore import SqlAlchemySqlStoreImpl
+    existing = _SQLSTORE_INSTANCES.get(backend_name)
+    if existing:
+        return existing
 
-        config = cast(SqliteSqlStoreConfig | PostgresSqlStoreConfig, backend_config).model_copy()
-        return SqlAlchemySqlStoreImpl(config)
-    else:
-        raise ValueError(f"Unknown sqlstore type {backend_config.type}")
+    lock = _SQLSTORE_LOCKS.setdefault(backend_name, Lock())
+    with lock:
+        existing = _SQLSTORE_INSTANCES.get(backend_name)
+        if existing:
+            return existing
+
+        if isinstance(backend_config, SqliteSqlStoreConfig | PostgresSqlStoreConfig):
+            from .sqlalchemy_sqlstore import SqlAlchemySqlStoreImpl
+
+            config = cast(SqliteSqlStoreConfig | PostgresSqlStoreConfig, backend_config).model_copy()
+            instance = SqlAlchemySqlStoreImpl(config)
+            _SQLSTORE_INSTANCES[backend_name] = instance
+            return instance
+        else:
+            raise ValueError(f"Unknown sqlstore type {backend_config.type}")
 
 
 def register_sqlstore_backends(backends: dict[str, StorageBackendConfig]) -> None:
     """Register the set of available SQL store backends for reference resolution."""
     global _SQLSTORE_BACKENDS
+    global _SQLSTORE_INSTANCES
 
     _SQLSTORE_BACKENDS.clear()
+    _SQLSTORE_INSTANCES.clear()
+    _SQLSTORE_LOCKS.clear()
     for name, cfg in backends.items():
         _SQLSTORE_BACKENDS[name] = cfg
diff --git a/tests/unit/utils/sqlstore/test_sqlstore.py b/tests/unit/utils/sqlstore/test_sqlstore.py
index 00669b698..d7ba0dc89 100644
--- a/tests/unit/utils/sqlstore/test_sqlstore.py
+++ b/tests/unit/utils/sqlstore/test_sqlstore.py
@@ -9,7 +9,7 @@ from tempfile import TemporaryDirectory
 
 import pytest
 
-from llama_stack.providers.utils.sqlstore.api import ColumnType
+from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
 from llama_stack.providers.utils.sqlstore.sqlalchemy_sqlstore import SqlAlchemySqlStoreImpl
 from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
 
@@ -65,6 +65,38 @@ async def test_sqlite_sqlstore():
         assert result.has_more is False
 
 
+async def test_sqlstore_upsert_support():
+    with TemporaryDirectory() as tmp_dir:
+        db_path = tmp_dir + "/upsert.db"
+        store = SqlAlchemySqlStoreImpl(SqliteSqlStoreConfig(db_path=db_path))
+
+        await store.create_table(
+            "items",
+            {
+                "id": ColumnDefinition(type=ColumnType.STRING, primary_key=True),
+                "value": ColumnType.STRING,
+                "updated_at": ColumnType.INTEGER,
+            },
+        )
+
+        await store.upsert(
+            table="items",
+            data={"id": "item_1", "value": "first", "updated_at": 1},
+            conflict_columns=["id"],
+        )
+        row = await store.fetch_one("items", {"id": "item_1"})
+        assert row == {"id": "item_1", "value": "first", "updated_at": 1}
+
+        await store.upsert(
+            table="items",
+            data={"id": "item_1", "value": "second", "updated_at": 2},
+            conflict_columns=["id"],
+            update_columns=["value", "updated_at"],
+        )
+        row = await store.fetch_one("items", {"id": "item_1"})
+        assert row == {"id": "item_1", "value": "second", "updated_at": 2}
+
+
 async def test_sqlstore_pagination_basic():
     """Test basic pagination functionality at the SQL store level."""
     with TemporaryDirectory() as tmp_dir:

From 9eb81439d2bc572e94d3d604e83f01e650da6298 Mon Sep 17 00:00:00 2001
From: Akram Ben Aissi <akram.benaissi@gmail.com>
Date: Thu, 13 Nov 2025 14:50:06 +0100
Subject: [PATCH 19/62] docs: Add comprehensive Files API and Vector Store
 integration doc (#3279)

docs: Add comprehensive Files API and Vector Store integration
documentation

- Add Files API documentation with OpenAI-compatible endpoints
- Create comprehensive guide for OpenAI-compatible file operations
- Reorganize documentation structure: move file operations to files/
directory
- Add vector store provider documentation for Milvus, SQLite-vec, FAISS
- Clean up redundant files and improve navigation
- Update cross-references and eliminate documentation duplication
- Support for release 0.2.14 FileResponse and Vector Store API features

# What does this PR do?
<!-- Provide a short summary of what this PR does and why. Link to
relevant issues if applicable. -->

<!-- If resolving an issue, uncomment and update the line below -->
<!-- Closes #[issue-number] -->

## Test Plan
<!-- Describe the tests you ran to verify your changes with result
summaries. *Provide clear instructions so the plan can be easily
re-executed.* -->
---
 docs/docs/api-deprecated/index.mdx            |  62 +++
 docs/docs/api-experimental/index.mdx          | 128 ++++++
 docs/docs/api-openai/index.mdx                | 287 ++++++++++++
 docs/docs/api/index.mdx                       | 144 ++++++
 docs/docs/concepts/apis/index.mdx             |  19 +-
 .../file_operations_vector_stores.mdx         | 420 ++++++++++++++++++
 docs/docs/providers/files/files.mdx           | 290 ++++++++++++
 .../openai_file_operations_quick_reference.md |  80 ++++
 .../files/openai_file_operations_support.md   | 291 ++++++++++++
 docs/docs/providers/index.mdx                 |  15 +-
 docs/docs/providers/openai.mdx                |  19 +-
 11 files changed, 1747 insertions(+), 8 deletions(-)
 create mode 100644 docs/docs/api-deprecated/index.mdx
 create mode 100644 docs/docs/api-experimental/index.mdx
 create mode 100644 docs/docs/api-openai/index.mdx
 create mode 100644 docs/docs/api/index.mdx
 create mode 100644 docs/docs/concepts/file_operations_vector_stores.mdx
 create mode 100644 docs/docs/providers/files/files.mdx
 create mode 100644 docs/docs/providers/files/openai_file_operations_quick_reference.md
 create mode 100644 docs/docs/providers/files/openai_file_operations_support.md

diff --git a/docs/docs/api-deprecated/index.mdx b/docs/docs/api-deprecated/index.mdx
new file mode 100644
index 000000000..0da357e30
--- /dev/null
+++ b/docs/docs/api-deprecated/index.mdx
@@ -0,0 +1,62 @@
+---
+title: Deprecated APIs
+description: Legacy APIs that are being phased out
+sidebar_label: Deprecated
+sidebar_position: 1
+---
+
+# Deprecated APIs
+
+This section contains APIs that are being phased out in favor of newer, more standardized implementations. These APIs are maintained for backward compatibility but are not recommended for new projects.
+
+:::warning Deprecation Notice
+These APIs are deprecated and will be removed in future versions. Please migrate to the recommended alternatives listed below.
+:::
+
+## Migration Guide
+
+When using deprecated APIs, please refer to the migration guides provided for each API to understand how to transition to the supported alternatives.
+
+## Deprecated API List
+
+### Legacy Inference APIs
+Some older inference endpoints that have been superseded by the standardized Inference API.
+
+**Migration Path:** Use the [Inference API](../api/) instead.
+
+### Legacy Vector Operations
+Older vector database operations that have been replaced by the Vector IO API.
+
+**Migration Path:** Use the [Vector IO API](../api/) instead.
+
+### Legacy File Operations
+Older file management endpoints that have been replaced by the Files API.
+
+**Migration Path:** Use the [Files API](../api/) instead.
+
+## Support Timeline
+
+Deprecated APIs will be supported according to the following timeline:
+
+- **Current Version**: Full support with deprecation warnings
+- **Next Major Version**: Limited support with migration notices
+- **Following Major Version**: Removal of deprecated APIs
+
+## Getting Help
+
+If you need assistance migrating from deprecated APIs:
+
+1. Check the specific migration guides for each API
+2. Review the [API Reference](../api/) for current alternatives
+3. Consult the [Community Forums](https://github.com/llamastack/llama-stack/discussions) for migration support
+4. Open an issue on GitHub for specific migration questions
+
+## Contributing
+
+If you find issues with deprecated APIs or have suggestions for improving the migration process, please contribute by:
+
+1. Opening an issue describing the problem
+2. Submitting a pull request with improvements
+3. Updating migration documentation
+
+For more information on contributing, see our [Contributing Guide](../contributing/).
diff --git a/docs/docs/api-experimental/index.mdx b/docs/docs/api-experimental/index.mdx
new file mode 100644
index 000000000..adbd64582
--- /dev/null
+++ b/docs/docs/api-experimental/index.mdx
@@ -0,0 +1,128 @@
+---
+title: Experimental APIs
+description: APIs in development with limited support
+sidebar_label: Experimental
+sidebar_position: 1
+---
+
+# Experimental APIs
+
+This section contains APIs that are currently in development and may have limited support or stability. These APIs are available for testing and feedback but should not be used in production environments.
+
+:::warning Experimental Notice
+These APIs are experimental and may change without notice. Use with caution and provide feedback to help improve them.
+:::
+
+## Current Experimental APIs
+
+### Batch Inference API
+Run inference on a dataset of inputs in batch mode for improved efficiency.
+
+**Status:** In Development
+**Provider Support:** Limited
+**Use Case:** Large-scale inference operations
+
+**Features:**
+- Batch processing of multiple inputs
+- Optimized resource utilization
+- Progress tracking and monitoring
+
+### Batch Agents API
+Run agentic workflows on a dataset of inputs in batch mode.
+
+**Status:** In Development
+**Provider Support:** Limited
+**Use Case:** Large-scale agent operations
+
+**Features:**
+- Batch agent execution
+- Parallel processing capabilities
+- Result aggregation and analysis
+
+### Synthetic Data Generation API
+Generate synthetic data for model development and testing.
+
+**Status:** Early Development
+**Provider Support:** Very Limited
+**Use Case:** Training data augmentation
+
+**Features:**
+- Automated data generation
+- Quality control mechanisms
+- Customizable generation parameters
+
+### Batches API (OpenAI-compatible)
+OpenAI-compatible batch management for inference operations.
+
+**Status:** In Development
+**Provider Support:** Limited
+**Use Case:** OpenAI batch processing compatibility
+
+**Features:**
+- OpenAI batch API compatibility
+- Job scheduling and management
+- Status tracking and monitoring
+
+## Getting Started with Experimental APIs
+
+### Prerequisites
+- Llama Stack server running with experimental features enabled
+- Appropriate provider configurations
+- Understanding of API limitations
+
+### Configuration
+Experimental APIs may require special configuration flags or provider settings. Check the specific API documentation for setup requirements.
+
+### Usage Guidelines
+1. **Testing Only**: Use experimental APIs for testing and development only
+2. **Monitor Changes**: Watch for updates and breaking changes
+3. **Provide Feedback**: Report issues and suggest improvements
+4. **Backup Data**: Always backup important data when using experimental features
+
+## Feedback and Contribution
+
+We encourage feedback on experimental APIs to help improve them:
+
+### Reporting Issues
+- Use GitHub issues with the "experimental" label
+- Include detailed error messages and reproduction steps
+- Specify the API version and provider being used
+
+### Feature Requests
+- Submit feature requests through GitHub discussions
+- Provide use cases and expected behavior
+- Consider contributing implementations
+
+### Testing
+- Test experimental APIs in your environment
+- Report performance issues and optimization opportunities
+- Share success stories and use cases
+
+## Migration to Stable APIs
+
+As experimental APIs mature, they will be moved to the stable API section. When this happens:
+
+1. **Announcement**: We'll announce the promotion in release notes
+2. **Migration Guide**: Detailed migration instructions will be provided
+3. **Deprecation Timeline**: Experimental versions will be deprecated with notice
+4. **Support**: Full support will be available for stable versions
+
+## Provider Support
+
+Experimental APIs may have limited provider support. Check the specific API documentation for:
+
+- Supported providers
+- Configuration requirements
+- Known limitations
+- Performance characteristics
+
+## Roadmap
+
+Experimental APIs are part of our ongoing development roadmap:
+
+- **Q1 2024**: Batch Inference API stabilization
+- **Q2 2024**: Batch Agents API improvements
+- **Q3 2024**: Synthetic Data Generation API expansion
+- **Q4 2024**: Batches API full OpenAI compatibility
+
+For the latest updates, follow our [GitHub releases](https://github.com/llamastack/llama-stack/releases) and [roadmap discussions](https://github.com/llamastack/llama-stack/discussions).
diff --git a/docs/docs/api-openai/index.mdx b/docs/docs/api-openai/index.mdx
new file mode 100644
index 000000000..99f3edaa7
--- /dev/null
+++ b/docs/docs/api-openai/index.mdx
@@ -0,0 +1,287 @@
+---
+title: OpenAI API Compatibility
+description: OpenAI-compatible APIs and features in Llama Stack
+sidebar_label: OpenAI Compatibility
+sidebar_position: 1
+---
+
+# OpenAI API Compatibility
+
+Llama Stack provides comprehensive OpenAI API compatibility, allowing you to use existing OpenAI API clients and tools with Llama Stack providers. This compatibility layer ensures seamless migration and interoperability.
+
+## Overview
+
+OpenAI API compatibility in Llama Stack includes:
+
+- **OpenAI-compatible endpoints** for all major APIs
+- **Request/response format compatibility** with OpenAI standards
+- **Authentication and authorization** using OpenAI-style API keys
+- **Error handling** with OpenAI-compatible error codes and messages
+- **Rate limiting** and usage tracking compatible with OpenAI patterns
+
+## Supported OpenAI APIs
+
+### Chat Completions API
+OpenAI-compatible chat completions for conversational AI applications.
+
+**Endpoint:** `/v1/chat/completions`
+**Compatibility:** Full OpenAI API compatibility
+**Providers:** All inference providers
+
+**Features:**
+- Message-based conversations
+- System prompts and user messages
+- Function calling support
+- Streaming responses
+- Temperature and other parameter controls
+
+### Completions API
+OpenAI-compatible text completions for general text generation.
+
+**Endpoint:** `/v1/completions`
+**Compatibility:** Full OpenAI API compatibility
+**Providers:** All inference providers
+
+**Features:**
+- Text completion generation
+- Prompt engineering support
+- Customizable parameters
+- Batch processing capabilities
+
+### Embeddings API
+OpenAI-compatible embeddings for vector operations.
+
+**Endpoint:** `/v1/embeddings`
+**Compatibility:** Full OpenAI API compatibility
+**Providers:** All embedding providers
+
+**Features:**
+- Text embedding generation
+- Multiple embedding models
+- Batch embedding processing
+- Vector similarity operations
+
+### Files API
+OpenAI-compatible file management for document processing.
+
+**Endpoint:** `/v1/files`
+**Compatibility:** Full OpenAI API compatibility
+**Providers:** Local Filesystem, S3
+
+**Features:**
+- File upload and management
+- Document processing
+- File metadata tracking
+- Secure file access
+
+### Vector Store Files API
+OpenAI-compatible vector store file operations for RAG applications.
+
+**Endpoint:** `/v1/vector_stores/{vector_store_id}/files`
+**Compatibility:** Full OpenAI API compatibility
+**Providers:** FAISS, SQLite-vec, Milvus, ChromaDB, Qdrant, Weaviate, Postgres (PGVector)
+
+**Features:**
+- Automatic document processing
+- Vector store integration
+- File chunking and indexing
+- Search and retrieval operations
+
+### Batches API
+OpenAI-compatible batch processing for large-scale operations.
+
+**Endpoint:** `/v1/batches`
+**Compatibility:** OpenAI API compatibility (experimental)
+**Providers:** Limited support
+
+**Features:**
+- Batch job creation and management
+- Progress tracking
+- Result retrieval
+- Error handling
+
+## Migration from OpenAI
+
+### Step 1: Update API Endpoint
+Change your API endpoint from OpenAI to your Llama Stack server:
+
+```python
+# Before (OpenAI)
+import openai
+client = openai.OpenAI(api_key="your-openai-key")
+
+# After (Llama Stack)
+import openai
+client = openai.OpenAI(
+    api_key="your-llama-stack-key",
+    base_url="http://localhost:8000/v1"  # Your Llama Stack server
+)
+```
+
+### Step 2: Configure Providers
+Set up your preferred providers in the Llama Stack configuration:
+
+```yaml
+# stack-config.yaml
+inference:
+  providers:
+    - name: "meta-reference"
+      type: "inline"
+      model: "llama-3.1-8b"
+```
+
+### Step 3: Test Compatibility
+Verify that your existing code works with Llama Stack:
+
+```python
+# Test chat completions
+response = client.chat.completions.create(
+    model="llama-3.1-8b",
+    messages=[
+        {"role": "user", "content": "Hello, world!"}
+    ]
+)
+print(response.choices[0].message.content)
+```
+
+## Provider-Specific Features
+
+### Meta Reference Provider
+- Full OpenAI API compatibility
+- Local model execution
+- Custom model support
+
+### Remote Providers
+- OpenAI API compatibility
+- Cloud-based execution
+- Scalable infrastructure
+
+### Vector Store Providers
+- OpenAI vector store API compatibility
+- Automatic document processing
+- Advanced search capabilities
+
+## Authentication
+
+Llama Stack supports OpenAI-style authentication:
+
+### API Key Authentication
+```python
+client = openai.OpenAI(
+    api_key="your-api-key",
+    base_url="http://localhost:8000/v1"
+)
+```
+
+### Environment Variables
+```bash
+export OPENAI_API_KEY="your-api-key"
+export OPENAI_BASE_URL="http://localhost:8000/v1"
+```
+
+## Error Handling
+
+Llama Stack provides OpenAI-compatible error responses:
+
+```python
+try:
+    response = client.chat.completions.create(...)
+except openai.APIError as e:
+    print(f"API Error: {e}")
+except openai.RateLimitError as e:
+    print(f"Rate Limit Error: {e}")
+except openai.APIConnectionError as e:
+    print(f"Connection Error: {e}")
+```
+
+## Rate Limiting
+
+OpenAI-compatible rate limiting is supported:
+
+- **Requests per minute** limits
+- **Tokens per minute** limits
+- **Concurrent request** limits
+- **Usage tracking** and monitoring
+
+## Monitoring and Observability
+
+Track your API usage with OpenAI-compatible monitoring:
+
+- **Request/response logging**
+- **Usage metrics** and analytics
+- **Performance monitoring**
+- **Error tracking** and alerting
+
+## Best Practices
+
+### 1. Provider Selection
+Choose providers based on your requirements:
+- **Local development**: Meta Reference, Ollama
+- **Production**: Cloud providers (Fireworks, Together, NVIDIA)
+- **Specialized use cases**: Custom providers
+
+### 2. Model Configuration
+Configure models for optimal performance:
+- **Model selection** based on task requirements
+- **Parameter tuning** for specific use cases
+- **Resource allocation** for performance
+
+### 3. Error Handling
+Implement robust error handling:
+- **Retry logic** for transient failures
+- **Fallback providers** for high availability
+- **Monitoring** and alerting for issues
+
+### 4. Security
+Follow security best practices:
+- **API key management** and rotation
+- **Access control** and authorization
+- **Data privacy** and compliance
+
+## Implementation Examples
+
+For detailed code examples and implementation guides, see our [OpenAI Implementation Guide](../providers/openai.mdx).
+
+## Known Limitations
+
+### Responses API Limitations
+The Responses API is still in active development. For detailed information about current limitations and implementation status, see our [OpenAI Responses API Limitations](../providers/openai_responses_limitations.mdx).
+
+## Troubleshooting
+
+### Common Issues
+
+**Connection Errors**
+- Verify server is running
+- Check network connectivity
+- Validate API endpoint URL
+
+**Authentication Errors**
+- Verify API key is correct
+- Check key permissions
+- Ensure proper authentication headers
+
+**Model Errors**
+- Verify model is available
+- Check provider configuration
+- Validate model parameters
+
+### Getting Help
+
+For OpenAI compatibility issues:
+
+1. **Check Documentation**: Review provider-specific documentation
+2. **Community Support**: Ask questions in GitHub discussions
+3. **Issue Reporting**: Open GitHub issues for bugs
+4. **Professional Support**: Contact support for enterprise issues
+
+## Roadmap
+
+Upcoming OpenAI compatibility features:
+
+- **Enhanced batch processing** support
+- **Advanced function calling** capabilities
+- **Improved error handling** and diagnostics
+- **Performance optimizations** for large-scale deployments
+
+For the latest updates, follow our [GitHub releases](https://github.com/llamastack/llama-stack/releases) and [roadmap discussions](https://github.com/llamastack/llama-stack/discussions).
diff --git a/docs/docs/api/index.mdx b/docs/docs/api/index.mdx
new file mode 100644
index 000000000..7088c6c2b
--- /dev/null
+++ b/docs/docs/api/index.mdx
@@ -0,0 +1,144 @@
+---
+title: API Reference
+description: Complete reference for Llama Stack APIs
+sidebar_label: Overview
+sidebar_position: 1
+---
+
+# API Reference
+
+Llama Stack provides a comprehensive set of APIs for building generative AI applications. All APIs follow OpenAI-compatible standards and can be used interchangeably across different providers.
+
+## Core APIs
+
+### Inference API
+Run inference with Large Language Models (LLMs) and embedding models.
+
+**Supported Providers:**
+- Meta Reference (Single Node)
+- Ollama (Single Node)
+- Fireworks (Hosted)
+- Together (Hosted)
+- NVIDIA NIM (Hosted and Single Node)
+- vLLM (Hosted and Single Node)
+- TGI (Hosted and Single Node)
+- AWS Bedrock (Hosted)
+- Cerebras (Hosted)
+- Groq (Hosted)
+- SambaNova (Hosted)
+- PyTorch ExecuTorch (On-device iOS, Android)
+- OpenAI (Hosted)
+- Anthropic (Hosted)
+- Gemini (Hosted)
+- WatsonX (Hosted)
+
+### Agents API
+Run multi-step agentic workflows with LLMs, including tool usage, memory (RAG), and complex reasoning.
+
+**Supported Providers:**
+- Meta Reference (Single Node)
+- Fireworks (Hosted)
+- Together (Hosted)
+- PyTorch ExecuTorch (On-device iOS)
+
+### Vector IO API
+Perform operations on vector stores, including adding documents, searching, and deleting documents.
+
+**Supported Providers:**
+- FAISS (Single Node)
+- SQLite-Vec (Single Node)
+- Chroma (Hosted and Single Node)
+- Milvus (Hosted and Single Node)
+- Postgres (PGVector) (Hosted and Single Node)
+- Weaviate (Hosted)
+- Qdrant (Hosted and Single Node)
+
+### Files API (OpenAI-compatible)
+Manage file uploads, storage, and retrieval with OpenAI-compatible endpoints.
+
+**Supported Providers:**
+- Local Filesystem (Single Node)
+- S3 (Hosted)
+
+### Vector Store Files API (OpenAI-compatible)
+Integrate file operations with vector stores for automatic document processing and search.
+
+**Supported Providers:**
+- FAISS (Single Node)
+- SQLite-vec (Single Node)
+- Milvus (Single Node)
+- ChromaDB (Hosted and Single Node)
+- Qdrant (Hosted and Single Node)
+- Weaviate (Hosted)
+- Postgres (PGVector) (Hosted and Single Node)
+
+### Safety API
+Apply safety policies to outputs at a systems level, not just model level.
+
+**Supported Providers:**
+- Llama Guard (Depends on Inference Provider)
+- Prompt Guard (Single Node)
+- Code Scanner (Single Node)
+- AWS Bedrock (Hosted)
+
+### Post Training API
+Fine-tune models for specific use cases and domains.
+
+**Supported Providers:**
+- Meta Reference (Single Node)
+- HuggingFace (Single Node)
+- TorchTune (Single Node)
+- NVIDIA NEMO (Hosted)
+
+### Eval API
+Generate outputs and perform scoring to evaluate system performance.
+
+**Supported Providers:**
+- Meta Reference (Single Node)
+- NVIDIA NEMO (Hosted)
+
+### Telemetry API
+Collect telemetry data from the system for monitoring and observability.
+
+**Supported Providers:**
+- Meta Reference (Single Node)
+
+### Tool Runtime API
+Interact with various tools and protocols to extend LLM capabilities.
+
+**Supported Providers:**
+- Brave Search (Hosted)
+- RAG Runtime (Single Node)
+
+## API Compatibility
+
+All Llama Stack APIs are designed to be OpenAI-compatible, allowing you to:
+- Use existing OpenAI API clients and tools
+- Migrate from OpenAI to other providers seamlessly
+- Maintain consistent API contracts across different environments
+
+## Getting Started
+
+To get started with Llama Stack APIs:
+
+1. **Choose a Distribution**: Select a pre-configured distribution that matches your environment
+2. **Configure Providers**: Set up the providers you want to use for each API
+3. **Start the Server**: Launch the Llama Stack server with your configuration
+4. **Use the APIs**: Make requests to the API endpoints using your preferred client
+
+For detailed setup instructions, see our [Getting Started Guide](../getting_started/quickstart).
+
+## Provider Details
+
+For complete provider compatibility and setup instructions, see our [Providers Documentation](../providers/).
+
+## API Stability
+
+Llama Stack APIs are organized by stability level:
+- **[Stable APIs](./index.mdx)** - Production-ready APIs with full support
+- **[Experimental APIs](../api-experimental/)** - APIs in development with limited support
+- **[Deprecated APIs](../api-deprecated/)** - Legacy APIs being phased out
+
+## OpenAI Integration
+
+For specific OpenAI API compatibility features, see our [OpenAI Compatibility Guide](../api-openai/).
diff --git a/docs/docs/concepts/apis/index.mdx b/docs/docs/concepts/apis/index.mdx
index 7519f6eff..7d12478ed 100644
--- a/docs/docs/concepts/apis/index.mdx
+++ b/docs/docs/concepts/apis/index.mdx
@@ -7,7 +7,7 @@ sidebar_position: 1
 
 # APIs
 
-A Llama Stack API is described as a collection of REST endpoints. We currently support the following APIs:
+A Llama Stack API is described as a collection of REST endpoints following OpenAI API standards. We currently support the following APIs:
 
 - **Inference**: run inference with a LLM
 - **Safety**: apply safety policies to the output at a Systems (not only model) level
@@ -16,11 +16,26 @@ A Llama Stack API is described as a collection of REST endpoints. We currently s
 - **Scoring**: evaluate outputs of the system
 - **Eval**: generate outputs (via Inference or Agents) and perform scoring
 - **VectorIO**: perform operations on vector stores, such as adding documents, searching, and deleting documents
+- **Files**: manage file uploads, storage, and retrieval
+- **Telemetry**: collect telemetry data from the system
 - **Post Training**: fine-tune a model
 - **Tool Runtime**: interact with various tools and protocols
-- **Responses**: generate responses from an LLM using this OpenAI compatible API.
+- **Responses**: generate responses from an LLM
 
 We are working on adding a few more APIs to complete the application lifecycle. These will include:
 - **Batch Inference**: run inference on a dataset of inputs
 - **Batch Agents**: run agents on a dataset of inputs
 - **Batches**: OpenAI-compatible batch management for inference
+
+
+## OpenAI API Compatibility
+We are working on adding OpenAI API compatibility to Llama Stack. This will allow you to use Llama Stack with OpenAI API clients and tools.
+
+### File Operations and Vector Store Integration
+
+The Files API and Vector Store APIs work together through file operations, enabling automatic document processing and search. This integration implements the [OpenAI Vector Store Files API specification](https://platform.openai.com/docs/api-reference/vector-stores-files) and allows you to:
+- Upload documents through the Files API
+- Automatically process and chunk documents into searchable vectors
+- Store processed content in vector databases based on the availability of [our providers](../../providers/index.mdx)
+- Search through documents using natural language queries
+For detailed information about this integration, see [File Operations and Vector Store Integration](../file_operations_vector_stores.md).
diff --git a/docs/docs/concepts/file_operations_vector_stores.mdx b/docs/docs/concepts/file_operations_vector_stores.mdx
new file mode 100644
index 000000000..6168ecf9d
--- /dev/null
+++ b/docs/docs/concepts/file_operations_vector_stores.mdx
@@ -0,0 +1,420 @@
+# File Operations and Vector Store Integration
+
+## Overview
+
+Llama Stack provides seamless integration between the Files API and Vector Store APIs, enabling you to upload documents and automatically process them into searchable vector embeddings. This integration implements file operations following the [OpenAI Vector Store Files API specification](https://platform.openai.com/docs/api-reference/vector-stores-files).
+
+## Enhanced Capabilities Beyond OpenAI
+
+While Llama Stack maintains full compatibility with OpenAI's Vector Store API, it provides several additional capabilities that enhance functionality and flexibility:
+
+### **Embedding Model Specification**
+Unlike OpenAI's vector stores which use a fixed embedding model, Llama Stack allows you to specify which embedding model to use when creating a vector store:
+
+```python
+# Create vector store with specific embedding model
+vector_store = client.vector_stores.create(
+    name="my_documents",
+    embedding_model="all-MiniLM-L6-v2",  # Specify your preferred model
+    embedding_dimension=384,
+)
+```
+
+### **Advanced Search Modes**
+Llama Stack supports multiple search modes beyond basic vector similarity:
+
+- **Vector Search**: Pure semantic similarity search using embeddings
+- **Keyword Search**: Traditional keyword-based search for exact matches
+- **Hybrid Search**: Combines both vector and keyword search for optimal results
+
+```python
+# Different search modes
+results = await client.vector_stores.search(
+    vector_store_id=vector_store.id,
+    query="machine learning algorithms",
+    search_mode="hybrid",  # or "vector", "keyword"
+    max_num_results=5,
+)
+```
+
+### **Flexible Ranking Options**
+For hybrid search, Llama Stack offers configurable ranking strategies:
+
+- **RRF (Reciprocal Rank Fusion)**: Combines rankings with configurable impact factor
+- **Weighted Ranker**: Linear combination of vector and keyword scores with adjustable weights
+
+```python
+# Custom ranking configuration
+results = await client.vector_stores.search(
+    vector_store_id=vector_store.id,
+    query="neural networks",
+    search_mode="hybrid",
+    ranking_options={
+        "ranker": {"type": "weighted", "alpha": 0.7}  # 70% vector, 30% keyword
+    },
+)
+```
+
+### **Provider Selection**
+Choose from multiple vector store providers based on your specific needs:
+
+- **Inline Providers**: FAISS (fast in-memory), SQLite-vec (disk-based), Milvus (high-performance)
+- **Remote Providers**: ChromaDB, Qdrant, Weaviate, Postgres (PGVector), Milvus
+
+```python
+# Specify provider when creating vector store
+vector_store = client.vector_stores.create(
+    name="my_documents", provider_id="sqlite-vec"  # Choose your preferred provider
+)
+```
+
+## How It Works
+
+The file operations work through several key components:
+
+1. **File Upload**: Documents are uploaded through the Files API
+2. **Automatic Processing**: Files are automatically chunked and converted to embeddings
+3. **Vector Storage**: Chunks are stored in vector databases with metadata
+4. **Search & Retrieval**: Users can search through processed documents using natural language
+
+## Supported Vector Store Providers
+
+The following vector store providers support file operations:
+
+### Inline Providers (Single Node)
+
+- **FAISS**: Fast in-memory vector similarity search
+- **SQLite-vec**: Disk-based storage with hybrid search capabilities
+
+### Remote Providers (Hosted)
+
+- **ChromaDB**: Vector database with metadata filtering
+- **Weaviate**: Vector database with GraphQL interface
+- **Postgres (PGVector)**: Vector extensions for PostgreSQL
+
+### Both Inline & Remote Providers
+- **Milvus**: High-performance vector database with advanced indexing
+- **Qdrant**: Vector similarity search with payload filtering
+
+## File Processing Pipeline
+
+### 1. File Upload
+
+```python
+from llama_stack import LlamaStackClient
+
+client = LlamaStackClient("http://localhost:8000")
+
+# Upload a document
+with open("document.pdf", "rb") as f:
+    file_info = await client.files.upload(file=f, purpose="assistants")
+```
+
+### 2. Attach to Vector Store
+
+```python
+# Create a vector store
+vector_store = client.vector_stores.create(name="my_documents")
+
+# Attach the file to the vector store
+file_attach_response = await client.vector_stores.files.create(
+    vector_store_id=vector_store.id, file_id=file_info.id
+)
+```
+
+### 3. Automatic Processing
+
+The system automatically:
+- Detects the file type and extracts text content
+- Splits content into chunks (default: 800 tokens with 400 token overlap)
+- Generates embeddings for each chunk
+- Stores chunks with metadata in the vector store
+- Updates file status to "completed"
+
+### 4. Search and Retrieval
+
+```python
+# Search through processed documents
+search_results = await client.vector_stores.search(
+    vector_store_id=vector_store.id,
+    query="What is the main topic discussed?",
+    max_num_results=5,
+)
+
+# Process results
+for result in search_results.data:
+    print(f"Score: {result.score}")
+    for content in result.content:
+        print(f"Content: {content.text}")
+```
+
+## Supported File Types
+
+The FileResponse system supports various document formats:
+
+- **Text Files**: `.txt`, `.md`, `.rst`
+- **Documents**: `.pdf`, `.docx`, `.doc`
+- **Code**: `.py`, `.js`, `.java`, `.cpp`, etc.
+- **Data**: `.json`, `.csv`, `.xml`
+- **Web Content**: HTML files
+
+## Chunking Strategies
+
+### Default Strategy
+
+The default chunking strategy uses:
+- **Max Chunk Size**: 800 tokens
+- **Overlap**: 400 tokens
+- **Method**: Semantic boundary detection
+
+### Custom Chunking
+
+You can customize chunking when attaching files:
+
+```python
+from llama_stack.apis.vector_io import VectorStoreChunkingStrategy
+
+# Attach file with custom chunking
+file_attach_response = await client.vector_stores.files.create(
+    vector_store_id=vector_store.id,
+    file_id=file_info.id,
+    chunking_strategy=chunking_strategy,
+)
+```
+
+**Note**: While Llama Stack is OpenAI-compatible, it also supports additional options beyond the standard OpenAI API. When creating vector stores, you can specify custom embedding models and embedding dimensions that will be used when processing chunks from attached files.
+
+
+## File Management
+
+### List Files in Vector Store
+
+```python
+# List all files in a vector store
+files = await client.vector_stores.files.list(vector_store_id=vector_store.id)
+
+for file in files:
+    print(f"File: {file.filename}, Status: {file.status}")
+```
+
+### File Status Tracking
+
+Files go through several statuses:
+- **in_progress**: File is being processed
+- **completed**: File successfully processed and searchable
+- **failed**: Processing failed (check `last_error` for details)
+- **cancelled**: Processing was cancelled
+
+### Retrieve File Content
+
+```python
+# Get chunked content from vector store
+content_response = await client.vector_stores.files.retrieve_content(
+    vector_store_id=vector_store.id, file_id=file_info.id
+)
+
+for chunk in content_response.content:
+    print(f"Chunk {chunk.metadata.get('chunk_index', 0)}: {chunk.text}")
+```
+
+## Vector Store Management
+
+### List Vector Stores
+
+Retrieve a paginated list of all vector stores:
+
+```python
+# List all vector stores with default pagination
+vector_stores = await client.vector_stores.list()
+
+# Custom pagination and ordering
+vector_stores = await client.vector_stores.list(
+    limit=10,
+    order="asc",  # or "desc"
+    after="vs_12345678",  # cursor-based pagination
+)
+
+for store in vector_stores.data:
+    print(f"Store: {store.name}, Files: {store.file_counts.total}")
+    print(f"Created: {store.created_at}, Status: {store.status}")
+```
+
+### Retrieve Vector Store Details
+
+Get detailed information about a specific vector store:
+
+```python
+# Get vector store details
+store_details = await client.vector_stores.retrieve(vector_store_id="vs_12345678")
+
+print(f"Name: {store_details.name}")
+print(f"Status: {store_details.status}")
+print(f"File Counts: {store_details.file_counts}")
+print(f"Usage: {store_details.usage_bytes} bytes")
+print(f"Created: {store_details.created_at}")
+print(f"Metadata: {store_details.metadata}")
+```
+
+### Update Vector Store
+
+Modify vector store properties such as name, metadata, or expiration settings:
+
+```python
+# Update vector store name and metadata
+updated_store = await client.vector_stores.update(
+    vector_store_id="vs_12345678",
+    name="Updated Document Collection",
+    metadata={
+        "description": "Updated collection for research",
+        "category": "research",
+        "version": "2.0",
+    },
+)
+
+# Set expiration policy
+expired_store = await client.vector_stores.update(
+    vector_store_id="vs_12345678",
+    expires_after={"anchor": "last_active_at", "days": 30},
+)
+
+print(f"Updated store: {updated_store.name}")
+print(f"Last active: {updated_store.last_active_at}")
+```
+
+### Delete Vector Store
+
+Remove a vector store and all its associated data:
+
+```python
+# Delete a vector store
+delete_response = await client.vector_stores.delete(vector_store_id="vs_12345678")
+
+if delete_response.deleted:
+    print(f"Vector store {delete_response.id} successfully deleted")
+else:
+    print("Failed to delete vector store")
+```
+
+**Important Notes:**
+- Deleting a vector store removes all files, chunks, and embeddings
+- This operation cannot be undone
+- The underlying vector database is also cleaned up
+- Consider backing up important data before deletion
+
+## Search Capabilities
+
+### Vector Search
+
+Pure similarity search using embeddings:
+
+```python
+results = await client.vector_stores.search(
+    vector_store_id=vector_store.id,
+    query="machine learning algorithms",
+    max_num_results=10,
+)
+```
+
+### Filtered Search
+
+Combine vector search with metadata filtering:
+
+```python
+results = await client.vector_stores.search(
+    vector_store_id=vector_store.id,
+    query="machine learning algorithms",
+    filters={"file_type": "pdf", "upload_date": "2024-01-01"},
+    max_num_results=10,
+)
+```
+
+### Hybrid Search
+
+[SQLite-vec](../providers/vector_io/inline_sqlite-vec.mdx), [pgvector](../providers/vector_io/remote_pgvector.mdx), and [Milvus](../providers/vector_io/inline_milvus.mdx) support combining vector and keyword search.
+
+## Performance Considerations
+
+> **Note**: For detailed performance optimization strategies, see [Performance Considerations](../providers/files/openai_file_operations_support.md#performance-considerations) in the provider documentation.
+
+**Key Points:**
+- **Chunk Size**: 400-600 tokens for precision, 800-1200 for context
+- **Storage**: Choose provider based on your performance needs
+- **Search**: Optimize for your specific use case
+
+## Error Handling
+
+> **Note**: For comprehensive troubleshooting and error handling, see [Troubleshooting](../providers/files/openai_file_operations_support.md#troubleshooting) in the provider documentation.
+
+**Common Issues:**
+- File processing failures (format, size limits)
+- Search performance optimization
+- Storage and memory issues
+
+## Best Practices
+
+> **Note**: For detailed best practices and recommendations, see [Best Practices](../providers/files/openai_file_operations_support.md#best-practices) in the provider documentation.
+
+**Key Recommendations:**
+- File organization and naming conventions
+- Chunking strategy optimization
+- Metadata and monitoring practices
+- Regular cleanup and maintenance
+
+## Integration Examples
+
+### RAG Application
+
+```python
+# Build a RAG system with file uploads
+async def build_rag_system():
+    # Create vector store
+    vector_store = client.vector_stores.create(name="knowledge_base")
+
+    # Upload and process documents
+    documents = ["doc1.pdf", "doc2.pdf", "doc3.pdf"]
+    for doc in documents:
+        with open(doc, "rb") as f:
+            file_info = await client.files.create(file=f, purpose="assistants")
+            await client.vector_stores.files.create(
+                vector_store_id=vector_store.id, file_id=file_info.id
+            )
+
+    return vector_store
+
+
+# Query the RAG system
+async def query_rag(vector_store_id, question):
+    results = await client.vector_stores.search(
+        vector_store_id=vector_store_id, query=question, max_num_results=5
+    )
+    return results
+```
+
+### Document Analysis
+
+```python
+# Analyze document content through vector search
+async def analyze_document(vector_store_id, file_id):
+    # Get document content
+    content = await client.vector_stores.files.retrieve_content(
+        vector_store_id=vector_store_id, file_id=file_id
+    )
+
+    # Search for specific topics
+    topics = ["introduction", "methodology", "conclusion"]
+    analysis = {}
+
+    for topic in topics:
+        results = await client.vector_stores.search(
+            vector_store_id=vector_store_id, query=topic, max_num_results=3
+        )
+        analysis[topic] = results.data
+
+    return analysis
+```
+
+## Next Steps
+
+- Explore the [Files API documentation](../../providers/files/files.mdx) for detailed API reference
+- Check [Vector Store Providers](../providers/vector_io/index.mdx) for specific implementation details
+- Review [Getting Started](../getting_started/quickstart.mdx) for quick setup instructions
diff --git a/docs/docs/providers/files/files.mdx b/docs/docs/providers/files/files.mdx
new file mode 100644
index 000000000..095642be3
--- /dev/null
+++ b/docs/docs/providers/files/files.mdx
@@ -0,0 +1,290 @@
+---
+sidebar_label: Files
+title: Files
+---
+
+## Overview
+
+The Files API provides file management capabilities for Llama Stack. It allows you to upload, store, retrieve, and manage files that can be used across various endpoints in your application.
+
+## Features
+
+- **File Upload**: Upload files with metadata and purpose classification
+- **File Management**: List, retrieve, and delete files
+- **Content Retrieval**: Access raw file content for processing
+- **API Compatibility**: Full compatibility with OpenAI Files API endpoints
+- **Flexible Storage**: Support for local filesystem and cloud storage backends
+
+## API Endpoints
+
+### Upload File
+
+**POST** `/v1/openai/v1/files`
+
+Upload a file that can be used across various endpoints.
+
+**Request Body:**
+- `file`: The file object to be uploaded (multipart form data)
+- `purpose`: The intended purpose of the uploaded file
+
+**Supported Purposes:**
+- `batch`: Files for batch operations
+
+**Response:**
+```json
+{
+  "id": "file-abc123",
+  "object": "file",
+  "bytes": 140,
+  "created_at": 1613779121,
+  "filename": "mydata.jsonl",
+  "purpose": "batch"
+}
+```
+
+**Example:**
+```python
+import requests
+
+with open("data.jsonl", "rb") as f:
+    files = {"file": f}
+    data = {"purpose": "batch"}
+    response = requests.post(
+        "http://localhost:8000/v1/openai/v1/files", files=files, data=data
+      )
+    file_info = response.json()
+```
+
+### List Files
+
+**GET** `/v1/openai/v1/files`
+
+Returns a list of files that belong to the user's organization.
+
+**Query Parameters:**
+- `after` (optional): A cursor for pagination
+- `limit` (optional): Limit on number of objects (1-10,000, default: 10,000)
+- `order` (optional): Sort order by created_at timestamp (`asc` or `desc`, default: `desc`)
+- `purpose` (optional): Filter files by purpose
+
+**Response:**
+```json
+{
+  "object": "list",
+  "data": [
+    {
+      "id": "file-abc123",
+      "object": "file",
+      "bytes": 140,
+      "created_at": 1613779121,
+      "filename": "mydata.jsonl",
+      "purpose": "fine-tune"
+    }
+  ],
+  "has_more": false
+}
+```
+
+**Example:**
+```python
+import requests
+
+# List all files
+response = requests.get("http://localhost:8000/v1/openai/v1/files")
+files = response.json()
+
+# List files with pagination
+response = requests.get(
+    "http://localhost:8000/v1/openAi/v1/files",
+    params={"limit": 10, "after": "file-abc123"},
+)
+files = response.json()
+
+# Filter by purpose
+response = requests.get(
+    "http://localhost:8000/v1/openAi/v1/files", params={"purpose": "fine-tune"}
+)
+files = response.json()
+```
+
+### Retrieve File
+
+**GET** `/v1/openAi/v1/files/{file_id}`
+
+Returns information about a specific file.
+
+**Path Parameters:**
+- `file_id`: The ID of the file to retrieve
+
+**Response:**
+```json
+{
+  "id": "file-abc123",
+  "object": "file",
+  "bytes": 140,
+  "created_at": 1613779121,
+  "filename": "mydata.jsonl",
+  "purpose": "fine-tune"
+}
+```
+
+**Example:**
+```python
+import requests
+
+file_id = "file-abc123"
+response = requests.get(f"http://localhost:8000/v1/openAi/v1/files/{file_id}")
+file_info = response.json()
+```
+
+### Delete File
+
+**DELETE** `/v1/openAi/v1/files/{file_id}`
+
+Delete a file.
+
+**Path Parameters:**
+- `file_id`: The ID of the file to delete
+
+**Response:**
+```json
+{
+  "id": "file-abc123",
+  "object": "file",
+  "deleted": true
+}
+```
+
+**Example:**
+```python
+import requests
+
+file_id = "file-abc123"
+response = requests.delete(f"http://localhost:8000/v1/openAi/v1/files/{file_id}")
+result = response.json()
+```
+
+### Retrieve File Content
+
+**GET** `/v1/openAi/v1/files/{file_id}/content`
+
+Returns the raw file content as a binary response.
+
+**Path Parameters:**
+- `file_id`: The ID of the file to retrieve content from
+
+**Response:**
+Binary file content with appropriate headers:
+- `Content-Type`: `application/octet-stream`
+- `Content-Disposition`: `attachment; filename="filename"`
+
+**Example:**
+```python
+import requests
+
+file_id = "file-abc123"
+response = requests.get(f"http://localhost:8000/v1/openAi/v1/files/{file_id}/content")
+
+# Save content to file
+with open("downloaded_file.jsonl", "wb") as f:
+    f.write(response.content)
+
+# Or process content directly
+content = response.content
+```
+
+## Vector Store Integration
+
+The Files API integrates with Vector Stores to enable document processing and search. For detailed information about this integration, see [File Operations and Vector Store Integration](../concepts/file_operations_vector_stores.md).
+
+### Vector Store File Operations
+
+**List Vector Store Files:**
+- **GET** `/v1/openAi/v1/vector_stores/{vector_store_id}/files`
+
+**Retrieve Vector Store File Content:**
+- **GET** `/v1/openAi/v1/vector_stores/{vector_store_id}/files/{file_id}/content`
+
+**Attach File to Vector Store:**
+- **POST** `/v1/openAi/v1/vector_stores/{vector_store_id}/files`
+
+## Error Handling
+
+The Files API returns standard HTTP status codes and error responses:
+
+- `400 Bad Request`: Invalid request parameters
+- `404 Not Found`: File not found
+- `429 Too Many Requests`: Rate limit exceeded
+- `500 Internal Server Error`: Server error
+
+**Error Response Format:**
+```json
+{
+  "error": {
+    "message": "Error description",
+    "type": "invalid_request_error",
+    "code": "file_not_found"
+  }
+}
+```
+
+## Rate Limits
+
+The Files API implements rate limiting to ensure fair usage:
+- File uploads: 100 files per minute
+- File retrievals: 1000 requests per minute
+- File deletions: 100 requests per minute
+
+## Best Practices
+
+1. **File Organization**: Use descriptive filenames and appropriate purpose classifications
+2. **Batch Operations**: For multiple files, consider using batch endpoints when available
+3. **Error Handling**: Always check response status codes and handle errors gracefully
+4. **Content Types**: Ensure files are uploaded with appropriate content types
+5. **Cleanup**: Regularly delete unused files to manage storage costs
+
+## Integration Examples
+
+### With Python Client
+
+```python
+from llama_stack import LlamaStackClient
+
+client = LlamaStackClient("http://localhost:8000")
+
+# Upload a file
+with open("data.jsonl", "rb") as f:
+    file_info = await client.files.upload(file=f, purpose="fine-tune")
+
+# List files
+files = await client.files.list(purpose="fine-tune")
+
+# Retrieve file content
+content = await client.files.retrieve_content(file_info.id)
+```
+
+### With cURL
+
+```bash
+# Upload file
+curl -X POST http://localhost:8000/v1/openAi/v1/files \
+  -F "file=@data.jsonl" \
+  -F "purpose=fine-tune"
+
+# List files
+curl http://localhost:8000/v1/openAi/v1/files
+
+# Download file content
+curl http://localhost:8000/v1/openAi/v1/files/file-abc123/content \
+  -o downloaded_file.jsonl
+```
+
+## Provider Support
+
+The Files API supports multiple storage backends:
+
+- **Local Filesystem**: Store files on local disk (inline provider)
+- **S3**: Store files in AWS S3 or S3-compatible services (remote provider)
+- **Custom Backends**: Extensible architecture for custom storage providers
+
+See the [Files Providers](index.md) documentation for detailed configuration options.
diff --git a/docs/docs/providers/files/openai_file_operations_quick_reference.md b/docs/docs/providers/files/openai_file_operations_quick_reference.md
new file mode 100644
index 000000000..43e2318e2
--- /dev/null
+++ b/docs/docs/providers/files/openai_file_operations_quick_reference.md
@@ -0,0 +1,80 @@
+# File Operations Quick Reference
+
+## Overview
+
+As of release 0.2.14, Llama Stack provides comprehensive file operations and Vector Store API integration, following the [OpenAI Vector Store Files API specification](https://platform.openai.com/docs/api-reference/vector-stores-files).
+
+> **Note**: For detailed overview and implementation details, see [Overview](../openai_file_operations_support.md#overview) in the full documentation.
+
+## Supported Providers
+
+> **Note**: For complete provider details and features, see [Supported Providers](../openai_file_operations_support.md#supported-providers) in the full documentation.
+
+**Inline Providers**: FAISS, SQLite-vec, Milvus
+**Remote Providers**: ChromaDB, Qdrant, Weaviate, PGVector
+
+## Quick Start
+
+### 1. Upload File
+```python
+file_info = await client.files.upload(
+    file=open("document.pdf", "rb"), purpose="assistants"
+)
+```
+
+### 2. Create Vector Store
+```python
+vector_store = client.vector_stores.create(name="my_docs")
+```
+
+### 3. Attach File
+```python
+await client.vector_stores.files.create(
+    vector_store_id=vector_store.id, file_id=file_info.id
+)
+```
+
+### 4. Search
+```python
+results = await client.vector_stores.search(
+    vector_store_id=vector_store.id, query="What is the main topic?", max_num_results=5
+)
+```
+
+## File Processing & Search
+
+**Processing**: 800 tokens default chunk size, 400 token overlap
+**Formats**: PDF, DOCX, TXT, Code files, etc.
+**Search**: Vector similarity, Hybrid (SQLite-vec), Filtered with metadata
+
+## Configuration
+
+> **Note**: For detailed configuration examples and options, see [Configuration Examples](../openai_file_operations_support.md#configuration-examples) in the full documentation.
+
+**Basic Setup**: Configure vector_io and files providers in your run.yaml
+
+## Common Use Cases
+
+- **RAG Systems**: Document Q&A with file uploads
+- **Knowledge Bases**: Searchable document collections
+- **Content Analysis**: Document similarity and clustering
+- **Research Tools**: Literature review and analysis
+
+## Performance Tips
+
+> **Note**: For detailed performance optimization strategies, see [Performance Considerations](../openai_file_operations_support.md#performance-considerations) in the full documentation.
+
+**Quick Tips**: Choose provider based on your needs (speed vs. storage vs. scalability)
+
+## Troubleshooting
+
+> **Note**: For comprehensive troubleshooting, see [Troubleshooting](../openai_file_operations_support.md#troubleshooting) in the full documentation.
+
+**Quick Fixes**: Check file format compatibility, optimize chunk sizes, monitor storage
+
+## Resources
+
+- [Full Documentation](openai_file_operations_support.md)
+- [Integration Guide](../concepts/file_operations_vector_stores.md)
+- [Files API](files_api.md)
+- [Provider Details](../vector_io/index.md)
diff --git a/docs/docs/providers/files/openai_file_operations_support.md b/docs/docs/providers/files/openai_file_operations_support.md
new file mode 100644
index 000000000..058c994da
--- /dev/null
+++ b/docs/docs/providers/files/openai_file_operations_support.md
@@ -0,0 +1,291 @@
+# File Operations Support in Vector Store Providers
+
+## Overview
+
+This document provides a comprehensive overview of file operations and Vector Store API support across all available vector store providers in Llama Stack. As of release 0.2.24, the following providers support full file operations integration.
+
+## Supported Providers
+
+### ✅ Full File Operations Support
+
+The following providers support complete file operations integration, including file upload, automatic processing, and search:
+
+#### Inline Providers (Single Node)
+
+| Provider | File Operations | Key Features |
+|----------|----------------|--------------|
+| **FAISS** | ✅ Full Support | Fast in-memory search, GPU acceleration |
+| **SQLite-vec** | ✅ Full Support | Hybrid search, disk-based storage |
+| **Milvus** | ✅ Full Support | High-performance, scalable indexing |
+
+#### Remote Providers (Hosted)
+
+| Provider | File Operations | Key Features |
+|----------|----------------|--------------|
+| **ChromaDB** | ✅ Full Support | Metadata filtering, persistent storage |
+| **Qdrant** | ✅ Full Support | Payload filtering, advanced search |
+| **Weaviate** | ✅ Full Support | GraphQL interface, schema management |
+| **Postgres (PGVector)** | ✅ Full Support | SQL integration, ACID compliance |
+
+### 🔄 Partial Support
+
+Some providers may support basic vector operations but lack full file operations integration:
+
+| Provider | Status | Notes |
+|----------|--------|-------|
+| **Meta Reference** | 🔄 Basic | Core vector operations only |
+
+## File Operations Features
+
+All supported providers offer the following file operations capabilities:
+
+### Core Functionality
+
+- **File Upload & Processing**: Automatic document ingestion and chunking
+- **Vector Storage**: Embedding generation and storage
+- **Search & Retrieval**: Semantic search with metadata filtering
+- **File Management**: List, retrieve, and manage files in vector stores
+
+### Advanced Features
+
+- **Automatic Chunking**: Configurable chunk sizes and overlap
+- **Metadata Preservation**: File attributes and chunk metadata
+- **Status Tracking**: Monitor file processing progress
+- **Error Handling**: Comprehensive error reporting and recovery
+
+## Implementation Details
+
+### File Processing Pipeline
+
+1. **Upload**: File uploaded via Files API
+2. **Extraction**: Text content extracted from various formats
+3. **Chunking**: Content split into optimal chunks (default: 800 tokens)
+4. **Embedding**: Chunks converted to vector embeddings
+5. **Storage**: Vectors stored with metadata in vector database
+6. **Indexing**: Search index updated for fast retrieval
+
+### Supported File Formats
+
+- **Documents**: PDF, DOCX, DOC
+- **Text**: TXT, MD, RST
+- **Code**: Python, JavaScript, Java, C++, etc.
+- **Data**: JSON, CSV, XML
+- **Web**: HTML files
+
+### Chunking Strategies
+
+- **Default**: 800 tokens with 400 token overlap
+- **Custom**: Configurable chunk sizes and overlap
+- **Static**: Fixed-size chunks with overlap
+
+## Provider-Specific Features
+
+### FAISS
+
+- **Storage**: In-memory with optional persistence
+- **Performance**: Optimized for speed and GPU acceleration
+- **Use Case**: High-performance, memory-constrained environments
+
+### SQLite-vec
+
+- **Storage**: Disk-based with SQLite backend
+- **Search**: Hybrid vector + keyword search
+- **Use Case**: Large document collections, frequent updates
+
+### Milvus
+
+- **Storage**: Scalable distributed storage
+- **Indexing**: Multiple index types (IVF, HNSW)
+- **Use Case**: Production deployments, large-scale applications
+
+### ChromaDB
+
+- **Storage**: Persistent storage with metadata
+- **Filtering**: Advanced metadata filtering
+- **Use Case**: Applications requiring rich metadata
+
+### Qdrant
+
+- **Storage**: High-performance vector database
+- **Filtering**: Payload-based filtering
+- **Use Case**: Real-time applications, complex queries
+
+### Weaviate
+
+- **Storage**: GraphQL-native vector database
+- **Schema**: Flexible schema management
+- **Use Case**: Applications requiring complex data relationships
+
+### Postgres (PGVector)
+
+- **Storage**: SQL database with vector extensions
+- **Integration**: ACID compliance, existing SQL workflows
+- **Use Case**: Applications requiring transactional guarantees
+
+## Configuration Examples
+
+### Basic Configuration
+
+```yaml
+vector_io:
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      kvstore:
+        type: sqlite
+        db_path: ~/.llama/faiss_store.db
+```
+
+### With FileResponse Support
+
+```yaml
+vector_io:
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      kvstore:
+        type: sqlite
+        db_path: ~/.llama/faiss_store.db
+
+files:
+  - provider_id: local-files
+    provider_type: inline::localfs
+    config:
+      storage_dir: ~/.llama/files
+      metadata_store:
+        type: sqlite
+        db_path: ~/.llama/files_metadata.db
+```
+
+## Usage Examples
+
+### Python Client
+
+```python
+from llama_stack import LlamaStackClient
+
+client = LlamaStackClient("http://localhost:8000")
+
+# Create vector store
+vector_store = client.vector_stores.create(name="documents")
+
+# Upload and process file
+with open("document.pdf", "rb") as f:
+    file_info = await client.files.upload(file=f, purpose="assistants")
+
+# Attach to vector store
+await client.vector_stores.files.create(
+    vector_store_id=vector_store.id, file_id=file_info.id
+)
+
+# Search
+results = await client.vector_stores.search(
+    vector_store_id=vector_store.id, query="What is the main topic?", max_num_results=5
+)
+```
+
+### cURL Commands
+
+```bash
+# Upload file
+curl -X POST http://localhost:8000/v1/openai/v1/files \
+  -F "file=@document.pdf" \
+  -F "purpose=assistants"
+
+# Create vector store
+curl -X POST http://localhost:8000/v1/openai/v1/vector_stores \
+  -H "Content-Type: application/json" \
+  -d '{"name": "documents"}'
+
+# Attach file to vector store
+curl -X POST http://localhost:8000/v1/openai/v1/vector_stores/{store_id}/files \
+  -H "Content-Type: application/json" \
+  -d '{"file_id": "file-abc123"}'
+
+# Search vector store
+curl -X POST http://localhost:8000/v1/openai/v1/vector_stores/{store_id}/search \
+  -H "Content-Type: application/json" \
+  -d '{"query": "What is the main topic?", "max_num_results": 5}'
+```
+
+## Performance Considerations
+
+### Chunk Size Optimization
+
+- **Small chunks (400-600 tokens)**: Better precision, more results
+- **Large chunks (800-1200 tokens)**: Better context, fewer results
+- **Overlap (50%)**: Maintains context between chunks
+
+### Storage Efficiency
+
+- **FAISS**: Fastest, but memory-limited
+- **SQLite-vec**: Good balance of performance and storage
+- **Milvus**: Scalable, production-ready
+- **Remote providers**: Managed, but network-dependent
+
+### Search Performance
+
+- **Vector search**: Fastest for semantic queries
+- **Hybrid search**: Best accuracy (SQLite-vec only)
+- **Filtered search**: Fast with metadata constraints
+
+## Troubleshooting
+
+### Common Issues
+
+1. **File Processing Failures**
+   - Check file format compatibility
+   - Verify file size limits
+   - Review error messages in file status
+
+2. **Search Performance**
+   - Optimize chunk sizes for your use case
+   - Use filters to narrow search scope
+   - Monitor vector store metrics
+
+3. **Storage Issues**
+   - Check available disk space
+   - Verify database permissions
+   - Monitor memory usage (for in-memory providers)
+
+### Monitoring
+
+```python
+# Check file processing status
+file_status = await client.vector_stores.files.retrieve(
+    vector_store_id=vector_store.id, file_id=file_info.id
+)
+
+if file_status.status == "failed":
+    print(f"Error: {file_status.last_error.message}")
+
+# Monitor vector store health
+health = await client.vector_stores.health(vector_store_id=vector_store.id)
+print(f"Status: {health.status}")
+```
+
+## Best Practices
+
+1. **File Organization**: Use descriptive names and organize by purpose
+2. **Chunking Strategy**: Test different sizes for your specific use case
+3. **Metadata**: Add relevant attributes for better filtering
+4. **Monitoring**: Track processing status and search performance
+5. **Cleanup**: Regularly remove unused files to manage storage
+
+## Future Enhancements
+
+Planned improvements for file operations support:
+
+- **Batch Processing**: Process multiple files simultaneously
+- **Advanced Chunking**: More sophisticated chunking algorithms
+- **Custom Embeddings**: Support for custom embedding models
+- **Real-time Updates**: Live file processing and indexing
+- **Multi-format Support**: Enhanced file format support
+
+## Support and Resources
+
+- **Documentation**: [File Operations and Vector Store Integration](../../concepts/file_operations_vector_stores.mdx)
+- **API Reference**: [Files API](files_api.md)
+- **Provider Docs**: [Vector Store Providers](../vector_io/index.md)
+- **Examples**: [Getting Started](../getting_started/index.md)
+- **Community**: [GitHub Discussions](https://github.com/meta-llama/llama-stack/discussions)
diff --git a/docs/docs/providers/index.mdx b/docs/docs/providers/index.mdx
index bfc16b29a..5c81a57ed 100644
--- a/docs/docs/providers/index.mdx
+++ b/docs/docs/providers/index.mdx
@@ -22,6 +22,7 @@ Importantly, Llama Stack always strives to provide at least one fully inline pro
 ## Provider Categories
 
 - **[External Providers](external/index.mdx)** - Guide for building and using external providers
+- **[OpenAI Compatibility](../api-openai/index.mdx)** - OpenAI API compatibility layer
 - **[Inference](inference/index.mdx)** - LLM and embedding model providers
 - **[Agents](agents/index.mdx)** - Agentic system providers
 - **[DatasetIO](datasetio/index.mdx)** - Dataset and data loader providers
@@ -30,6 +31,16 @@ Importantly, Llama Stack always strives to provide at least one fully inline pro
 - **[Tool Runtime](tool_runtime/index.mdx)** - Tool and protocol providers
 - **[Files](files/index.mdx)** - File system and storage providers
 
-## Other information about Providers
-- **[OpenAI Compatibility](./openai.mdx)** - OpenAI API compatibility layer
+## API Documentation
+
+For comprehensive API documentation and reference:
+
+- **[API Reference](../api/index.mdx)** - Complete API documentation
+- **[Experimental APIs](../api-experimental/index.mdx)** - APIs in development
+- **[Deprecated APIs](../api-deprecated/index.mdx)** - Legacy APIs being phased out
+- **[OpenAI Compatibility](../api-openai/index.mdx)** - OpenAI API compatibility guide
+
+## Additional Provider Information
+
+- **[OpenAI Implementation Guide](./openai.mdx)** - Code examples and implementation details for OpenAI APIs
 - **[OpenAI-Compatible Responses Limitations](./openai_responses_limitations.mdx)** - Known limitations of the Responses API in Llama Stack
diff --git a/docs/docs/providers/openai.mdx b/docs/docs/providers/openai.mdx
index 84436e769..c3bb46ecf 100644
--- a/docs/docs/providers/openai.mdx
+++ b/docs/docs/providers/openai.mdx
@@ -1,9 +1,14 @@
 ---
-title: OpenAI Compatibility
-description: OpenAI API Compatibility
-sidebar_label: OpenAI Compatibility
-sidebar_position: 1
+title: OpenAI Implementation Guide
+description: Code examples and implementation details for OpenAI API compatibility
+sidebar_label: OpenAI Implementation
+sidebar_position: 2
 ---
+
+# OpenAI Implementation Guide
+
+This guide provides detailed code examples and implementation details for using OpenAI-compatible APIs with Llama Stack. For a comprehensive overview of OpenAI compatibility features, see our [OpenAI API Compatibility Guide](../api-openai/index.mdx).
+
 ## OpenAI API Compatibility
 
 ### Server path
@@ -195,3 +200,9 @@ Lines of code unfurl
 Logic whispers in the dark
 Art in hidden form
 ```
+
+## Additional Resources
+
+- **[OpenAI API Compatibility Guide](../api-openai/index.mdx)** - Comprehensive overview of OpenAI compatibility features
+- **[OpenAI Responses API Limitations](./openai_responses_limitations.mdx)** - Detailed limitations and known issues
+- **[Provider Documentation](../index.mdx)** - Complete provider ecosystem overview

From 1e81056a22c0fe2607428da31a4e0c8acf7c27da Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Thu, 13 Nov 2025 07:23:23 -0800
Subject: [PATCH 20/62] feat(tests): enable MCP tests in server mode (#4146)

We would like to run all OpenAI compatibility tests using only the
openai-client library. This is most friendly for contributors since they
can run tests without needing to update the client-sdks (which is
getting easier but still a long pole.)

This is the first step in enabling that -- no using "library client" for
any of the Responses tests. This seems like a reasonable trade-off since
the usage of an embeddeble library client for Responses (or any
OpenAI-compatible) behavior seems to be not very common. To do this, we
needed to enable MCP tests (which only worked in library client mode)
for server mode.
---
 scripts/integration-tests.sh                  |  16 +
 tests/common/mcp.py                           |  10 +-
 tests/integration/responses/conftest.py       |  17 +
 ...9d940455cb083c0fd1330c666a12d74df6f89.json | 549 ++++++++++++
 ...bd9f35d82223c7d1cab613ab2e818d79d6f9b.json | 295 +++++++
 ...27118869d34d768ad87ba072e92e8a43a52f2.json | 833 ++++++++++++++++++
 ...ed6791b1054ce0f36e967eb3793b5608344f3.json | 759 ++++++++++++++++
 ...6f50e862aeddbbeaeb256ef1add34de7c1dc8.json | 549 ++++++++++++
 ...f53ec795fd77ef818827e16691689151bf17c.json | 413 +++++++++
 ...f77eb5d0989d312e929ed59dda07738487d09.json | 586 ++++++++++++
 ...2ff7145784d249c3216c34299c38c28118328.json | 524 +++++++++++
 ...803c4a397f772ad8b1cb90ec44527ce964a45.json | 614 +++++++++++++
 ...18a218bb7f4b8363998abc34ec9bb7ba3a03d.json | 574 ++++++++++++
 ...5fe3ff21e6c39189ab93778335439f288158f.json | 771 ++++++++++++++++
 ...520db560af78e9bc38159e526b68b8daa168e.json | 759 ++++++++++++++++
 ...981ca011dd1b6c29df530d12726b1cf7989e5.json | 833 ++++++++++++++++++
 ...ee40546a0658db3df58b9b4d948e4e95b0961.json | 524 +++++++++++
 ...a91ebca1cbaeb4f7aab22c5b9e246b476272f.json | 649 ++++++++++++++
 ...c53deb1ac47d064a1b5c70a78b7436438818f.json | 450 ++++++++++
 ...d054d5f5dd6bdd3c4333db6cef7361fb32feb.json | 759 ++++++++++++++++
 ...2a453cb8f2e11e80beb8e5506439345c428eb.json | 808 +++++++++++++++++
 ...5fae1f1eb09efe6e4f86c115a78a3db5a59bc.json | 668 ++++++++++++++
 ...34cf2f55727b67c1e1854a106b9d8c7c64b70.json | 700 +++++++++++++++
 ...a9a1a9488fb2347bf73d6e3bc2203a9a47a61.json | 641 ++++++++++++++
 .../responses/test_basic_responses.py         |  30 +-
 .../responses/test_conversation_responses.py  |  12 +-
 .../integration/responses/test_file_search.py |  50 +-
 .../responses/test_tool_responses.py          | 117 +--
 tests/integration/tool_runtime/test_mcp.py    |   5 -
 29 files changed, 13388 insertions(+), 127 deletions(-)
 create mode 100644 tests/integration/responses/conftest.py
 create mode 100644 tests/integration/responses/recordings/0a4aca0cd075369aaf6133ee82d9d940455cb083c0fd1330c666a12d74df6f89.json
 create mode 100644 tests/integration/responses/recordings/2bd4c8dc08b3ee3ffce696864f0bd9f35d82223c7d1cab613ab2e818d79d6f9b.json
 create mode 100644 tests/integration/responses/recordings/2ed23a4289840f93202f94e7e7027118869d34d768ad87ba072e92e8a43a52f2.json
 create mode 100644 tests/integration/responses/recordings/3177a984c900c2bdc2785b502bded6791b1054ce0f36e967eb3793b5608344f3.json
 create mode 100644 tests/integration/responses/recordings/318c5361647df0245c074cd2c7d6f50e862aeddbbeaeb256ef1add34de7c1dc8.json
 create mode 100644 tests/integration/responses/recordings/430a49246c97c29bd958f383627f53ec795fd77ef818827e16691689151bf17c.json
 create mode 100644 tests/integration/responses/recordings/52a2b96781961e252aa3a7b0a5ff77eb5d0989d312e929ed59dda07738487d09.json
 create mode 100644 tests/integration/responses/recordings/541b5db7789e61d2400b70bd41c2ff7145784d249c3216c34299c38c28118328.json
 create mode 100644 tests/integration/responses/recordings/6a05cad89f138e215047fd44d21803c4a397f772ad8b1cb90ec44527ce964a45.json
 create mode 100644 tests/integration/responses/recordings/6d7f54b7be4845c31ae64498e8018a218bb7f4b8363998abc34ec9bb7ba3a03d.json
 create mode 100644 tests/integration/responses/recordings/73c9287059db75cd80dc56cff905fe3ff21e6c39189ab93778335439f288158f.json
 create mode 100644 tests/integration/responses/recordings/9f10c42f1338ae4b535cb877851520db560af78e9bc38159e526b68b8daa168e.json
 create mode 100644 tests/integration/responses/recordings/a97d8a2f2fd75b4a5ca732e632b981ca011dd1b6c29df530d12726b1cf7989e5.json
 create mode 100644 tests/integration/responses/recordings/b30da63114770b8c975bf66e24aee40546a0658db3df58b9b4d948e4e95b0961.json
 create mode 100644 tests/integration/responses/recordings/b6b7282ca0ad5a3c59321d2b045a91ebca1cbaeb4f7aab22c5b9e246b476272f.json
 create mode 100644 tests/integration/responses/recordings/c27df465b2996c4d7c909e9ccfac53deb1ac47d064a1b5c70a78b7436438818f.json
 create mode 100644 tests/integration/responses/recordings/d35c1244fbbe9898da3958113c1d054d5f5dd6bdd3c4333db6cef7361fb32feb.json
 create mode 100644 tests/integration/responses/recordings/d42e1020edee86d9f6da7df909c2a453cb8f2e11e80beb8e5506439345c428eb.json
 create mode 100644 tests/integration/responses/recordings/e2dc09dc546d9b8b99096804fe75fae1f1eb09efe6e4f86c115a78a3db5a59bc.json
 create mode 100644 tests/integration/responses/recordings/e9f1cc3da4297f143b7b2a4b21b34cf2f55727b67c1e1854a106b9d8c7c64b70.json
 create mode 100644 tests/integration/responses/recordings/ed89b57fec937fa8602b4911a21a9a1a9488fb2347bf73d6e3bc2203a9a47a61.json

diff --git a/scripts/integration-tests.sh b/scripts/integration-tests.sh
index 0951feb14..8b0002125 100755
--- a/scripts/integration-tests.sh
+++ b/scripts/integration-tests.sh
@@ -162,6 +162,17 @@ if [[ "$COLLECT_ONLY" == false ]]; then
         export LLAMA_STACK_TEST_STACK_CONFIG_TYPE="library_client"
         echo "Setting stack config type: library_client"
     fi
+
+    # Set MCP host for in-process MCP server tests
+    # - For library client and server mode: localhost (both on same host)
+    # - For docker mode: host.docker.internal (container needs to reach host)
+    if [[ "$STACK_CONFIG" == docker:* ]]; then
+        export LLAMA_STACK_TEST_MCP_HOST="host.docker.internal"
+        echo "Setting MCP host: host.docker.internal (docker mode)"
+    else
+        export LLAMA_STACK_TEST_MCP_HOST="localhost"
+        echo "Setting MCP host: localhost (library/server mode)"
+    fi
 fi
 
 SETUP_ENV=$(PYTHONPATH=$THIS_DIR/.. python "$THIS_DIR/get_setup_env.py" --suite "$TEST_SUITE" --setup "$TEST_SETUP" --format bash)
@@ -338,6 +349,7 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then
     DOCKER_ENV_VARS=""
     DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_INFERENCE_MODE=$INFERENCE_MODE"
     DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_STACK_CONFIG_TYPE=server"
+    DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_MCP_HOST=${LLAMA_STACK_TEST_MCP_HOST:-host.docker.internal}"
     # Disabled: https://github.com/llamastack/llama-stack/issues/4089
     #DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:${COLLECTOR_PORT}"
     DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OTEL_METRIC_EXPORT_INTERVAL=200"
@@ -371,8 +383,11 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then
     # Use regular port mapping instead
     NETWORK_MODE=""
     PORT_MAPPINGS=""
+    ADD_HOST_FLAG=""
     if [[ "$(uname)" != "Darwin" ]] && [[ "$(uname)" != *"MINGW"* ]]; then
         NETWORK_MODE="--network host"
+        # On Linux with host network, also add host.docker.internal mapping for consistency
+        ADD_HOST_FLAG="--add-host=host.docker.internal:host-gateway"
     else
         # On non-Linux (macOS, Windows), need explicit port mappings for both app and telemetry
         PORT_MAPPINGS="-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT -p $COLLECTOR_PORT:$COLLECTOR_PORT"
@@ -381,6 +396,7 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then
 
     docker run -d $NETWORK_MODE --name "$container_name" \
         $PORT_MAPPINGS \
+        $ADD_HOST_FLAG \
         $DOCKER_ENV_VARS \
         "$IMAGE_NAME" \
         --port $LLAMA_STACK_PORT
diff --git a/tests/common/mcp.py b/tests/common/mcp.py
index 644becd2d..085575ec0 100644
--- a/tests/common/mcp.py
+++ b/tests/common/mcp.py
@@ -244,8 +244,14 @@ def make_mcp_server(required_auth_token: str | None = None, tools: dict[str, Cal
     timeout = 2
     start_time = time.time()
 
-    server_url = f"http://localhost:{port}/sse"
-    logger.debug(f"Waiting for MCP server thread to start on port {port}")
+    # Determine the appropriate host for the server URL based on test environment
+    # - For library client and server mode: use localhost (both on same host)
+    # - For docker mode: use host.docker.internal (container needs to reach host)
+    import os
+
+    mcp_host = os.environ.get("LLAMA_STACK_TEST_MCP_HOST", "localhost")
+    server_url = f"http://{mcp_host}:{port}/sse"
+    logger.debug(f"Waiting for MCP server thread to start on port {port} (accessible via {mcp_host})")
 
     while time.time() - start_time < timeout:
         if server_thread.is_alive():
diff --git a/tests/integration/responses/conftest.py b/tests/integration/responses/conftest.py
new file mode 100644
index 000000000..c29575072
--- /dev/null
+++ b/tests/integration/responses/conftest.py
@@ -0,0 +1,17 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import pytest
+
+from llama_stack.core.library_client import LlamaStackAsLibraryClient
+
+
+@pytest.fixture
+def responses_client(compat_client):
+    """Provide a client for responses tests, skipping library client mode."""
+    if isinstance(compat_client, LlamaStackAsLibraryClient):
+        pytest.skip("Responses API tests are not supported in library client mode")
+    return compat_client
diff --git a/tests/integration/responses/recordings/0a4aca0cd075369aaf6133ee82d9d940455cb083c0fd1330c666a12d74df6f89.json b/tests/integration/responses/recordings/0a4aca0cd075369aaf6133ee82d9d940455cb083c0fd1330c666a12d74df6f89.json
new file mode 100644
index 000000000..9b432130b
--- /dev/null
+++ b/tests/integration/responses/recordings/0a4aca0cd075369aaf6133ee82d9d940455cb083c0fd1330c666a12d74df6f89.json
@@ -0,0 +1,549 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_streaming_multi_turn_tool_execution[openai_client-txt=openai/gpt-4o-experiment_analysis_streaming]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "I need a complete analysis: First, get the experiment ID for 'chemical_reaction', then get the results for that experiment, and tell me if the yield was above 80%. Return only one tool call per step.  Please stream your analysis process."
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_Q9Gcxub7UbQsxJWVkiy4FETr",
+              "type": "function",
+              "function": {
+                "name": "get_experiment_id",
+                "arguments": "{\"experiment_name\":\"chemical_reaction\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_Q9Gcxub7UbQsxJWVkiy4FETr",
+          "content": [
+            {
+              "type": "text",
+              "text": "exp_003"
+            }
+          ]
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_id",
+            "description": "\n        Get the user ID for a given username. This ID is needed for other operations.\n\n        :param username: The username to look up\n        :return: The user ID for the username\n        ",
+            "parameters": {
+              "properties": {
+                "username": {
+                  "title": "Username",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "username"
+              ],
+              "title": "get_user_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_permissions",
+            "description": "\n        Get the permissions for a user ID. Requires a valid user ID from get_user_id.\n\n        :param user_id: The user ID to check permissions for\n        :return: The permissions for the user\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id"
+              ],
+              "title": "get_user_permissionsArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "check_file_access",
+            "description": "\n        Check if a user can access a specific file. Requires a valid user ID.\n\n        :param user_id: The user ID to check access for\n        :param filename: The filename to check access to\n        :return: Whether the user can access the file (yes/no)\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                },
+                "filename": {
+                  "title": "Filename",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id",
+                "filename"
+              ],
+              "title": "check_file_accessArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_id",
+            "description": "\n        Get the experiment ID for a given experiment name. This ID is needed to get results.\n\n        :param experiment_name: The name of the experiment\n        :return: The experiment ID\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_name": {
+                  "title": "Experiment Name",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_name"
+              ],
+              "title": "get_experiment_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_results",
+            "description": "\n        Get the results for an experiment ID. Requires a valid experiment ID from get_experiment_id.\n\n        :param experiment_id: The experiment ID to get results for\n        :return: The experiment results\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_id": {
+                  "title": "Experiment Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_id"
+              ],
+              "title": "get_experiment_resultsArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0a4aca0cd075",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_yTMuQEKu7x115q8XvhqelRub",
+                    "function": {
+                      "arguments": "",
+                      "name": "get_experiment_results"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "9CSOZwfG5M7nid"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0a4aca0cd075",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Wss"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0a4aca0cd075",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "experiment",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "5AmVsa0S6NBy"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0a4aca0cd075",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "_id",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "2Sf"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0a4aca0cd075",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\":\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "z"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0a4aca0cd075",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "exp",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "leu"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0a4aca0cd075",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "_",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "omxpR"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0a4aca0cd075",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "003",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "kW6"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0a4aca0cd075",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Zm6"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0a4aca0cd075",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "aXvC"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0a4aca0cd075",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 19,
+            "prompt_tokens": 457,
+            "total_tokens": 476,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "s13YHOCCaCDcJ"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/2bd4c8dc08b3ee3ffce696864f0bd9f35d82223c7d1cab613ab2e818d79d6f9b.json b/tests/integration/responses/recordings/2bd4c8dc08b3ee3ffce696864f0bd9f35d82223c7d1cab613ab2e818d79d6f9b.json
new file mode 100644
index 000000000..5aebcd841
--- /dev/null
+++ b/tests/integration/responses/recordings/2bd4c8dc08b3ee3ffce696864f0bd9f35d82223c7d1cab613ab2e818d79d6f9b.json
@@ -0,0 +1,295 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_non_streaming_multi_turn_tool_execution[openai_client-txt=openai/gpt-4o-user_file_access_check]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "I need to check if user 'alice' can access the file 'document.txt'. First, get alice's user ID, then check if that user ID can access the file 'document.txt'. Do this as a series of steps, where each step is a separate message. Return only one tool call per step. Summarize the final result with a single 'yes' or 'no' response."
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_EsVvmBUqtJb42kNkYnK19QkJ",
+              "type": "function",
+              "function": {
+                "name": "get_user_id",
+                "arguments": "{\"username\":\"alice\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_EsVvmBUqtJb42kNkYnK19QkJ",
+          "content": [
+            {
+              "type": "text",
+              "text": "user_12345"
+            }
+          ]
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_kCmSE8ORKfQoiEsW2UCYr5Sh",
+              "type": "function",
+              "function": {
+                "name": "check_file_access",
+                "arguments": "{\"user_id\":\"user_12345\",\"filename\":\"document.txt\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_kCmSE8ORKfQoiEsW2UCYr5Sh",
+          "content": [
+            {
+              "type": "text",
+              "text": "yes"
+            }
+          ]
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_id",
+            "description": "\n        Get the user ID for a given username. This ID is needed for other operations.\n\n        :param username: The username to look up\n        :return: The user ID for the username\n        ",
+            "parameters": {
+              "properties": {
+                "username": {
+                  "title": "Username",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "username"
+              ],
+              "title": "get_user_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_permissions",
+            "description": "\n        Get the permissions for a user ID. Requires a valid user ID from get_user_id.\n\n        :param user_id: The user ID to check permissions for\n        :return: The permissions for the user\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id"
+              ],
+              "title": "get_user_permissionsArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "check_file_access",
+            "description": "\n        Check if a user can access a specific file. Requires a valid user ID.\n\n        :param user_id: The user ID to check access for\n        :param filename: The filename to check access to\n        :return: Whether the user can access the file (yes/no)\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                },
+                "filename": {
+                  "title": "Filename",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id",
+                "filename"
+              ],
+              "title": "check_file_accessArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_id",
+            "description": "\n        Get the experiment ID for a given experiment name. This ID is needed to get results.\n\n        :param experiment_name: The name of the experiment\n        :return: The experiment ID\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_name": {
+                  "title": "Experiment Name",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_name"
+              ],
+              "title": "get_experiment_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_results",
+            "description": "\n        Get the results for an experiment ID. Requires a valid experiment ID from get_experiment_id.\n\n        :param experiment_id: The experiment ID to get results for\n        :return: The experiment results\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_id": {
+                  "title": "Experiment Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_id"
+              ],
+              "title": "get_experiment_resultsArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-2bd4c8dc08b3",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "UxHf8fChwO3CUY"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-2bd4c8dc08b3",
+          "choices": [
+            {
+              "delta": {
+                "content": "yes",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "GOexNEhopELIg"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-2bd4c8dc08b3",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "O41d8hC8zD"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-2bd4c8dc08b3",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 2,
+            "prompt_tokens": 516,
+            "total_tokens": 518,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "9VQklZAZMYAfa0"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/2ed23a4289840f93202f94e7e7027118869d34d768ad87ba072e92e8a43a52f2.json b/tests/integration/responses/recordings/2ed23a4289840f93202f94e7e7027118869d34d768ad87ba072e92e8a43a52f2.json
new file mode 100644
index 000000000..c39483a7c
--- /dev/null
+++ b/tests/integration/responses/recordings/2ed23a4289840f93202f94e7e7027118869d34d768ad87ba072e92e8a43a52f2.json
@@ -0,0 +1,833 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_streaming_multi_turn_tool_execution[openai_client-txt=openai/gpt-4o-user_permissions_workflow]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Help me with this security check: First, get the user ID for 'charlie', then get the permissions for that user ID, and finally check if that user can access 'secret_file.txt'. Stream your progress as you work through each step. Return only one tool call per step. Summarize the final result with a single 'yes' or 'no' response."
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_fsxGbKmceUbLSXCe4sx9WLXO",
+              "type": "function",
+              "function": {
+                "name": "get_user_id",
+                "arguments": "{\"username\":\"charlie\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_fsxGbKmceUbLSXCe4sx9WLXO",
+          "content": [
+            {
+              "type": "text",
+              "text": "user_11111"
+            }
+          ]
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_moRBxqnBJ48EWTSEoQ1llgib",
+              "type": "function",
+              "function": {
+                "name": "get_user_permissions",
+                "arguments": "{\"user_id\":\"user_11111\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_moRBxqnBJ48EWTSEoQ1llgib",
+          "content": [
+            {
+              "type": "text",
+              "text": "admin"
+            }
+          ]
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_ybUqAP9oQn3rwQqVdOLs5Wb4",
+              "type": "function",
+              "function": {
+                "name": "check_file_access",
+                "arguments": "{\"user_id\":\"user_11111\",\"filename\":\"secret_file.txt\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_ybUqAP9oQn3rwQqVdOLs5Wb4",
+          "content": [
+            {
+              "type": "text",
+              "text": "no"
+            }
+          ]
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_id",
+            "description": "\n        Get the user ID for a given username. This ID is needed for other operations.\n\n        :param username: The username to look up\n        :return: The user ID for the username\n        ",
+            "parameters": {
+              "properties": {
+                "username": {
+                  "title": "Username",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "username"
+              ],
+              "title": "get_user_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_permissions",
+            "description": "\n        Get the permissions for a user ID. Requires a valid user ID from get_user_id.\n\n        :param user_id: The user ID to check permissions for\n        :return: The permissions for the user\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id"
+              ],
+              "title": "get_user_permissionsArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "check_file_access",
+            "description": "\n        Check if a user can access a specific file. Requires a valid user ID.\n\n        :param user_id: The user ID to check access for\n        :param filename: The filename to check access to\n        :return: Whether the user can access the file (yes/no)\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                },
+                "filename": {
+                  "title": "Filename",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id",
+                "filename"
+              ],
+              "title": "check_file_accessArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_id",
+            "description": "\n        Get the experiment ID for a given experiment name. This ID is needed to get results.\n\n        :param experiment_name: The name of the experiment\n        :return: The experiment ID\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_name": {
+                  "title": "Experiment Name",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_name"
+              ],
+              "title": "get_experiment_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_results",
+            "description": "\n        Get the results for an experiment ID. Requires a valid experiment ID from get_experiment_id.\n\n        :param experiment_id: The experiment ID to get results for\n        :return: The experiment results\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_id": {
+                  "title": "Experiment Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_id"
+              ],
+              "title": "get_experiment_resultsArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-2ed23a428984",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "WLGSIGDbuImIc2"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-2ed23a428984",
+          "choices": [
+            {
+              "delta": {
+                "content": "The",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "tOPrT8GpCzqCn"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-2ed23a428984",
+          "choices": [
+            {
+              "delta": {
+                "content": " user",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "ViOvVDT7owF"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-2ed23a428984",
+          "choices": [
+            {
+              "delta": {
+                "content": " '",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "EkiYJGYtRb2KCr"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-2ed23a428984",
+          "choices": [
+            {
+              "delta": {
+                "content": "char",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "ioC2G58DuWTx"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-2ed23a428984",
+          "choices": [
+            {
+              "delta": {
+                "content": "lie",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "A5rxByl55APwi"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-2ed23a428984",
+          "choices": [
+            {
+              "delta": {
+                "content": "'",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "kmDNWRqOyy2r3ST"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-2ed23a428984",
+          "choices": [
+            {
+              "delta": {
+                "content": " cannot",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "JHGD4XKFC"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-2ed23a428984",
+          "choices": [
+            {
+              "delta": {
+                "content": " access",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "6IPkFhs93"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-2ed23a428984",
+          "choices": [
+            {
+              "delta": {
+                "content": " '",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "LGHjKnVq2lF1DS"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-2ed23a428984",
+          "choices": [
+            {
+              "delta": {
+                "content": "secret",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "1nGoXVjnK0"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-2ed23a428984",
+          "choices": [
+            {
+              "delta": {
+                "content": "_file",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "OeR7YlvZQLa"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-2ed23a428984",
+          "choices": [
+            {
+              "delta": {
+                "content": ".txt",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "yLKHaSgjE64R"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-2ed23a428984",
+          "choices": [
+            {
+              "delta": {
+                "content": "'.",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "waZY1Js7DPWtoN"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-2ed23a428984",
+          "choices": [
+            {
+              "delta": {
+                "content": " The",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "km3Gr5HspErW"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-2ed23a428984",
+          "choices": [
+            {
+              "delta": {
+                "content": " final",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Mvzf8AUstX"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-2ed23a428984",
+          "choices": [
+            {
+              "delta": {
+                "content": " result",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "660CrCPne"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-2ed23a428984",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "lq7NyKvIo8UEO"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-2ed23a428984",
+          "choices": [
+            {
+              "delta": {
+                "content": ":",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "qjIz07y1RQsKqTo"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-2ed23a428984",
+          "choices": [
+            {
+              "delta": {
+                "content": " no",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "xhcVwxM4RaQcN"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-2ed23a428984",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "dPxBJZ3WUesIy8T"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-2ed23a428984",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Z9wFfcEaK2"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-2ed23a428984",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 21,
+            "prompt_tokens": 542,
+            "total_tokens": 563,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "fSoZk1lrb3nJt"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/3177a984c900c2bdc2785b502bded6791b1054ce0f36e967eb3793b5608344f3.json b/tests/integration/responses/recordings/3177a984c900c2bdc2785b502bded6791b1054ce0f36e967eb3793b5608344f3.json
new file mode 100644
index 000000000..d86ca8cc9
--- /dev/null
+++ b/tests/integration/responses/recordings/3177a984c900c2bdc2785b502bded6791b1054ce0f36e967eb3793b5608344f3.json
@@ -0,0 +1,759 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_mcp_tool_approval[openai_client-txt=openai/gpt-4o-True-boiling_point_tool]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What is the boiling point of myawesomeliquid in Celsius?"
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "greet_everyone",
+            "parameters": {
+              "properties": {
+                "url": {
+                  "title": "Url",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "url"
+              ],
+              "title": "greet_everyoneArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_boiling_point",
+            "description": "\n        Returns the boiling point of a liquid in Celsius or Fahrenheit.\n\n        :param liquid_name: The name of the liquid\n        :param celsius: Whether to return the boiling point in Celsius\n        :return: The boiling point of the liquid in Celcius or Fahrenheit\n        ",
+            "parameters": {
+              "properties": {
+                "liquid_name": {
+                  "title": "Liquid Name",
+                  "type": "string"
+                },
+                "celsius": {
+                  "default": true,
+                  "title": "Celsius",
+                  "type": "boolean"
+                }
+              },
+              "required": [
+                "liquid_name"
+              ],
+              "title": "get_boiling_pointArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3177a984c900",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_bL84OWNnE1s75GJEqGLAK35W",
+                    "function": {
+                      "arguments": "",
+                      "name": "get_boiling_point"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "ptE"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3177a984c900",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "UEV"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3177a984c900",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "li",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "hMko"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3177a984c900",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "quid",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "nr"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3177a984c900",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "_name",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "x"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3177a984c900",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\":\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "D"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3177a984c900",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "my",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "aLLC"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3177a984c900",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "aw",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "EZdr"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3177a984c900",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "esom",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "yV"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3177a984c900",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "eli",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "0bj"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3177a984c900",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "quid",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "5J"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3177a984c900",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\",\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "z"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3177a984c900",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "c",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "7dZEY"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3177a984c900",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "elsius",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": ""
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3177a984c900",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\":",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "AqP"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3177a984c900",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "true",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "X8"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3177a984c900",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "oa7h2"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3177a984c900",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "1Is8"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3177a984c900",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 27,
+            "prompt_tokens": 156,
+            "total_tokens": 183,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "DfwHMdbjUVww7"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/318c5361647df0245c074cd2c7d6f50e862aeddbbeaeb256ef1add34de7c1dc8.json b/tests/integration/responses/recordings/318c5361647df0245c074cd2c7d6f50e862aeddbbeaeb256ef1add34de7c1dc8.json
new file mode 100644
index 000000000..025246ebe
--- /dev/null
+++ b/tests/integration/responses/recordings/318c5361647df0245c074cd2c7d6f50e862aeddbbeaeb256ef1add34de7c1dc8.json
@@ -0,0 +1,549 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_non_streaming_multi_turn_tool_execution[openai_client-txt=openai/gpt-4o-experiment_results_lookup]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "I need to get the results for the 'boiling_point' experiment. First, get the experiment ID for 'boiling_point', then use that ID to get the experiment results. Tell me the boiling point in Celsius."
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_dZwjBxH3aTRhnaS0bJVPqRcz",
+              "type": "function",
+              "function": {
+                "name": "get_experiment_id",
+                "arguments": "{\"experiment_name\":\"boiling_point\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_dZwjBxH3aTRhnaS0bJVPqRcz",
+          "content": [
+            {
+              "type": "text",
+              "text": "exp_004"
+            }
+          ]
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_id",
+            "description": "\n        Get the user ID for a given username. This ID is needed for other operations.\n\n        :param username: The username to look up\n        :return: The user ID for the username\n        ",
+            "parameters": {
+              "properties": {
+                "username": {
+                  "title": "Username",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "username"
+              ],
+              "title": "get_user_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_permissions",
+            "description": "\n        Get the permissions for a user ID. Requires a valid user ID from get_user_id.\n\n        :param user_id: The user ID to check permissions for\n        :return: The permissions for the user\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id"
+              ],
+              "title": "get_user_permissionsArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "check_file_access",
+            "description": "\n        Check if a user can access a specific file. Requires a valid user ID.\n\n        :param user_id: The user ID to check access for\n        :param filename: The filename to check access to\n        :return: Whether the user can access the file (yes/no)\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                },
+                "filename": {
+                  "title": "Filename",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id",
+                "filename"
+              ],
+              "title": "check_file_accessArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_id",
+            "description": "\n        Get the experiment ID for a given experiment name. This ID is needed to get results.\n\n        :param experiment_name: The name of the experiment\n        :return: The experiment ID\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_name": {
+                  "title": "Experiment Name",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_name"
+              ],
+              "title": "get_experiment_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_results",
+            "description": "\n        Get the results for an experiment ID. Requires a valid experiment ID from get_experiment_id.\n\n        :param experiment_id: The experiment ID to get results for\n        :return: The experiment results\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_id": {
+                  "title": "Experiment Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_id"
+              ],
+              "title": "get_experiment_resultsArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-318c5361647d",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_skNUKbERbtdoADH834U9OE91",
+                    "function": {
+                      "arguments": "",
+                      "name": "get_experiment_results"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "5aHvu2xes6Amy8"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-318c5361647d",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "9HQ"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-318c5361647d",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "experiment",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "ckAh5OXg9JIe"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-318c5361647d",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "_id",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "avh"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-318c5361647d",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\":\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "x"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-318c5361647d",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "exp",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "f75"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-318c5361647d",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "_",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Nini1"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-318c5361647d",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "004",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "MXB"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-318c5361647d",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Vc4"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-318c5361647d",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "rnph"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-318c5361647d",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 19,
+            "prompt_tokens": 450,
+            "total_tokens": 469,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "nUptVmnQlQZrH"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/430a49246c97c29bd958f383627f53ec795fd77ef818827e16691689151bf17c.json b/tests/integration/responses/recordings/430a49246c97c29bd958f383627f53ec795fd77ef818827e16691689151bf17c.json
new file mode 100644
index 000000000..b26cd985e
--- /dev/null
+++ b/tests/integration/responses/recordings/430a49246c97c29bd958f383627f53ec795fd77ef818827e16691689151bf17c.json
@@ -0,0 +1,413 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_non_streaming_multi_turn_tool_execution[openai_client-txt=openai/gpt-4o-user_file_access_check]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "I need to check if user 'alice' can access the file 'document.txt'. First, get alice's user ID, then check if that user ID can access the file 'document.txt'. Do this as a series of steps, where each step is a separate message. Return only one tool call per step. Summarize the final result with a single 'yes' or 'no' response."
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_id",
+            "description": "\n        Get the user ID for a given username. This ID is needed for other operations.\n\n        :param username: The username to look up\n        :return: The user ID for the username\n        ",
+            "parameters": {
+              "properties": {
+                "username": {
+                  "title": "Username",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "username"
+              ],
+              "title": "get_user_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_permissions",
+            "description": "\n        Get the permissions for a user ID. Requires a valid user ID from get_user_id.\n\n        :param user_id: The user ID to check permissions for\n        :return: The permissions for the user\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id"
+              ],
+              "title": "get_user_permissionsArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "check_file_access",
+            "description": "\n        Check if a user can access a specific file. Requires a valid user ID.\n\n        :param user_id: The user ID to check access for\n        :param filename: The filename to check access to\n        :return: Whether the user can access the file (yes/no)\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                },
+                "filename": {
+                  "title": "Filename",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id",
+                "filename"
+              ],
+              "title": "check_file_accessArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_id",
+            "description": "\n        Get the experiment ID for a given experiment name. This ID is needed to get results.\n\n        :param experiment_name: The name of the experiment\n        :return: The experiment ID\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_name": {
+                  "title": "Experiment Name",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_name"
+              ],
+              "title": "get_experiment_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_results",
+            "description": "\n        Get the results for an experiment ID. Requires a valid experiment ID from get_experiment_id.\n\n        :param experiment_id: The experiment ID to get results for\n        :return: The experiment results\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_id": {
+                  "title": "Experiment Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_id"
+              ],
+              "title": "get_experiment_resultsArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-430a49246c97",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_EsVvmBUqtJb42kNkYnK19QkJ",
+                    "function": {
+                      "arguments": "",
+                      "name": "get_user_id"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Ma7aiZxSs"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-430a49246c97",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "DXu"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-430a49246c97",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "username",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "rtfrl7gxu80vmN"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-430a49246c97",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\":\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "r"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-430a49246c97",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "alice",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "M"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-430a49246c97",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "vSu"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-430a49246c97",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "sXfh"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-430a49246c97",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 15,
+            "prompt_tokens": 454,
+            "total_tokens": 469,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "bEe7hWJ6U62YQ"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/52a2b96781961e252aa3a7b0a5ff77eb5d0989d312e929ed59dda07738487d09.json b/tests/integration/responses/recordings/52a2b96781961e252aa3a7b0a5ff77eb5d0989d312e929ed59dda07738487d09.json
new file mode 100644
index 000000000..fef5f0a62
--- /dev/null
+++ b/tests/integration/responses/recordings/52a2b96781961e252aa3a7b0a5ff77eb5d0989d312e929ed59dda07738487d09.json
@@ -0,0 +1,586 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_streaming_multi_turn_tool_execution[openai_client-txt=openai/gpt-4o-user_permissions_workflow]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Help me with this security check: First, get the user ID for 'charlie', then get the permissions for that user ID, and finally check if that user can access 'secret_file.txt'. Stream your progress as you work through each step. Return only one tool call per step. Summarize the final result with a single 'yes' or 'no' response."
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_fsxGbKmceUbLSXCe4sx9WLXO",
+              "type": "function",
+              "function": {
+                "name": "get_user_id",
+                "arguments": "{\"username\":\"charlie\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_fsxGbKmceUbLSXCe4sx9WLXO",
+          "content": [
+            {
+              "type": "text",
+              "text": "user_11111"
+            }
+          ]
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_id",
+            "description": "\n        Get the user ID for a given username. This ID is needed for other operations.\n\n        :param username: The username to look up\n        :return: The user ID for the username\n        ",
+            "parameters": {
+              "properties": {
+                "username": {
+                  "title": "Username",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "username"
+              ],
+              "title": "get_user_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_permissions",
+            "description": "\n        Get the permissions for a user ID. Requires a valid user ID from get_user_id.\n\n        :param user_id: The user ID to check permissions for\n        :return: The permissions for the user\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id"
+              ],
+              "title": "get_user_permissionsArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "check_file_access",
+            "description": "\n        Check if a user can access a specific file. Requires a valid user ID.\n\n        :param user_id: The user ID to check access for\n        :param filename: The filename to check access to\n        :return: Whether the user can access the file (yes/no)\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                },
+                "filename": {
+                  "title": "Filename",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id",
+                "filename"
+              ],
+              "title": "check_file_accessArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_id",
+            "description": "\n        Get the experiment ID for a given experiment name. This ID is needed to get results.\n\n        :param experiment_name: The name of the experiment\n        :return: The experiment ID\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_name": {
+                  "title": "Experiment Name",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_name"
+              ],
+              "title": "get_experiment_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_results",
+            "description": "\n        Get the results for an experiment ID. Requires a valid experiment ID from get_experiment_id.\n\n        :param experiment_id: The experiment ID to get results for\n        :return: The experiment results\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_id": {
+                  "title": "Experiment Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_id"
+              ],
+              "title": "get_experiment_resultsArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-52a2b9678196",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_moRBxqnBJ48EWTSEoQ1llgib",
+                    "function": {
+                      "arguments": "",
+                      "name": "get_user_permissions"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": ""
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-52a2b9678196",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "00p"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-52a2b9678196",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "user",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Y0"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-52a2b9678196",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "_id",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "i2I"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-52a2b9678196",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\":\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "P"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-52a2b9678196",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "user",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "IG"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-52a2b9678196",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "_",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "QY61l"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-52a2b9678196",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "111",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "YAZ"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-52a2b9678196",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "11",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Nw7U"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-52a2b9678196",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Ev7"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-52a2b9678196",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "CSaD"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-52a2b9678196",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 19,
+            "prompt_tokens": 478,
+            "total_tokens": 497,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "kMNEyeKFT75vK"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/541b5db7789e61d2400b70bd41c2ff7145784d249c3216c34299c38c28118328.json b/tests/integration/responses/recordings/541b5db7789e61d2400b70bd41c2ff7145784d249c3216c34299c38c28118328.json
new file mode 100644
index 000000000..6b7e5bc49
--- /dev/null
+++ b/tests/integration/responses/recordings/541b5db7789e61d2400b70bd41c2ff7145784d249c3216c34299c38c28118328.json
@@ -0,0 +1,524 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_non_streaming_multi_turn_tool_execution[openai_client-txt=openai/gpt-4o-experiment_results_lookup]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "I need to get the results for the 'boiling_point' experiment. First, get the experiment ID for 'boiling_point', then use that ID to get the experiment results. Tell me the boiling point in Celsius."
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_id",
+            "description": "\n        Get the user ID for a given username. This ID is needed for other operations.\n\n        :param username: The username to look up\n        :return: The user ID for the username\n        ",
+            "parameters": {
+              "properties": {
+                "username": {
+                  "title": "Username",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "username"
+              ],
+              "title": "get_user_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_permissions",
+            "description": "\n        Get the permissions for a user ID. Requires a valid user ID from get_user_id.\n\n        :param user_id: The user ID to check permissions for\n        :return: The permissions for the user\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id"
+              ],
+              "title": "get_user_permissionsArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "check_file_access",
+            "description": "\n        Check if a user can access a specific file. Requires a valid user ID.\n\n        :param user_id: The user ID to check access for\n        :param filename: The filename to check access to\n        :return: Whether the user can access the file (yes/no)\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                },
+                "filename": {
+                  "title": "Filename",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id",
+                "filename"
+              ],
+              "title": "check_file_accessArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_id",
+            "description": "\n        Get the experiment ID for a given experiment name. This ID is needed to get results.\n\n        :param experiment_name: The name of the experiment\n        :return: The experiment ID\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_name": {
+                  "title": "Experiment Name",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_name"
+              ],
+              "title": "get_experiment_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_results",
+            "description": "\n        Get the results for an experiment ID. Requires a valid experiment ID from get_experiment_id.\n\n        :param experiment_id: The experiment ID to get results for\n        :return: The experiment results\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_id": {
+                  "title": "Experiment Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_id"
+              ],
+              "title": "get_experiment_resultsArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-541b5db7789e",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_dZwjBxH3aTRhnaS0bJVPqRcz",
+                    "function": {
+                      "arguments": "",
+                      "name": "get_experiment_id"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "W3B"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-541b5db7789e",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "L7n"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-541b5db7789e",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "experiment",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "lXUc0FKJkRea"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-541b5db7789e",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "_name",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "0"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-541b5db7789e",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\":\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "D"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-541b5db7789e",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "bo",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "3dUQ"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-541b5db7789e",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "iling",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "1"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-541b5db7789e",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "_point",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": ""
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-541b5db7789e",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "48i"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-541b5db7789e",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "eQyU"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-541b5db7789e",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 19,
+            "prompt_tokens": 418,
+            "total_tokens": 437,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "5tVrc5IEigum8"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/6a05cad89f138e215047fd44d21803c4a397f772ad8b1cb90ec44527ce964a45.json b/tests/integration/responses/recordings/6a05cad89f138e215047fd44d21803c4a397f772ad8b1cb90ec44527ce964a45.json
new file mode 100644
index 000000000..adae894b3
--- /dev/null
+++ b/tests/integration/responses/recordings/6a05cad89f138e215047fd44d21803c4a397f772ad8b1cb90ec44527ce964a45.json
@@ -0,0 +1,614 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_non_streaming_mcp_tool[openai_client-txt=openai/gpt-4o-boiling_point_tool]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What is the boiling point of myawesomeliquid in Celsius?"
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_8kf8fNIDcWOelbCmUEcretON",
+              "type": "function",
+              "function": {
+                "name": "get_boiling_point",
+                "arguments": "{\"liquid_name\":\"myawesomeliquid\",\"celsius\":true}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_8kf8fNIDcWOelbCmUEcretON",
+          "content": [
+            {
+              "type": "text",
+              "text": "-100"
+            }
+          ]
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "greet_everyone",
+            "parameters": {
+              "properties": {
+                "url": {
+                  "title": "Url",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "url"
+              ],
+              "title": "greet_everyoneArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_boiling_point",
+            "description": "\n        Returns the boiling point of a liquid in Celsius or Fahrenheit.\n\n        :param liquid_name: The name of the liquid\n        :param celsius: Whether to return the boiling point in Celsius\n        :return: The boiling point of the liquid in Celcius or Fahrenheit\n        ",
+            "parameters": {
+              "properties": {
+                "liquid_name": {
+                  "title": "Liquid Name",
+                  "type": "string"
+                },
+                "celsius": {
+                  "default": true,
+                  "title": "Celsius",
+                  "type": "boolean"
+                }
+              },
+              "required": [
+                "liquid_name"
+              ],
+              "title": "get_boiling_pointArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6a05cad89f13",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "QvigjcdULEdran"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6a05cad89f13",
+          "choices": [
+            {
+              "delta": {
+                "content": "The",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "sIHyVud88f1Ri"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6a05cad89f13",
+          "choices": [
+            {
+              "delta": {
+                "content": " boiling",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "L46IcJeM"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6a05cad89f13",
+          "choices": [
+            {
+              "delta": {
+                "content": " point",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "j0afpRCRBL"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6a05cad89f13",
+          "choices": [
+            {
+              "delta": {
+                "content": " of",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "tuzBzZB7jURPj"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6a05cad89f13",
+          "choices": [
+            {
+              "delta": {
+                "content": " \"",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "iq6vUNVBRuRH5"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6a05cad89f13",
+          "choices": [
+            {
+              "delta": {
+                "content": "my",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Nkkz9uUPfhHdqZ"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6a05cad89f13",
+          "choices": [
+            {
+              "delta": {
+                "content": "aw",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "oR3PEQpsXLwYOJ"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6a05cad89f13",
+          "choices": [
+            {
+              "delta": {
+                "content": "esom",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "VBFf1ewix1rj"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6a05cad89f13",
+          "choices": [
+            {
+              "delta": {
+                "content": "eli",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "yEx3rYoaZjsTw"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6a05cad89f13",
+          "choices": [
+            {
+              "delta": {
+                "content": "quid",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "I6VR8wzPmnpa"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6a05cad89f13",
+          "choices": [
+            {
+              "delta": {
+                "content": "\"",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "xld69F07KIb2Yc"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6a05cad89f13",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "GKgtQZJiWLVKj"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6a05cad89f13",
+          "choices": [
+            {
+              "delta": {
+                "content": " -",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "1by4tgiJqNgaI1"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6a05cad89f13",
+          "choices": [
+            {
+              "delta": {
+                "content": "100",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "2RdP6HDQApUpN"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6a05cad89f13",
+          "choices": [
+            {
+              "delta": {
+                "content": "\u00b0C",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "21ABialEpJBCcX"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6a05cad89f13",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "uoaaRgmiGLD815k"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6a05cad89f13",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "QKEKTjUUam"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6a05cad89f13",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 17,
+            "prompt_tokens": 195,
+            "total_tokens": 212,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "ceWQr6uzZRuj3"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/6d7f54b7be4845c31ae64498e8018a218bb7f4b8363998abc34ec9bb7ba3a03d.json b/tests/integration/responses/recordings/6d7f54b7be4845c31ae64498e8018a218bb7f4b8363998abc34ec9bb7ba3a03d.json
new file mode 100644
index 000000000..997e18bec
--- /dev/null
+++ b/tests/integration/responses/recordings/6d7f54b7be4845c31ae64498e8018a218bb7f4b8363998abc34ec9bb7ba3a03d.json
@@ -0,0 +1,574 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_mcp_tool_approval[openai_client-txt=openai/gpt-4o-False-boiling_point_tool]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What is the boiling point of myawesomeliquid in Celsius?"
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "greet_everyone",
+            "parameters": {
+              "properties": {
+                "url": {
+                  "title": "Url",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "url"
+              ],
+              "title": "greet_everyoneArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_boiling_point",
+            "description": "\n        Returns the boiling point of a liquid in Celsius or Fahrenheit.\n\n        :param liquid_name: The name of the liquid\n        :param celsius: Whether to return the boiling point in Celsius\n        :return: The boiling point of the liquid in Celcius or Fahrenheit\n        ",
+            "parameters": {
+              "properties": {
+                "liquid_name": {
+                  "title": "Liquid Name",
+                  "type": "string"
+                },
+                "celsius": {
+                  "default": true,
+                  "title": "Celsius",
+                  "type": "boolean"
+                }
+              },
+              "required": [
+                "liquid_name"
+              ],
+              "title": "get_boiling_pointArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6d7f54b7be48",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_4ldOwO71od1E0lrdgYQCoe2e",
+                    "function": {
+                      "arguments": "",
+                      "name": "get_boiling_point"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "TdV"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6d7f54b7be48",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "L5f"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6d7f54b7be48",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "li",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "qo3z"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6d7f54b7be48",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "quid",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "i3"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6d7f54b7be48",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "_name",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Z"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6d7f54b7be48",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\":\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Z"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6d7f54b7be48",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "my",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "QdX5"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6d7f54b7be48",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "aw",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "sJYi"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6d7f54b7be48",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "esom",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Yk"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6d7f54b7be48",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "eli",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "pnS"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6d7f54b7be48",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "quid",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "y5"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6d7f54b7be48",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Tjs"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6d7f54b7be48",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Cx0I"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6d7f54b7be48",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 22,
+            "prompt_tokens": 156,
+            "total_tokens": 178,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "bmRrd4XLuhmCv"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/73c9287059db75cd80dc56cff905fe3ff21e6c39189ab93778335439f288158f.json b/tests/integration/responses/recordings/73c9287059db75cd80dc56cff905fe3ff21e6c39189ab93778335439f288158f.json
new file mode 100644
index 000000000..53f1a8125
--- /dev/null
+++ b/tests/integration/responses/recordings/73c9287059db75cd80dc56cff905fe3ff21e6c39189ab93778335439f288158f.json
@@ -0,0 +1,771 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_non_streaming_multi_turn_tool_execution[openai_client-txt=openai/gpt-4o-user_file_access_check]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "I need to check if user 'alice' can access the file 'document.txt'. First, get alice's user ID, then check if that user ID can access the file 'document.txt'. Do this as a series of steps, where each step is a separate message. Return only one tool call per step. Summarize the final result with a single 'yes' or 'no' response."
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_EsVvmBUqtJb42kNkYnK19QkJ",
+              "type": "function",
+              "function": {
+                "name": "get_user_id",
+                "arguments": "{\"username\":\"alice\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_EsVvmBUqtJb42kNkYnK19QkJ",
+          "content": [
+            {
+              "type": "text",
+              "text": "user_12345"
+            }
+          ]
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_id",
+            "description": "\n        Get the user ID for a given username. This ID is needed for other operations.\n\n        :param username: The username to look up\n        :return: The user ID for the username\n        ",
+            "parameters": {
+              "properties": {
+                "username": {
+                  "title": "Username",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "username"
+              ],
+              "title": "get_user_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_permissions",
+            "description": "\n        Get the permissions for a user ID. Requires a valid user ID from get_user_id.\n\n        :param user_id: The user ID to check permissions for\n        :return: The permissions for the user\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id"
+              ],
+              "title": "get_user_permissionsArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "check_file_access",
+            "description": "\n        Check if a user can access a specific file. Requires a valid user ID.\n\n        :param user_id: The user ID to check access for\n        :param filename: The filename to check access to\n        :return: Whether the user can access the file (yes/no)\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                },
+                "filename": {
+                  "title": "Filename",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id",
+                "filename"
+              ],
+              "title": "check_file_accessArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_id",
+            "description": "\n        Get the experiment ID for a given experiment name. This ID is needed to get results.\n\n        :param experiment_name: The name of the experiment\n        :return: The experiment ID\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_name": {
+                  "title": "Experiment Name",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_name"
+              ],
+              "title": "get_experiment_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_results",
+            "description": "\n        Get the results for an experiment ID. Requires a valid experiment ID from get_experiment_id.\n\n        :param experiment_id: The experiment ID to get results for\n        :return: The experiment results\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_id": {
+                  "title": "Experiment Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_id"
+              ],
+              "title": "get_experiment_resultsArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-73c9287059db",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_kCmSE8ORKfQoiEsW2UCYr5Sh",
+                    "function": {
+                      "arguments": "",
+                      "name": "check_file_access"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "sCU"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-73c9287059db",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "iHp"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-73c9287059db",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "user",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "3b"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-73c9287059db",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "_id",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "4hG"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-73c9287059db",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\":\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "z"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-73c9287059db",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "user",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "zX"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-73c9287059db",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "_",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "WRFf5"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-73c9287059db",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "123",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "PvE"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-73c9287059db",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "45",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "xak8"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-73c9287059db",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\",\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "v"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-73c9287059db",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "filename",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "l7Rfy5le49BJu0"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-73c9287059db",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\":\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "p"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-73c9287059db",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "document",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "EpFPZH128OUIsw"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-73c9287059db",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": ".txt",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Zg"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-73c9287059db",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "jH3"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-73c9287059db",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "UubI"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-73c9287059db",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 24,
+            "prompt_tokens": 482,
+            "total_tokens": 506,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "GITY7sf69sAJd"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/9f10c42f1338ae4b535cb877851520db560af78e9bc38159e526b68b8daa168e.json b/tests/integration/responses/recordings/9f10c42f1338ae4b535cb877851520db560af78e9bc38159e526b68b8daa168e.json
new file mode 100644
index 000000000..5c9d6ee91
--- /dev/null
+++ b/tests/integration/responses/recordings/9f10c42f1338ae4b535cb877851520db560af78e9bc38159e526b68b8daa168e.json
@@ -0,0 +1,759 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_sequential_mcp_tool[openai_client-txt=openai/gpt-4o-boiling_point_tool]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What is the boiling point of myawesomeliquid in Celsius?"
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "greet_everyone",
+            "parameters": {
+              "properties": {
+                "url": {
+                  "title": "Url",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "url"
+              ],
+              "title": "greet_everyoneArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_boiling_point",
+            "description": "\n        Returns the boiling point of a liquid in Celsius or Fahrenheit.\n\n        :param liquid_name: The name of the liquid\n        :param celsius: Whether to return the boiling point in Celsius\n        :return: The boiling point of the liquid in Celcius or Fahrenheit\n        ",
+            "parameters": {
+              "properties": {
+                "liquid_name": {
+                  "title": "Liquid Name",
+                  "type": "string"
+                },
+                "celsius": {
+                  "default": true,
+                  "title": "Celsius",
+                  "type": "boolean"
+                }
+              },
+              "required": [
+                "liquid_name"
+              ],
+              "title": "get_boiling_pointArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-9f10c42f1338",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_b5k2yeqIi5ucElnnrVPyYU4x",
+                    "function": {
+                      "arguments": "",
+                      "name": "get_boiling_point"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "AhH"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-9f10c42f1338",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "SMa"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-9f10c42f1338",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "li",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "fBD0"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-9f10c42f1338",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "quid",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "LL"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-9f10c42f1338",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "_name",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "h"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-9f10c42f1338",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\":\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "5"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-9f10c42f1338",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "my",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "ySpU"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-9f10c42f1338",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "aw",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "fra1"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-9f10c42f1338",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "esom",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Hb"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-9f10c42f1338",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "eli",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "INi"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-9f10c42f1338",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "quid",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "jF"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-9f10c42f1338",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\",\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "i"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-9f10c42f1338",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "c",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "2dDeK"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-9f10c42f1338",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "elsius",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": ""
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-9f10c42f1338",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\":",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "DSb"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-9f10c42f1338",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "true",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "vP"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-9f10c42f1338",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "9boiy"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-9f10c42f1338",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "ZZRa"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-9f10c42f1338",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 27,
+            "prompt_tokens": 156,
+            "total_tokens": 183,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "HoutUcx6gZI1g"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/a97d8a2f2fd75b4a5ca732e632b981ca011dd1b6c29df530d12726b1cf7989e5.json b/tests/integration/responses/recordings/a97d8a2f2fd75b4a5ca732e632b981ca011dd1b6c29df530d12726b1cf7989e5.json
new file mode 100644
index 000000000..3ba6af144
--- /dev/null
+++ b/tests/integration/responses/recordings/a97d8a2f2fd75b4a5ca732e632b981ca011dd1b6c29df530d12726b1cf7989e5.json
@@ -0,0 +1,833 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_streaming_multi_turn_tool_execution[openai_client-txt=openai/gpt-4o-user_permissions_workflow]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Help me with this security check: First, get the user ID for 'charlie', then get the permissions for that user ID, and finally check if that user can access 'secret_file.txt'. Stream your progress as you work through each step. Return only one tool call per step. Summarize the final result with a single 'yes' or 'no' response."
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_fsxGbKmceUbLSXCe4sx9WLXO",
+              "type": "function",
+              "function": {
+                "name": "get_user_id",
+                "arguments": "{\"username\":\"charlie\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_fsxGbKmceUbLSXCe4sx9WLXO",
+          "content": [
+            {
+              "type": "text",
+              "text": "user_11111"
+            }
+          ]
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_moRBxqnBJ48EWTSEoQ1llgib",
+              "type": "function",
+              "function": {
+                "name": "get_user_permissions",
+                "arguments": "{\"user_id\":\"user_11111\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_moRBxqnBJ48EWTSEoQ1llgib",
+          "content": [
+            {
+              "type": "text",
+              "text": "admin"
+            }
+          ]
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_id",
+            "description": "\n        Get the user ID for a given username. This ID is needed for other operations.\n\n        :param username: The username to look up\n        :return: The user ID for the username\n        ",
+            "parameters": {
+              "properties": {
+                "username": {
+                  "title": "Username",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "username"
+              ],
+              "title": "get_user_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_permissions",
+            "description": "\n        Get the permissions for a user ID. Requires a valid user ID from get_user_id.\n\n        :param user_id: The user ID to check permissions for\n        :return: The permissions for the user\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id"
+              ],
+              "title": "get_user_permissionsArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "check_file_access",
+            "description": "\n        Check if a user can access a specific file. Requires a valid user ID.\n\n        :param user_id: The user ID to check access for\n        :param filename: The filename to check access to\n        :return: Whether the user can access the file (yes/no)\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                },
+                "filename": {
+                  "title": "Filename",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id",
+                "filename"
+              ],
+              "title": "check_file_accessArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_id",
+            "description": "\n        Get the experiment ID for a given experiment name. This ID is needed to get results.\n\n        :param experiment_name: The name of the experiment\n        :return: The experiment ID\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_name": {
+                  "title": "Experiment Name",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_name"
+              ],
+              "title": "get_experiment_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_results",
+            "description": "\n        Get the results for an experiment ID. Requires a valid experiment ID from get_experiment_id.\n\n        :param experiment_id: The experiment ID to get results for\n        :return: The experiment results\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_id": {
+                  "title": "Experiment Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_id"
+              ],
+              "title": "get_experiment_resultsArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-a97d8a2f2fd7",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_ybUqAP9oQn3rwQqVdOLs5Wb4",
+                    "function": {
+                      "arguments": "",
+                      "name": "check_file_access"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "xpc"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-a97d8a2f2fd7",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "xXs"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-a97d8a2f2fd7",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "user",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "XY"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-a97d8a2f2fd7",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "_id",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "HbC"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-a97d8a2f2fd7",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\":\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "f"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-a97d8a2f2fd7",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "user",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Ds"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-a97d8a2f2fd7",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "_",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Osfy3"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-a97d8a2f2fd7",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "111",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "ioI"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-a97d8a2f2fd7",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "11",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "GQg6"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-a97d8a2f2fd7",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\",\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "2"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-a97d8a2f2fd7",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "filename",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "b2qqKbGC68nHMB"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-a97d8a2f2fd7",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\":\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "H"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-a97d8a2f2fd7",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "secret",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": ""
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-a97d8a2f2fd7",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "_file",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "6"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-a97d8a2f2fd7",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": ".txt",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Wz"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-a97d8a2f2fd7",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "ImW"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-a97d8a2f2fd7",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "nRAE"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-a97d8a2f2fd7",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 25,
+            "prompt_tokens": 507,
+            "total_tokens": 532,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "rgbYyZ54cN8La"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/b30da63114770b8c975bf66e24aee40546a0658db3df58b9b4d948e4e95b0961.json b/tests/integration/responses/recordings/b30da63114770b8c975bf66e24aee40546a0658db3df58b9b4d948e4e95b0961.json
new file mode 100644
index 000000000..80cce1358
--- /dev/null
+++ b/tests/integration/responses/recordings/b30da63114770b8c975bf66e24aee40546a0658db3df58b9b4d948e4e95b0961.json
@@ -0,0 +1,524 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_streaming_multi_turn_tool_execution[openai_client-txt=openai/gpt-4o-experiment_analysis_streaming]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "I need a complete analysis: First, get the experiment ID for 'chemical_reaction', then get the results for that experiment, and tell me if the yield was above 80%. Return only one tool call per step.  Please stream your analysis process."
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_id",
+            "description": "\n        Get the user ID for a given username. This ID is needed for other operations.\n\n        :param username: The username to look up\n        :return: The user ID for the username\n        ",
+            "parameters": {
+              "properties": {
+                "username": {
+                  "title": "Username",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "username"
+              ],
+              "title": "get_user_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_permissions",
+            "description": "\n        Get the permissions for a user ID. Requires a valid user ID from get_user_id.\n\n        :param user_id: The user ID to check permissions for\n        :return: The permissions for the user\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id"
+              ],
+              "title": "get_user_permissionsArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "check_file_access",
+            "description": "\n        Check if a user can access a specific file. Requires a valid user ID.\n\n        :param user_id: The user ID to check access for\n        :param filename: The filename to check access to\n        :return: Whether the user can access the file (yes/no)\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                },
+                "filename": {
+                  "title": "Filename",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id",
+                "filename"
+              ],
+              "title": "check_file_accessArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_id",
+            "description": "\n        Get the experiment ID for a given experiment name. This ID is needed to get results.\n\n        :param experiment_name: The name of the experiment\n        :return: The experiment ID\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_name": {
+                  "title": "Experiment Name",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_name"
+              ],
+              "title": "get_experiment_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_results",
+            "description": "\n        Get the results for an experiment ID. Requires a valid experiment ID from get_experiment_id.\n\n        :param experiment_id: The experiment ID to get results for\n        :return: The experiment results\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_id": {
+                  "title": "Experiment Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_id"
+              ],
+              "title": "get_experiment_resultsArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b30da6311477",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_Q9Gcxub7UbQsxJWVkiy4FETr",
+                    "function": {
+                      "arguments": "",
+                      "name": "get_experiment_id"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "c8d"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b30da6311477",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "QoE"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b30da6311477",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "experiment",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "1krtmewG8p36"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b30da6311477",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "_name",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "P"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b30da6311477",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\":\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "D"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b30da6311477",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "chemical",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "FoS4ov7pi99K5h"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b30da6311477",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "_re",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "BhD"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b30da6311477",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "action",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": ""
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b30da6311477",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "KWC"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b30da6311477",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "PFmv"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b30da6311477",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 19,
+            "prompt_tokens": 425,
+            "total_tokens": 444,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "NYdC3zepOXLsO"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/b6b7282ca0ad5a3c59321d2b045a91ebca1cbaeb4f7aab22c5b9e246b476272f.json b/tests/integration/responses/recordings/b6b7282ca0ad5a3c59321d2b045a91ebca1cbaeb4f7aab22c5b9e246b476272f.json
new file mode 100644
index 000000000..040998a3b
--- /dev/null
+++ b/tests/integration/responses/recordings/b6b7282ca0ad5a3c59321d2b045a91ebca1cbaeb4f7aab22c5b9e246b476272f.json
@@ -0,0 +1,649 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_sequential_mcp_tool[openai_client-txt=openai/gpt-4o-boiling_point_tool]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What is the boiling point of myawesomeliquid in Celsius?"
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_b5k2yeqIi5ucElnnrVPyYU4x",
+              "type": "function",
+              "function": {
+                "name": "get_boiling_point",
+                "arguments": "{\"liquid_name\":\"myawesomeliquid\",\"celsius\":true}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_b5k2yeqIi5ucElnnrVPyYU4x",
+          "content": [
+            {
+              "type": "text",
+              "text": "-100"
+            }
+          ]
+        },
+        {
+          "role": "assistant",
+          "content": "The boiling point of \"myawesomeliquid\" is -100 degrees Celsius."
+        },
+        {
+          "role": "user",
+          "content": "What is the boiling point of myawesomeliquid in Celsius?"
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "greet_everyone",
+            "parameters": {
+              "properties": {
+                "url": {
+                  "title": "Url",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "url"
+              ],
+              "title": "greet_everyoneArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_boiling_point",
+            "description": "\n        Returns the boiling point of a liquid in Celsius or Fahrenheit.\n\n        :param liquid_name: The name of the liquid\n        :param celsius: Whether to return the boiling point in Celsius\n        :return: The boiling point of the liquid in Celcius or Fahrenheit\n        ",
+            "parameters": {
+              "properties": {
+                "liquid_name": {
+                  "title": "Liquid Name",
+                  "type": "string"
+                },
+                "celsius": {
+                  "default": true,
+                  "title": "Celsius",
+                  "type": "boolean"
+                }
+              },
+              "required": [
+                "liquid_name"
+              ],
+              "title": "get_boiling_pointArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b6b7282ca0ad",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "7S5XpbMeFTTZba"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b6b7282ca0ad",
+          "choices": [
+            {
+              "delta": {
+                "content": "The",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "G4KYajpQCgm5p"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b6b7282ca0ad",
+          "choices": [
+            {
+              "delta": {
+                "content": " boiling",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "krw8d3Np"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b6b7282ca0ad",
+          "choices": [
+            {
+              "delta": {
+                "content": " point",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "sOEsvVtCEV"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b6b7282ca0ad",
+          "choices": [
+            {
+              "delta": {
+                "content": " of",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "5eAw89OUrx7VT"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b6b7282ca0ad",
+          "choices": [
+            {
+              "delta": {
+                "content": " \"",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "PFghmTocqCYea"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b6b7282ca0ad",
+          "choices": [
+            {
+              "delta": {
+                "content": "my",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "IRJRbKIoXwNh0e"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b6b7282ca0ad",
+          "choices": [
+            {
+              "delta": {
+                "content": "aw",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "wuoL6MoA21KfMP"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b6b7282ca0ad",
+          "choices": [
+            {
+              "delta": {
+                "content": "esom",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "DLRS3D5YVekk"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b6b7282ca0ad",
+          "choices": [
+            {
+              "delta": {
+                "content": "eli",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "PQZQlOncwl01F"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b6b7282ca0ad",
+          "choices": [
+            {
+              "delta": {
+                "content": "quid",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "TVfNNxYtZgXQ"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b6b7282ca0ad",
+          "choices": [
+            {
+              "delta": {
+                "content": "\"",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "LscPqJGnbMf6Qw"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b6b7282ca0ad",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "X8NSrxHcpYYXL"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b6b7282ca0ad",
+          "choices": [
+            {
+              "delta": {
+                "content": " -",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "5nfdb4DuFapoeT"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b6b7282ca0ad",
+          "choices": [
+            {
+              "delta": {
+                "content": "100",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "K2qXQYFAd591w"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b6b7282ca0ad",
+          "choices": [
+            {
+              "delta": {
+                "content": " degrees",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "b0rvHdF1"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b6b7282ca0ad",
+          "choices": [
+            {
+              "delta": {
+                "content": " Celsius",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "kFoGt52c"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b6b7282ca0ad",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "SJjhJwz2zgz693C"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b6b7282ca0ad",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "MityMxFgBz"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b6b7282ca0ad",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 18,
+            "prompt_tokens": 234,
+            "total_tokens": 252,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "qf0j6dzuNPifV"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/c27df465b2996c4d7c909e9ccfac53deb1ac47d064a1b5c70a78b7436438818f.json b/tests/integration/responses/recordings/c27df465b2996c4d7c909e9ccfac53deb1ac47d064a1b5c70a78b7436438818f.json
new file mode 100644
index 000000000..c79ed1010
--- /dev/null
+++ b/tests/integration/responses/recordings/c27df465b2996c4d7c909e9ccfac53deb1ac47d064a1b5c70a78b7436438818f.json
@@ -0,0 +1,450 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_streaming_multi_turn_tool_execution[openai_client-txt=openai/gpt-4o-user_permissions_workflow]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Help me with this security check: First, get the user ID for 'charlie', then get the permissions for that user ID, and finally check if that user can access 'secret_file.txt'. Stream your progress as you work through each step. Return only one tool call per step. Summarize the final result with a single 'yes' or 'no' response."
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_id",
+            "description": "\n        Get the user ID for a given username. This ID is needed for other operations.\n\n        :param username: The username to look up\n        :return: The user ID for the username\n        ",
+            "parameters": {
+              "properties": {
+                "username": {
+                  "title": "Username",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "username"
+              ],
+              "title": "get_user_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_permissions",
+            "description": "\n        Get the permissions for a user ID. Requires a valid user ID from get_user_id.\n\n        :param user_id: The user ID to check permissions for\n        :return: The permissions for the user\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id"
+              ],
+              "title": "get_user_permissionsArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "check_file_access",
+            "description": "\n        Check if a user can access a specific file. Requires a valid user ID.\n\n        :param user_id: The user ID to check access for\n        :param filename: The filename to check access to\n        :return: Whether the user can access the file (yes/no)\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                },
+                "filename": {
+                  "title": "Filename",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id",
+                "filename"
+              ],
+              "title": "check_file_accessArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_id",
+            "description": "\n        Get the experiment ID for a given experiment name. This ID is needed to get results.\n\n        :param experiment_name: The name of the experiment\n        :return: The experiment ID\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_name": {
+                  "title": "Experiment Name",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_name"
+              ],
+              "title": "get_experiment_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_results",
+            "description": "\n        Get the results for an experiment ID. Requires a valid experiment ID from get_experiment_id.\n\n        :param experiment_id: The experiment ID to get results for\n        :return: The experiment results\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_id": {
+                  "title": "Experiment Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_id"
+              ],
+              "title": "get_experiment_resultsArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c27df465b299",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_fsxGbKmceUbLSXCe4sx9WLXO",
+                    "function": {
+                      "arguments": "",
+                      "name": "get_user_id"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "sOa6fZEKZ"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c27df465b299",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "HBO"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c27df465b299",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "username",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "7kcXlaglccmA8a"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c27df465b299",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\":\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "a"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c27df465b299",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "char",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "bS"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c27df465b299",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "lie",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "d2e"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c27df465b299",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "fhE"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c27df465b299",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "SlsZ"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c27df465b299",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 16,
+            "prompt_tokens": 449,
+            "total_tokens": 465,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "fjMWRTbF1Ni06"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/d35c1244fbbe9898da3958113c1d054d5f5dd6bdd3c4333db6cef7361fb32feb.json b/tests/integration/responses/recordings/d35c1244fbbe9898da3958113c1d054d5f5dd6bdd3c4333db6cef7361fb32feb.json
new file mode 100644
index 000000000..a41104fd5
--- /dev/null
+++ b/tests/integration/responses/recordings/d35c1244fbbe9898da3958113c1d054d5f5dd6bdd3c4333db6cef7361fb32feb.json
@@ -0,0 +1,759 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_non_streaming_mcp_tool[openai_client-txt=openai/gpt-4o-boiling_point_tool]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What is the boiling point of myawesomeliquid in Celsius?"
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "greet_everyone",
+            "parameters": {
+              "properties": {
+                "url": {
+                  "title": "Url",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "url"
+              ],
+              "title": "greet_everyoneArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_boiling_point",
+            "description": "\n        Returns the boiling point of a liquid in Celsius or Fahrenheit.\n\n        :param liquid_name: The name of the liquid\n        :param celsius: Whether to return the boiling point in Celsius\n        :return: The boiling point of the liquid in Celcius or Fahrenheit\n        ",
+            "parameters": {
+              "properties": {
+                "liquid_name": {
+                  "title": "Liquid Name",
+                  "type": "string"
+                },
+                "celsius": {
+                  "default": true,
+                  "title": "Celsius",
+                  "type": "boolean"
+                }
+              },
+              "required": [
+                "liquid_name"
+              ],
+              "title": "get_boiling_pointArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d35c1244fbbe",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_8kf8fNIDcWOelbCmUEcretON",
+                    "function": {
+                      "arguments": "",
+                      "name": "get_boiling_point"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "1xG"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d35c1244fbbe",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "RQj"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d35c1244fbbe",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "li",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "XncI"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d35c1244fbbe",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "quid",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "86"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d35c1244fbbe",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "_name",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "L"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d35c1244fbbe",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\":\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "8"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d35c1244fbbe",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "my",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "lnSu"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d35c1244fbbe",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "aw",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "ksr1"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d35c1244fbbe",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "esom",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "CU"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d35c1244fbbe",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "eli",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "hrv"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d35c1244fbbe",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "quid",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "K9"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d35c1244fbbe",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\",\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "a"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d35c1244fbbe",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "c",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "LKw52"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d35c1244fbbe",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "elsius",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": ""
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d35c1244fbbe",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\":",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "yGY"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d35c1244fbbe",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "true",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "wC"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d35c1244fbbe",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "8fF8B"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d35c1244fbbe",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "bbwp"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d35c1244fbbe",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 27,
+            "prompt_tokens": 156,
+            "total_tokens": 183,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "k0bo4JwUfLNKW"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/d42e1020edee86d9f6da7df909c2a453cb8f2e11e80beb8e5506439345c428eb.json b/tests/integration/responses/recordings/d42e1020edee86d9f6da7df909c2a453cb8f2e11e80beb8e5506439345c428eb.json
new file mode 100644
index 000000000..610fe96b1
--- /dev/null
+++ b/tests/integration/responses/recordings/d42e1020edee86d9f6da7df909c2a453cb8f2e11e80beb8e5506439345c428eb.json
@@ -0,0 +1,808 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_streaming_multi_turn_tool_execution[openai_client-txt=openai/gpt-4o-experiment_analysis_streaming]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "I need a complete analysis: First, get the experiment ID for 'chemical_reaction', then get the results for that experiment, and tell me if the yield was above 80%. Return only one tool call per step.  Please stream your analysis process."
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_Q9Gcxub7UbQsxJWVkiy4FETr",
+              "type": "function",
+              "function": {
+                "name": "get_experiment_id",
+                "arguments": "{\"experiment_name\":\"chemical_reaction\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_Q9Gcxub7UbQsxJWVkiy4FETr",
+          "content": [
+            {
+              "type": "text",
+              "text": "exp_003"
+            }
+          ]
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_yTMuQEKu7x115q8XvhqelRub",
+              "type": "function",
+              "function": {
+                "name": "get_experiment_results",
+                "arguments": "{\"experiment_id\":\"exp_003\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_yTMuQEKu7x115q8XvhqelRub",
+          "content": [
+            {
+              "type": "text",
+              "text": "Yield: 85%, Status: Complete"
+            }
+          ]
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_id",
+            "description": "\n        Get the user ID for a given username. This ID is needed for other operations.\n\n        :param username: The username to look up\n        :return: The user ID for the username\n        ",
+            "parameters": {
+              "properties": {
+                "username": {
+                  "title": "Username",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "username"
+              ],
+              "title": "get_user_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_permissions",
+            "description": "\n        Get the permissions for a user ID. Requires a valid user ID from get_user_id.\n\n        :param user_id: The user ID to check permissions for\n        :return: The permissions for the user\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id"
+              ],
+              "title": "get_user_permissionsArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "check_file_access",
+            "description": "\n        Check if a user can access a specific file. Requires a valid user ID.\n\n        :param user_id: The user ID to check access for\n        :param filename: The filename to check access to\n        :return: Whether the user can access the file (yes/no)\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                },
+                "filename": {
+                  "title": "Filename",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id",
+                "filename"
+              ],
+              "title": "check_file_accessArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_id",
+            "description": "\n        Get the experiment ID for a given experiment name. This ID is needed to get results.\n\n        :param experiment_name: The name of the experiment\n        :return: The experiment ID\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_name": {
+                  "title": "Experiment Name",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_name"
+              ],
+              "title": "get_experiment_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_results",
+            "description": "\n        Get the results for an experiment ID. Requires a valid experiment ID from get_experiment_id.\n\n        :param experiment_id: The experiment ID to get results for\n        :return: The experiment results\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_id": {
+                  "title": "Experiment Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_id"
+              ],
+              "title": "get_experiment_resultsArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d42e1020edee",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "7yA3503fehs27D"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d42e1020edee",
+          "choices": [
+            {
+              "delta": {
+                "content": "The",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "T95BeWrgJQMHt"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d42e1020edee",
+          "choices": [
+            {
+              "delta": {
+                "content": " yield",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "VveNEnHuMQ"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d42e1020edee",
+          "choices": [
+            {
+              "delta": {
+                "content": " for",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "KupSssWahehO"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d42e1020edee",
+          "choices": [
+            {
+              "delta": {
+                "content": " the",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Ogot8KLW0IXw"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d42e1020edee",
+          "choices": [
+            {
+              "delta": {
+                "content": " '",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "dYKJ6jPstuAso4"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d42e1020edee",
+          "choices": [
+            {
+              "delta": {
+                "content": "chemical",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "wcSKhZVd"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d42e1020edee",
+          "choices": [
+            {
+              "delta": {
+                "content": "_re",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "6ZlTlRGLyclHo"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d42e1020edee",
+          "choices": [
+            {
+              "delta": {
+                "content": "action",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "WpYqOmrhXr"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d42e1020edee",
+          "choices": [
+            {
+              "delta": {
+                "content": "'",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "qUhq7HrrwdFEyuY"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d42e1020edee",
+          "choices": [
+            {
+              "delta": {
+                "content": " experiment",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "WWO2y"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d42e1020edee",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "pFVMO1BRN37n4"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d42e1020edee",
+          "choices": [
+            {
+              "delta": {
+                "content": " ",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "TtQlcHeU2mPl830"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d42e1020edee",
+          "choices": [
+            {
+              "delta": {
+                "content": "85",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "zyw8OdA0pXZCp5"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d42e1020edee",
+          "choices": [
+            {
+              "delta": {
+                "content": "%,",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "VcHVTGGXrqvev1"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d42e1020edee",
+          "choices": [
+            {
+              "delta": {
+                "content": " which",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "FI9FAA2rX6"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d42e1020edee",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Cc65gPYGA6Xfd"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d42e1020edee",
+          "choices": [
+            {
+              "delta": {
+                "content": " above",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "T7BlLMIQGs"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d42e1020edee",
+          "choices": [
+            {
+              "delta": {
+                "content": " ",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "2oKThCybRdG8MzZ"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d42e1020edee",
+          "choices": [
+            {
+              "delta": {
+                "content": "80",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "QHWdJWXK6hzQVS"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d42e1020edee",
+          "choices": [
+            {
+              "delta": {
+                "content": "%.",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "lJnplmQYyl0SL3"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d42e1020edee",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "NPaAVrOB4J"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d42e1020edee",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 21,
+            "prompt_tokens": 494,
+            "total_tokens": 515,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "ngidabPDDHECm"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/e2dc09dc546d9b8b99096804fe75fae1f1eb09efe6e4f86c115a78a3db5a59bc.json b/tests/integration/responses/recordings/e2dc09dc546d9b8b99096804fe75fae1f1eb09efe6e4f86c115a78a3db5a59bc.json
new file mode 100644
index 000000000..ce771f24e
--- /dev/null
+++ b/tests/integration/responses/recordings/e2dc09dc546d9b8b99096804fe75fae1f1eb09efe6e4f86c115a78a3db5a59bc.json
@@ -0,0 +1,668 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_mcp_tool_approval[openai_client-txt=openai/gpt-4o-True-boiling_point_tool]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What is the boiling point of myawesomeliquid in Celsius?"
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_bL84OWNnE1s75GJEqGLAK35W",
+              "type": "function",
+              "function": {
+                "name": "get_boiling_point",
+                "arguments": "{\"liquid_name\":\"myawesomeliquid\",\"celsius\":true}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_bL84OWNnE1s75GJEqGLAK35W",
+          "content": [
+            {
+              "type": "text",
+              "text": "-100"
+            }
+          ]
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "greet_everyone",
+            "parameters": {
+              "properties": {
+                "url": {
+                  "title": "Url",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "url"
+              ],
+              "title": "greet_everyoneArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_boiling_point",
+            "description": "\n        Returns the boiling point of a liquid in Celsius or Fahrenheit.\n\n        :param liquid_name: The name of the liquid\n        :param celsius: Whether to return the boiling point in Celsius\n        :return: The boiling point of the liquid in Celcius or Fahrenheit\n        ",
+            "parameters": {
+              "properties": {
+                "liquid_name": {
+                  "title": "Liquid Name",
+                  "type": "string"
+                },
+                "celsius": {
+                  "default": true,
+                  "title": "Celsius",
+                  "type": "boolean"
+                }
+              },
+              "required": [
+                "liquid_name"
+              ],
+              "title": "get_boiling_pointArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e2dc09dc546d",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "STnb1nbwTsG4JZ"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e2dc09dc546d",
+          "choices": [
+            {
+              "delta": {
+                "content": "The",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "aEUUYMIYjnZpH"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e2dc09dc546d",
+          "choices": [
+            {
+              "delta": {
+                "content": " boiling",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "2QzI8Zau"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e2dc09dc546d",
+          "choices": [
+            {
+              "delta": {
+                "content": " point",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "gZw7vp0bnu"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e2dc09dc546d",
+          "choices": [
+            {
+              "delta": {
+                "content": " of",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "TYru3DcfZVc6B"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e2dc09dc546d",
+          "choices": [
+            {
+              "delta": {
+                "content": " \"",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "h5P3cluszFa21"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e2dc09dc546d",
+          "choices": [
+            {
+              "delta": {
+                "content": "my",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "ggSDGSgtWOR3d9"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e2dc09dc546d",
+          "choices": [
+            {
+              "delta": {
+                "content": "aw",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "lm72CS5Lt7lW76"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e2dc09dc546d",
+          "choices": [
+            {
+              "delta": {
+                "content": "esom",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "fKXRsLB1CG0e"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e2dc09dc546d",
+          "choices": [
+            {
+              "delta": {
+                "content": "eli",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "JxZBNjkfyXquH"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e2dc09dc546d",
+          "choices": [
+            {
+              "delta": {
+                "content": "quid",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "egtKHFRBAqZn"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e2dc09dc546d",
+          "choices": [
+            {
+              "delta": {
+                "content": "\"",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "R7MdHaS5Rj2mMV"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e2dc09dc546d",
+          "choices": [
+            {
+              "delta": {
+                "content": " in",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "LydsYLrAIj6PU"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e2dc09dc546d",
+          "choices": [
+            {
+              "delta": {
+                "content": " Celsius",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "4MmAUDk0"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e2dc09dc546d",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Ivlu4M0VfRH8b"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e2dc09dc546d",
+          "choices": [
+            {
+              "delta": {
+                "content": " -",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "OfTmU32oCtMsuo"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e2dc09dc546d",
+          "choices": [
+            {
+              "delta": {
+                "content": "100",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "IUbbHa5oyIPjr"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e2dc09dc546d",
+          "choices": [
+            {
+              "delta": {
+                "content": "\u00b0C",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "llluAF0LBNJIwi"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e2dc09dc546d",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "LnUC3LPx43OfUbC"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e2dc09dc546d",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "ULfebGmmMn"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e2dc09dc546d",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 19,
+            "prompt_tokens": 195,
+            "total_tokens": 214,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "w11BVXjZVXRtg"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/e9f1cc3da4297f143b7b2a4b21b34cf2f55727b67c1e1854a106b9d8c7c64b70.json b/tests/integration/responses/recordings/e9f1cc3da4297f143b7b2a4b21b34cf2f55727b67c1e1854a106b9d8c7c64b70.json
new file mode 100644
index 000000000..f8472055f
--- /dev/null
+++ b/tests/integration/responses/recordings/e9f1cc3da4297f143b7b2a4b21b34cf2f55727b67c1e1854a106b9d8c7c64b70.json
@@ -0,0 +1,700 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_non_streaming_multi_turn_tool_execution[openai_client-txt=openai/gpt-4o-experiment_results_lookup]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "I need to get the results for the 'boiling_point' experiment. First, get the experiment ID for 'boiling_point', then use that ID to get the experiment results. Tell me the boiling point in Celsius."
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_dZwjBxH3aTRhnaS0bJVPqRcz",
+              "type": "function",
+              "function": {
+                "name": "get_experiment_id",
+                "arguments": "{\"experiment_name\":\"boiling_point\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_dZwjBxH3aTRhnaS0bJVPqRcz",
+          "content": [
+            {
+              "type": "text",
+              "text": "exp_004"
+            }
+          ]
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_skNUKbERbtdoADH834U9OE91",
+              "type": "function",
+              "function": {
+                "name": "get_experiment_results",
+                "arguments": "{\"experiment_id\":\"exp_004\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_skNUKbERbtdoADH834U9OE91",
+          "content": [
+            {
+              "type": "text",
+              "text": "Boiling Point: 100\u00b0C, Status: Verified"
+            }
+          ]
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_id",
+            "description": "\n        Get the user ID for a given username. This ID is needed for other operations.\n\n        :param username: The username to look up\n        :return: The user ID for the username\n        ",
+            "parameters": {
+              "properties": {
+                "username": {
+                  "title": "Username",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "username"
+              ],
+              "title": "get_user_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_permissions",
+            "description": "\n        Get the permissions for a user ID. Requires a valid user ID from get_user_id.\n\n        :param user_id: The user ID to check permissions for\n        :return: The permissions for the user\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id"
+              ],
+              "title": "get_user_permissionsArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "check_file_access",
+            "description": "\n        Check if a user can access a specific file. Requires a valid user ID.\n\n        :param user_id: The user ID to check access for\n        :param filename: The filename to check access to\n        :return: Whether the user can access the file (yes/no)\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                },
+                "filename": {
+                  "title": "Filename",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id",
+                "filename"
+              ],
+              "title": "check_file_accessArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_id",
+            "description": "\n        Get the experiment ID for a given experiment name. This ID is needed to get results.\n\n        :param experiment_name: The name of the experiment\n        :return: The experiment ID\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_name": {
+                  "title": "Experiment Name",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_name"
+              ],
+              "title": "get_experiment_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_results",
+            "description": "\n        Get the results for an experiment ID. Requires a valid experiment ID from get_experiment_id.\n\n        :param experiment_id: The experiment ID to get results for\n        :return: The experiment results\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_id": {
+                  "title": "Experiment Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_id"
+              ],
+              "title": "get_experiment_resultsArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e9f1cc3da429",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "OzNg5nfMI5VouN"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e9f1cc3da429",
+          "choices": [
+            {
+              "delta": {
+                "content": "The",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "EBvjjqFPfytPb"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e9f1cc3da429",
+          "choices": [
+            {
+              "delta": {
+                "content": " boiling",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "HhEiLgKg"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e9f1cc3da429",
+          "choices": [
+            {
+              "delta": {
+                "content": " point",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "hLc2aAgg1D"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e9f1cc3da429",
+          "choices": [
+            {
+              "delta": {
+                "content": " for",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "q3AsmJJ6Rvyt"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e9f1cc3da429",
+          "choices": [
+            {
+              "delta": {
+                "content": " the",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "4QJrcjxcuFLd"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e9f1cc3da429",
+          "choices": [
+            {
+              "delta": {
+                "content": " experiment",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "BQQJ8"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e9f1cc3da429",
+          "choices": [
+            {
+              "delta": {
+                "content": " '",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "nj2SOixVU5KocZ"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e9f1cc3da429",
+          "choices": [
+            {
+              "delta": {
+                "content": "bo",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "ookLm9qkLqQQ3M"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e9f1cc3da429",
+          "choices": [
+            {
+              "delta": {
+                "content": "iling",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "J4axWnSRvQU"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e9f1cc3da429",
+          "choices": [
+            {
+              "delta": {
+                "content": "_point",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "QG6jvQWF8t"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e9f1cc3da429",
+          "choices": [
+            {
+              "delta": {
+                "content": "'",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "veUGdbLd3d8r2yU"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e9f1cc3da429",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "ZOCkbhGksYmsF"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e9f1cc3da429",
+          "choices": [
+            {
+              "delta": {
+                "content": " ",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "fbNuaYkAA8gREQ7"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e9f1cc3da429",
+          "choices": [
+            {
+              "delta": {
+                "content": "100",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "3rdZxDq7QoXcl"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e9f1cc3da429",
+          "choices": [
+            {
+              "delta": {
+                "content": "\u00b0C",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "upjHViB9dUBWAd"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e9f1cc3da429",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "hBZNqRjyLGCIMjg"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e9f1cc3da429",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "PrtgvDwRZp"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e9f1cc3da429",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 17,
+            "prompt_tokens": 490,
+            "total_tokens": 507,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "euYYBnLE4Mj0Z"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/ed89b57fec937fa8602b4911a21a9a1a9488fb2347bf73d6e3bc2203a9a47a61.json b/tests/integration/responses/recordings/ed89b57fec937fa8602b4911a21a9a1a9488fb2347bf73d6e3bc2203a9a47a61.json
new file mode 100644
index 000000000..d8d87a16e
--- /dev/null
+++ b/tests/integration/responses/recordings/ed89b57fec937fa8602b4911a21a9a1a9488fb2347bf73d6e3bc2203a9a47a61.json
@@ -0,0 +1,641 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_sequential_mcp_tool[openai_client-txt=openai/gpt-4o-boiling_point_tool]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What is the boiling point of myawesomeliquid in Celsius?"
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_b5k2yeqIi5ucElnnrVPyYU4x",
+              "type": "function",
+              "function": {
+                "name": "get_boiling_point",
+                "arguments": "{\"liquid_name\":\"myawesomeliquid\",\"celsius\":true}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_b5k2yeqIi5ucElnnrVPyYU4x",
+          "content": [
+            {
+              "type": "text",
+              "text": "-100"
+            }
+          ]
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "greet_everyone",
+            "parameters": {
+              "properties": {
+                "url": {
+                  "title": "Url",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "url"
+              ],
+              "title": "greet_everyoneArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_boiling_point",
+            "description": "\n        Returns the boiling point of a liquid in Celsius or Fahrenheit.\n\n        :param liquid_name: The name of the liquid\n        :param celsius: Whether to return the boiling point in Celsius\n        :return: The boiling point of the liquid in Celcius or Fahrenheit\n        ",
+            "parameters": {
+              "properties": {
+                "liquid_name": {
+                  "title": "Liquid Name",
+                  "type": "string"
+                },
+                "celsius": {
+                  "default": true,
+                  "title": "Celsius",
+                  "type": "boolean"
+                }
+              },
+              "required": [
+                "liquid_name"
+              ],
+              "title": "get_boiling_pointArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ed89b57fec93",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "WGXCgkwfwMDUCG"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ed89b57fec93",
+          "choices": [
+            {
+              "delta": {
+                "content": "The",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "pkdvw6gGNrtXN"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ed89b57fec93",
+          "choices": [
+            {
+              "delta": {
+                "content": " boiling",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "RO5YJeZc"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ed89b57fec93",
+          "choices": [
+            {
+              "delta": {
+                "content": " point",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "riZZHSDEz0"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ed89b57fec93",
+          "choices": [
+            {
+              "delta": {
+                "content": " of",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "1zjk8zIdt2Y2b"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ed89b57fec93",
+          "choices": [
+            {
+              "delta": {
+                "content": " \"",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "XGHv0dlif7IrC"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ed89b57fec93",
+          "choices": [
+            {
+              "delta": {
+                "content": "my",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Ii2KeTyV3U0uiU"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ed89b57fec93",
+          "choices": [
+            {
+              "delta": {
+                "content": "aw",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "3OyYvSytdOYhpT"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ed89b57fec93",
+          "choices": [
+            {
+              "delta": {
+                "content": "esom",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "zCnXbjW4JE6l"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ed89b57fec93",
+          "choices": [
+            {
+              "delta": {
+                "content": "eli",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "0bwcz2K91q7EO"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ed89b57fec93",
+          "choices": [
+            {
+              "delta": {
+                "content": "quid",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Um0jFlJegpXI"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ed89b57fec93",
+          "choices": [
+            {
+              "delta": {
+                "content": "\"",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "4OllZlS2JmoD3l"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ed89b57fec93",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "x4jApO80AyXpX"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ed89b57fec93",
+          "choices": [
+            {
+              "delta": {
+                "content": " -",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "wq0D3Wzc1l3h6S"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ed89b57fec93",
+          "choices": [
+            {
+              "delta": {
+                "content": "100",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Dn78V58iZ9wKK"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ed89b57fec93",
+          "choices": [
+            {
+              "delta": {
+                "content": " degrees",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "fjHDBTqT"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ed89b57fec93",
+          "choices": [
+            {
+              "delta": {
+                "content": " Celsius",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Cnp6KULL"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ed89b57fec93",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "grbygHexDT4JwGx"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ed89b57fec93",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "upSRpiQQKE"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ed89b57fec93",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 18,
+            "prompt_tokens": 195,
+            "total_tokens": 213,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "psE6Es6zZ2Kz4"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/test_basic_responses.py b/tests/integration/responses/test_basic_responses.py
index a764084af..d72a43375 100644
--- a/tests/integration/responses/test_basic_responses.py
+++ b/tests/integration/responses/test_basic_responses.py
@@ -13,8 +13,8 @@ from .streaming_assertions import StreamingValidator
 
 
 @pytest.mark.parametrize("case", basic_test_cases)
-def test_response_non_streaming_basic(compat_client, text_model_id, case):
-    response = compat_client.responses.create(
+def test_response_non_streaming_basic(responses_client, text_model_id, case):
+    response = responses_client.responses.create(
         model=text_model_id,
         input=case.input,
         stream=False,
@@ -31,10 +31,10 @@ def test_response_non_streaming_basic(compat_client, text_model_id, case):
         "Total tokens should equal input + output tokens"
     )
 
-    retrieved_response = compat_client.responses.retrieve(response_id=response.id)
+    retrieved_response = responses_client.responses.retrieve(response_id=response.id)
     assert retrieved_response.output_text == response.output_text
 
-    next_response = compat_client.responses.create(
+    next_response = responses_client.responses.create(
         model=text_model_id,
         input="Repeat your previous response in all caps.",
         previous_response_id=response.id,
@@ -44,8 +44,8 @@ def test_response_non_streaming_basic(compat_client, text_model_id, case):
 
 
 @pytest.mark.parametrize("case", basic_test_cases)
-def test_response_streaming_basic(compat_client, text_model_id, case):
-    response = compat_client.responses.create(
+def test_response_streaming_basic(responses_client, text_model_id, case):
+    response = responses_client.responses.create(
         model=text_model_id,
         input=case.input,
         stream=True,
@@ -98,15 +98,15 @@ def test_response_streaming_basic(compat_client, text_model_id, case):
     validator.assert_response_consistency()
 
     # Verify stored response matches streamed response
-    retrieved_response = compat_client.responses.retrieve(response_id=response_id)
+    retrieved_response = responses_client.responses.retrieve(response_id=response_id)
     final_event = events[-1]
     assert retrieved_response.output_text == final_event.response.output_text
 
 
 @pytest.mark.parametrize("case", basic_test_cases)
-def test_response_streaming_incremental_content(compat_client, text_model_id, case):
+def test_response_streaming_incremental_content(responses_client, text_model_id, case):
     """Test that streaming actually delivers content incrementally, not just at the end."""
-    response = compat_client.responses.create(
+    response = responses_client.responses.create(
         model=text_model_id,
         input=case.input,
         stream=True,
@@ -170,10 +170,10 @@ def test_response_streaming_incremental_content(compat_client, text_model_id, ca
 
 
 @pytest.mark.parametrize("case", multi_turn_test_cases)
-def test_response_non_streaming_multi_turn(compat_client, text_model_id, case):
+def test_response_non_streaming_multi_turn(responses_client, text_model_id, case):
     previous_response_id = None
     for turn_input, turn_expected in case.turns:
-        response = compat_client.responses.create(
+        response = responses_client.responses.create(
             model=text_model_id,
             input=turn_input,
             previous_response_id=previous_response_id,
@@ -184,8 +184,8 @@ def test_response_non_streaming_multi_turn(compat_client, text_model_id, case):
 
 
 @pytest.mark.parametrize("case", image_test_cases)
-def test_response_non_streaming_image(compat_client, text_model_id, case):
-    response = compat_client.responses.create(
+def test_response_non_streaming_image(responses_client, text_model_id, case):
+    response = responses_client.responses.create(
         model=text_model_id,
         input=case.input,
         stream=False,
@@ -195,10 +195,10 @@ def test_response_non_streaming_image(compat_client, text_model_id, case):
 
 
 @pytest.mark.parametrize("case", multi_turn_image_test_cases)
-def test_response_non_streaming_multi_turn_image(compat_client, text_model_id, case):
+def test_response_non_streaming_multi_turn_image(responses_client, text_model_id, case):
     previous_response_id = None
     for turn_input, turn_expected in case.turns:
-        response = compat_client.responses.create(
+        response = responses_client.responses.create(
             model=text_model_id,
             input=turn_input,
             previous_response_id=previous_response_id,
diff --git a/tests/integration/responses/test_conversation_responses.py b/tests/integration/responses/test_conversation_responses.py
index babb77793..bbd861e0d 100644
--- a/tests/integration/responses/test_conversation_responses.py
+++ b/tests/integration/responses/test_conversation_responses.py
@@ -131,18 +131,18 @@ class TestConversationResponses:
         assert len(response.output_text.strip()) > 0
 
     # this is not ready yet
-    # def test_conversation_compat_client(self, compat_client, text_model_id):
+    # def test_conversation_compat_client(self, responses_client, text_model_id):
     #     """Test conversation parameter works with compatibility client."""
-    #     if not hasattr(compat_client, "conversations"):
-    #         pytest.skip("compat_client does not support conversations API")
+    #     if not hasattr(responses_client, "conversations"):
+    #         pytest.skip("responses_client does not support conversations API")
     #
-    #     conversation = compat_client.conversations.create()
-    #     response = compat_client.responses.create(
+    #     conversation = responses_client.conversations.create()
+    #     response = responses_client.responses.create(
     #         model=text_model_id, input="Tell me a joke", conversation=conversation.id
     #     )
     #
     #     assert response is not None
     #     assert len(response.output_text.strip()) > 0
     #
-    #     conversation_items = compat_client.conversations.items.list(conversation.id)
+    #     conversation_items = responses_client.conversations.items.list(conversation.id)
     #     assert len(conversation_items.data) >= 2
diff --git a/tests/integration/responses/test_file_search.py b/tests/integration/responses/test_file_search.py
index dde5fd7f6..b2a634fb0 100644
--- a/tests/integration/responses/test_file_search.py
+++ b/tests/integration/responses/test_file_search.py
@@ -9,8 +9,6 @@ import time
 
 import pytest
 
-from llama_stack.core.library_client import LlamaStackAsLibraryClient
-
 from .helpers import new_vector_store, upload_file
 
 
@@ -28,12 +26,9 @@ from .helpers import new_vector_store, upload_file
         },
     ],
 )
-def test_response_text_format(compat_client, text_model_id, text_format):
-    if isinstance(compat_client, LlamaStackAsLibraryClient):
-        pytest.skip("Responses API text format is not yet supported in library client.")
-
+def test_response_text_format(responses_client, text_model_id, text_format):
     stream = False
-    response = compat_client.responses.create(
+    response = responses_client.responses.create(
         model=text_model_id,
         input="What is the capital of France?",
         stream=stream,
@@ -47,13 +42,10 @@ def test_response_text_format(compat_client, text_model_id, text_format):
 
 
 @pytest.fixture
-def vector_store_with_filtered_files(compat_client, embedding_model_id, embedding_dimension, tmp_path_factory):
+def vector_store_with_filtered_files(responses_client, embedding_model_id, embedding_dimension, tmp_path_factory):
     # """Create a vector store with multiple files that have different attributes for filtering tests."""
-    if isinstance(compat_client, LlamaStackAsLibraryClient):
-        pytest.skip("upload_file() is not yet supported in library client somehow?")
-
     vector_store = new_vector_store(
-        compat_client, "test_vector_store_with_filters", embedding_model_id, embedding_dimension
+        responses_client, "test_vector_store_with_filters", embedding_model_id, embedding_dimension
     )
     tmp_path = tmp_path_factory.mktemp("filter_test_files")
 
@@ -104,11 +96,11 @@ def vector_store_with_filtered_files(compat_client, embedding_model_id, embeddin
         file_path.write_text(file_data["content"])
 
         # Upload file
-        file_response = upload_file(compat_client, file_data["name"], str(file_path))
+        file_response = upload_file(responses_client, file_data["name"], str(file_path))
         file_ids.append(file_response.id)
 
         # Attach file to vector store with attributes
-        file_attach_response = compat_client.vector_stores.files.create(
+        file_attach_response = responses_client.vector_stores.files.create(
             vector_store_id=vector_store.id,
             file_id=file_response.id,
             attributes=file_data["attributes"],
@@ -117,7 +109,7 @@ def vector_store_with_filtered_files(compat_client, embedding_model_id, embeddin
         # Wait for attachment
         while file_attach_response.status == "in_progress":
             time.sleep(0.1)
-            file_attach_response = compat_client.vector_stores.files.retrieve(
+            file_attach_response = responses_client.vector_stores.files.retrieve(
                 vector_store_id=vector_store.id,
                 file_id=file_response.id,
             )
@@ -127,17 +119,17 @@ def vector_store_with_filtered_files(compat_client, embedding_model_id, embeddin
 
     # Cleanup: delete vector store and files
     try:
-        compat_client.vector_stores.delete(vector_store_id=vector_store.id)
+        responses_client.vector_stores.delete(vector_store_id=vector_store.id)
         for file_id in file_ids:
             try:
-                compat_client.files.delete(file_id=file_id)
+                responses_client.files.delete(file_id=file_id)
             except Exception:
                 pass  # File might already be deleted
     except Exception:
         pass  # Best effort cleanup
 
 
-def test_response_file_search_filter_by_region(compat_client, text_model_id, vector_store_with_filtered_files):
+def test_response_file_search_filter_by_region(responses_client, text_model_id, vector_store_with_filtered_files):
     """Test file search with region equality filter."""
     tools = [
         {
@@ -147,7 +139,7 @@ def test_response_file_search_filter_by_region(compat_client, text_model_id, vec
         }
     ]
 
-    response = compat_client.responses.create(
+    response = responses_client.responses.create(
         model=text_model_id,
         input="What are the updates from the US region?",
         tools=tools,
@@ -168,7 +160,7 @@ def test_response_file_search_filter_by_region(compat_client, text_model_id, vec
         assert "asia" not in result.text.lower()
 
 
-def test_response_file_search_filter_by_category(compat_client, text_model_id, vector_store_with_filtered_files):
+def test_response_file_search_filter_by_category(responses_client, text_model_id, vector_store_with_filtered_files):
     """Test file search with category equality filter."""
     tools = [
         {
@@ -178,7 +170,7 @@ def test_response_file_search_filter_by_category(compat_client, text_model_id, v
         }
     ]
 
-    response = compat_client.responses.create(
+    response = responses_client.responses.create(
         model=text_model_id,
         input="Show me all marketing reports",
         tools=tools,
@@ -198,7 +190,7 @@ def test_response_file_search_filter_by_category(compat_client, text_model_id, v
         assert "revenue figures" not in result.text.lower()
 
 
-def test_response_file_search_filter_by_date_range(compat_client, text_model_id, vector_store_with_filtered_files):
+def test_response_file_search_filter_by_date_range(responses_client, text_model_id, vector_store_with_filtered_files):
     """Test file search with date range filter using compound AND."""
     tools = [
         {
@@ -222,7 +214,7 @@ def test_response_file_search_filter_by_date_range(compat_client, text_model_id,
         }
     ]
 
-    response = compat_client.responses.create(
+    response = responses_client.responses.create(
         model=text_model_id,
         input="What happened in Q1 2023?",
         tools=tools,
@@ -241,7 +233,7 @@ def test_response_file_search_filter_by_date_range(compat_client, text_model_id,
         assert "q3" not in result.text.lower()
 
 
-def test_response_file_search_filter_compound_and(compat_client, text_model_id, vector_store_with_filtered_files):
+def test_response_file_search_filter_compound_and(responses_client, text_model_id, vector_store_with_filtered_files):
     """Test file search with compound AND filter (region AND category)."""
     tools = [
         {
@@ -257,7 +249,7 @@ def test_response_file_search_filter_compound_and(compat_client, text_model_id,
         }
     ]
 
-    response = compat_client.responses.create(
+    response = responses_client.responses.create(
         model=text_model_id,
         input="What are the engineering updates from the US?",
         tools=tools,
@@ -277,7 +269,7 @@ def test_response_file_search_filter_compound_and(compat_client, text_model_id,
         assert "promotional" not in result.text.lower() and "revenue" not in result.text.lower()
 
 
-def test_response_file_search_filter_compound_or(compat_client, text_model_id, vector_store_with_filtered_files):
+def test_response_file_search_filter_compound_or(responses_client, text_model_id, vector_store_with_filtered_files):
     """Test file search with compound OR filter (marketing OR sales)."""
     tools = [
         {
@@ -293,7 +285,7 @@ def test_response_file_search_filter_compound_or(compat_client, text_model_id, v
         }
     ]
 
-    response = compat_client.responses.create(
+    response = responses_client.responses.create(
         model=text_model_id,
         input="Show me marketing and sales documents",
         tools=tools,
@@ -320,7 +312,7 @@ def test_response_file_search_filter_compound_or(compat_client, text_model_id, v
     assert categories_found.issubset({"marketing", "sales"}), f"Found unexpected categories: {categories_found}"
 
 
-def test_response_file_search_streaming_events(compat_client, text_model_id, vector_store_with_filtered_files):
+def test_response_file_search_streaming_events(responses_client, text_model_id, vector_store_with_filtered_files):
     """Test that file search emits proper streaming events (in_progress, searching, completed)."""
     tools = [
         {
@@ -329,7 +321,7 @@ def test_response_file_search_streaming_events(compat_client, text_model_id, vec
         }
     ]
 
-    stream = compat_client.responses.create(
+    stream = responses_client.responses.create(
         model=text_model_id,
         input="What are the marketing updates?",
         tools=tools,
diff --git a/tests/integration/responses/test_tool_responses.py b/tests/integration/responses/test_tool_responses.py
index 9bf58c6ff..2c7c7ef34 100644
--- a/tests/integration/responses/test_tool_responses.py
+++ b/tests/integration/responses/test_tool_responses.py
@@ -9,6 +9,7 @@ import logging  # allow-direct-logging
 import os
 
 import httpx
+import llama_stack_client
 import openai
 import pytest
 
@@ -29,8 +30,8 @@ from .streaming_assertions import StreamingValidator
 
 
 @pytest.mark.parametrize("case", web_search_test_cases)
-def test_response_non_streaming_web_search(compat_client, text_model_id, case):
-    response = compat_client.responses.create(
+def test_response_non_streaming_web_search(responses_client, text_model_id, case):
+    response = responses_client.responses.create(
         model=text_model_id,
         input=case.input,
         tools=case.tools,
@@ -48,12 +49,9 @@ def test_response_non_streaming_web_search(compat_client, text_model_id, case):
 
 @pytest.mark.parametrize("case", file_search_test_cases)
 def test_response_non_streaming_file_search(
-    compat_client, text_model_id, embedding_model_id, embedding_dimension, tmp_path, case
+    responses_client, text_model_id, embedding_model_id, embedding_dimension, tmp_path, case
 ):
-    if isinstance(compat_client, LlamaStackAsLibraryClient):
-        pytest.skip("Responses API file search is not yet supported in library client.")
-
-    vector_store = new_vector_store(compat_client, "test_vector_store", embedding_model_id, embedding_dimension)
+    vector_store = new_vector_store(responses_client, "test_vector_store", embedding_model_id, embedding_dimension)
 
     if case.file_content:
         file_name = "test_response_non_streaming_file_search.txt"
@@ -65,16 +63,16 @@ def test_response_non_streaming_file_search(
     else:
         raise ValueError("No file content or path provided for case")
 
-    file_response = upload_file(compat_client, file_name, file_path)
+    file_response = upload_file(responses_client, file_name, file_path)
 
     # Attach our file to the vector store
-    compat_client.vector_stores.files.create(
+    responses_client.vector_stores.files.create(
         vector_store_id=vector_store.id,
         file_id=file_response.id,
     )
 
     # Wait for the file to be attached
-    wait_for_file_attachment(compat_client, vector_store.id, file_response.id)
+    wait_for_file_attachment(responses_client, vector_store.id, file_response.id)
 
     # Update our tools with the right vector store id
     tools = case.tools
@@ -83,7 +81,7 @@ def test_response_non_streaming_file_search(
             tool["vector_store_ids"] = [vector_store.id]
 
     # Create the response request, which should query our vector store
-    response = compat_client.responses.create(
+    response = responses_client.responses.create(
         model=text_model_id,
         input=case.input,
         tools=tools,
@@ -105,15 +103,12 @@ def test_response_non_streaming_file_search(
 
 
 def test_response_non_streaming_file_search_empty_vector_store(
-    compat_client, text_model_id, embedding_model_id, embedding_dimension
+    responses_client, text_model_id, embedding_model_id, embedding_dimension
 ):
-    if isinstance(compat_client, LlamaStackAsLibraryClient):
-        pytest.skip("Responses API file search is not yet supported in library client.")
-
-    vector_store = new_vector_store(compat_client, "test_vector_store", embedding_model_id, embedding_dimension)
+    vector_store = new_vector_store(responses_client, "test_vector_store", embedding_model_id, embedding_dimension)
 
     # Create the response request, which should query our vector store
-    response = compat_client.responses.create(
+    response = responses_client.responses.create(
         model=text_model_id,
         input="How many experts does the Llama 4 Maverick model have?",
         tools=[{"type": "file_search", "vector_store_ids": [vector_store.id]}],
@@ -133,13 +128,10 @@ def test_response_non_streaming_file_search_empty_vector_store(
 
 
 def test_response_sequential_file_search(
-    compat_client, text_model_id, embedding_model_id, embedding_dimension, tmp_path
+    responses_client, text_model_id, embedding_model_id, embedding_dimension, tmp_path
 ):
     """Test file search with sequential responses using previous_response_id."""
-    if isinstance(compat_client, LlamaStackAsLibraryClient):
-        pytest.skip("Responses API file search is not yet supported in library client.")
-
-    vector_store = new_vector_store(compat_client, "test_vector_store", embedding_model_id, embedding_dimension)
+    vector_store = new_vector_store(responses_client, "test_vector_store", embedding_model_id, embedding_dimension)
 
     # Create a test file with content
     file_content = "The Llama 4 Maverick model has 128 experts in its mixture of experts architecture."
@@ -147,21 +139,21 @@ def test_response_sequential_file_search(
     file_path = tmp_path / file_name
     file_path.write_text(file_content)
 
-    file_response = upload_file(compat_client, file_name, file_path)
+    file_response = upload_file(responses_client, file_name, file_path)
 
     # Attach the file to the vector store
-    compat_client.vector_stores.files.create(
+    responses_client.vector_stores.files.create(
         vector_store_id=vector_store.id,
         file_id=file_response.id,
     )
 
     # Wait for the file to be attached
-    wait_for_file_attachment(compat_client, vector_store.id, file_response.id)
+    wait_for_file_attachment(responses_client, vector_store.id, file_response.id)
 
     tools = [{"type": "file_search", "vector_store_ids": [vector_store.id]}]
 
     # First response request with file search
-    response = compat_client.responses.create(
+    response = responses_client.responses.create(
         model=text_model_id,
         input="How many experts does the Llama 4 Maverick model have?",
         tools=tools,
@@ -178,7 +170,7 @@ def test_response_sequential_file_search(
     assert "128" in response.output_text or "experts" in response.output_text.lower()
 
     # Second response request using previous_response_id
-    response2 = compat_client.responses.create(
+    response2 = responses_client.responses.create(
         model=text_model_id,
         input="Can you tell me more about the architecture?",
         tools=tools,
@@ -199,14 +191,11 @@ def test_response_sequential_file_search(
 
 
 @pytest.mark.parametrize("case", mcp_tool_test_cases)
-def test_response_non_streaming_mcp_tool(compat_client, text_model_id, case, caplog):
-    if not isinstance(compat_client, LlamaStackAsLibraryClient):
-        pytest.skip("in-process MCP server is only supported in library client")
-
+def test_response_non_streaming_mcp_tool(responses_client, text_model_id, case, caplog):
     with make_mcp_server() as mcp_server_info:
         tools = setup_mcp_tools(case.tools, mcp_server_info)
 
-        response = compat_client.responses.create(
+        response = responses_client.responses.create(
             model=text_model_id,
             input=case.input,
             tools=tools,
@@ -243,15 +232,15 @@ def test_response_non_streaming_mcp_tool(compat_client, text_model_id, case, cap
 
         exc_type = (
             AuthenticationRequiredError
-            if isinstance(compat_client, LlamaStackAsLibraryClient)
-            else (httpx.HTTPStatusError, openai.AuthenticationError)
+            if isinstance(responses_client, LlamaStackAsLibraryClient)
+            else (httpx.HTTPStatusError, openai.AuthenticationError, llama_stack_client.AuthenticationError)
         )
         # Suppress expected auth error logs only for the failing auth attempt
         with caplog.at_level(
             logging.CRITICAL, logger="llama_stack.providers.inline.agents.meta_reference.responses.streaming"
         ):
             with pytest.raises(exc_type):
-                compat_client.responses.create(
+                responses_client.responses.create(
                     model=text_model_id,
                     input=case.input,
                     tools=tools,
@@ -262,7 +251,7 @@ def test_response_non_streaming_mcp_tool(compat_client, text_model_id, case, cap
             if tool["type"] == "mcp":
                 tool["headers"] = {"Authorization": "Bearer test-token"}
 
-        response = compat_client.responses.create(
+        response = responses_client.responses.create(
             model=text_model_id,
             input=case.input,
             tools=tools,
@@ -272,14 +261,11 @@ def test_response_non_streaming_mcp_tool(compat_client, text_model_id, case, cap
 
 
 @pytest.mark.parametrize("case", mcp_tool_test_cases)
-def test_response_sequential_mcp_tool(compat_client, text_model_id, case):
-    if not isinstance(compat_client, LlamaStackAsLibraryClient):
-        pytest.skip("in-process MCP server is only supported in library client")
-
+def test_response_sequential_mcp_tool(responses_client, text_model_id, case):
     with make_mcp_server() as mcp_server_info:
         tools = setup_mcp_tools(case.tools, mcp_server_info)
 
-        response = compat_client.responses.create(
+        response = responses_client.responses.create(
             model=text_model_id,
             input=case.input,
             tools=tools,
@@ -311,7 +297,7 @@ def test_response_sequential_mcp_tool(compat_client, text_model_id, case):
         text_content = message.content[0].text
         assert "boiling point" in text_content.lower()
 
-        response2 = compat_client.responses.create(
+        response2 = responses_client.responses.create(
             model=text_model_id, input=case.input, tools=tools, stream=False, previous_response_id=response.id
         )
 
@@ -323,16 +309,13 @@ def test_response_sequential_mcp_tool(compat_client, text_model_id, case):
 
 @pytest.mark.parametrize("case", mcp_tool_test_cases)
 @pytest.mark.parametrize("approve", [True, False])
-def test_response_mcp_tool_approval(compat_client, text_model_id, case, approve):
-    if not isinstance(compat_client, LlamaStackAsLibraryClient):
-        pytest.skip("in-process MCP server is only supported in library client")
-
+def test_response_mcp_tool_approval(responses_client, text_model_id, case, approve):
     with make_mcp_server() as mcp_server_info:
         tools = setup_mcp_tools(case.tools, mcp_server_info)
         for tool in tools:
             tool["require_approval"] = "always"
 
-        response = compat_client.responses.create(
+        response = responses_client.responses.create(
             model=text_model_id,
             input=case.input,
             tools=tools,
@@ -352,13 +335,13 @@ def test_response_mcp_tool_approval(compat_client, text_model_id, case, approve)
         approval_request = response.output[1]
         assert approval_request.type == "mcp_approval_request"
         assert approval_request.name == "get_boiling_point"
-        assert json.loads(approval_request.arguments) == {
-            "liquid_name": "myawesomeliquid",
-            "celsius": True,
-        }
+        args = json.loads(approval_request.arguments)
+        assert args["liquid_name"] == "myawesomeliquid"
+        # celsius has a default value of True, so it may be omitted or explicitly set
+        assert args.get("celsius", True) is True
 
         # send approval response
-        response = compat_client.responses.create(
+        response = responses_client.responses.create(
             previous_response_id=response.id,
             model=text_model_id,
             input=[{"type": "mcp_approval_response", "approval_request_id": approval_request.id, "approve": approve}],
@@ -398,8 +381,8 @@ def test_response_mcp_tool_approval(compat_client, text_model_id, case, approve)
 
 
 @pytest.mark.parametrize("case", custom_tool_test_cases)
-def test_response_non_streaming_custom_tool(compat_client, text_model_id, case):
-    response = compat_client.responses.create(
+def test_response_non_streaming_custom_tool(responses_client, text_model_id, case):
+    response = responses_client.responses.create(
         model=text_model_id,
         input=case.input,
         tools=case.tools,
@@ -412,8 +395,8 @@ def test_response_non_streaming_custom_tool(compat_client, text_model_id, case):
 
 
 @pytest.mark.parametrize("case", custom_tool_test_cases)
-def test_response_function_call_ordering_1(compat_client, text_model_id, case):
-    response = compat_client.responses.create(
+def test_response_function_call_ordering_1(responses_client, text_model_id, case):
+    response = responses_client.responses.create(
         model=text_model_id,
         input=case.input,
         tools=case.tools,
@@ -437,13 +420,13 @@ def test_response_function_call_ordering_1(compat_client, text_model_id, case):
             "call_id": response.output[0].call_id,
         }
     )
-    response = compat_client.responses.create(
+    response = responses_client.responses.create(
         model=text_model_id, input=inputs, tools=case.tools, stream=False, previous_response_id=response.id
     )
     assert len(response.output) == 1
 
 
-def test_response_function_call_ordering_2(compat_client, text_model_id):
+def test_response_function_call_ordering_2(responses_client, text_model_id):
     tools = [
         {
             "type": "function",
@@ -468,7 +451,7 @@ def test_response_function_call_ordering_2(compat_client, text_model_id):
             "content": "Is the weather better in San Francisco or Los Angeles?",
         }
     ]
-    response = compat_client.responses.create(
+    response = responses_client.responses.create(
         model=text_model_id,
         input=inputs,
         tools=tools,
@@ -489,7 +472,7 @@ def test_response_function_call_ordering_2(compat_client, text_model_id):
                     "call_id": output.call_id,
                 }
             )
-    response = compat_client.responses.create(
+    response = responses_client.responses.create(
         model=text_model_id,
         input=inputs,
         tools=tools,
@@ -500,15 +483,12 @@ def test_response_function_call_ordering_2(compat_client, text_model_id):
 
 
 @pytest.mark.parametrize("case", multi_turn_tool_execution_test_cases)
-def test_response_non_streaming_multi_turn_tool_execution(compat_client, text_model_id, case):
+def test_response_non_streaming_multi_turn_tool_execution(responses_client, text_model_id, case):
     """Test multi-turn tool execution where multiple MCP tool calls are performed in sequence."""
-    if not isinstance(compat_client, LlamaStackAsLibraryClient):
-        pytest.skip("in-process MCP server is only supported in library client")
-
     with make_mcp_server(tools=dependency_tools()) as mcp_server_info:
         tools = setup_mcp_tools(case.tools, mcp_server_info)
 
-        response = compat_client.responses.create(
+        response = responses_client.responses.create(
             input=case.input,
             model=text_model_id,
             tools=tools,
@@ -550,15 +530,12 @@ def test_response_non_streaming_multi_turn_tool_execution(compat_client, text_mo
 
 
 @pytest.mark.parametrize("case", multi_turn_tool_execution_streaming_test_cases)
-def test_response_streaming_multi_turn_tool_execution(compat_client, text_model_id, case):
+def test_response_streaming_multi_turn_tool_execution(responses_client, text_model_id, case):
     """Test streaming multi-turn tool execution where multiple MCP tool calls are performed in sequence."""
-    if not isinstance(compat_client, LlamaStackAsLibraryClient):
-        pytest.skip("in-process MCP server is only supported in library client")
-
     with make_mcp_server(tools=dependency_tools()) as mcp_server_info:
         tools = setup_mcp_tools(case.tools, mcp_server_info)
 
-        stream = compat_client.responses.create(
+        stream = responses_client.responses.create(
             input=case.input,
             model=text_model_id,
             tools=tools,
diff --git a/tests/integration/tool_runtime/test_mcp.py b/tests/integration/tool_runtime/test_mcp.py
index 3a8fde37f..9ce0d1c98 100644
--- a/tests/integration/tool_runtime/test_mcp.py
+++ b/tests/integration/tool_runtime/test_mcp.py
@@ -10,8 +10,6 @@ import pytest
 from llama_stack_client.lib.agents.agent import Agent
 from llama_stack_client.lib.agents.turn_events import StepCompleted, StepProgress, ToolCallIssuedDelta
 
-from llama_stack.core.library_client import LlamaStackAsLibraryClient
-
 AUTH_TOKEN = "test-token"
 
 from tests.common.mcp import MCP_TOOLGROUP_ID, make_mcp_server
@@ -24,9 +22,6 @@ def mcp_server():
 
 
 def test_mcp_invocation(llama_stack_client, text_model_id, mcp_server):
-    if not isinstance(llama_stack_client, LlamaStackAsLibraryClient):
-        pytest.skip("The local MCP server only reliably reachable from library client.")
-
     test_toolgroup_id = MCP_TOOLGROUP_ID
     uri = mcp_server["server_url"]
 

From aeaf4eb3dd1f465f3a17238ebfb47b76de9de4cd Mon Sep 17 00:00:00 2001
From: Derek Higgins <derekh@redhat.com>
Date: Thu, 13 Nov 2025 15:24:05 +0000
Subject: [PATCH 21/62] fix: remove_disabled_providers filtering models with
 None fields (#4132)

Fixed bug where models with No provider_model_id were incorrectly
filtered from the startup config display. The function was checking
multiple fields when it should only filter items with explicitly
disabled provider_id.

Changes:
o Modified remove_disabled_providers to only check provider_id field o
Changed condition from checking multiple fields with None to only
  checking provider_id for "__disabled__", None or empty string
o Added comprehensive unit tests

Closes: #4131

Signed-off-by: Derek Higgins <derekh@redhat.com>
---
 src/llama_stack/core/server/server.py |  4 +-
 tests/unit/server/test_server.py      | 69 ++++++++++++++++++++++++++-
 2 files changed, 70 insertions(+), 3 deletions(-)

diff --git a/src/llama_stack/core/server/server.py b/src/llama_stack/core/server/server.py
index 80505c3f9..5bf876c02 100644
--- a/src/llama_stack/core/server/server.py
+++ b/src/llama_stack/core/server/server.py
@@ -526,8 +526,8 @@ def extract_path_params(route: str) -> list[str]:
 
 def remove_disabled_providers(obj):
     if isinstance(obj, dict):
-        keys = ["provider_id", "shield_id", "provider_model_id", "model_id"]
-        if any(k in obj and obj[k] in ("__disabled__", "", None) for k in keys):
+        # Filter out items where provider_id is explicitly disabled or empty
+        if "provider_id" in obj and obj["provider_id"] in ("__disabled__", "", None):
             return None
         return {k: v for k, v in ((k, remove_disabled_providers(v)) for k, v in obj.items()) if v is not None}
     elif isinstance(obj, list):
diff --git a/tests/unit/server/test_server.py b/tests/unit/server/test_server.py
index d6d4f4f23..53f193672 100644
--- a/tests/unit/server/test_server.py
+++ b/tests/unit/server/test_server.py
@@ -12,7 +12,7 @@ from pydantic import ValidationError
 
 from llama_stack.core.access_control.access_control import AccessDeniedError
 from llama_stack.core.datatypes import AuthenticationRequiredError
-from llama_stack.core.server.server import translate_exception
+from llama_stack.core.server.server import remove_disabled_providers, translate_exception
 
 
 class TestTranslateException:
@@ -194,3 +194,70 @@ class TestTranslateException:
         assert isinstance(result3, HTTPException)
         assert result3.status_code == 403
         assert result3.detail == "Permission denied: Access denied"
+
+
+class TestRemoveDisabledProviders:
+    """Test cases for the remove_disabled_providers function."""
+
+    def test_remove_explicitly_disabled_provider(self):
+        """Test that providers with provider_id='__disabled__' are removed."""
+        config = {
+            "providers": {
+                "inference": [
+                    {"provider_id": "openai", "provider_type": "remote::openai", "config": {}},
+                    {"provider_id": "__disabled__", "provider_type": "remote::vllm", "config": {}},
+                ]
+            }
+        }
+        result = remove_disabled_providers(config)
+        assert len(result["providers"]["inference"]) == 1
+        assert result["providers"]["inference"][0]["provider_id"] == "openai"
+
+    def test_remove_empty_provider_id(self):
+        """Test that providers with empty provider_id are removed."""
+        config = {
+            "providers": {
+                "inference": [
+                    {"provider_id": "openai", "provider_type": "remote::openai", "config": {}},
+                    {"provider_id": "", "provider_type": "remote::vllm", "config": {}},
+                ]
+            }
+        }
+        result = remove_disabled_providers(config)
+        assert len(result["providers"]["inference"]) == 1
+        assert result["providers"]["inference"][0]["provider_id"] == "openai"
+
+    def test_keep_models_with_none_provider_model_id(self):
+        """Test that models with None provider_model_id are NOT removed."""
+        config = {
+            "registered_resources": {
+                "models": [
+                    {
+                        "model_id": "llama-3-2-3b",
+                        "provider_id": "vllm-inference",
+                        "model_type": "llm",
+                        "provider_model_id": None,
+                        "metadata": {},
+                    },
+                    {
+                        "model_id": "gpt-4o-mini",
+                        "provider_id": "openai",
+                        "model_type": "llm",
+                        "provider_model_id": None,
+                        "metadata": {},
+                    },
+                    {
+                        "model_id": "granite-embedding-125m",
+                        "provider_id": "sentence-transformers",
+                        "model_type": "embedding",
+                        "provider_model_id": "ibm-granite/granite-embedding-125m-english",
+                        "metadata": {"embedding_dimension": 768},
+                    },
+                ]
+            }
+        }
+        result = remove_disabled_providers(config)
+        assert len(result["registered_resources"]["models"]) == 3
+        assert result["registered_resources"]["models"][0]["model_id"] == "llama-3-2-3b"
+        assert result["registered_resources"]["models"][1]["model_id"] == "gpt-4o-mini"
+        assert result["registered_resources"]["models"][2]["model_id"] == "granite-embedding-125m"

From 4442b24de7238364aa2201e6b36e8b0bd9f415cf Mon Sep 17 00:00:00 2001
From: Francisco Arceo <arceofrancisco@gmail.com>
Date: Thu, 13 Nov 2025 12:15:32 -0500
Subject: [PATCH 22/62] chore: Fix docs so can be deployed (#4149)

# What does this PR do?
Building/Deploying docs is failing here:
https://github.com/llamastack/llamastack.github.io/actions/runs/19333785864/job/55303209627#step:8:49

Needs the playground file. Updated it to reflect current admin status.

<!-- If resolving an issue, uncomment and update the line below -->
<!-- Closes #[issue-number] -->

## Test Plan
<!-- Describe the tests you ran to verify your changes with result
summaries. *Provide clear instructions so the plan can be easily
re-executed.* -->

Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
---
 .../docs/building_applications/playground.mdx | 87 +++++++++++++++++++
 1 file changed, 87 insertions(+)
 create mode 100644 docs/docs/building_applications/playground.mdx

diff --git a/docs/docs/building_applications/playground.mdx b/docs/docs/building_applications/playground.mdx
new file mode 100644
index 000000000..1afb250c4
--- /dev/null
+++ b/docs/docs/building_applications/playground.mdx
@@ -0,0 +1,87 @@
+---
+title: Admin UI & Chat Playground
+description: Web-based admin interface and chat playground for Llama Stack
+sidebar_label: Playground
+sidebar_position: 10
+---
+
+# Admin UI & Chat Playground
+
+The Llama Stack UI provides a comprehensive web-based admin interface for managing your Llama Stack server, with an integrated chat playground for interactive testing. This admin interface is the primary way to monitor, manage, and debug your Llama Stack applications.
+
+## Quick Start
+
+Launch the admin UI with:
+
+```bash
+npx llama-stack-ui
+```
+
+Then visit `http://localhost:8322` to access the interface.
+
+## Admin Interface Features
+
+The Llama Stack UI is organized into three main sections:
+
+### 🎯 Create
+**Chat Playground** - Interactive testing environment
+- Real-time chat interface for testing agents and models
+- Multi-turn conversations with tool calling support
+- Agent SDK integration (will be migrated to Responses API)
+- Custom system prompts and model parameter adjustment
+
+### 📊 Manage
+**Logs & Resource Management** - Monitor and manage your stack
+- **Responses Logs**: View and analyze agent responses and interactions
+- **Chat Completions Logs**: Monitor chat completion requests and responses
+- **Vector Stores**: Create, manage, and monitor vector databases for RAG workflows
+- **Prompts**: Full CRUD operations for prompt templates and management
+- **Files**: Forthcoming file management capabilities
+
+## Key Capabilities for Application Development
+
+### Real-time Monitoring
+- **Response Tracking**: Monitor all agent responses and tool calls
+- **Completion Analysis**: View chat completion performance and patterns
+- **Vector Store Activity**: Track RAG operations and document processing
+- **Prompt Usage**: Analyze prompt template performance
+
+### Resource Management
+- **Vector Store CRUD**: Create, update, and delete vector databases
+- **Prompt Library**: Organize and version control your prompts
+- **File Operations**: Manage documents and assets (forthcoming)
+
+### Interactive Testing
+- **Chat Playground**: Test conversational flows before production deployment
+- **Agent Prototyping**: Validate agent behaviors and tool integrations
+
+## Development Workflow Integration
+
+The admin UI supports your development lifecycle:
+
+1. **Development**: Use chat playground to prototype and test features
+2. **Monitoring**: Track system performance through logs and metrics
+3. **Management**: Organize prompts, vector stores, and other resources
+4. **Debugging**: Analyze logs to identify and resolve issues
+
+## Architecture Notes
+
+- **Current**: Chat playground uses Agents SDK
+- **Future**: Migration to Responses API for improved performance and consistency
+- **Admin Focus**: Primary emphasis on monitoring, logging, and resource management
+
+## Getting Started
+
+1. **Launch the UI**: Run `npx llama-stack-ui`
+2. **Explore Logs**: Start with Responses and Chat Completions logs to understand your system activity
+3. **Test in Playground**: Use the chat interface to validate your agent configurations
+4. **Manage Resources**: Create vector stores and organize prompts through the UI
+
+For detailed setup and configuration, see the [Llama Stack UI documentation](/docs/distributions/llama_stack_ui).
+
+## Next Steps
+
+- Set up your [first agent](/docs/building_applications/agent)
+- Implement [RAG functionality](/docs/building_applications/rag)
+- Add [evaluation metrics](/docs/building_applications/evals)
+- Configure [safety measures](/docs/building_applications/safety)

From ceb716b9a0ed0904e53fd362ce7dce932c15e35c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Han?= <seb@redhat.com>
Date: Thu, 13 Nov 2025 19:52:38 +0100
Subject: [PATCH 23/62] chore: set minimum pre-commit version (#4148)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

# What does this PR do?

- force a min precommit version
- pin to >= 4.3.0 when installing

---------

Signed-off-by: Sébastien Han <seb@redhat.com>
Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com>
---
 .github/workflows/pre-commit.yml | 2 +-
 .pre-commit-config.yaml          | 2 +-
 pyproject.toml                   | 2 +-
 uv.lock                          | 8 ++++----
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
index 74f7da19a..ac125bba5 100644
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows/pre-commit.yml
@@ -53,7 +53,7 @@ jobs:
         working-directory: src/llama_stack_ui
 
       - name: Install pre-commit
-        run: python -m pip install pre-commit
+        run: python -m pip install 'pre-commit>=4.4.0'
 
       - name: Cache pre-commit
         uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 42cd2f5ce..19b83563c 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,5 +1,5 @@
 exclude: 'build/'
-
+minimum_pre_commit_version: 4.4.0
 default_language_version:
     python: python3.12
     node: "22"
diff --git a/pyproject.toml b/pyproject.toml
index e6808af8a..d12d28e8c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -69,7 +69,7 @@ dev = [
     "black",
     "ruff",
     "mypy",
-    "pre-commit",
+    "pre-commit>=4.4.0",
     "ruamel.yaml", # needed for openapi generator
 ]
 # Type checking dependencies - includes type stubs and optional runtime dependencies
diff --git a/uv.lock b/uv.lock
index f1808f005..884d41b79 100644
--- a/uv.lock
+++ b/uv.lock
@@ -2125,7 +2125,7 @@ dev = [
     { name = "black" },
     { name = "mypy" },
     { name = "nbval" },
-    { name = "pre-commit" },
+    { name = "pre-commit", specifier = ">=4.4.0" },
     { name = "pytest", specifier = ">=8.4" },
     { name = "pytest-asyncio", specifier = ">=1.0" },
     { name = "pytest-cov" },
@@ -3403,7 +3403,7 @@ wheels = [
 
 [[package]]
 name = "pre-commit"
-version = "4.2.0"
+version = "4.4.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "cfgv" },
@@ -3412,9 +3412,9 @@ dependencies = [
     { name = "pyyaml" },
     { name = "virtualenv" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/08/39/679ca9b26c7bb2999ff122d50faa301e49af82ca9c066ec061cfbc0c6784/pre_commit-4.2.0.tar.gz", hash = "sha256:601283b9757afd87d40c4c4a9b2b5de9637a8ea02eaff7adc2d0fb4e04841146", size = 193424, upload-time = "2025-03-18T21:35:20.987Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/a6/49/7845c2d7bf6474efd8e27905b51b11e6ce411708c91e829b93f324de9929/pre_commit-4.4.0.tar.gz", hash = "sha256:f0233ebab440e9f17cabbb558706eb173d19ace965c68cdce2c081042b4fab15", size = 197501, upload-time = "2025-11-08T21:12:11.607Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/88/74/a88bf1b1efeae488a0c0b7bdf71429c313722d1fc0f377537fbe554e6180/pre_commit-4.2.0-py2.py3-none-any.whl", hash = "sha256:a009ca7205f1eb497d10b845e52c838a98b6cdd2102a6c8e4540e94ee75c58bd", size = 220707, upload-time = "2025-03-18T21:35:19.343Z" },
+    { url = "https://files.pythonhosted.org/packages/27/11/574fe7d13acf30bfd0a8dd7fa1647040f2b8064f13f43e8c963b1e65093b/pre_commit-4.4.0-py2.py3-none-any.whl", hash = "sha256:b35ea52957cbf83dcc5d8ee636cbead8624e3a15fbfa61a370e42158ac8a5813", size = 226049, upload-time = "2025-11-08T21:12:10.228Z" },
 ]
 
 [[package]]

From 840ad75fe9bf62ab1cedaf5fbcd2690920ecfdaf Mon Sep 17 00:00:00 2001
From: Charlie Doern <cdoern@redhat.com>
Date: Thu, 13 Nov 2025 14:51:17 -0500
Subject: [PATCH 24/62] feat: split API and provider specs into separate
 llama-stack-api pkg (#3895)

# What does this PR do?

Extract API definitions and provider specifications into a standalone
llama-stack-api package that can be published to PyPI independently of
the main llama-stack server.


see: https://github.com/llamastack/llama-stack/pull/2978 and
https://github.com/llamastack/llama-stack/pull/2978#issuecomment-3145115942

Motivation

External providers currently import from llama-stack, which overrides
the installed version and causes dependency conflicts. This separation
allows external providers to:

- Install only the type definitions they need without server
dependencies
- Avoid version conflicts with the installed llama-stack package
- Be versioned and released independently

This enables us to re-enable external provider module tests that were
previously blocked by these import conflicts.

Changes

- Created llama-stack-api package with minimal dependencies (pydantic,
jsonschema)
- Moved APIs, providers datatypes, strong_typing, and schema_utils
- Updated all imports from llama_stack.* to llama_stack_api.*
- Configured local editable install for development workflow
- Updated linting and type-checking configuration for both packages

Next Steps

- Publish llama-stack-api to PyPI
- Update external provider dependencies
- Re-enable external provider module tests


Pre-cursor PRs to this one:

- #4093
- #3954
- #4064

These PRs moved key pieces _out_ of the Api pkg, limiting the scope of
change here.


relates to #3237

## Test Plan

Package builds successfully and can be imported independently. All
pre-commit hooks pass with expected exclusions maintained.

---------

Signed-off-by: Charlie Doern <cdoern@redhat.com>
---
 .github/workflows/python-build-test.yml       |  13 +-
 .pre-commit-config.yaml                       |   2 +-
 docs/docs/concepts/apis/external.mdx          |   9 +-
 docs/docs/distributions/building_distro.mdx   |   2 +-
 .../external/external-providers-guide.mdx     |   2 +-
 .../providers/vector_io/inline_sqlite-vec.mdx |   4 +-
 docs/openapi_generator/generate.py            |   2 +-
 docs/openapi_generator/pyopenapi/generator.py |  26 +-
 .../openapi_generator/pyopenapi/operations.py |  12 +-
 .../pyopenapi/specification.py                |   2 +-
 docs/openapi_generator/pyopenapi/utility.py   |   7 +-
 pyproject.toml                                |  21 +-
 scripts/generate_prompt_format.py             |   2 +-
 scripts/provider_codegen.py                   |   5 +-
 src/llama-stack-api/README.md                 | 103 +++
 .../llama_stack_api/__init__.py               | 871 ++++++++++++++++++
 .../llama_stack_api}/agents.py                |   6 +-
 .../llama_stack_api}/batches.py               |   4 +-
 .../llama_stack_api}/benchmarks.py            |   6 +-
 .../llama_stack_api/common}/__init__.py       |   0
 .../llama_stack_api}/common/content_types.py  |  44 +-
 .../llama_stack_api}/common/errors.py         |   0
 .../llama_stack_api}/common/job_types.py      |   2 +-
 .../llama_stack_api}/common/responses.py      |   2 +-
 .../llama_stack_api}/common/tracing.py        |   0
 .../llama_stack_api}/common/training_types.py |   2 +-
 .../llama_stack_api}/common/type_system.py    |   2 +-
 .../llama_stack_api}/conversations.py         |   8 +-
 .../llama_stack_api}/datasetio.py             |   8 +-
 .../llama_stack_api}/datasets.py              |   6 +-
 .../llama_stack_api}/datatypes.py             | 171 +++-
 .../llama_stack_api}/eval.py                  |  12 +-
 .../llama_stack_api}/files.py                 |   8 +-
 .../llama_stack_api}/inference.py             |  12 +-
 .../llama_stack_api}/inspect.py               |   6 +-
 .../llama_stack_api}/models.py                |   8 +-
 .../llama_stack_api}/openai_responses.py      |   4 +-
 .../llama_stack_api}/post_training.py         |  10 +-
 .../llama_stack_api}/prompts.py               |   6 +-
 .../llama_stack_api}/providers.py             |   6 +-
 .../llama_stack_api}/py.typed                 |   0
 .../llama_stack_api}/rag_tool.py              |   2 +-
 .../llama_stack_api}/resource.py              |   0
 .../llama_stack_api}/safety.py                |  10 +-
 .../llama_stack_api}/schema_utils.py          |   0
 .../llama_stack_api}/scoring.py               |   6 +-
 .../llama_stack_api}/scoring_functions.py     |   8 +-
 .../llama_stack_api}/shields.py               |   8 +-
 .../strong_typing/__init__.py                 |   0
 .../strong_typing/auxiliary.py                |   0
 .../strong_typing/classdef.py                 |   0
 .../llama_stack_api}/strong_typing/core.py    |   0
 .../strong_typing/deserializer.py             |   0
 .../strong_typing/docstring.py                |   0
 .../strong_typing/exception.py                |   0
 .../strong_typing/inspection.py               |   0
 .../llama_stack_api}/strong_typing/mapping.py |   0
 .../llama_stack_api}/strong_typing/name.py    |   0
 .../llama_stack_api/strong_typing/py.typed    |   0
 .../llama_stack_api}/strong_typing/schema.py  |   0
 .../strong_typing/serialization.py            |   0
 .../strong_typing/serializer.py               |   0
 .../llama_stack_api}/strong_typing/slots.py   |   0
 .../strong_typing/topological.py              |   0
 .../llama_stack_api}/tools.py                 |  10 +-
 .../llama_stack_api}/vector_io.py             |  12 +-
 .../llama_stack_api}/vector_stores.py         |   2 +-
 .../llama_stack_api}/version.py               |   0
 src/llama-stack-api/pyproject.toml            |  82 ++
 src/llama_stack/apis/agents/__init__.py       |   7 -
 src/llama_stack/apis/batches/__init__.py      |   9 -
 src/llama_stack/apis/benchmarks/__init__.py   |   7 -
 src/llama_stack/apis/common/__init__.py       |   5 -
 .../apis/conversations/__init__.py            |  27 -
 src/llama_stack/apis/datasetio/__init__.py    |   7 -
 src/llama_stack/apis/datasets/__init__.py     |   7 -
 src/llama_stack/apis/datatypes.py             | 158 ----
 src/llama_stack/apis/eval/__init__.py         |   7 -
 src/llama_stack/apis/files/__init__.py        |   7 -
 src/llama_stack/apis/inference/__init__.py    |   7 -
 src/llama_stack/apis/inspect/__init__.py      |   7 -
 src/llama_stack/apis/models/__init__.py       |   7 -
 .../apis/post_training/__init__.py            |   7 -
 src/llama_stack/apis/prompts/__init__.py      |   9 -
 src/llama_stack/apis/providers/__init__.py    |   7 -
 src/llama_stack/apis/safety/__init__.py       |   7 -
 src/llama_stack/apis/scoring/__init__.py      |   7 -
 .../apis/scoring_functions/__init__.py        |   7 -
 src/llama_stack/apis/shields/__init__.py      |   7 -
 src/llama_stack/apis/tools/__init__.py        |   8 -
 src/llama_stack/apis/vector_io/__init__.py    |   7 -
 .../apis/vector_stores/__init__.py            |   7 -
 src/llama_stack/cli/stack/_list_deps.py       |   2 +-
 src/llama_stack/cli/stack/utils.py            |   2 +-
 src/llama_stack/core/build.py                 |   2 +-
 src/llama_stack/core/client.py                |   3 +-
 src/llama_stack/core/configure.py             |   3 +-
 .../core/conversations/conversations.py       |   6 +-
 src/llama_stack/core/datatypes.py             |  41 +-
 src/llama_stack/core/distribution.py          |  12 +-
 src/llama_stack/core/external.py              |   2 +-
 src/llama_stack/core/inspect.py               |   8 +-
 src/llama_stack/core/library_client.py        |   2 +-
 src/llama_stack/core/prompts/prompts.py       |   2 +-
 src/llama_stack/core/providers.py             |   3 +-
 src/llama_stack/core/resolver.py              |  75 +-
 src/llama_stack/core/routers/__init__.py      |   3 +-
 src/llama_stack/core/routers/datasets.py      |   6 +-
 src/llama_stack/core/routers/eval_scoring.py  |  10 +-
 src/llama_stack/core/routers/inference.py     |  25 +-
 src/llama_stack/core/routers/safety.py        |   7 +-
 src/llama_stack/core/routers/tool_runtime.py  |   5 +-
 src/llama_stack/core/routers/vector_io.py     |  12 +-
 .../core/routing_tables/benchmarks.py         |   3 +-
 src/llama_stack/core/routing_tables/common.py |   6 +-
 .../core/routing_tables/datasets.py           |   7 +-
 src/llama_stack/core/routing_tables/models.py |  12 +-
 .../core/routing_tables/scoring_functions.py  |   7 +-
 .../core/routing_tables/shields.py            |   4 +-
 .../core/routing_tables/toolgroups.py         |  13 +-
 .../core/routing_tables/vector_stores.py      |  11 +-
 src/llama_stack/core/server/auth_providers.py |   2 +-
 src/llama_stack/core/server/routes.py         |   3 +-
 src/llama_stack/core/server/server.py         |   4 +-
 src/llama_stack/core/stack.py                 |  45 +-
 src/llama_stack/core/telemetry/telemetry.py   |   2 +-
 src/llama_stack/distributions/dell/dell.py    |   3 +-
 .../meta-reference-gpu/meta_reference.py      |   3 +-
 .../open-benchmark/open_benchmark.py          |   4 +-
 .../distributions/starter/starter.py          |   3 +-
 src/llama_stack/distributions/template.py     |   3 +-
 .../inline/agents/meta_reference/agents.py    |  21 +-
 .../responses/openai_responses.py             |  31 +-
 .../meta_reference/responses/streaming.py     |  23 +-
 .../meta_reference/responses/tool_executor.py |  30 +-
 .../agents/meta_reference/responses/types.py  |  11 +-
 .../agents/meta_reference/responses/utils.py  |  38 +-
 .../inline/agents/meta_reference/safety.py    |   4 +-
 .../inline/batches/reference/__init__.py      |   5 +-
 .../inline/batches/reference/batches.py       |  20 +-
 .../inline/datasetio/localfs/datasetio.py     |   6 +-
 .../inline/eval/meta_reference/eval.py        |  25 +-
 .../providers/inline/files/localfs/files.py   |   8 +-
 .../inline/inference/meta_reference/config.py |   2 +-
 .../inference/meta_reference/generators.py    |   6 +-
 .../inference/meta_reference/inference.py     |  21 +-
 .../sentence_transformers.py                  |  14 +-
 .../inline/post_training/common/validator.py  |   7 +-
 .../huggingface/post_training.py              |   7 +-
 .../recipes/finetune_single_device.py         |  16 +-
 .../recipes/finetune_single_device_dpo.py     |  14 +-
 .../inline/post_training/huggingface/utils.py |   3 +-
 .../post_training/torchtune/common/utils.py   |   2 +-
 .../post_training/torchtune/post_training.py  |   7 +-
 .../recipes/lora_finetuning_single_device.py  |  22 +-
 .../safety/code_scanner/code_scanner.py       |  10 +-
 .../inline/safety/llama_guard/llama_guard.py  |  15 +-
 .../safety/prompt_guard/prompt_guard.py       |  14 +-
 .../providers/inline/scoring/basic/scoring.py |  12 +-
 .../basic/scoring_fn/docvqa_scoring_fn.py     |   4 +-
 .../basic/scoring_fn/equality_scoring_fn.py   |   4 +-
 .../basic/scoring_fn/fn_defs/docvqa.py        |   4 +-
 .../basic/scoring_fn/fn_defs/equality.py      |   4 +-
 .../basic/scoring_fn/fn_defs/ifeval.py        |   4 +-
 .../fn_defs/regex_parser_math_response.py     |   4 +-
 .../regex_parser_multiple_choice_answer.py    |   4 +-
 .../basic/scoring_fn/fn_defs/subset_of.py     |   4 +-
 .../basic/scoring_fn/ifeval_scoring_fn.py     |   4 +-
 .../regex_parser_math_response_scoring_fn.py  |   4 +-
 .../scoring_fn/regex_parser_scoring_fn.py     |   4 +-
 .../basic/scoring_fn/subset_of_scoring_fn.py  |   4 +-
 .../inline/scoring/braintrust/braintrust.py   |  15 +-
 .../scoring_fn/fn_defs/answer_correctness.py  |   4 +-
 .../scoring_fn/fn_defs/answer_relevancy.py    |   4 +-
 .../scoring_fn/fn_defs/answer_similarity.py   |   4 +-
 .../fn_defs/context_entity_recall.py          |   4 +-
 .../scoring_fn/fn_defs/context_precision.py   |   4 +-
 .../scoring_fn/fn_defs/context_recall.py      |   4 +-
 .../scoring_fn/fn_defs/context_relevancy.py   |   4 +-
 .../scoring_fn/fn_defs/factuality.py          |   4 +-
 .../scoring_fn/fn_defs/faithfulness.py        |   4 +-
 .../inline/scoring/llm_as_judge/scoring.py    |  14 +-
 .../fn_defs/llm_as_judge_405b_simpleqa.py     |   4 +-
 .../scoring_fn/fn_defs/llm_as_judge_base.py   |   3 +-
 .../scoring_fn/llm_as_judge_scoring_fn.py     |   5 +-
 .../inline/tool_runtime/rag/__init__.py       |   2 +-
 .../tool_runtime/rag/context_retriever.py     |   9 +-
 .../inline/tool_runtime/rag/memory.py         |  21 +-
 .../inline/vector_io/chroma/__init__.py       |   2 +-
 .../inline/vector_io/chroma/config.py         |   2 +-
 .../inline/vector_io/faiss/__init__.py        |   2 +-
 .../inline/vector_io/faiss/config.py          |   2 +-
 .../providers/inline/vector_io/faiss/faiss.py |  19 +-
 .../inline/vector_io/milvus/__init__.py       |   2 +-
 .../inline/vector_io/milvus/config.py         |   2 +-
 .../inline/vector_io/qdrant/__init__.py       |   2 +-
 .../inline/vector_io/qdrant/config.py         |   2 +-
 .../inline/vector_io/sqlite_vec/__init__.py   |   2 +-
 .../inline/vector_io/sqlite_vec/sqlite_vec.py |  16 +-
 src/llama_stack/providers/registry/agents.py  |   3 +-
 src/llama_stack/providers/registry/batches.py |   2 +-
 .../providers/registry/datasetio.py           |   2 +-
 src/llama_stack/providers/registry/eval.py    |   2 +-
 src/llama_stack/providers/registry/files.py   |   3 +-
 .../providers/registry/inference.py           |   2 +-
 .../providers/registry/post_training.py       |   2 +-
 src/llama_stack/providers/registry/safety.py  |   2 +-
 src/llama_stack/providers/registry/scoring.py |   2 +-
 .../providers/registry/tool_runtime.py        |   3 +-
 .../providers/registry/vector_io.py           |   4 +-
 .../datasetio/huggingface/huggingface.py      |   6 +-
 .../remote/datasetio/nvidia/datasetio.py      |   6 +-
 .../providers/remote/eval/nvidia/eval.py      |  24 +-
 .../providers/remote/files/openai/files.py    |   8 +-
 .../providers/remote/files/s3/files.py        |   7 +-
 .../remote/inference/anthropic/config.py      |   2 +-
 .../remote/inference/azure/config.py          |   2 +-
 .../remote/inference/bedrock/bedrock.py       |   6 +-
 .../remote/inference/cerebras/cerebras.py     |   3 +-
 .../remote/inference/cerebras/config.py       |   2 +-
 .../remote/inference/databricks/config.py     |   2 +-
 .../remote/inference/databricks/databricks.py |   2 +-
 .../remote/inference/fireworks/config.py      |   2 +-
 .../remote/inference/gemini/config.py         |   2 +-
 .../remote/inference/gemini/gemini.py         |   3 +-
 .../providers/remote/inference/groq/config.py |   2 +-
 .../inference/llama_openai_compat/config.py   |   2 +-
 .../inference/llama_openai_compat/llama.py    |   3 +-
 .../remote/inference/nvidia/__init__.py       |   2 +-
 .../remote/inference/nvidia/config.py         |   2 +-
 .../remote/inference/nvidia/nvidia.py         |  13 +-
 .../remote/inference/oci/__init__.py          |   2 +-
 .../providers/remote/inference/oci/config.py  |   2 +-
 .../providers/remote/inference/oci/oci.py     |  10 +-
 .../remote/inference/ollama/ollama.py         |  12 +-
 .../remote/inference/openai/config.py         |   2 +-
 .../remote/inference/passthrough/config.py    |   2 +-
 .../inference/passthrough/passthrough.py      |   8 +-
 .../remote/inference/runpod/config.py         |   2 +-
 .../remote/inference/runpod/runpod.py         |   3 +-
 .../remote/inference/sambanova/config.py      |   2 +-
 .../providers/remote/inference/tgi/config.py  |   2 +-
 .../providers/remote/inference/tgi/tgi.py     |   6 +-
 .../remote/inference/together/config.py       |   2 +-
 .../remote/inference/together/together.py     |  12 +-
 .../remote/inference/vertexai/config.py       |   2 +-
 .../providers/remote/inference/vllm/config.py |   2 +-
 .../providers/remote/inference/vllm/vllm.py   |  12 +-
 .../remote/inference/watsonx/config.py        |   2 +-
 .../remote/inference/watsonx/watsonx.py       |  11 +-
 .../remote/post_training/nvidia/README.md     |   2 +-
 .../post_training/nvidia/post_training.py     |   6 +-
 .../remote/post_training/nvidia/utils.py      |   2 +-
 .../remote/safety/bedrock/bedrock.py          |   9 +-
 .../providers/remote/safety/bedrock/config.py |   3 +-
 .../providers/remote/safety/nvidia/README.md  |   4 +-
 .../providers/remote/safety/nvidia/config.py  |   3 +-
 .../providers/remote/safety/nvidia/nvidia.py  |  14 +-
 .../remote/safety/sambanova/config.py         |   3 +-
 .../remote/safety/sambanova/sambanova.py      |  10 +-
 .../tool_runtime/bing_search/bing_search.py   |   8 +-
 .../tool_runtime/brave_search/brave_search.py |   8 +-
 .../model_context_protocol.py                 |   9 +-
 .../tavily_search/tavily_search.py            |   8 +-
 .../wolfram_alpha/wolfram_alpha.py            |   8 +-
 .../remote/vector_io/chroma/__init__.py       |   2 +-
 .../remote/vector_io/chroma/chroma.py         |  15 +-
 .../remote/vector_io/chroma/config.py         |   2 +-
 .../remote/vector_io/milvus/__init__.py       |   2 +-
 .../remote/vector_io/milvus/config.py         |   2 +-
 .../remote/vector_io/milvus/milvus.py         |  17 +-
 .../remote/vector_io/pgvector/__init__.py     |   2 +-
 .../remote/vector_io/pgvector/config.py       |   2 +-
 .../remote/vector_io/pgvector/pgvector.py     |  17 +-
 .../remote/vector_io/qdrant/__init__.py       |   2 +-
 .../remote/vector_io/qdrant/config.py         |   2 +-
 .../remote/vector_io/qdrant/qdrant.py         |  25 +-
 .../remote/vector_io/weaviate/__init__.py     |   2 +-
 .../remote/vector_io/weaviate/config.py       |   2 +-
 .../remote/vector_io/weaviate/weaviate.py     |  18 +-
 .../utils/common/data_schema_validator.py     |   7 +-
 .../providers/utils/files/form_data.py        |   3 +-
 .../utils/inference/embedding_mixin.py        |   2 +-
 .../utils/inference/inference_store.py        |   6 +-
 .../utils/inference/litellm_openai_mixin.py   |   4 +-
 .../utils/inference/model_registry.py         |   4 +-
 .../utils/inference/openai_compat.py          |  26 +-
 .../providers/utils/inference/openai_mixin.py |  10 +-
 .../utils/inference/prompt_adapter.py         |  12 +-
 .../providers/utils/kvstore/sqlite/config.py  |   3 +-
 .../providers/utils/memory/file_utils.py      |   2 +-
 .../utils/memory/openai_vector_store_mixin.py |  13 +-
 .../providers/utils/memory/vector_store.py    |  20 +-
 src/llama_stack/providers/utils/pagination.py |   2 +-
 .../utils/responses/responses_store.py        |   9 +-
 .../utils/scoring/aggregation_utils.py        |   3 +-
 .../utils/scoring/base_scoring_fn.py          |   4 +-
 .../providers/utils/sqlstore/api.py           |   3 +-
 .../utils/sqlstore/sqlalchemy_sqlstore.py     |   2 +-
 src/llama_stack/providers/utils/tools/mcp.py  |  15 +-
 .../src/llama_stack_api_weather/weather.py    |   4 +-
 tests/integration/batches/conftest.py         |   3 +-
 tests/integration/files/test_files.py         |   2 +-
 .../inference/test_provider_data_routing.py   |   6 +-
 .../post_training/test_post_training.py       |   4 +-
 ...c189daa31e88b25d0381a985f24203b7a5a38.json |   2 +-
 ...393e5712917253462292829b37b9320d6df82.json |   2 +-
 ...a0ee18d09bd413189a7c03b24bf3871e3d8d7.json |   2 +-
 tests/integration/safety/test_llama_guard.py  |   2 +-
 tests/integration/safety/test_safety.py       |   3 +-
 .../integration/safety/test_vision_safety.py  |   3 +-
 .../tool_runtime/test_registration.py         |   2 +-
 .../vector_io/test_openai_vector_stores.py    |  19 +-
 tests/integration/vector_io/test_vector_io.py |   3 +-
 tests/unit/conversations/test_api_models.py   |   6 +-
 .../unit/conversations/test_conversations.py  |   5 +-
 tests/unit/core/routers/test_safety_router.py |   4 +-
 tests/unit/core/routers/test_vector_io.py     |   2 +-
 tests/unit/core/test_stack_validation.py      |   4 +-
 .../routers/test_routing_tables.py            |  23 +-
 .../unit/distribution/test_api_recordings.py  |   5 +-
 tests/unit/distribution/test_distribution.py  |  22 +-
 tests/unit/files/test_files.py                |   4 +-
 .../unit/providers/batches/test_reference.py  |   4 +-
 .../batches/test_reference_idempotency.py     |   3 +-
 tests/unit/providers/files/test_s3_files.py   |  12 +-
 .../providers/files/test_s3_files_auth.py     |   3 +-
 .../inference/test_bedrock_adapter.py         |   2 +-
 .../providers/inference/test_remote_vllm.py   |   8 +-
 .../responses/test_streaming.py               |   2 +-
 tests/unit/providers/nvidia/test_datastore.py |   3 +-
 tests/unit/providers/nvidia/test_eval.py      |  16 +-
 .../unit/providers/nvidia/test_parameters.py  |   4 +-
 .../providers/nvidia/test_rerank_inference.py |   2 +-
 tests/unit/providers/nvidia/test_safety.py    |  11 +-
 .../nvidia/test_supervised_fine_tuning.py     |   4 +-
 tests/unit/providers/test_bedrock.py          |   3 +-
 .../utils/inference/test_openai_mixin.py      |   3 +-
 .../utils/inference/test_prompt_adapter.py    |   6 +-
 .../utils/memory/test_vector_store.py         |   3 +-
 .../providers/utils/test_model_registry.py    |   2 +-
 tests/unit/providers/vector_io/conftest.py    |   3 +-
 tests/unit/providers/vector_io/test_faiss.py  |   5 +-
 .../providers/vector_io/test_sqlite_vec.py    |   2 +-
 .../test_vector_io_openai_vector_stores.py    |  12 +-
 .../providers/vector_io/test_vector_utils.py  |   3 +-
 tests/unit/rag/test_rag_query.py              |   7 +-
 tests/unit/rag/test_vector_store.py           |   7 +-
 tests/unit/registry/test_registry.py          |   6 +-
 tests/unit/registry/test_registry_acl.py      |   3 +-
 tests/unit/server/test_access_control.py      |   3 +-
 tests/unit/server/test_auth.py                |   2 +-
 tests/unit/server/test_resolver.py            |   3 +-
 tests/unit/server/test_sse.py                 |   2 +-
 tests/unit/tools/test_tools_json_schema.py    |   2 +-
 .../utils/inference/test_inference_store.py   |   4 +-
 .../utils/responses/test_responses_store.py   |   9 +-
 uv.lock                                       |  21 +
 358 files changed, 2337 insertions(+), 1424 deletions(-)
 create mode 100644 src/llama-stack-api/README.md
 create mode 100644 src/llama-stack-api/llama_stack_api/__init__.py
 rename src/{llama_stack/apis/agents => llama-stack-api/llama_stack_api}/agents.py (96%)
 rename src/{llama_stack/apis/batches => llama-stack-api/llama_stack_api}/batches.py (96%)
 rename src/{llama_stack/apis/benchmarks => llama-stack-api/llama_stack_api}/benchmarks.py (94%)
 rename src/{llama_stack/apis => llama-stack-api/llama_stack_api/common}/__init__.py (100%)
 rename src/{llama_stack/apis => llama-stack-api/llama_stack_api}/common/content_types.py (65%)
 rename src/{llama_stack/apis => llama-stack-api/llama_stack_api}/common/errors.py (100%)
 rename src/{llama_stack/apis => llama-stack-api/llama_stack_api}/common/job_types.py (94%)
 rename src/{llama_stack/apis => llama-stack-api/llama_stack_api}/common/responses.py (97%)
 rename src/{llama_stack/apis => llama-stack-api/llama_stack_api}/common/tracing.py (100%)
 rename src/{llama_stack/apis => llama-stack-api/llama_stack_api}/common/training_types.py (96%)
 rename src/{llama_stack/apis => llama-stack-api/llama_stack_api}/common/type_system.py (97%)
 rename src/{llama_stack/apis/conversations => llama-stack-api/llama_stack_api}/conversations.py (97%)
 rename src/{llama_stack/apis/datasetio => llama-stack-api/llama_stack_api}/datasetio.py (89%)
 rename src/{llama_stack/apis/datasets => llama-stack-api/llama_stack_api}/datasets.py (97%)
 rename src/{llama_stack/providers => llama-stack-api/llama_stack_api}/datatypes.py (51%)
 rename src/{llama_stack/apis/eval => llama-stack-api/llama_stack_api}/eval.py (92%)
 rename src/{llama_stack/apis/files => llama-stack-api/llama_stack_api}/files.py (96%)
 rename src/{llama_stack/apis/inference => llama-stack-api/llama_stack_api}/inference.py (99%)
 rename src/{llama_stack/apis/inspect => llama-stack-api/llama_stack_api}/inspect.py (94%)
 rename src/{llama_stack/apis/models => llama-stack-api/llama_stack_api}/models.py (95%)
 rename src/{llama_stack/apis/agents => llama-stack-api/llama_stack_api}/openai_responses.py (99%)
 rename src/{llama_stack/apis/post_training => llama-stack-api/llama_stack_api}/post_training.py (97%)
 rename src/{llama_stack/apis/prompts => llama-stack-api/llama_stack_api}/prompts.py (97%)
 rename src/{llama_stack/apis/providers => llama-stack-api/llama_stack_api}/providers.py (91%)
 rename src/{llama_stack/strong_typing => llama-stack-api/llama_stack_api}/py.typed (100%)
 rename src/{llama_stack/apis/tools => llama-stack-api/llama_stack_api}/rag_tool.py (98%)
 rename src/{llama_stack/apis => llama-stack-api/llama_stack_api}/resource.py (100%)
 rename src/{llama_stack/apis/safety => llama-stack-api/llama_stack_api}/safety.py (93%)
 rename src/{llama_stack => llama-stack-api/llama_stack_api}/schema_utils.py (100%)
 rename src/{llama_stack/apis/scoring => llama-stack-api/llama_stack_api}/scoring.py (93%)
 rename src/{llama_stack/apis/scoring_functions => llama-stack-api/llama_stack_api}/scoring_functions.py (96%)
 rename src/{llama_stack/apis/shields => llama-stack-api/llama_stack_api}/shields.py (91%)
 rename src/{llama_stack => llama-stack-api/llama_stack_api}/strong_typing/__init__.py (100%)
 rename src/{llama_stack => llama-stack-api/llama_stack_api}/strong_typing/auxiliary.py (100%)
 rename src/{llama_stack => llama-stack-api/llama_stack_api}/strong_typing/classdef.py (100%)
 rename src/{llama_stack => llama-stack-api/llama_stack_api}/strong_typing/core.py (100%)
 rename src/{llama_stack => llama-stack-api/llama_stack_api}/strong_typing/deserializer.py (100%)
 rename src/{llama_stack => llama-stack-api/llama_stack_api}/strong_typing/docstring.py (100%)
 rename src/{llama_stack => llama-stack-api/llama_stack_api}/strong_typing/exception.py (100%)
 rename src/{llama_stack => llama-stack-api/llama_stack_api}/strong_typing/inspection.py (100%)
 rename src/{llama_stack => llama-stack-api/llama_stack_api}/strong_typing/mapping.py (100%)
 rename src/{llama_stack => llama-stack-api/llama_stack_api}/strong_typing/name.py (100%)
 create mode 100644 src/llama-stack-api/llama_stack_api/strong_typing/py.typed
 rename src/{llama_stack => llama-stack-api/llama_stack_api}/strong_typing/schema.py (100%)
 rename src/{llama_stack => llama-stack-api/llama_stack_api}/strong_typing/serialization.py (100%)
 rename src/{llama_stack => llama-stack-api/llama_stack_api}/strong_typing/serializer.py (100%)
 rename src/{llama_stack => llama-stack-api/llama_stack_api}/strong_typing/slots.py (100%)
 rename src/{llama_stack => llama-stack-api/llama_stack_api}/strong_typing/topological.py (100%)
 rename src/{llama_stack/apis/tools => llama-stack-api/llama_stack_api}/tools.py (95%)
 rename src/{llama_stack/apis/vector_io => llama-stack-api/llama_stack_api}/vector_io.py (98%)
 rename src/{llama_stack/apis/vector_stores => llama-stack-api/llama_stack_api}/vector_stores.py (96%)
 rename src/{llama_stack/apis => llama-stack-api/llama_stack_api}/version.py (100%)
 create mode 100644 src/llama-stack-api/pyproject.toml
 delete mode 100644 src/llama_stack/apis/agents/__init__.py
 delete mode 100644 src/llama_stack/apis/batches/__init__.py
 delete mode 100644 src/llama_stack/apis/benchmarks/__init__.py
 delete mode 100644 src/llama_stack/apis/common/__init__.py
 delete mode 100644 src/llama_stack/apis/conversations/__init__.py
 delete mode 100644 src/llama_stack/apis/datasetio/__init__.py
 delete mode 100644 src/llama_stack/apis/datasets/__init__.py
 delete mode 100644 src/llama_stack/apis/datatypes.py
 delete mode 100644 src/llama_stack/apis/eval/__init__.py
 delete mode 100644 src/llama_stack/apis/files/__init__.py
 delete mode 100644 src/llama_stack/apis/inference/__init__.py
 delete mode 100644 src/llama_stack/apis/inspect/__init__.py
 delete mode 100644 src/llama_stack/apis/models/__init__.py
 delete mode 100644 src/llama_stack/apis/post_training/__init__.py
 delete mode 100644 src/llama_stack/apis/prompts/__init__.py
 delete mode 100644 src/llama_stack/apis/providers/__init__.py
 delete mode 100644 src/llama_stack/apis/safety/__init__.py
 delete mode 100644 src/llama_stack/apis/scoring/__init__.py
 delete mode 100644 src/llama_stack/apis/scoring_functions/__init__.py
 delete mode 100644 src/llama_stack/apis/shields/__init__.py
 delete mode 100644 src/llama_stack/apis/tools/__init__.py
 delete mode 100644 src/llama_stack/apis/vector_io/__init__.py
 delete mode 100644 src/llama_stack/apis/vector_stores/__init__.py

diff --git a/.github/workflows/python-build-test.yml b/.github/workflows/python-build-test.yml
index c605a30c3..b0f2c6e69 100644
--- a/.github/workflows/python-build-test.yml
+++ b/.github/workflows/python-build-test.yml
@@ -30,13 +30,16 @@ jobs:
         activate-environment: true
         version: 0.7.6
 
-    - name: Build Llama Stack package
-      run: |
-        uv build
+    - name: Build Llama Stack API package
+      working-directory: src/llama-stack-api
+      run: uv build
 
-    - name: Install Llama Stack package
+    - name: Build Llama Stack package
+      run: uv build
+
+    - name: Install Llama Stack package (with api stubs from local build)
       run: |
-        uv pip install dist/*.whl
+        uv pip install --find-links src/llama-stack-api/dist dist/*.whl
 
     - name: Verify Llama Stack package
       run: |
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 19b83563c..6f4dd6a0e 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -42,7 +42,7 @@ repos:
     hooks:
     -   id: ruff
         args: [ --fix ]
-        exclude: ^src/llama_stack/strong_typing/.*$
+        exclude: ^(src/llama_stack/strong_typing/.*|src/llama-stack-api/llama_stack_api/strong_typing/.*)$
     -   id: ruff-format
 
 -   repo: https://github.com/adamchainz/blacken-docs
diff --git a/docs/docs/concepts/apis/external.mdx b/docs/docs/concepts/apis/external.mdx
index 42819a4ac..005b85647 100644
--- a/docs/docs/concepts/apis/external.mdx
+++ b/docs/docs/concepts/apis/external.mdx
@@ -58,7 +58,7 @@ External APIs must expose a `available_providers()` function in their module tha
 
 ```python
 # llama_stack_api_weather/api.py
-from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec
+from llama_stack_api import Api, InlineProviderSpec, ProviderSpec
 
 
 def available_providers() -> list[ProviderSpec]:
@@ -79,7 +79,7 @@ A Protocol class like so:
 # llama_stack_api_weather/api.py
 from typing import Protocol
 
-from llama_stack.schema_utils import webmethod
+from llama_stack_api import webmethod
 
 
 class WeatherAPI(Protocol):
@@ -151,13 +151,12 @@ __all__ = ["WeatherAPI", "available_providers"]
 # llama-stack-api-weather/src/llama_stack_api_weather/weather.py
 from typing import Protocol
 
-from llama_stack.providers.datatypes import (
+from llama_stack_api import (
     Api,
     ProviderSpec,
     RemoteProviderSpec,
+    webmethod,
 )
-from llama_stack.schema_utils import webmethod
-
 
 def available_providers() -> list[ProviderSpec]:
     return [
diff --git a/docs/docs/distributions/building_distro.mdx b/docs/docs/distributions/building_distro.mdx
index c4a01bf7d..532ffaaf0 100644
--- a/docs/docs/distributions/building_distro.mdx
+++ b/docs/docs/distributions/building_distro.mdx
@@ -65,7 +65,7 @@ external_providers_dir: /workspace/providers.d
 Inside `providers.d/custom_ollama/provider.py`, define `get_provider_spec()` so the CLI can discover dependencies:
 
 ```python
-from llama_stack.providers.datatypes import ProviderSpec
+from llama_stack_api.providers.datatypes import ProviderSpec
 
 
 def get_provider_spec() -> ProviderSpec:
diff --git a/docs/docs/providers/external/external-providers-guide.mdx b/docs/docs/providers/external/external-providers-guide.mdx
index 748fd62c0..dc813c75b 100644
--- a/docs/docs/providers/external/external-providers-guide.mdx
+++ b/docs/docs/providers/external/external-providers-guide.mdx
@@ -80,7 +80,7 @@ container_image: custom-vector-store:latest  # optional
 All providers must contain a `get_provider_spec` function in their `provider` module. This is a standardized structure that Llama Stack expects and is necessary for getting things such as the config class. The `get_provider_spec` method returns a structure identical to the `adapter`. An example function may look like:
 
 ```python
-from llama_stack.providers.datatypes import (
+from llama_stack_api.providers.datatypes import (
     ProviderSpec,
     Api,
     RemoteProviderSpec,
diff --git a/docs/docs/providers/vector_io/inline_sqlite-vec.mdx b/docs/docs/providers/vector_io/inline_sqlite-vec.mdx
index bfa2f29de..45631dff3 100644
--- a/docs/docs/providers/vector_io/inline_sqlite-vec.mdx
+++ b/docs/docs/providers/vector_io/inline_sqlite-vec.mdx
@@ -153,7 +153,7 @@ description: |
   Example using RAGQueryConfig with different search modes:
 
   ```python
-  from llama_stack.apis.tools import RAGQueryConfig, RRFRanker, WeightedRanker
+  from llama_stack_api import RAGQueryConfig, RRFRanker, WeightedRanker
 
   # Vector search
   config = RAGQueryConfig(mode="vector", max_chunks=5)
@@ -358,7 +358,7 @@ Two ranker types are supported:
 Example using RAGQueryConfig with different search modes:
 
 ```python
-from llama_stack.apis.tools import RAGQueryConfig, RRFRanker, WeightedRanker
+from llama_stack_api import RAGQueryConfig, RRFRanker, WeightedRanker
 
 # Vector search
 config = RAGQueryConfig(mode="vector", max_chunks=5)
diff --git a/docs/openapi_generator/generate.py b/docs/openapi_generator/generate.py
index 65720df4a..769db32a7 100644
--- a/docs/openapi_generator/generate.py
+++ b/docs/openapi_generator/generate.py
@@ -16,7 +16,7 @@ import sys
 import fire
 import ruamel.yaml as yaml
 
-from llama_stack.apis.version import LLAMA_STACK_API_V1 # noqa: E402
+from llama_stack_api import LLAMA_STACK_API_V1 # noqa: E402
 from llama_stack.core.stack import LlamaStack  # noqa: E402
 
 from .pyopenapi.options import Options  # noqa: E402
diff --git a/docs/openapi_generator/pyopenapi/generator.py b/docs/openapi_generator/pyopenapi/generator.py
index 30fc9038d..afbb5c710 100644
--- a/docs/openapi_generator/pyopenapi/generator.py
+++ b/docs/openapi_generator/pyopenapi/generator.py
@@ -16,27 +16,27 @@ from typing import Annotated, Any, Dict, get_args, get_origin, Set, Union
 
 from fastapi import UploadFile
 
-from llama_stack.apis.datatypes import Error
-from llama_stack.strong_typing.core import JsonType
-from llama_stack.strong_typing.docstring import Docstring, parse_type
-from llama_stack.strong_typing.inspection import (
+from llama_stack_api import (
+    Docstring,
+    Error,
+    JsonSchemaGenerator,
+    JsonType,
+    Schema,
+    SchemaOptions,
+    get_schema_identifier,
     is_generic_list,
     is_type_optional,
     is_type_union,
     is_unwrapped_body_param,
+    json_dump_string,
+    object_to_json,
+    parse_type,
+    python_type_to_name,
+    register_schema,
     unwrap_generic_list,
     unwrap_optional_type,
     unwrap_union_types,
 )
-from llama_stack.strong_typing.name import python_type_to_name
-from llama_stack.strong_typing.schema import (
-    get_schema_identifier,
-    JsonSchemaGenerator,
-    register_schema,
-    Schema,
-    SchemaOptions,
-)
-from llama_stack.strong_typing.serialization import json_dump_string, object_to_json
 from pydantic import BaseModel
 
 from .operations import (
diff --git a/docs/openapi_generator/pyopenapi/operations.py b/docs/openapi_generator/pyopenapi/operations.py
index a1c95c7a7..42a554f2c 100644
--- a/docs/openapi_generator/pyopenapi/operations.py
+++ b/docs/openapi_generator/pyopenapi/operations.py
@@ -11,19 +11,21 @@ import typing
 from dataclasses import dataclass
 from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Tuple, Union
 
-from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1BETA, LLAMA_STACK_API_V1ALPHA
-
 from termcolor import colored
 
-from llama_stack.strong_typing.inspection import get_signature
-
 from typing import get_origin, get_args
 
 from fastapi import UploadFile
 from fastapi.params import File, Form
 from typing import Annotated
 
-from llama_stack.schema_utils import ExtraBodyField
+from llama_stack_api import (
+    ExtraBodyField,
+    LLAMA_STACK_API_V1,
+    LLAMA_STACK_API_V1ALPHA,
+    LLAMA_STACK_API_V1BETA,
+    get_signature,
+)
 
 
 def split_prefix(
diff --git a/docs/openapi_generator/pyopenapi/specification.py b/docs/openapi_generator/pyopenapi/specification.py
index 90bf54316..bfa35f539 100644
--- a/docs/openapi_generator/pyopenapi/specification.py
+++ b/docs/openapi_generator/pyopenapi/specification.py
@@ -9,7 +9,7 @@ import enum
 from dataclasses import dataclass
 from typing import Any, ClassVar, Dict, List, Optional, Union
 
-from llama_stack.strong_typing.schema import JsonType, Schema, StrictJsonType
+from llama_stack_api import JsonType, Schema, StrictJsonType
 
 URL = str
 
diff --git a/docs/openapi_generator/pyopenapi/utility.py b/docs/openapi_generator/pyopenapi/utility.py
index c1425b250..762249eb8 100644
--- a/docs/openapi_generator/pyopenapi/utility.py
+++ b/docs/openapi_generator/pyopenapi/utility.py
@@ -11,8 +11,7 @@ from pathlib import Path
 from typing import Any, List, Optional, TextIO, Union, get_type_hints, get_origin, get_args
 
 from pydantic import BaseModel
-from llama_stack.strong_typing.schema import object_to_json, StrictJsonType
-from llama_stack.strong_typing.inspection import is_unwrapped_body_param
+from llama_stack_api import StrictJsonType, is_unwrapped_body_param, object_to_json
 from llama_stack.core.resolver import api_protocol_map
 
 from .generator import Generator
@@ -165,12 +164,12 @@ def _validate_api_delete_method_returns_none(method) -> str | None:
         return "has no return type annotation"
 
     return_type = hints['return']
-    
+
     # Allow OpenAI endpoints to return response objects since they follow OpenAI specification
     method_name = getattr(method, '__name__', '')
     if method_name.__contains__('openai_'):
         return None
-    
+
     if return_type is not None and return_type is not type(None):
         return "does not return None where None is mandatory"
 
diff --git a/pyproject.toml b/pyproject.toml
index d12d28e8c..d287b4be7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -31,6 +31,7 @@ dependencies = [
     "httpx",
     "jinja2>=3.1.6",
     "jsonschema",
+    "llama-stack-api",  # API and provider specifications (local dev via tool.uv.sources)
     "openai>=2.5.0",
     "prompt-toolkit",
     "python-dotenv",
@@ -180,7 +181,7 @@ install-wheel-from-presigned = "llama_stack.cli.scripts.run:install_wheel_from_p
 
 [tool.setuptools.packages.find]
 where = ["src"]
-include = ["llama_stack", "llama_stack.*"]
+include = ["llama_stack", "llama_stack.*", "llama-stack-api", "llama-stack-api.*"]
 
 [[tool.uv.index]]
 name = "pytorch-cpu"
@@ -190,6 +191,7 @@ explicit = true
 [tool.uv.sources]
 torch = [{ index = "pytorch-cpu" }]
 torchvision = [{ index = "pytorch-cpu" }]
+llama-stack-api = [{ path = "src/llama-stack-api", editable = true }]
 
 [tool.ruff]
 line-length = 120
@@ -256,8 +258,8 @@ unfixable = [
 ] # Using import * is acceptable (or at least tolerated) in an __init__.py of a package API
 
 [tool.mypy]
-mypy_path = ["src"]
-packages = ["llama_stack"]
+mypy_path = ["src", "src/llama-stack-api"]
+packages = ["llama_stack", "llama_stack_api"]
 plugins = ['pydantic.mypy']
 disable_error_code = []
 warn_return_any = true
@@ -279,15 +281,18 @@ exclude = [
     "^src/llama_stack/core/store/registry\\.py$",
     "^src/llama_stack/core/utils/exec\\.py$",
     "^src/llama_stack/core/utils/prompt_for_config\\.py$",
+    # Moved to llama-stack-api but still excluded
     "^src/llama_stack/models/llama/llama3/interface\\.py$",
     "^src/llama_stack/models/llama/llama3/tokenizer\\.py$",
     "^src/llama_stack/models/llama/llama3/tool_utils\\.py$",
-    "^src/llama_stack/providers/inline/datasetio/localfs/",
-    "^src/llama_stack/providers/inline/eval/meta_reference/eval\\.py$",
-    "^src/llama_stack/providers/inline/inference/meta_reference/inference\\.py$",
     "^src/llama_stack/models/llama/llama3/generation\\.py$",
     "^src/llama_stack/models/llama/llama3/multimodal/model\\.py$",
     "^src/llama_stack/models/llama/llama4/",
+    "^src/llama-stack-api/llama_stack_api/core/telemetry/telemetry\\.py$",
+    "^src/llama_stack/providers/inline/agents/meta_reference/",
+    "^src/llama_stack/providers/inline/datasetio/localfs/",
+    "^src/llama_stack/providers/inline/eval/meta_reference/eval\\.py$",
+    "^src/llama_stack/providers/inline/inference/meta_reference/inference\\.py$",
     "^src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers\\.py$",
     "^src/llama_stack/providers/inline/post_training/common/validator\\.py$",
     "^src/llama_stack/providers/inline/safety/code_scanner/",
@@ -337,7 +342,9 @@ exclude = [
     "^src/llama_stack/providers/utils/telemetry/dataset_mixin\\.py$",
     "^src/llama_stack/providers/utils/telemetry/trace_protocol\\.py$",
     "^src/llama_stack/providers/utils/telemetry/tracing\\.py$",
-    "^src/llama_stack/strong_typing/auxiliary\\.py$",
+    "^src/llama-stack-api/llama_stack_api/core/telemetry/trace_protocol\\.py$",
+    "^src/llama-stack-api/llama_stack_api/core/telemetry/tracing\\.py$",
+    "^src/llama-stack-api/llama_stack_api/strong_typing/auxiliary\\.py$",
     "^src/llama_stack/distributions/template\\.py$",
 ]
 
diff --git a/scripts/generate_prompt_format.py b/scripts/generate_prompt_format.py
index 855033f95..8099a3f0d 100755
--- a/scripts/generate_prompt_format.py
+++ b/scripts/generate_prompt_format.py
@@ -14,8 +14,8 @@ import os
 from pathlib import Path
 
 import fire
+from llama_stack_api import ModelNotFoundError
 
-from llama_stack.apis.common.errors import ModelNotFoundError
 from llama_stack.models.llama.llama3.generation import Llama3
 from llama_stack.models.llama.llama4.generation import Llama4
 from llama_stack.models.llama.sku_list import resolve_model
diff --git a/scripts/provider_codegen.py b/scripts/provider_codegen.py
index de79b4d17..d62d626ad 100755
--- a/scripts/provider_codegen.py
+++ b/scripts/provider_codegen.py
@@ -22,7 +22,7 @@ def get_api_docstring(api_name: str) -> str | None:
     """Extract docstring from the API protocol class."""
     try:
         # Import the API module dynamically
-        api_module = __import__(f"llama_stack.apis.{api_name}", fromlist=[api_name.title()])
+        api_module = __import__(f"llama_stack_api.{api_name}", fromlist=[api_name.title()])
 
         # Get the main protocol class (usually capitalized API name)
         protocol_class_name = api_name.title()
@@ -83,8 +83,9 @@ def get_config_class_info(config_class_path: str) -> dict[str, Any]:
                 # this string replace is ridiculous
                 field_type = field_type.replace("typing.", "").replace("Optional[", "").replace("]", "")
                 field_type = field_type.replace("Annotated[", "").replace("FieldInfo(", "").replace(")", "")
-                field_type = field_type.replace("llama_stack.apis.inference.inference.", "")
+                field_type = field_type.replace("llama_stack_api.inference.", "")
                 field_type = field_type.replace("llama_stack.providers.", "")
+                field_type = field_type.replace("llama_stack_api.datatypes.", "")
 
                 default_value = field.default
                 if field.default_factory is not None:
diff --git a/src/llama-stack-api/README.md b/src/llama-stack-api/README.md
new file mode 100644
index 000000000..aa6b05722
--- /dev/null
+++ b/src/llama-stack-api/README.md
@@ -0,0 +1,103 @@
+# llama-stack-api
+
+API and Provider specifications for Llama Stack - a lightweight package with protocol definitions and provider specs.
+
+## Overview
+
+`llama-stack-api` is a minimal dependency package that contains:
+
+- **API Protocol Definitions**: Type-safe protocol definitions for all Llama Stack APIs (inference, agents, safety, etc.)
+- **Provider Specifications**: Provider spec definitions for building custom providers
+- **Data Types**: Shared data types and models used across the Llama Stack ecosystem
+- **Type Utilities**: Strong typing utilities and schema validation
+
+## What This Package Does NOT Include
+
+- Server implementation (see `llama-stack` package)
+- Provider implementations (see `llama-stack` package)
+- CLI tools (see `llama-stack` package)
+- Runtime orchestration (see `llama-stack` package)
+
+## Use Cases
+
+This package is designed for:
+
+1. **Third-party Provider Developers**: Build custom providers without depending on the full Llama Stack server
+2. **Client Library Authors**: Use type definitions without server dependencies
+3. **Documentation Generation**: Generate API docs from protocol definitions
+4. **Type Checking**: Validate implementations against the official specs
+
+## Installation
+
+```bash
+pip install llama-stack-api
+```
+
+Or with uv:
+
+```bash
+uv pip install llama-stack-api
+```
+
+## Dependencies
+
+Minimal dependencies:
+- `pydantic>=2.11.9` - For data validation and serialization
+- `jsonschema` - For JSON schema utilities
+
+## Versioning
+
+This package follows semantic versioning independently from the main `llama-stack` package:
+
+- **Patch versions** (0.1.x): Documentation, internal improvements
+- **Minor versions** (0.x.0): New APIs, backward-compatible changes
+- **Major versions** (x.0.0): Breaking changes to existing APIs
+
+Current version: **0.1.0**
+
+## Usage Example
+
+```python
+from llama_stack_api.inference import Inference, ChatCompletionRequest
+from llama_stack_api.providers.datatypes import ProviderSpec, InlineProviderSpec
+from llama_stack_api.datatypes import Api
+
+
+# Use protocol definitions for type checking
+class MyInferenceProvider(Inference):
+    async def chat_completion(self, request: ChatCompletionRequest):
+        # Your implementation
+        pass
+
+
+# Define provider specifications
+my_provider_spec = InlineProviderSpec(
+    api=Api.inference,
+    provider_type="inline::my-provider",
+    pip_packages=["my-dependencies"],
+    module="my_package.providers.inference",
+    config_class="my_package.providers.inference.MyConfig",
+)
+```
+
+## Relationship to llama-stack
+
+The main `llama-stack` package depends on `llama-stack-api` and provides:
+- Full server implementation
+- Built-in provider implementations
+- CLI tools for running and managing stacks
+- Runtime provider resolution and orchestration
+
+## Contributing
+
+See the main [Llama Stack repository](https://github.com/llamastack/llama-stack) for contribution guidelines.
+
+## License
+
+MIT License - see LICENSE file for details.
+
+## Links
+
+- [Main Llama Stack Repository](https://github.com/llamastack/llama-stack)
+- [Documentation](https://llamastack.ai/)
+- [Client Library](https://pypi.org/project/llama-stack-client/)
diff --git a/src/llama-stack-api/llama_stack_api/__init__.py b/src/llama-stack-api/llama_stack_api/__init__.py
new file mode 100644
index 000000000..8bbe9f8bd
--- /dev/null
+++ b/src/llama-stack-api/llama_stack_api/__init__.py
@@ -0,0 +1,871 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+"""
+Llama Stack API Specifications
+
+This package contains the API definitions, data types, and protocol specifications
+for Llama Stack. It is designed to be a lightweight dependency for external providers
+and clients that need to interact with Llama Stack APIs without requiring the full
+server implementation.
+
+All imports from this package MUST use the form:
+    from llama_stack_api import <symbol>
+
+Sub-module imports (e.g., from llama_stack_api.agents import Agents) are NOT supported
+and considered a code smell. All exported symbols are explicitly listed in __all__.
+"""
+
+__version__ = "0.4.0"
+
+# Import submodules for those who need them
+from . import common, strong_typing  # noqa: F401
+
+# Import all public API symbols
+from .agents import Agents, ResponseGuardrail, ResponseGuardrailSpec
+from .batches import Batches, BatchObject, ListBatchesResponse
+from .benchmarks import (
+    Benchmark,
+    BenchmarkInput,
+    Benchmarks,
+    CommonBenchmarkFields,
+    ListBenchmarksResponse,
+)
+
+# Import commonly used types from common submodule
+from .common.content_types import (
+    URL,
+    ImageContentItem,
+    InterleavedContent,
+    InterleavedContentItem,
+    TextContentItem,
+    _URLOrData,
+)
+from .common.errors import (
+    ConflictError,
+    DatasetNotFoundError,
+    InvalidConversationIdError,
+    ModelNotFoundError,
+    ModelTypeError,
+    ResourceNotFoundError,
+    TokenValidationError,
+    ToolGroupNotFoundError,
+    UnsupportedModelError,
+    VectorStoreNotFoundError,
+)
+from .common.job_types import Job, JobStatus
+from .common.responses import Order, PaginatedResponse
+from .common.training_types import Checkpoint, PostTrainingMetric
+from .common.type_system import (
+    ChatCompletionInputType,
+    CompletionInputType,
+    NumberType,
+    ParamType,
+    StringType,
+)
+from .conversations import (
+    Conversation,
+    ConversationDeletedResource,
+    ConversationItem,
+    ConversationItemCreateRequest,
+    ConversationItemDeletedResource,
+    ConversationItemInclude,
+    ConversationItemList,
+    ConversationMessage,
+    Conversations,
+    Metadata,
+)
+from .datasetio import DatasetIO, DatasetStore
+from .datasets import (
+    CommonDatasetFields,
+    Dataset,
+    DatasetInput,
+    DatasetPurpose,
+    Datasets,
+    DatasetType,
+    DataSource,
+    ListDatasetsResponse,
+    RowsDataSource,
+    URIDataSource,
+)
+from .datatypes import (
+    Api,
+    BenchmarksProtocolPrivate,
+    DatasetsProtocolPrivate,
+    DynamicApiMeta,
+    Error,
+    ExternalApiSpec,
+    HealthResponse,
+    HealthStatus,
+    InlineProviderSpec,
+    ModelsProtocolPrivate,
+    ProviderSpec,
+    RemoteProviderConfig,
+    RemoteProviderSpec,
+    RoutingTable,
+    ScoringFunctionsProtocolPrivate,
+    ShieldsProtocolPrivate,
+    ToolGroupsProtocolPrivate,
+    VectorStoresProtocolPrivate,
+)
+from .eval import BenchmarkConfig, Eval, EvalCandidate, EvaluateResponse, ModelCandidate
+from .files import (
+    ExpiresAfter,
+    Files,
+    ListOpenAIFileResponse,
+    OpenAIFileDeleteResponse,
+    OpenAIFileObject,
+    OpenAIFilePurpose,
+)
+from .inference import (
+    Bf16QuantizationConfig,
+    ChatCompletionResponseEventType,
+    CompletionRequest,
+    EmbeddingsResponse,
+    EmbeddingTaskType,
+    Fp8QuantizationConfig,
+    GrammarResponseFormat,
+    GreedySamplingStrategy,
+    Inference,
+    InferenceProvider,
+    Int4QuantizationConfig,
+    JsonSchemaResponseFormat,
+    ListOpenAIChatCompletionResponse,
+    LogProbConfig,
+    ModelStore,
+    OpenAIAssistantMessageParam,
+    OpenAIChatCompletion,
+    OpenAIChatCompletionChunk,
+    OpenAIChatCompletionContentPartImageParam,
+    OpenAIChatCompletionContentPartParam,
+    OpenAIChatCompletionContentPartTextParam,
+    OpenAIChatCompletionMessageContent,
+    OpenAIChatCompletionRequestWithExtraBody,
+    OpenAIChatCompletionTextOnlyMessageContent,
+    OpenAIChatCompletionToolCall,
+    OpenAIChatCompletionToolCallFunction,
+    OpenAIChatCompletionUsage,
+    OpenAIChatCompletionUsageCompletionTokensDetails,
+    OpenAIChatCompletionUsagePromptTokensDetails,
+    OpenAIChoice,
+    OpenAIChoiceDelta,
+    OpenAIChoiceLogprobs,
+    OpenAIChunkChoice,
+    OpenAICompletion,
+    OpenAICompletionChoice,
+    OpenAICompletionLogprobs,
+    OpenAICompletionRequestWithExtraBody,
+    OpenAICompletionWithInputMessages,
+    OpenAIDeveloperMessageParam,
+    OpenAIEmbeddingData,
+    OpenAIEmbeddingsRequestWithExtraBody,
+    OpenAIEmbeddingsResponse,
+    OpenAIEmbeddingUsage,
+    OpenAIFile,
+    OpenAIFileFile,
+    OpenAIImageURL,
+    OpenAIJSONSchema,
+    OpenAIMessageParam,
+    OpenAIResponseFormatJSONObject,
+    OpenAIResponseFormatJSONSchema,
+    OpenAIResponseFormatParam,
+    OpenAIResponseFormatText,
+    OpenAISystemMessageParam,
+    OpenAITokenLogProb,
+    OpenAIToolMessageParam,
+    OpenAITopLogProb,
+    OpenAIUserMessageParam,
+    QuantizationConfig,
+    QuantizationType,
+    RerankData,
+    RerankResponse,
+    ResponseFormat,
+    ResponseFormatType,
+    SamplingParams,
+    SamplingStrategy,
+    SystemMessage,
+    SystemMessageBehavior,
+    TextTruncation,
+    TokenLogProbs,
+    ToolChoice,
+    ToolResponseMessage,
+    TopKSamplingStrategy,
+    TopPSamplingStrategy,
+    UserMessage,
+)
+from .inspect import (
+    ApiFilter,
+    HealthInfo,
+    Inspect,
+    ListRoutesResponse,
+    RouteInfo,
+    VersionInfo,
+)
+from .models import (
+    CommonModelFields,
+    ListModelsResponse,
+    Model,
+    ModelInput,
+    Models,
+    ModelType,
+    OpenAIListModelsResponse,
+    OpenAIModel,
+)
+from .openai_responses import (
+    AllowedToolsFilter,
+    ApprovalFilter,
+    ListOpenAIResponseInputItem,
+    ListOpenAIResponseObject,
+    MCPListToolsTool,
+    OpenAIDeleteResponseObject,
+    OpenAIResponseAnnotationCitation,
+    OpenAIResponseAnnotationContainerFileCitation,
+    OpenAIResponseAnnotationFileCitation,
+    OpenAIResponseAnnotationFilePath,
+    OpenAIResponseAnnotations,
+    OpenAIResponseContentPart,
+    OpenAIResponseContentPartOutputText,
+    OpenAIResponseContentPartReasoningSummary,
+    OpenAIResponseContentPartReasoningText,
+    OpenAIResponseContentPartRefusal,
+    OpenAIResponseError,
+    OpenAIResponseInput,
+    OpenAIResponseInputFunctionToolCallOutput,
+    OpenAIResponseInputMessageContent,
+    OpenAIResponseInputMessageContentFile,
+    OpenAIResponseInputMessageContentImage,
+    OpenAIResponseInputMessageContentText,
+    OpenAIResponseInputTool,
+    OpenAIResponseInputToolFileSearch,
+    OpenAIResponseInputToolFunction,
+    OpenAIResponseInputToolMCP,
+    OpenAIResponseInputToolWebSearch,
+    OpenAIResponseMCPApprovalRequest,
+    OpenAIResponseMCPApprovalResponse,
+    OpenAIResponseMessage,
+    OpenAIResponseObject,
+    OpenAIResponseObjectStream,
+    OpenAIResponseObjectStreamResponseCompleted,
+    OpenAIResponseObjectStreamResponseContentPartAdded,
+    OpenAIResponseObjectStreamResponseContentPartDone,
+    OpenAIResponseObjectStreamResponseCreated,
+    OpenAIResponseObjectStreamResponseFailed,
+    OpenAIResponseObjectStreamResponseFileSearchCallCompleted,
+    OpenAIResponseObjectStreamResponseFileSearchCallInProgress,
+    OpenAIResponseObjectStreamResponseFileSearchCallSearching,
+    OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta,
+    OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone,
+    OpenAIResponseObjectStreamResponseIncomplete,
+    OpenAIResponseObjectStreamResponseInProgress,
+    OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta,
+    OpenAIResponseObjectStreamResponseMcpCallArgumentsDone,
+    OpenAIResponseObjectStreamResponseMcpCallCompleted,
+    OpenAIResponseObjectStreamResponseMcpCallFailed,
+    OpenAIResponseObjectStreamResponseMcpCallInProgress,
+    OpenAIResponseObjectStreamResponseMcpListToolsCompleted,
+    OpenAIResponseObjectStreamResponseMcpListToolsFailed,
+    OpenAIResponseObjectStreamResponseMcpListToolsInProgress,
+    OpenAIResponseObjectStreamResponseOutputItemAdded,
+    OpenAIResponseObjectStreamResponseOutputItemDone,
+    OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded,
+    OpenAIResponseObjectStreamResponseOutputTextDelta,
+    OpenAIResponseObjectStreamResponseOutputTextDone,
+    OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded,
+    OpenAIResponseObjectStreamResponseReasoningSummaryPartDone,
+    OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta,
+    OpenAIResponseObjectStreamResponseReasoningSummaryTextDone,
+    OpenAIResponseObjectStreamResponseReasoningTextDelta,
+    OpenAIResponseObjectStreamResponseReasoningTextDone,
+    OpenAIResponseObjectStreamResponseRefusalDelta,
+    OpenAIResponseObjectStreamResponseRefusalDone,
+    OpenAIResponseObjectStreamResponseWebSearchCallCompleted,
+    OpenAIResponseObjectStreamResponseWebSearchCallInProgress,
+    OpenAIResponseObjectStreamResponseWebSearchCallSearching,
+    OpenAIResponseObjectWithInput,
+    OpenAIResponseOutput,
+    OpenAIResponseOutputMessageContent,
+    OpenAIResponseOutputMessageContentOutputText,
+    OpenAIResponseOutputMessageFileSearchToolCall,
+    OpenAIResponseOutputMessageFileSearchToolCallResults,
+    OpenAIResponseOutputMessageFunctionToolCall,
+    OpenAIResponseOutputMessageMCPCall,
+    OpenAIResponseOutputMessageMCPListTools,
+    OpenAIResponseOutputMessageWebSearchToolCall,
+    OpenAIResponsePrompt,
+    OpenAIResponseText,
+    OpenAIResponseTextFormat,
+    OpenAIResponseTool,
+    OpenAIResponseToolMCP,
+    OpenAIResponseUsage,
+    OpenAIResponseUsageInputTokensDetails,
+    OpenAIResponseUsageOutputTokensDetails,
+    WebSearchToolTypes,
+)
+from .post_training import (
+    AlgorithmConfig,
+    DataConfig,
+    DatasetFormat,
+    DPOAlignmentConfig,
+    DPOLossType,
+    EfficiencyConfig,
+    ListPostTrainingJobsResponse,
+    LoraFinetuningConfig,
+    OptimizerConfig,
+    OptimizerType,
+    PostTraining,
+    PostTrainingJob,
+    PostTrainingJobArtifactsResponse,
+    PostTrainingJobLogStream,
+    PostTrainingJobStatusResponse,
+    PostTrainingRLHFRequest,
+    QATFinetuningConfig,
+    RLHFAlgorithm,
+    TrainingConfig,
+)
+from .prompts import ListPromptsResponse, Prompt, Prompts
+from .providers import ListProvidersResponse, ProviderInfo, Providers
+from .rag_tool import (
+    DefaultRAGQueryGeneratorConfig,
+    LLMRAGQueryGeneratorConfig,
+    RAGDocument,
+    RAGQueryConfig,
+    RAGQueryGenerator,
+    RAGQueryGeneratorConfig,
+    RAGQueryResult,
+    RAGSearchMode,
+    Ranker,
+    RRFRanker,
+    WeightedRanker,
+)
+from .resource import Resource, ResourceType
+from .safety import (
+    ModerationObject,
+    ModerationObjectResults,
+    RunShieldResponse,
+    Safety,
+    SafetyViolation,
+    ShieldStore,
+    ViolationLevel,
+)
+from .schema_utils import (
+    CallableT,
+    ExtraBodyField,
+    WebMethod,
+    json_schema_type,
+    register_schema,
+    webmethod,
+)
+from .scoring import (
+    ScoreBatchResponse,
+    ScoreResponse,
+    Scoring,
+    ScoringFunctionStore,
+    ScoringResult,
+    ScoringResultRow,
+)
+from .scoring_functions import (
+    AggregationFunctionType,
+    BasicScoringFnParams,
+    CommonScoringFnFields,
+    ListScoringFunctionsResponse,
+    LLMAsJudgeScoringFnParams,
+    RegexParserScoringFnParams,
+    ScoringFn,
+    ScoringFnInput,
+    ScoringFnParams,
+    ScoringFnParamsType,
+    ScoringFunctions,
+)
+from .shields import (
+    CommonShieldFields,
+    ListShieldsResponse,
+    Shield,
+    ShieldInput,
+    Shields,
+)
+
+# Import from strong_typing
+from .strong_typing.core import JsonType
+from .strong_typing.docstring import Docstring, parse_type
+from .strong_typing.inspection import (
+    get_signature,
+    is_generic_list,
+    is_type_optional,
+    is_type_union,
+    is_unwrapped_body_param,
+    unwrap_generic_list,
+    unwrap_optional_type,
+    unwrap_union_types,
+)
+from .strong_typing.name import python_type_to_name
+from .strong_typing.schema import (
+    JsonSchemaGenerator,
+    Schema,
+    SchemaOptions,
+    StrictJsonType,
+    get_schema_identifier,
+)
+from .strong_typing.serialization import json_dump_string, object_to_json
+from .tools import (
+    ListToolDefsResponse,
+    ListToolGroupsResponse,
+    SpecialToolGroup,
+    ToolDef,
+    ToolGroup,
+    ToolGroupInput,
+    ToolGroups,
+    ToolInvocationResult,
+    ToolRuntime,
+    ToolStore,
+)
+from .vector_io import (
+    Chunk,
+    ChunkMetadata,
+    OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
+    OpenAICreateVectorStoreRequestWithExtraBody,
+    QueryChunksResponse,
+    SearchRankingOptions,
+    VectorIO,
+    VectorStoreChunkingStrategy,
+    VectorStoreChunkingStrategyAuto,
+    VectorStoreChunkingStrategyStatic,
+    VectorStoreChunkingStrategyStaticConfig,
+    VectorStoreContent,
+    VectorStoreCreateRequest,
+    VectorStoreDeleteResponse,
+    VectorStoreFileBatchObject,
+    VectorStoreFileContentResponse,
+    VectorStoreFileCounts,
+    VectorStoreFileDeleteResponse,
+    VectorStoreFileLastError,
+    VectorStoreFileObject,
+    VectorStoreFilesListInBatchResponse,
+    VectorStoreFileStatus,
+    VectorStoreListFilesResponse,
+    VectorStoreListResponse,
+    VectorStoreModifyRequest,
+    VectorStoreObject,
+    VectorStoreSearchRequest,
+    VectorStoreSearchResponse,
+    VectorStoreSearchResponsePage,
+    VectorStoreTable,
+)
+from .vector_stores import VectorStore, VectorStoreInput
+from .version import (
+    LLAMA_STACK_API_V1,
+    LLAMA_STACK_API_V1ALPHA,
+    LLAMA_STACK_API_V1BETA,
+)
+
+__all__ = [
+    # Submodules
+    "common",
+    "strong_typing",
+    # Version constants
+    "LLAMA_STACK_API_V1",
+    "LLAMA_STACK_API_V1ALPHA",
+    "LLAMA_STACK_API_V1BETA",
+    # API Symbols
+    "Agents",
+    "AggregationFunctionType",
+    "AlgorithmConfig",
+    "AllowedToolsFilter",
+    "Api",
+    "ApiFilter",
+    "ApprovalFilter",
+    "BasicScoringFnParams",
+    "Batches",
+    "BatchObject",
+    "Benchmark",
+    "BenchmarkConfig",
+    "BenchmarkInput",
+    "Benchmarks",
+    "BenchmarksProtocolPrivate",
+    "Bf16QuantizationConfig",
+    "CallableT",
+    "ChatCompletionInputType",
+    "ChatCompletionResponseEventType",
+    "Checkpoint",
+    "Chunk",
+    "ChunkMetadata",
+    "CommonBenchmarkFields",
+    "ConflictError",
+    "CommonDatasetFields",
+    "CommonModelFields",
+    "CommonScoringFnFields",
+    "CommonShieldFields",
+    "CompletionInputType",
+    "CompletionRequest",
+    "Conversation",
+    "ConversationDeletedResource",
+    "ConversationItem",
+    "ConversationItemCreateRequest",
+    "ConversationItemDeletedResource",
+    "ConversationItemInclude",
+    "ConversationItemList",
+    "ConversationMessage",
+    "Conversations",
+    "DPOAlignmentConfig",
+    "DPOLossType",
+    "DataConfig",
+    "DataSource",
+    "Dataset",
+    "DatasetFormat",
+    "DatasetIO",
+    "DatasetInput",
+    "DatasetPurpose",
+    "DatasetNotFoundError",
+    "DatasetStore",
+    "DatasetType",
+    "Datasets",
+    "DatasetsProtocolPrivate",
+    "DefaultRAGQueryGeneratorConfig",
+    "Docstring",
+    "DynamicApiMeta",
+    "EfficiencyConfig",
+    "EmbeddingTaskType",
+    "EmbeddingsResponse",
+    "Error",
+    "Eval",
+    "EvalCandidate",
+    "EvaluateResponse",
+    "ExpiresAfter",
+    "ExternalApiSpec",
+    "ExtraBodyField",
+    "Files",
+    "Fp8QuantizationConfig",
+    "get_schema_identifier",
+    "get_signature",
+    "GrammarResponseFormat",
+    "GreedySamplingStrategy",
+    "HealthInfo",
+    "HealthResponse",
+    "HealthStatus",
+    "ImageContentItem",
+    "Inference",
+    "InferenceProvider",
+    "InlineProviderSpec",
+    "Inspect",
+    "Int4QuantizationConfig",
+    "InterleavedContent",
+    "InterleavedContentItem",
+    "InvalidConversationIdError",
+    "is_generic_list",
+    "is_type_optional",
+    "is_type_union",
+    "is_unwrapped_body_param",
+    "Job",
+    "JobStatus",
+    "json_dump_string",
+    "json_schema_type",
+    "JsonSchemaGenerator",
+    "JsonSchemaResponseFormat",
+    "JsonType",
+    "LLMAsJudgeScoringFnParams",
+    "LLMRAGQueryGeneratorConfig",
+    "ListBatchesResponse",
+    "ListBenchmarksResponse",
+    "ListDatasetsResponse",
+    "ListModelsResponse",
+    "ListOpenAIChatCompletionResponse",
+    "ListOpenAIFileResponse",
+    "ListOpenAIResponseInputItem",
+    "ListOpenAIResponseObject",
+    "ListPostTrainingJobsResponse",
+    "ListPromptsResponse",
+    "ListProvidersResponse",
+    "ListRoutesResponse",
+    "ListScoringFunctionsResponse",
+    "ListShieldsResponse",
+    "ListToolDefsResponse",
+    "ListToolGroupsResponse",
+    "LogProbConfig",
+    "LoraFinetuningConfig",
+    "MCPListToolsTool",
+    "Metadata",
+    "Model",
+    "ModelCandidate",
+    "ModelInput",
+    "ModelNotFoundError",
+    "ModelStore",
+    "ModelType",
+    "ModelTypeError",
+    "Models",
+    "ModelsProtocolPrivate",
+    "ModerationObject",
+    "ModerationObjectResults",
+    "NumberType",
+    "object_to_json",
+    "OpenAIAssistantMessageParam",
+    "OpenAIChatCompletion",
+    "OpenAIChatCompletionChunk",
+    "OpenAIChatCompletionContentPartImageParam",
+    "OpenAIChatCompletionContentPartParam",
+    "OpenAIChatCompletionContentPartTextParam",
+    "OpenAIChatCompletionMessageContent",
+    "OpenAIChatCompletionRequestWithExtraBody",
+    "OpenAIChatCompletionTextOnlyMessageContent",
+    "OpenAIChatCompletionToolCall",
+    "OpenAIChatCompletionToolCallFunction",
+    "OpenAIChatCompletionUsage",
+    "OpenAIChatCompletionUsageCompletionTokensDetails",
+    "OpenAIChatCompletionUsagePromptTokensDetails",
+    "OpenAIChoice",
+    "OpenAIChoiceDelta",
+    "OpenAIChoiceLogprobs",
+    "OpenAIChunkChoice",
+    "OpenAICompletion",
+    "OpenAICompletionChoice",
+    "OpenAICompletionLogprobs",
+    "OpenAICompletionRequestWithExtraBody",
+    "OpenAICompletionWithInputMessages",
+    "OpenAICreateVectorStoreFileBatchRequestWithExtraBody",
+    "OpenAICreateVectorStoreRequestWithExtraBody",
+    "OpenAIDeleteResponseObject",
+    "OpenAIDeveloperMessageParam",
+    "OpenAIEmbeddingData",
+    "OpenAIEmbeddingUsage",
+    "OpenAIEmbeddingsRequestWithExtraBody",
+    "OpenAIEmbeddingsResponse",
+    "OpenAIFile",
+    "OpenAIFileDeleteResponse",
+    "OpenAIFileFile",
+    "OpenAIFileObject",
+    "OpenAIFilePurpose",
+    "OpenAIImageURL",
+    "OpenAIJSONSchema",
+    "OpenAIListModelsResponse",
+    "OpenAIMessageParam",
+    "OpenAIModel",
+    "Order",
+    "OpenAIResponseAnnotationCitation",
+    "OpenAIResponseAnnotationContainerFileCitation",
+    "OpenAIResponseAnnotationFileCitation",
+    "OpenAIResponseAnnotationFilePath",
+    "OpenAIResponseAnnotations",
+    "OpenAIResponseContentPart",
+    "OpenAIResponseContentPartOutputText",
+    "OpenAIResponseContentPartReasoningSummary",
+    "OpenAIResponseContentPartReasoningText",
+    "OpenAIResponseContentPartRefusal",
+    "OpenAIResponseError",
+    "OpenAIResponseFormatJSONObject",
+    "OpenAIResponseFormatJSONSchema",
+    "OpenAIResponseFormatParam",
+    "OpenAIResponseFormatText",
+    "OpenAIResponseInput",
+    "OpenAIResponseInputFunctionToolCallOutput",
+    "OpenAIResponseInputMessageContent",
+    "OpenAIResponseInputMessageContentFile",
+    "OpenAIResponseInputMessageContentImage",
+    "OpenAIResponseInputMessageContentText",
+    "OpenAIResponseInputTool",
+    "OpenAIResponseInputToolFileSearch",
+    "OpenAIResponseInputToolFunction",
+    "OpenAIResponseInputToolMCP",
+    "OpenAIResponseInputToolWebSearch",
+    "OpenAIResponseMCPApprovalRequest",
+    "OpenAIResponseMCPApprovalResponse",
+    "OpenAIResponseMessage",
+    "OpenAIResponseObject",
+    "OpenAIResponseObjectStream",
+    "OpenAIResponseObjectStreamResponseCompleted",
+    "OpenAIResponseObjectStreamResponseContentPartAdded",
+    "OpenAIResponseObjectStreamResponseContentPartDone",
+    "OpenAIResponseObjectStreamResponseCreated",
+    "OpenAIResponseObjectStreamResponseFailed",
+    "OpenAIResponseObjectStreamResponseFileSearchCallCompleted",
+    "OpenAIResponseObjectStreamResponseFileSearchCallInProgress",
+    "OpenAIResponseObjectStreamResponseFileSearchCallSearching",
+    "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta",
+    "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone",
+    "OpenAIResponseObjectStreamResponseInProgress",
+    "OpenAIResponseObjectStreamResponseIncomplete",
+    "OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta",
+    "OpenAIResponseObjectStreamResponseMcpCallArgumentsDone",
+    "OpenAIResponseObjectStreamResponseMcpCallCompleted",
+    "OpenAIResponseObjectStreamResponseMcpCallFailed",
+    "OpenAIResponseObjectStreamResponseMcpCallInProgress",
+    "OpenAIResponseObjectStreamResponseMcpListToolsCompleted",
+    "OpenAIResponseObjectStreamResponseMcpListToolsFailed",
+    "OpenAIResponseObjectStreamResponseMcpListToolsInProgress",
+    "OpenAIResponseObjectStreamResponseOutputItemAdded",
+    "OpenAIResponseObjectStreamResponseOutputItemDone",
+    "OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded",
+    "OpenAIResponseObjectStreamResponseOutputTextDelta",
+    "OpenAIResponseObjectStreamResponseOutputTextDone",
+    "OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded",
+    "OpenAIResponseObjectStreamResponseReasoningSummaryPartDone",
+    "OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta",
+    "OpenAIResponseObjectStreamResponseReasoningSummaryTextDone",
+    "OpenAIResponseObjectStreamResponseReasoningTextDelta",
+    "OpenAIResponseObjectStreamResponseReasoningTextDone",
+    "OpenAIResponseObjectStreamResponseRefusalDelta",
+    "OpenAIResponseObjectStreamResponseRefusalDone",
+    "OpenAIResponseObjectStreamResponseWebSearchCallCompleted",
+    "OpenAIResponseObjectStreamResponseWebSearchCallInProgress",
+    "OpenAIResponseObjectStreamResponseWebSearchCallSearching",
+    "OpenAIResponseObjectWithInput",
+    "OpenAIResponseOutput",
+    "OpenAIResponseOutputMessageContent",
+    "OpenAIResponseOutputMessageContentOutputText",
+    "OpenAIResponseOutputMessageFileSearchToolCall",
+    "OpenAIResponseOutputMessageFileSearchToolCallResults",
+    "OpenAIResponseOutputMessageFunctionToolCall",
+    "OpenAIResponseOutputMessageMCPCall",
+    "OpenAIResponseOutputMessageMCPListTools",
+    "OpenAIResponseOutputMessageWebSearchToolCall",
+    "OpenAIResponsePrompt",
+    "OpenAIResponseText",
+    "OpenAIResponseTextFormat",
+    "OpenAIResponseTool",
+    "OpenAIResponseToolMCP",
+    "OpenAIResponseUsage",
+    "OpenAIResponseUsageInputTokensDetails",
+    "OpenAIResponseUsageOutputTokensDetails",
+    "OpenAISystemMessageParam",
+    "OpenAITokenLogProb",
+    "OpenAIToolMessageParam",
+    "OpenAITopLogProb",
+    "OpenAIUserMessageParam",
+    "OptimizerConfig",
+    "OptimizerType",
+    "PaginatedResponse",
+    "ParamType",
+    "parse_type",
+    "PostTraining",
+    "PostTrainingMetric",
+    "PostTrainingJob",
+    "PostTrainingJobArtifactsResponse",
+    "PostTrainingJobLogStream",
+    "PostTrainingJobStatusResponse",
+    "PostTrainingRLHFRequest",
+    "Prompt",
+    "Prompts",
+    "ProviderInfo",
+    "ProviderSpec",
+    "Providers",
+    "python_type_to_name",
+    "QATFinetuningConfig",
+    "QuantizationConfig",
+    "QuantizationType",
+    "QueryChunksResponse",
+    "RAGDocument",
+    "RAGQueryConfig",
+    "RAGQueryGenerator",
+    "RAGQueryGeneratorConfig",
+    "RAGQueryResult",
+    "RAGSearchMode",
+    "register_schema",
+    "RLHFAlgorithm",
+    "RRFRanker",
+    "Ranker",
+    "RegexParserScoringFnParams",
+    "RemoteProviderConfig",
+    "RemoteProviderSpec",
+    "RerankData",
+    "RerankResponse",
+    "Resource",
+    "ResourceNotFoundError",
+    "ResourceType",
+    "ResponseFormat",
+    "ResponseFormatType",
+    "ResponseGuardrail",
+    "ResponseGuardrailSpec",
+    "RouteInfo",
+    "RoutingTable",
+    "RowsDataSource",
+    "RunShieldResponse",
+    "Safety",
+    "SafetyViolation",
+    "SamplingParams",
+    "SamplingStrategy",
+    "ScoreBatchResponse",
+    "ScoreResponse",
+    "Scoring",
+    "ScoringFn",
+    "ScoringFnInput",
+    "ScoringFnParams",
+    "ScoringFnParamsType",
+    "ScoringFunctionStore",
+    "ScoringFunctions",
+    "ScoringFunctionsProtocolPrivate",
+    "ScoringResult",
+    "ScoringResultRow",
+    "Schema",
+    "SchemaOptions",
+    "SearchRankingOptions",
+    "Shield",
+    "ShieldInput",
+    "ShieldStore",
+    "Shields",
+    "ShieldsProtocolPrivate",
+    "SpecialToolGroup",
+    "StrictJsonType",
+    "StringType",
+    "SystemMessage",
+    "SystemMessageBehavior",
+    "TextContentItem",
+    "TextTruncation",
+    "TokenLogProbs",
+    "TokenValidationError",
+    "ToolChoice",
+    "ToolGroupNotFoundError",
+    "ToolDef",
+    "ToolGroup",
+    "ToolGroupInput",
+    "ToolGroups",
+    "ToolGroupsProtocolPrivate",
+    "ToolInvocationResult",
+    "ToolResponseMessage",
+    "ToolRuntime",
+    "ToolStore",
+    "TopKSamplingStrategy",
+    "TopPSamplingStrategy",
+    "TrainingConfig",
+    "UnsupportedModelError",
+    "unwrap_generic_list",
+    "unwrap_optional_type",
+    "unwrap_union_types",
+    "URIDataSource",
+    "URL",
+    "_URLOrData",
+    "UserMessage",
+    "VectorIO",
+    "VectorStore",
+    "VectorStoreChunkingStrategy",
+    "VectorStoreChunkingStrategyAuto",
+    "VectorStoreChunkingStrategyStatic",
+    "VectorStoreChunkingStrategyStaticConfig",
+    "VectorStoreContent",
+    "VectorStoreCreateRequest",
+    "VectorStoreDeleteResponse",
+    "VectorStoreFileBatchObject",
+    "VectorStoreFileContentResponse",
+    "VectorStoreFileCounts",
+    "VectorStoreFileDeleteResponse",
+    "VectorStoreFileLastError",
+    "VectorStoreFileObject",
+    "VectorStoreFileStatus",
+    "VectorStoreFilesListInBatchResponse",
+    "VectorStoreInput",
+    "VectorStoreListFilesResponse",
+    "VectorStoreListResponse",
+    "VectorStoreModifyRequest",
+    "VectorStoreObject",
+    "VectorStoreSearchRequest",
+    "VectorStoreSearchResponse",
+    "VectorStoreSearchResponsePage",
+    "VectorStoreTable",
+    "VectorStoreNotFoundError",
+    "VectorStoresProtocolPrivate",
+    "VersionInfo",
+    "ViolationLevel",
+    "webmethod",
+    "WebMethod",
+    "WebSearchToolTypes",
+    "WeightedRanker",
+]
diff --git a/src/llama_stack/apis/agents/agents.py b/src/llama-stack-api/llama_stack_api/agents.py
similarity index 96%
rename from src/llama_stack/apis/agents/agents.py
rename to src/llama-stack-api/llama_stack_api/agents.py
index 09687ef33..ca0611746 100644
--- a/src/llama_stack/apis/agents/agents.py
+++ b/src/llama-stack-api/llama_stack_api/agents.py
@@ -9,9 +9,9 @@ from typing import Annotated, Protocol, runtime_checkable
 
 from pydantic import BaseModel
 
-from llama_stack.apis.common.responses import Order
-from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.schema_utils import ExtraBodyField, json_schema_type, webmethod
+from llama_stack_api.common.responses import Order
+from llama_stack_api.schema_utils import ExtraBodyField, json_schema_type, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1
 
 from .openai_responses import (
     ListOpenAIResponseInputItem,
diff --git a/src/llama_stack/apis/batches/batches.py b/src/llama-stack-api/llama_stack_api/batches.py
similarity index 96%
rename from src/llama_stack/apis/batches/batches.py
rename to src/llama-stack-api/llama_stack_api/batches.py
index 1ee9fdb15..00c47d39f 100644
--- a/src/llama_stack/apis/batches/batches.py
+++ b/src/llama-stack-api/llama_stack_api/batches.py
@@ -8,8 +8,8 @@ from typing import Literal, Protocol, runtime_checkable
 
 from pydantic import BaseModel, Field
 
-from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.schema_utils import json_schema_type, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1
 
 try:
     from openai.types import Batch as BatchObject
diff --git a/src/llama_stack/apis/benchmarks/benchmarks.py b/src/llama-stack-api/llama_stack_api/benchmarks.py
similarity index 94%
rename from src/llama_stack/apis/benchmarks/benchmarks.py
rename to src/llama-stack-api/llama_stack_api/benchmarks.py
index 9a67269c3..e9ac3a8b8 100644
--- a/src/llama_stack/apis/benchmarks/benchmarks.py
+++ b/src/llama-stack-api/llama_stack_api/benchmarks.py
@@ -7,9 +7,9 @@ from typing import Any, Literal, Protocol, runtime_checkable
 
 from pydantic import BaseModel, Field
 
-from llama_stack.apis.resource import Resource, ResourceType
-from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.resource import Resource, ResourceType
+from llama_stack_api.schema_utils import json_schema_type, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1ALPHA
 
 
 class CommonBenchmarkFields(BaseModel):
diff --git a/src/llama_stack/apis/__init__.py b/src/llama-stack-api/llama_stack_api/common/__init__.py
similarity index 100%
rename from src/llama_stack/apis/__init__.py
rename to src/llama-stack-api/llama_stack_api/common/__init__.py
diff --git a/src/llama_stack/apis/common/content_types.py b/src/llama-stack-api/llama_stack_api/common/content_types.py
similarity index 65%
rename from src/llama_stack/apis/common/content_types.py
rename to src/llama-stack-api/llama_stack_api/common/content_types.py
index 950dd17ff..1bfe109c1 100644
--- a/src/llama_stack/apis/common/content_types.py
+++ b/src/llama-stack-api/llama_stack_api/common/content_types.py
@@ -4,13 +4,11 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from enum import Enum
 from typing import Annotated, Literal
 
 from pydantic import BaseModel, Field, model_validator
 
-from llama_stack.models.llama.datatypes import ToolCall
-from llama_stack.schema_utils import json_schema_type, register_schema
+from llama_stack_api.schema_utils import json_schema_type, register_schema
 
 
 @json_schema_type
@@ -101,43 +99,3 @@ class ImageDelta(BaseModel):
 
     type: Literal["image"] = "image"
     image: bytes
-
-
-class ToolCallParseStatus(Enum):
-    """Status of tool call parsing during streaming.
-    :cvar started: Tool call parsing has begun
-    :cvar in_progress: Tool call parsing is ongoing
-    :cvar failed: Tool call parsing failed
-    :cvar succeeded: Tool call parsing completed successfully
-    """
-
-    started = "started"
-    in_progress = "in_progress"
-    failed = "failed"
-    succeeded = "succeeded"
-
-
-@json_schema_type
-class ToolCallDelta(BaseModel):
-    """A tool call content delta for streaming responses.
-
-    :param type: Discriminator type of the delta. Always "tool_call"
-    :param tool_call: Either an in-progress tool call string or the final parsed tool call
-    :param parse_status: Current parsing status of the tool call
-    """
-
-    type: Literal["tool_call"] = "tool_call"
-
-    # you either send an in-progress tool call so the client can stream a long
-    # code generation or you send the final parsed tool call at the end of the
-    # stream
-    tool_call: str | ToolCall
-    parse_status: ToolCallParseStatus
-
-
-# streaming completions send a stream of ContentDeltas
-ContentDelta = Annotated[
-    TextDelta | ImageDelta | ToolCallDelta,
-    Field(discriminator="type"),
-]
-register_schema(ContentDelta, name="ContentDelta")
diff --git a/src/llama_stack/apis/common/errors.py b/src/llama-stack-api/llama_stack_api/common/errors.py
similarity index 100%
rename from src/llama_stack/apis/common/errors.py
rename to src/llama-stack-api/llama_stack_api/common/errors.py
diff --git a/src/llama_stack/apis/common/job_types.py b/src/llama-stack-api/llama_stack_api/common/job_types.py
similarity index 94%
rename from src/llama_stack/apis/common/job_types.py
rename to src/llama-stack-api/llama_stack_api/common/job_types.py
index 5da42bfd3..b6ef35d7f 100644
--- a/src/llama_stack/apis/common/job_types.py
+++ b/src/llama-stack-api/llama_stack_api/common/job_types.py
@@ -7,7 +7,7 @@ from enum import Enum
 
 from pydantic import BaseModel
 
-from llama_stack.schema_utils import json_schema_type
+from llama_stack_api.schema_utils import json_schema_type
 
 
 class JobStatus(Enum):
diff --git a/src/llama_stack/apis/common/responses.py b/src/llama-stack-api/llama_stack_api/common/responses.py
similarity index 97%
rename from src/llama_stack/apis/common/responses.py
rename to src/llama-stack-api/llama_stack_api/common/responses.py
index 53a290eea..c843ce1d9 100644
--- a/src/llama_stack/apis/common/responses.py
+++ b/src/llama-stack-api/llama_stack_api/common/responses.py
@@ -9,7 +9,7 @@ from typing import Any
 
 from pydantic import BaseModel
 
-from llama_stack.schema_utils import json_schema_type
+from llama_stack_api.schema_utils import json_schema_type
 
 
 class Order(Enum):
diff --git a/src/llama_stack/apis/common/tracing.py b/src/llama-stack-api/llama_stack_api/common/tracing.py
similarity index 100%
rename from src/llama_stack/apis/common/tracing.py
rename to src/llama-stack-api/llama_stack_api/common/tracing.py
diff --git a/src/llama_stack/apis/common/training_types.py b/src/llama-stack-api/llama_stack_api/common/training_types.py
similarity index 96%
rename from src/llama_stack/apis/common/training_types.py
rename to src/llama-stack-api/llama_stack_api/common/training_types.py
index 5c236a25d..aa3481770 100644
--- a/src/llama_stack/apis/common/training_types.py
+++ b/src/llama-stack-api/llama_stack_api/common/training_types.py
@@ -8,7 +8,7 @@ from datetime import datetime
 
 from pydantic import BaseModel
 
-from llama_stack.schema_utils import json_schema_type
+from llama_stack_api.schema_utils import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/apis/common/type_system.py b/src/llama-stack-api/llama_stack_api/common/type_system.py
similarity index 97%
rename from src/llama_stack/apis/common/type_system.py
rename to src/llama-stack-api/llama_stack_api/common/type_system.py
index c71501548..8297713cf 100644
--- a/src/llama_stack/apis/common/type_system.py
+++ b/src/llama-stack-api/llama_stack_api/common/type_system.py
@@ -8,7 +8,7 @@ from typing import Annotated, Literal
 
 from pydantic import BaseModel, Field
 
-from llama_stack.schema_utils import json_schema_type, register_schema
+from llama_stack_api.schema_utils import json_schema_type, register_schema
 
 
 @json_schema_type
diff --git a/src/llama_stack/apis/conversations/conversations.py b/src/llama-stack-api/llama_stack_api/conversations.py
similarity index 97%
rename from src/llama_stack/apis/conversations/conversations.py
rename to src/llama-stack-api/llama_stack_api/conversations.py
index 3fdd3b47e..4854181d1 100644
--- a/src/llama_stack/apis/conversations/conversations.py
+++ b/src/llama-stack-api/llama_stack_api/conversations.py
@@ -9,7 +9,8 @@ from typing import Annotated, Literal, Protocol, runtime_checkable
 
 from pydantic import BaseModel, Field
 
-from llama_stack.apis.agents.openai_responses import (
+from llama_stack_api.common.tracing import telemetry_traceable
+from llama_stack_api.openai_responses import (
     OpenAIResponseInputFunctionToolCallOutput,
     OpenAIResponseMCPApprovalRequest,
     OpenAIResponseMCPApprovalResponse,
@@ -20,9 +21,8 @@ from llama_stack.apis.agents.openai_responses import (
     OpenAIResponseOutputMessageMCPListTools,
     OpenAIResponseOutputMessageWebSearchToolCall,
 )
-from llama_stack.apis.common.tracing import telemetry_traceable
-from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
+from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1
 
 Metadata = dict[str, str]
 
diff --git a/src/llama_stack/apis/datasetio/datasetio.py b/src/llama-stack-api/llama_stack_api/datasetio.py
similarity index 89%
rename from src/llama_stack/apis/datasetio/datasetio.py
rename to src/llama-stack-api/llama_stack_api/datasetio.py
index a0c4a1afc..309a8ff41 100644
--- a/src/llama_stack/apis/datasetio/datasetio.py
+++ b/src/llama-stack-api/llama_stack_api/datasetio.py
@@ -6,10 +6,10 @@
 
 from typing import Any, Protocol, runtime_checkable
 
-from llama_stack.apis.common.responses import PaginatedResponse
-from llama_stack.apis.datasets import Dataset
-from llama_stack.apis.version import LLAMA_STACK_API_V1BETA
-from llama_stack.schema_utils import webmethod
+from llama_stack_api.common.responses import PaginatedResponse
+from llama_stack_api.datasets import Dataset
+from llama_stack_api.schema_utils import webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1BETA
 
 
 class DatasetStore(Protocol):
diff --git a/src/llama_stack/apis/datasets/datasets.py b/src/llama-stack-api/llama_stack_api/datasets.py
similarity index 97%
rename from src/llama_stack/apis/datasets/datasets.py
rename to src/llama-stack-api/llama_stack_api/datasets.py
index 9bedc6209..76d787078 100644
--- a/src/llama_stack/apis/datasets/datasets.py
+++ b/src/llama-stack-api/llama_stack_api/datasets.py
@@ -9,9 +9,9 @@ from typing import Annotated, Any, Literal, Protocol
 
 from pydantic import BaseModel, Field
 
-from llama_stack.apis.resource import Resource, ResourceType
-from llama_stack.apis.version import LLAMA_STACK_API_V1BETA
-from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
+from llama_stack_api.resource import Resource, ResourceType
+from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1BETA
 
 
 class DatasetPurpose(StrEnum):
diff --git a/src/llama_stack/providers/datatypes.py b/src/llama-stack-api/llama_stack_api/datatypes.py
similarity index 51%
rename from src/llama_stack/providers/datatypes.py
rename to src/llama-stack-api/llama_stack_api/datatypes.py
index 9be3edb8e..f024068f3 100644
--- a/src/llama_stack/providers/datatypes.py
+++ b/src/llama-stack-api/llama_stack_api/datatypes.py
@@ -4,21 +4,172 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from enum import StrEnum
+from enum import Enum, EnumMeta, StrEnum
 from typing import Any, Protocol
 from urllib.parse import urlparse
 
 from pydantic import BaseModel, Field
 
-from llama_stack.apis.benchmarks import Benchmark
-from llama_stack.apis.datasets import Dataset
-from llama_stack.apis.datatypes import Api
-from llama_stack.apis.models import Model
-from llama_stack.apis.scoring_functions import ScoringFn
-from llama_stack.apis.shields import Shield
-from llama_stack.apis.tools import ToolGroup
-from llama_stack.apis.vector_stores import VectorStore
-from llama_stack.schema_utils import json_schema_type
+from llama_stack_api.benchmarks import Benchmark
+from llama_stack_api.datasets import Dataset
+from llama_stack_api.models import Model
+from llama_stack_api.schema_utils import json_schema_type
+from llama_stack_api.scoring_functions import ScoringFn
+from llama_stack_api.shields import Shield
+from llama_stack_api.tools import ToolGroup
+from llama_stack_api.vector_stores import VectorStore
+
+
+class DynamicApiMeta(EnumMeta):
+    def __new__(cls, name, bases, namespace):
+        # Store the original enum values
+        original_values = {k: v for k, v in namespace.items() if not k.startswith("_")}
+
+        # Create the enum class
+        cls = super().__new__(cls, name, bases, namespace)
+
+        # Store the original values for reference
+        cls._original_values = original_values
+        # Initialize _dynamic_values
+        cls._dynamic_values = {}
+
+        return cls
+
+    def __call__(cls, value):
+        try:
+            return super().__call__(value)
+        except ValueError as e:
+            # If this value was already dynamically added, return it
+            if value in cls._dynamic_values:
+                return cls._dynamic_values[value]
+
+            # If the value doesn't exist, create a new enum member
+            # Create a new member name from the value
+            member_name = value.lower().replace("-", "_")
+
+            # If this member name already exists in the enum, return the existing member
+            if member_name in cls._member_map_:
+                return cls._member_map_[member_name]
+
+            # Instead of creating a new member, raise ValueError to force users to use Api.add() to
+            # register new APIs explicitly
+            raise ValueError(f"API '{value}' does not exist. Use Api.add() to register new APIs.") from e
+
+    def __iter__(cls):
+        # Allow iteration over both static and dynamic members
+        yield from super().__iter__()
+        if hasattr(cls, "_dynamic_values"):
+            yield from cls._dynamic_values.values()
+
+    def add(cls, value):
+        """
+        Add a new API to the enum.
+        Used to register external APIs.
+        """
+        member_name = value.lower().replace("-", "_")
+
+        # If this member name already exists in the enum, return it
+        if member_name in cls._member_map_:
+            return cls._member_map_[member_name]
+
+        # Create a new enum member
+        member = object.__new__(cls)
+        member._name_ = member_name
+        member._value_ = value
+
+        # Add it to the enum class
+        cls._member_map_[member_name] = member
+        cls._member_names_.append(member_name)
+        cls._member_type_ = str
+
+        # Store it in our dynamic values
+        cls._dynamic_values[value] = member
+
+        return member
+
+
+@json_schema_type
+class Api(Enum, metaclass=DynamicApiMeta):
+    """Enumeration of all available APIs in the Llama Stack system.
+    :cvar providers: Provider management and configuration
+    :cvar inference: Text generation, chat completions, and embeddings
+    :cvar safety: Content moderation and safety shields
+    :cvar agents: Agent orchestration and execution
+    :cvar batches: Batch processing for asynchronous API requests
+    :cvar vector_io: Vector database operations and queries
+    :cvar datasetio: Dataset input/output operations
+    :cvar scoring: Model output evaluation and scoring
+    :cvar eval: Model evaluation and benchmarking framework
+    :cvar post_training: Fine-tuning and model training
+    :cvar tool_runtime: Tool execution and management
+    :cvar telemetry: Observability and system monitoring
+    :cvar models: Model metadata and management
+    :cvar shields: Safety shield implementations
+    :cvar datasets: Dataset creation and management
+    :cvar scoring_functions: Scoring function definitions
+    :cvar benchmarks: Benchmark suite management
+    :cvar tool_groups: Tool group organization
+    :cvar files: File storage and management
+    :cvar prompts: Prompt versions and management
+    :cvar inspect: Built-in system inspection and introspection
+    """
+
+    providers = "providers"
+    inference = "inference"
+    safety = "safety"
+    agents = "agents"
+    batches = "batches"
+    vector_io = "vector_io"
+    datasetio = "datasetio"
+    scoring = "scoring"
+    eval = "eval"
+    post_training = "post_training"
+    tool_runtime = "tool_runtime"
+
+    models = "models"
+    shields = "shields"
+    vector_stores = "vector_stores"  # only used for routing table
+    datasets = "datasets"
+    scoring_functions = "scoring_functions"
+    benchmarks = "benchmarks"
+    tool_groups = "tool_groups"
+    files = "files"
+    prompts = "prompts"
+    conversations = "conversations"
+
+    # built-in API
+    inspect = "inspect"
+
+
+@json_schema_type
+class Error(BaseModel):
+    """
+    Error response from the API. Roughly follows RFC 7807.
+
+    :param status: HTTP status code
+    :param title: Error title, a short summary of the error which is invariant for an error type
+    :param detail: Error detail, a longer human-readable description of the error
+    :param instance: (Optional) A URL which can be used to retrieve more information about the specific occurrence of the error
+    """
+
+    status: int
+    title: str
+    detail: str
+    instance: str | None = None
+
+
+class ExternalApiSpec(BaseModel):
+    """Specification for an external API implementation."""
+
+    module: str = Field(..., description="Python module containing the API implementation")
+    name: str = Field(..., description="Name of the API")
+    pip_packages: list[str] = Field(default=[], description="List of pip packages to install the API")
+    protocol: str = Field(..., description="Name of the protocol class for the API")
+
+
+# Provider-related types (merged from providers/datatypes.py)
+# NOTE: These imports are forward references to avoid circular dependencies
+# They will be resolved at runtime when the classes are used
 
 
 class ModelsProtocolPrivate(Protocol):
diff --git a/src/llama_stack/apis/eval/eval.py b/src/llama-stack-api/llama_stack_api/eval.py
similarity index 92%
rename from src/llama_stack/apis/eval/eval.py
rename to src/llama-stack-api/llama_stack_api/eval.py
index accb04ce1..7a11c221e 100644
--- a/src/llama_stack/apis/eval/eval.py
+++ b/src/llama-stack-api/llama_stack_api/eval.py
@@ -8,12 +8,12 @@ from typing import Any, Literal, Protocol
 
 from pydantic import BaseModel, Field
 
-from llama_stack.apis.common.job_types import Job
-from llama_stack.apis.inference import SamplingParams, SystemMessage
-from llama_stack.apis.scoring import ScoringResult
-from llama_stack.apis.scoring_functions import ScoringFnParams
-from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.common.job_types import Job
+from llama_stack_api.inference import SamplingParams, SystemMessage
+from llama_stack_api.schema_utils import json_schema_type, webmethod
+from llama_stack_api.scoring import ScoringResult
+from llama_stack_api.scoring_functions import ScoringFnParams
+from llama_stack_api.version import LLAMA_STACK_API_V1ALPHA
 
 
 @json_schema_type
diff --git a/src/llama_stack/apis/files/files.py b/src/llama-stack-api/llama_stack_api/files.py
similarity index 96%
rename from src/llama_stack/apis/files/files.py
rename to src/llama-stack-api/llama_stack_api/files.py
index f0ea2f892..8a75a1c39 100644
--- a/src/llama_stack/apis/files/files.py
+++ b/src/llama-stack-api/llama_stack_api/files.py
@@ -10,10 +10,10 @@ from typing import Annotated, ClassVar, Literal, Protocol, runtime_checkable
 from fastapi import File, Form, Response, UploadFile
 from pydantic import BaseModel, Field
 
-from llama_stack.apis.common.responses import Order
-from llama_stack.apis.common.tracing import telemetry_traceable
-from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.common.responses import Order
+from llama_stack_api.common.tracing import telemetry_traceable
+from llama_stack_api.schema_utils import json_schema_type, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1
 
 
 # OpenAI Files API Models
diff --git a/src/llama_stack/apis/inference/inference.py b/src/llama-stack-api/llama_stack_api/inference.py
similarity index 99%
rename from src/llama_stack/apis/inference/inference.py
rename to src/llama-stack-api/llama_stack_api/inference.py
index 9f04917c9..b42de95be 100644
--- a/src/llama_stack/apis/inference/inference.py
+++ b/src/llama-stack-api/llama_stack_api/inference.py
@@ -18,14 +18,14 @@ from fastapi import Body
 from pydantic import BaseModel, Field
 from typing_extensions import TypedDict
 
-from llama_stack.apis.common.content_types import InterleavedContent
-from llama_stack.apis.common.responses import (
+from llama_stack_api.common.content_types import InterleavedContent
+from llama_stack_api.common.responses import (
     Order,
 )
-from llama_stack.apis.common.tracing import telemetry_traceable
-from llama_stack.apis.models import Model
-from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
-from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
+from llama_stack_api.common.tracing import telemetry_traceable
+from llama_stack_api.models import Model
+from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
 
 
 @json_schema_type
diff --git a/src/llama_stack/apis/inspect/inspect.py b/src/llama-stack-api/llama_stack_api/inspect.py
similarity index 94%
rename from src/llama_stack/apis/inspect/inspect.py
rename to src/llama-stack-api/llama_stack_api/inspect.py
index 235abb124..8326e9e6b 100644
--- a/src/llama_stack/apis/inspect/inspect.py
+++ b/src/llama-stack-api/llama_stack_api/inspect.py
@@ -8,11 +8,11 @@ from typing import Literal, Protocol, runtime_checkable
 
 from pydantic import BaseModel
 
-from llama_stack.apis.version import (
+from llama_stack_api.datatypes import HealthStatus
+from llama_stack_api.schema_utils import json_schema_type, webmethod
+from llama_stack_api.version import (
     LLAMA_STACK_API_V1,
 )
-from llama_stack.providers.datatypes import HealthStatus
-from llama_stack.schema_utils import json_schema_type, webmethod
 
 # Valid values for the route filter parameter.
 # Actual API levels: v1, v1alpha, v1beta (filters by level, excludes deprecated)
diff --git a/src/llama_stack/apis/models/models.py b/src/llama-stack-api/llama_stack_api/models.py
similarity index 95%
rename from src/llama_stack/apis/models/models.py
rename to src/llama-stack-api/llama_stack_api/models.py
index bbb359b51..833864ec2 100644
--- a/src/llama_stack/apis/models/models.py
+++ b/src/llama-stack-api/llama_stack_api/models.py
@@ -9,10 +9,10 @@ from typing import Any, Literal, Protocol, runtime_checkable
 
 from pydantic import BaseModel, ConfigDict, Field, field_validator
 
-from llama_stack.apis.common.tracing import telemetry_traceable
-from llama_stack.apis.resource import Resource, ResourceType
-from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.common.tracing import telemetry_traceable
+from llama_stack_api.resource import Resource, ResourceType
+from llama_stack_api.schema_utils import json_schema_type, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1
 
 
 class CommonModelFields(BaseModel):
diff --git a/src/llama_stack/apis/agents/openai_responses.py b/src/llama-stack-api/llama_stack_api/openai_responses.py
similarity index 99%
rename from src/llama_stack/apis/agents/openai_responses.py
rename to src/llama-stack-api/llama_stack_api/openai_responses.py
index 16657ab32..70139a98a 100644
--- a/src/llama_stack/apis/agents/openai_responses.py
+++ b/src/llama-stack-api/llama_stack_api/openai_responses.py
@@ -10,8 +10,8 @@ from typing import Annotated, Any, Literal
 from pydantic import BaseModel, Field, model_validator
 from typing_extensions import TypedDict
 
-from llama_stack.apis.vector_io import SearchRankingOptions as FileSearchRankingOptions
-from llama_stack.schema_utils import json_schema_type, register_schema
+from llama_stack_api.schema_utils import json_schema_type, register_schema
+from llama_stack_api.vector_io import SearchRankingOptions as FileSearchRankingOptions
 
 # NOTE(ashwin): this file is literally a copy of the OpenAI responses API schema. We should probably
 # take their YAML and generate this file automatically. Their YAML is available.
diff --git a/src/llama_stack/apis/post_training/post_training.py b/src/llama-stack-api/llama_stack_api/post_training.py
similarity index 97%
rename from src/llama_stack/apis/post_training/post_training.py
rename to src/llama-stack-api/llama_stack_api/post_training.py
index 2b7a6222f..0cc9277d9 100644
--- a/src/llama_stack/apis/post_training/post_training.py
+++ b/src/llama-stack-api/llama_stack_api/post_training.py
@@ -10,11 +10,11 @@ from typing import Annotated, Any, Literal, Protocol
 
 from pydantic import BaseModel, Field
 
-from llama_stack.apis.common.content_types import URL
-from llama_stack.apis.common.job_types import JobStatus
-from llama_stack.apis.common.training_types import Checkpoint
-from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA
-from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
+from llama_stack_api.common.content_types import URL
+from llama_stack_api.common.job_types import JobStatus
+from llama_stack_api.common.training_types import Checkpoint
+from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1ALPHA
 
 
 @json_schema_type
diff --git a/src/llama_stack/apis/prompts/prompts.py b/src/llama-stack-api/llama_stack_api/prompts.py
similarity index 97%
rename from src/llama_stack/apis/prompts/prompts.py
rename to src/llama-stack-api/llama_stack_api/prompts.py
index 406ae529c..651d03e61 100644
--- a/src/llama_stack/apis/prompts/prompts.py
+++ b/src/llama-stack-api/llama_stack_api/prompts.py
@@ -10,9 +10,9 @@ from typing import Protocol, runtime_checkable
 
 from pydantic import BaseModel, Field, field_validator, model_validator
 
-from llama_stack.apis.common.tracing import telemetry_traceable
-from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.common.tracing import telemetry_traceable
+from llama_stack_api.schema_utils import json_schema_type, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1
 
 
 @json_schema_type
diff --git a/src/llama_stack/apis/providers/providers.py b/src/llama-stack-api/llama_stack_api/providers.py
similarity index 91%
rename from src/llama_stack/apis/providers/providers.py
rename to src/llama-stack-api/llama_stack_api/providers.py
index e1872571d..5b555b82f 100644
--- a/src/llama_stack/apis/providers/providers.py
+++ b/src/llama-stack-api/llama_stack_api/providers.py
@@ -8,9 +8,9 @@ from typing import Any, Protocol, runtime_checkable
 
 from pydantic import BaseModel
 
-from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.providers.datatypes import HealthResponse
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.datatypes import HealthResponse
+from llama_stack_api.schema_utils import json_schema_type, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1
 
 
 @json_schema_type
diff --git a/src/llama_stack/strong_typing/py.typed b/src/llama-stack-api/llama_stack_api/py.typed
similarity index 100%
rename from src/llama_stack/strong_typing/py.typed
rename to src/llama-stack-api/llama_stack_api/py.typed
diff --git a/src/llama_stack/apis/tools/rag_tool.py b/src/llama-stack-api/llama_stack_api/rag_tool.py
similarity index 98%
rename from src/llama_stack/apis/tools/rag_tool.py
rename to src/llama-stack-api/llama_stack_api/rag_tool.py
index 8bcc89bf0..b5edd51af 100644
--- a/src/llama_stack/apis/tools/rag_tool.py
+++ b/src/llama-stack-api/llama_stack_api/rag_tool.py
@@ -9,7 +9,7 @@ from typing import Annotated, Any, Literal
 
 from pydantic import BaseModel, Field, field_validator
 
-from llama_stack.apis.common.content_types import URL, InterleavedContent
+from llama_stack_api.common.content_types import URL, InterleavedContent
 
 
 class RRFRanker(BaseModel):
diff --git a/src/llama_stack/apis/resource.py b/src/llama-stack-api/llama_stack_api/resource.py
similarity index 100%
rename from src/llama_stack/apis/resource.py
rename to src/llama-stack-api/llama_stack_api/resource.py
diff --git a/src/llama_stack/apis/safety/safety.py b/src/llama-stack-api/llama_stack_api/safety.py
similarity index 93%
rename from src/llama_stack/apis/safety/safety.py
rename to src/llama-stack-api/llama_stack_api/safety.py
index 8872cc518..ef84be2ea 100644
--- a/src/llama_stack/apis/safety/safety.py
+++ b/src/llama-stack-api/llama_stack_api/safety.py
@@ -9,11 +9,11 @@ from typing import Any, Protocol, runtime_checkable
 
 from pydantic import BaseModel, Field
 
-from llama_stack.apis.common.tracing import telemetry_traceable
-from llama_stack.apis.inference import OpenAIMessageParam
-from llama_stack.apis.shields import Shield
-from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.common.tracing import telemetry_traceable
+from llama_stack_api.inference import OpenAIMessageParam
+from llama_stack_api.schema_utils import json_schema_type, webmethod
+from llama_stack_api.shields import Shield
+from llama_stack_api.version import LLAMA_STACK_API_V1
 
 
 @json_schema_type
diff --git a/src/llama_stack/schema_utils.py b/src/llama-stack-api/llama_stack_api/schema_utils.py
similarity index 100%
rename from src/llama_stack/schema_utils.py
rename to src/llama-stack-api/llama_stack_api/schema_utils.py
diff --git a/src/llama_stack/apis/scoring/scoring.py b/src/llama-stack-api/llama_stack_api/scoring.py
similarity index 93%
rename from src/llama_stack/apis/scoring/scoring.py
rename to src/llama-stack-api/llama_stack_api/scoring.py
index 03d943e94..47d144d21 100644
--- a/src/llama_stack/apis/scoring/scoring.py
+++ b/src/llama-stack-api/llama_stack_api/scoring.py
@@ -8,9 +8,9 @@ from typing import Any, Protocol, runtime_checkable
 
 from pydantic import BaseModel
 
-from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnParams
-from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.schema_utils import json_schema_type, webmethod
+from llama_stack_api.scoring_functions import ScoringFn, ScoringFnParams
+from llama_stack_api.version import LLAMA_STACK_API_V1
 
 # mapping of metric to value
 ScoringResultRow = dict[str, Any]
diff --git a/src/llama_stack/apis/scoring_functions/scoring_functions.py b/src/llama-stack-api/llama_stack_api/scoring_functions.py
similarity index 96%
rename from src/llama_stack/apis/scoring_functions/scoring_functions.py
rename to src/llama-stack-api/llama_stack_api/scoring_functions.py
index 78f4a7541..f75336e54 100644
--- a/src/llama_stack/apis/scoring_functions/scoring_functions.py
+++ b/src/llama-stack-api/llama_stack_api/scoring_functions.py
@@ -16,10 +16,10 @@ from typing import (
 
 from pydantic import BaseModel, Field
 
-from llama_stack.apis.common.type_system import ParamType
-from llama_stack.apis.resource import Resource, ResourceType
-from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
+from llama_stack_api.common.type_system import ParamType
+from llama_stack_api.resource import Resource, ResourceType
+from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1
 
 
 # Perhaps more structure can be imposed on these functions. Maybe they could be associated
diff --git a/src/llama_stack/apis/shields/shields.py b/src/llama-stack-api/llama_stack_api/shields.py
similarity index 91%
rename from src/llama_stack/apis/shields/shields.py
rename to src/llama-stack-api/llama_stack_api/shields.py
index 659ba8b75..2aeb83333 100644
--- a/src/llama_stack/apis/shields/shields.py
+++ b/src/llama-stack-api/llama_stack_api/shields.py
@@ -8,10 +8,10 @@ from typing import Any, Literal, Protocol, runtime_checkable
 
 from pydantic import BaseModel
 
-from llama_stack.apis.common.tracing import telemetry_traceable
-from llama_stack.apis.resource import Resource, ResourceType
-from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.common.tracing import telemetry_traceable
+from llama_stack_api.resource import Resource, ResourceType
+from llama_stack_api.schema_utils import json_schema_type, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1
 
 
 class CommonShieldFields(BaseModel):
diff --git a/src/llama_stack/strong_typing/__init__.py b/src/llama-stack-api/llama_stack_api/strong_typing/__init__.py
similarity index 100%
rename from src/llama_stack/strong_typing/__init__.py
rename to src/llama-stack-api/llama_stack_api/strong_typing/__init__.py
diff --git a/src/llama_stack/strong_typing/auxiliary.py b/src/llama-stack-api/llama_stack_api/strong_typing/auxiliary.py
similarity index 100%
rename from src/llama_stack/strong_typing/auxiliary.py
rename to src/llama-stack-api/llama_stack_api/strong_typing/auxiliary.py
diff --git a/src/llama_stack/strong_typing/classdef.py b/src/llama-stack-api/llama_stack_api/strong_typing/classdef.py
similarity index 100%
rename from src/llama_stack/strong_typing/classdef.py
rename to src/llama-stack-api/llama_stack_api/strong_typing/classdef.py
diff --git a/src/llama_stack/strong_typing/core.py b/src/llama-stack-api/llama_stack_api/strong_typing/core.py
similarity index 100%
rename from src/llama_stack/strong_typing/core.py
rename to src/llama-stack-api/llama_stack_api/strong_typing/core.py
diff --git a/src/llama_stack/strong_typing/deserializer.py b/src/llama-stack-api/llama_stack_api/strong_typing/deserializer.py
similarity index 100%
rename from src/llama_stack/strong_typing/deserializer.py
rename to src/llama-stack-api/llama_stack_api/strong_typing/deserializer.py
diff --git a/src/llama_stack/strong_typing/docstring.py b/src/llama-stack-api/llama_stack_api/strong_typing/docstring.py
similarity index 100%
rename from src/llama_stack/strong_typing/docstring.py
rename to src/llama-stack-api/llama_stack_api/strong_typing/docstring.py
diff --git a/src/llama_stack/strong_typing/exception.py b/src/llama-stack-api/llama_stack_api/strong_typing/exception.py
similarity index 100%
rename from src/llama_stack/strong_typing/exception.py
rename to src/llama-stack-api/llama_stack_api/strong_typing/exception.py
diff --git a/src/llama_stack/strong_typing/inspection.py b/src/llama-stack-api/llama_stack_api/strong_typing/inspection.py
similarity index 100%
rename from src/llama_stack/strong_typing/inspection.py
rename to src/llama-stack-api/llama_stack_api/strong_typing/inspection.py
diff --git a/src/llama_stack/strong_typing/mapping.py b/src/llama-stack-api/llama_stack_api/strong_typing/mapping.py
similarity index 100%
rename from src/llama_stack/strong_typing/mapping.py
rename to src/llama-stack-api/llama_stack_api/strong_typing/mapping.py
diff --git a/src/llama_stack/strong_typing/name.py b/src/llama-stack-api/llama_stack_api/strong_typing/name.py
similarity index 100%
rename from src/llama_stack/strong_typing/name.py
rename to src/llama-stack-api/llama_stack_api/strong_typing/name.py
diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/py.typed b/src/llama-stack-api/llama_stack_api/strong_typing/py.typed
new file mode 100644
index 000000000..e69de29bb
diff --git a/src/llama_stack/strong_typing/schema.py b/src/llama-stack-api/llama_stack_api/strong_typing/schema.py
similarity index 100%
rename from src/llama_stack/strong_typing/schema.py
rename to src/llama-stack-api/llama_stack_api/strong_typing/schema.py
diff --git a/src/llama_stack/strong_typing/serialization.py b/src/llama-stack-api/llama_stack_api/strong_typing/serialization.py
similarity index 100%
rename from src/llama_stack/strong_typing/serialization.py
rename to src/llama-stack-api/llama_stack_api/strong_typing/serialization.py
diff --git a/src/llama_stack/strong_typing/serializer.py b/src/llama-stack-api/llama_stack_api/strong_typing/serializer.py
similarity index 100%
rename from src/llama_stack/strong_typing/serializer.py
rename to src/llama-stack-api/llama_stack_api/strong_typing/serializer.py
diff --git a/src/llama_stack/strong_typing/slots.py b/src/llama-stack-api/llama_stack_api/strong_typing/slots.py
similarity index 100%
rename from src/llama_stack/strong_typing/slots.py
rename to src/llama-stack-api/llama_stack_api/strong_typing/slots.py
diff --git a/src/llama_stack/strong_typing/topological.py b/src/llama-stack-api/llama_stack_api/strong_typing/topological.py
similarity index 100%
rename from src/llama_stack/strong_typing/topological.py
rename to src/llama-stack-api/llama_stack_api/strong_typing/topological.py
diff --git a/src/llama_stack/apis/tools/tools.py b/src/llama-stack-api/llama_stack_api/tools.py
similarity index 95%
rename from src/llama_stack/apis/tools/tools.py
rename to src/llama-stack-api/llama_stack_api/tools.py
index 4e7cf2544..6571c2047 100644
--- a/src/llama_stack/apis/tools/tools.py
+++ b/src/llama-stack-api/llama_stack_api/tools.py
@@ -10,11 +10,11 @@ from typing import Any, Literal, Protocol
 from pydantic import BaseModel
 from typing_extensions import runtime_checkable
 
-from llama_stack.apis.common.content_types import URL, InterleavedContent
-from llama_stack.apis.common.tracing import telemetry_traceable
-from llama_stack.apis.resource import Resource, ResourceType
-from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.common.content_types import URL, InterleavedContent
+from llama_stack_api.common.tracing import telemetry_traceable
+from llama_stack_api.resource import Resource, ResourceType
+from llama_stack_api.schema_utils import json_schema_type, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1
 
 
 @json_schema_type
diff --git a/src/llama_stack/apis/vector_io/vector_io.py b/src/llama-stack-api/llama_stack_api/vector_io.py
similarity index 98%
rename from src/llama_stack/apis/vector_io/vector_io.py
rename to src/llama-stack-api/llama_stack_api/vector_io.py
index 699241128..053e569f4 100644
--- a/src/llama_stack/apis/vector_io/vector_io.py
+++ b/src/llama-stack-api/llama_stack_api/vector_io.py
@@ -13,12 +13,12 @@ from typing import Annotated, Any, Literal, Protocol, runtime_checkable
 from fastapi import Body, Query
 from pydantic import BaseModel, Field
 
-from llama_stack.apis.common.tracing import telemetry_traceable
-from llama_stack.apis.inference import InterleavedContent
-from llama_stack.apis.vector_stores import VectorStore
-from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.schema_utils import json_schema_type, webmethod
-from llama_stack.strong_typing.schema import register_schema
+from llama_stack_api.common.tracing import telemetry_traceable
+from llama_stack_api.inference import InterleavedContent
+from llama_stack_api.schema_utils import json_schema_type, webmethod
+from llama_stack_api.strong_typing.schema import register_schema
+from llama_stack_api.vector_stores import VectorStore
+from llama_stack_api.version import LLAMA_STACK_API_V1
 
 
 @json_schema_type
diff --git a/src/llama_stack/apis/vector_stores/vector_stores.py b/src/llama-stack-api/llama_stack_api/vector_stores.py
similarity index 96%
rename from src/llama_stack/apis/vector_stores/vector_stores.py
rename to src/llama-stack-api/llama_stack_api/vector_stores.py
index 524624028..0a1e6c53c 100644
--- a/src/llama_stack/apis/vector_stores/vector_stores.py
+++ b/src/llama-stack-api/llama_stack_api/vector_stores.py
@@ -8,7 +8,7 @@ from typing import Literal
 
 from pydantic import BaseModel
 
-from llama_stack.apis.resource import Resource, ResourceType
+from llama_stack_api.resource import Resource, ResourceType
 
 
 # Internal resource type for storing the vector store routing and other information
diff --git a/src/llama_stack/apis/version.py b/src/llama-stack-api/llama_stack_api/version.py
similarity index 100%
rename from src/llama_stack/apis/version.py
rename to src/llama-stack-api/llama_stack_api/version.py
diff --git a/src/llama-stack-api/pyproject.toml b/src/llama-stack-api/pyproject.toml
new file mode 100644
index 000000000..a00472d36
--- /dev/null
+++ b/src/llama-stack-api/pyproject.toml
@@ -0,0 +1,82 @@
+[build-system]
+requires = ["setuptools>=61.0"]
+build-backend = "setuptools.build_meta"
+
+[tool.uv]
+required-version = ">=0.7.0"
+
+[project]
+name = "llama-stack-api"
+version = "0.1.0"
+authors = [{ name = "Meta Llama", email = "llama-oss@meta.com" }]
+description = "API and Provider specifications for Llama Stack - lightweight package with protocol definitions and provider specs"
+readme = "README.md"
+requires-python = ">=3.12"
+license = { "text" = "MIT" }
+classifiers = [
+    "License :: OSI Approved :: MIT License",
+    "Programming Language :: Python :: 3",
+    "Operating System :: OS Independent",
+    "Intended Audience :: Developers",
+    "Intended Audience :: Information Technology",
+    "Intended Audience :: Science/Research",
+    "Topic :: Scientific/Engineering :: Artificial Intelligence",
+    "Topic :: Scientific/Engineering :: Information Analysis",
+]
+dependencies = [
+    "pydantic>=2.11.9",
+    "jsonschema",
+    "opentelemetry-sdk>=1.30.0",
+    "opentelemetry-exporter-otlp-proto-http>=1.30.0",
+]
+
+[project.urls]
+Homepage = "https://github.com/llamastack/llama-stack"
+
+[tool.setuptools.packages.find]
+where = ["."]
+include = ["llama_stack_api", "llama_stack_api.*"]
+
+[tool.setuptools.package-data]
+llama_stack_api = ["py.typed"]
+
+[tool.ruff]
+line-length = 120
+
+[tool.ruff.lint]
+select = [
+    "UP",      # pyupgrade
+    "B",       # flake8-bugbear
+    "B9",      # flake8-bugbear subset
+    "C",       # comprehensions
+    "E",       # pycodestyle
+    "F",       # Pyflakes
+    "N",       # Naming
+    "W",       # Warnings
+    "DTZ",     # datetime rules
+    "I",       # isort (imports order)
+    "RUF001",  # Checks for ambiguous Unicode characters in strings
+    "RUF002",  # Checks for ambiguous Unicode characters in docstrings
+    "RUF003",  # Checks for ambiguous Unicode characters in comments
+    "PLC2401", # Checks for the use of non-ASCII characters in variable names
+]
+ignore = [
+    # The following ignores are desired by the project maintainers.
+    "E402",   # Module level import not at top of file
+    "E501",   # Line too long
+    "F405",   # Maybe undefined or defined from star import
+    "C408",   # Ignored because we like the dict keyword argument syntax
+    "N812",   # Ignored because import torch.nn.functional as F is PyTorch convention
+
+    # These are the additional ones we started ignoring after moving to ruff. We should look into each one of them later.
+    "C901",   # Complexity of the function is too high
+]
+unfixable = [
+    "PLE2515",
+] # Do not fix this automatically since ruff will replace the zero-width space with \u200b - let's do it manually
+
+[tool.ruff.lint.per-file-ignores]
+"llama_stack_api/apis/**/__init__.py" = ["F403"]
+
+[tool.ruff.lint.pep8-naming]
+classmethod-decorators = ["classmethod", "pydantic.field_validator"]
diff --git a/src/llama_stack/apis/agents/__init__.py b/src/llama_stack/apis/agents/__init__.py
deleted file mode 100644
index 6416b283b..000000000
--- a/src/llama_stack/apis/agents/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .agents import *
diff --git a/src/llama_stack/apis/batches/__init__.py b/src/llama_stack/apis/batches/__init__.py
deleted file mode 100644
index 9ce7d3d75..000000000
--- a/src/llama_stack/apis/batches/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .batches import Batches, BatchObject, ListBatchesResponse
-
-__all__ = ["Batches", "BatchObject", "ListBatchesResponse"]
diff --git a/src/llama_stack/apis/benchmarks/__init__.py b/src/llama_stack/apis/benchmarks/__init__.py
deleted file mode 100644
index 62d1b367c..000000000
--- a/src/llama_stack/apis/benchmarks/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .benchmarks import *
diff --git a/src/llama_stack/apis/common/__init__.py b/src/llama_stack/apis/common/__init__.py
deleted file mode 100644
index 756f351d8..000000000
--- a/src/llama_stack/apis/common/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
diff --git a/src/llama_stack/apis/conversations/__init__.py b/src/llama_stack/apis/conversations/__init__.py
deleted file mode 100644
index b6ddc5999..000000000
--- a/src/llama_stack/apis/conversations/__init__.py
+++ /dev/null
@@ -1,27 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .conversations import (
-    Conversation,
-    ConversationDeletedResource,
-    ConversationItem,
-    ConversationItemCreateRequest,
-    ConversationItemDeletedResource,
-    ConversationItemList,
-    Conversations,
-    Metadata,
-)
-
-__all__ = [
-    "Conversation",
-    "ConversationDeletedResource",
-    "ConversationItem",
-    "ConversationItemCreateRequest",
-    "ConversationItemDeletedResource",
-    "ConversationItemList",
-    "Conversations",
-    "Metadata",
-]
diff --git a/src/llama_stack/apis/datasetio/__init__.py b/src/llama_stack/apis/datasetio/__init__.py
deleted file mode 100644
index 8c087bfa4..000000000
--- a/src/llama_stack/apis/datasetio/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .datasetio import *
diff --git a/src/llama_stack/apis/datasets/__init__.py b/src/llama_stack/apis/datasets/__init__.py
deleted file mode 100644
index 9c9a128d2..000000000
--- a/src/llama_stack/apis/datasets/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .datasets import *
diff --git a/src/llama_stack/apis/datatypes.py b/src/llama_stack/apis/datatypes.py
deleted file mode 100644
index ae01c5dfc..000000000
--- a/src/llama_stack/apis/datatypes.py
+++ /dev/null
@@ -1,158 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from enum import Enum, EnumMeta
-
-from pydantic import BaseModel, Field
-
-from llama_stack.schema_utils import json_schema_type
-
-
-class DynamicApiMeta(EnumMeta):
-    def __new__(cls, name, bases, namespace):
-        # Store the original enum values
-        original_values = {k: v for k, v in namespace.items() if not k.startswith("_")}
-
-        # Create the enum class
-        cls = super().__new__(cls, name, bases, namespace)
-
-        # Store the original values for reference
-        cls._original_values = original_values
-        # Initialize _dynamic_values
-        cls._dynamic_values = {}
-
-        return cls
-
-    def __call__(cls, value):
-        try:
-            return super().__call__(value)
-        except ValueError as e:
-            # If this value was already dynamically added, return it
-            if value in cls._dynamic_values:
-                return cls._dynamic_values[value]
-
-            # If the value doesn't exist, create a new enum member
-            # Create a new member name from the value
-            member_name = value.lower().replace("-", "_")
-
-            # If this member name already exists in the enum, return the existing member
-            if member_name in cls._member_map_:
-                return cls._member_map_[member_name]
-
-            # Instead of creating a new member, raise ValueError to force users to use Api.add() to
-            # register new APIs explicitly
-            raise ValueError(f"API '{value}' does not exist. Use Api.add() to register new APIs.") from e
-
-    def __iter__(cls):
-        # Allow iteration over both static and dynamic members
-        yield from super().__iter__()
-        if hasattr(cls, "_dynamic_values"):
-            yield from cls._dynamic_values.values()
-
-    def add(cls, value):
-        """
-        Add a new API to the enum.
-        Used to register external APIs.
-        """
-        member_name = value.lower().replace("-", "_")
-
-        # If this member name already exists in the enum, return it
-        if member_name in cls._member_map_:
-            return cls._member_map_[member_name]
-
-        # Create a new enum member
-        member = object.__new__(cls)
-        member._name_ = member_name
-        member._value_ = value
-
-        # Add it to the enum class
-        cls._member_map_[member_name] = member
-        cls._member_names_.append(member_name)
-        cls._member_type_ = str
-
-        # Store it in our dynamic values
-        cls._dynamic_values[value] = member
-
-        return member
-
-
-@json_schema_type
-class Api(Enum, metaclass=DynamicApiMeta):
-    """Enumeration of all available APIs in the Llama Stack system.
-    :cvar providers: Provider management and configuration
-    :cvar inference: Text generation, chat completions, and embeddings
-    :cvar safety: Content moderation and safety shields
-    :cvar agents: Agent orchestration and execution
-    :cvar batches: Batch processing for asynchronous API requests
-    :cvar vector_io: Vector database operations and queries
-    :cvar datasetio: Dataset input/output operations
-    :cvar scoring: Model output evaluation and scoring
-    :cvar eval: Model evaluation and benchmarking framework
-    :cvar post_training: Fine-tuning and model training
-    :cvar tool_runtime: Tool execution and management
-    :cvar telemetry: Observability and system monitoring
-    :cvar models: Model metadata and management
-    :cvar shields: Safety shield implementations
-    :cvar datasets: Dataset creation and management
-    :cvar scoring_functions: Scoring function definitions
-    :cvar benchmarks: Benchmark suite management
-    :cvar tool_groups: Tool group organization
-    :cvar files: File storage and management
-    :cvar prompts: Prompt versions and management
-    :cvar inspect: Built-in system inspection and introspection
-    """
-
-    providers = "providers"
-    inference = "inference"
-    safety = "safety"
-    agents = "agents"
-    batches = "batches"
-    vector_io = "vector_io"
-    datasetio = "datasetio"
-    scoring = "scoring"
-    eval = "eval"
-    post_training = "post_training"
-    tool_runtime = "tool_runtime"
-
-    models = "models"
-    shields = "shields"
-    vector_stores = "vector_stores"  # only used for routing table
-    datasets = "datasets"
-    scoring_functions = "scoring_functions"
-    benchmarks = "benchmarks"
-    tool_groups = "tool_groups"
-    files = "files"
-    prompts = "prompts"
-    conversations = "conversations"
-
-    # built-in API
-    inspect = "inspect"
-
-
-@json_schema_type
-class Error(BaseModel):
-    """
-    Error response from the API. Roughly follows RFC 7807.
-
-    :param status: HTTP status code
-    :param title: Error title, a short summary of the error which is invariant for an error type
-    :param detail: Error detail, a longer human-readable description of the error
-    :param instance: (Optional) A URL which can be used to retrieve more information about the specific occurrence of the error
-    """
-
-    status: int
-    title: str
-    detail: str
-    instance: str | None = None
-
-
-class ExternalApiSpec(BaseModel):
-    """Specification for an external API implementation."""
-
-    module: str = Field(..., description="Python module containing the API implementation")
-    name: str = Field(..., description="Name of the API")
-    pip_packages: list[str] = Field(default=[], description="List of pip packages to install the API")
-    protocol: str = Field(..., description="Name of the protocol class for the API")
diff --git a/src/llama_stack/apis/eval/__init__.py b/src/llama_stack/apis/eval/__init__.py
deleted file mode 100644
index 28a1d6049..000000000
--- a/src/llama_stack/apis/eval/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .eval import *
diff --git a/src/llama_stack/apis/files/__init__.py b/src/llama_stack/apis/files/__init__.py
deleted file mode 100644
index 189e4de19..000000000
--- a/src/llama_stack/apis/files/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .files import *
diff --git a/src/llama_stack/apis/inference/__init__.py b/src/llama_stack/apis/inference/__init__.py
deleted file mode 100644
index f0c8783c1..000000000
--- a/src/llama_stack/apis/inference/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .inference import *
diff --git a/src/llama_stack/apis/inspect/__init__.py b/src/llama_stack/apis/inspect/__init__.py
deleted file mode 100644
index 016937e3d..000000000
--- a/src/llama_stack/apis/inspect/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .inspect import *
diff --git a/src/llama_stack/apis/models/__init__.py b/src/llama_stack/apis/models/__init__.py
deleted file mode 100644
index ee90106b6..000000000
--- a/src/llama_stack/apis/models/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .models import *
diff --git a/src/llama_stack/apis/post_training/__init__.py b/src/llama_stack/apis/post_training/__init__.py
deleted file mode 100644
index 695575a30..000000000
--- a/src/llama_stack/apis/post_training/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .post_training import *
diff --git a/src/llama_stack/apis/prompts/__init__.py b/src/llama_stack/apis/prompts/__init__.py
deleted file mode 100644
index 6070f3450..000000000
--- a/src/llama_stack/apis/prompts/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .prompts import ListPromptsResponse, Prompt, Prompts
-
-__all__ = ["Prompt", "Prompts", "ListPromptsResponse"]
diff --git a/src/llama_stack/apis/providers/__init__.py b/src/llama_stack/apis/providers/__init__.py
deleted file mode 100644
index e35e2fe47..000000000
--- a/src/llama_stack/apis/providers/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .providers import *
diff --git a/src/llama_stack/apis/safety/__init__.py b/src/llama_stack/apis/safety/__init__.py
deleted file mode 100644
index d93bc1355..000000000
--- a/src/llama_stack/apis/safety/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .safety import *
diff --git a/src/llama_stack/apis/scoring/__init__.py b/src/llama_stack/apis/scoring/__init__.py
deleted file mode 100644
index 624b9e704..000000000
--- a/src/llama_stack/apis/scoring/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .scoring import *
diff --git a/src/llama_stack/apis/scoring_functions/__init__.py b/src/llama_stack/apis/scoring_functions/__init__.py
deleted file mode 100644
index fc1de0311..000000000
--- a/src/llama_stack/apis/scoring_functions/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .scoring_functions import *
diff --git a/src/llama_stack/apis/shields/__init__.py b/src/llama_stack/apis/shields/__init__.py
deleted file mode 100644
index 783a4d124..000000000
--- a/src/llama_stack/apis/shields/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .shields import *
diff --git a/src/llama_stack/apis/tools/__init__.py b/src/llama_stack/apis/tools/__init__.py
deleted file mode 100644
index b25310ecf..000000000
--- a/src/llama_stack/apis/tools/__init__.py
+++ /dev/null
@@ -1,8 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .rag_tool import *
-from .tools import *
diff --git a/src/llama_stack/apis/vector_io/__init__.py b/src/llama_stack/apis/vector_io/__init__.py
deleted file mode 100644
index 3f4c60805..000000000
--- a/src/llama_stack/apis/vector_io/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .vector_io import *
diff --git a/src/llama_stack/apis/vector_stores/__init__.py b/src/llama_stack/apis/vector_stores/__init__.py
deleted file mode 100644
index 8fc34058a..000000000
--- a/src/llama_stack/apis/vector_stores/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .vector_stores import *
diff --git a/src/llama_stack/cli/stack/_list_deps.py b/src/llama_stack/cli/stack/_list_deps.py
index 18141be5f..50fe394fc 100644
--- a/src/llama_stack/cli/stack/_list_deps.py
+++ b/src/llama_stack/cli/stack/_list_deps.py
@@ -9,6 +9,7 @@ import sys
 from pathlib import Path
 
 import yaml
+from llama_stack_api import Api
 from termcolor import cprint
 
 from llama_stack.cli.stack.utils import ImageType
@@ -21,7 +22,6 @@ from llama_stack.core.datatypes import (
 from llama_stack.core.distribution import get_provider_registry
 from llama_stack.core.stack import replace_env_vars
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import Api
 
 TEMPLATES_PATH = Path(__file__).parent.parent.parent / "templates"
 
diff --git a/src/llama_stack/cli/stack/utils.py b/src/llama_stack/cli/stack/utils.py
index cc1ca051b..0a4e22b09 100644
--- a/src/llama_stack/cli/stack/utils.py
+++ b/src/llama_stack/cli/stack/utils.py
@@ -11,6 +11,7 @@ from functools import lru_cache
 from pathlib import Path
 
 import yaml
+from llama_stack_api import Api
 from termcolor import cprint
 
 from llama_stack.core.datatypes import (
@@ -32,7 +33,6 @@ from llama_stack.core.storage.datatypes import (
 from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR
 from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.core.utils.image_types import LlamaStackImageType
-from llama_stack.providers.datatypes import Api
 
 TEMPLATES_PATH = Path(__file__).parent.parent.parent / "distributions"
 
diff --git a/src/llama_stack/core/build.py b/src/llama_stack/core/build.py
index fb3a22109..27ded7ede 100644
--- a/src/llama_stack/core/build.py
+++ b/src/llama_stack/core/build.py
@@ -6,6 +6,7 @@
 
 import sys
 
+from llama_stack_api import Api
 from pydantic import BaseModel
 from termcolor import cprint
 
@@ -13,7 +14,6 @@ from llama_stack.core.datatypes import BuildConfig
 from llama_stack.core.distribution import get_provider_registry
 from llama_stack.distributions.template import DistributionTemplate
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import Api
 
 log = get_logger(name=__name__, category="core")
 
diff --git a/src/llama_stack/core/client.py b/src/llama_stack/core/client.py
index 49e01794e..41acacdb5 100644
--- a/src/llama_stack/core/client.py
+++ b/src/llama_stack/core/client.py
@@ -12,11 +12,10 @@ from enum import Enum
 from typing import Any, Union, get_args, get_origin
 
 import httpx
+from llama_stack_api import RemoteProviderConfig
 from pydantic import BaseModel, parse_obj_as
 from termcolor import cprint
 
-from llama_stack.providers.datatypes import RemoteProviderConfig
-
 _CLIENT_CLASSES = {}
 
 
diff --git a/src/llama_stack/core/configure.py b/src/llama_stack/core/configure.py
index 5d4a54184..bdb3b9734 100644
--- a/src/llama_stack/core/configure.py
+++ b/src/llama_stack/core/configure.py
@@ -6,6 +6,8 @@
 import textwrap
 from typing import Any
 
+from llama_stack_api import Api, ProviderSpec
+
 from llama_stack.core.datatypes import (
     LLAMA_STACK_RUN_CONFIG_VERSION,
     DistributionSpec,
@@ -20,7 +22,6 @@ from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars
 from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.core.utils.prompt_for_config import prompt_for_config
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import Api, ProviderSpec
 
 logger = get_logger(name=__name__, category="core")
 
diff --git a/src/llama_stack/core/conversations/conversations.py b/src/llama_stack/core/conversations/conversations.py
index f83834522..b94cd4fdd 100644
--- a/src/llama_stack/core/conversations/conversations.py
+++ b/src/llama_stack/core/conversations/conversations.py
@@ -8,9 +8,7 @@ import secrets
 import time
 from typing import Any, Literal
 
-from pydantic import BaseModel, TypeAdapter
-
-from llama_stack.apis.conversations.conversations import (
+from llama_stack_api import (
     Conversation,
     ConversationDeletedResource,
     ConversationItem,
@@ -20,6 +18,8 @@ from llama_stack.apis.conversations.conversations import (
     Conversations,
     Metadata,
 )
+from pydantic import BaseModel, TypeAdapter
+
 from llama_stack.core.datatypes import AccessRule, StackRunConfig
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
diff --git a/src/llama_stack/core/datatypes.py b/src/llama_stack/core/datatypes.py
index 2182ea4e5..4231363b6 100644
--- a/src/llama_stack/core/datatypes.py
+++ b/src/llama_stack/core/datatypes.py
@@ -9,22 +9,34 @@ from pathlib import Path
 from typing import Annotated, Any, Literal, Self
 from urllib.parse import urlparse
 
+from llama_stack_api import (
+    Api,
+    Benchmark,
+    BenchmarkInput,
+    Dataset,
+    DatasetInput,
+    DatasetIO,
+    Eval,
+    Inference,
+    Model,
+    ModelInput,
+    ProviderSpec,
+    Resource,
+    Safety,
+    Scoring,
+    ScoringFn,
+    ScoringFnInput,
+    Shield,
+    ShieldInput,
+    ToolGroup,
+    ToolGroupInput,
+    ToolRuntime,
+    VectorIO,
+    VectorStore,
+    VectorStoreInput,
+)
 from pydantic import BaseModel, Field, field_validator, model_validator
 
-from llama_stack.apis.benchmarks import Benchmark, BenchmarkInput
-from llama_stack.apis.datasetio import DatasetIO
-from llama_stack.apis.datasets import Dataset, DatasetInput
-from llama_stack.apis.eval import Eval
-from llama_stack.apis.inference import Inference
-from llama_stack.apis.models import Model, ModelInput
-from llama_stack.apis.resource import Resource
-from llama_stack.apis.safety import Safety
-from llama_stack.apis.scoring import Scoring
-from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnInput
-from llama_stack.apis.shields import Shield, ShieldInput
-from llama_stack.apis.tools import ToolGroup, ToolGroupInput, ToolRuntime
-from llama_stack.apis.vector_io import VectorIO
-from llama_stack.apis.vector_stores import VectorStore, VectorStoreInput
 from llama_stack.core.access_control.datatypes import AccessRule
 from llama_stack.core.storage.datatypes import (
     KVStoreReference,
@@ -32,7 +44,6 @@ from llama_stack.core.storage.datatypes import (
     StorageConfig,
 )
 from llama_stack.log import LoggingConfig
-from llama_stack.providers.datatypes import Api, ProviderSpec
 
 LLAMA_STACK_BUILD_CONFIG_VERSION = 2
 LLAMA_STACK_RUN_CONFIG_VERSION = 2
diff --git a/src/llama_stack/core/distribution.py b/src/llama_stack/core/distribution.py
index 9be5ffb49..162f9f2b0 100644
--- a/src/llama_stack/core/distribution.py
+++ b/src/llama_stack/core/distribution.py
@@ -10,17 +10,17 @@ import os
 from typing import Any
 
 import yaml
-from pydantic import BaseModel
-
-from llama_stack.core.datatypes import BuildConfig, DistributionSpec
-from llama_stack.core.external import load_external_apis
-from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import (
+from llama_stack_api import (
     Api,
     InlineProviderSpec,
     ProviderSpec,
     RemoteProviderSpec,
 )
+from pydantic import BaseModel
+
+from llama_stack.core.datatypes import BuildConfig, DistributionSpec
+from llama_stack.core.external import load_external_apis
+from llama_stack.log import get_logger
 
 logger = get_logger(name=__name__, category="core")
 
diff --git a/src/llama_stack/core/external.py b/src/llama_stack/core/external.py
index 12e9824ad..ce0c7eb72 100644
--- a/src/llama_stack/core/external.py
+++ b/src/llama_stack/core/external.py
@@ -6,8 +6,8 @@
 
 
 import yaml
+from llama_stack_api import Api, ExternalApiSpec
 
-from llama_stack.apis.datatypes import Api, ExternalApiSpec
 from llama_stack.core.datatypes import BuildConfig, StackRunConfig
 from llama_stack.log import get_logger
 
diff --git a/src/llama_stack/core/inspect.py b/src/llama_stack/core/inspect.py
index 07b51128f..53ddd3475 100644
--- a/src/llama_stack/core/inspect.py
+++ b/src/llama_stack/core/inspect.py
@@ -6,19 +6,19 @@
 
 from importlib.metadata import version
 
-from pydantic import BaseModel
-
-from llama_stack.apis.inspect import (
+from llama_stack_api import (
     HealthInfo,
+    HealthStatus,
     Inspect,
     ListRoutesResponse,
     RouteInfo,
     VersionInfo,
 )
+from pydantic import BaseModel
+
 from llama_stack.core.datatypes import StackRunConfig
 from llama_stack.core.external import load_external_apis
 from llama_stack.core.server.routes import get_all_api_routes
-from llama_stack.providers.datatypes import HealthStatus
 
 
 class DistributionInspectConfig(BaseModel):
diff --git a/src/llama_stack/core/library_client.py b/src/llama_stack/core/library_client.py
index db990368b..959284720 100644
--- a/src/llama_stack/core/library_client.py
+++ b/src/llama_stack/core/library_client.py
@@ -18,6 +18,7 @@ from typing import Any, TypeVar, Union, get_args, get_origin
 import httpx
 import yaml
 from fastapi import Response as FastAPIResponse
+from llama_stack_api import is_unwrapped_body_param
 
 try:
     from llama_stack_client import (
@@ -57,7 +58,6 @@ from llama_stack.core.utils.config import redact_sensitive_fields
 from llama_stack.core.utils.context import preserve_contexts_async_generator
 from llama_stack.core.utils.exec import in_notebook
 from llama_stack.log import get_logger, setup_logging
-from llama_stack.strong_typing.inspection import is_unwrapped_body_param
 
 logger = get_logger(name=__name__, category="core")
 
diff --git a/src/llama_stack/core/prompts/prompts.py b/src/llama_stack/core/prompts/prompts.py
index 1a6f38cb5..d9532b978 100644
--- a/src/llama_stack/core/prompts/prompts.py
+++ b/src/llama_stack/core/prompts/prompts.py
@@ -7,9 +7,9 @@
 import json
 from typing import Any
 
+from llama_stack_api import ListPromptsResponse, Prompt, Prompts
 from pydantic import BaseModel
 
-from llama_stack.apis.prompts import ListPromptsResponse, Prompt, Prompts
 from llama_stack.core.datatypes import StackRunConfig
 from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
 
diff --git a/src/llama_stack/core/providers.py b/src/llama_stack/core/providers.py
index 7095ffd18..7337d9e35 100644
--- a/src/llama_stack/core/providers.py
+++ b/src/llama_stack/core/providers.py
@@ -7,11 +7,10 @@
 import asyncio
 from typing import Any
 
+from llama_stack_api import HealthResponse, HealthStatus, ListProvidersResponse, ProviderInfo, Providers
 from pydantic import BaseModel
 
-from llama_stack.apis.providers import ListProvidersResponse, ProviderInfo, Providers
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import HealthResponse, HealthStatus
 
 from .datatypes import StackRunConfig
 from .utils.config import redact_sensitive_fields
diff --git a/src/llama_stack/core/resolver.py b/src/llama_stack/core/resolver.py
index 8bf371fed..ca154fbc6 100644
--- a/src/llama_stack/core/resolver.py
+++ b/src/llama_stack/core/resolver.py
@@ -8,29 +8,46 @@ import importlib.metadata
 import inspect
 from typing import Any
 
-from llama_stack.apis.agents import Agents
-from llama_stack.apis.batches import Batches
-from llama_stack.apis.benchmarks import Benchmarks
-from llama_stack.apis.conversations import Conversations
-from llama_stack.apis.datasetio import DatasetIO
-from llama_stack.apis.datasets import Datasets
-from llama_stack.apis.datatypes import ExternalApiSpec
-from llama_stack.apis.eval import Eval
-from llama_stack.apis.files import Files
-from llama_stack.apis.inference import Inference, InferenceProvider
-from llama_stack.apis.inspect import Inspect
-from llama_stack.apis.models import Models
-from llama_stack.apis.post_training import PostTraining
-from llama_stack.apis.prompts import Prompts
-from llama_stack.apis.providers import Providers as ProvidersAPI
-from llama_stack.apis.safety import Safety
-from llama_stack.apis.scoring import Scoring
-from llama_stack.apis.scoring_functions import ScoringFunctions
-from llama_stack.apis.shields import Shields
-from llama_stack.apis.tools import ToolGroups, ToolRuntime
-from llama_stack.apis.vector_io import VectorIO
-from llama_stack.apis.vector_stores import VectorStore
-from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA
+from llama_stack_api import (
+    LLAMA_STACK_API_V1ALPHA,
+    Agents,
+    Api,
+    Batches,
+    Benchmarks,
+    BenchmarksProtocolPrivate,
+    Conversations,
+    DatasetIO,
+    Datasets,
+    DatasetsProtocolPrivate,
+    Eval,
+    ExternalApiSpec,
+    Files,
+    Inference,
+    InferenceProvider,
+    Inspect,
+    Models,
+    ModelsProtocolPrivate,
+    PostTraining,
+    Prompts,
+    ProviderSpec,
+    RemoteProviderConfig,
+    RemoteProviderSpec,
+    Safety,
+    Scoring,
+    ScoringFunctions,
+    ScoringFunctionsProtocolPrivate,
+    Shields,
+    ShieldsProtocolPrivate,
+    ToolGroups,
+    ToolGroupsProtocolPrivate,
+    ToolRuntime,
+    VectorIO,
+    VectorStore,
+)
+from llama_stack_api import (
+    Providers as ProvidersAPI,
+)
+
 from llama_stack.core.client import get_client_impl
 from llama_stack.core.datatypes import (
     AccessRule,
@@ -44,18 +61,6 @@ from llama_stack.core.external import load_external_apis
 from llama_stack.core.store import DistributionRegistry
 from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import (
-    Api,
-    BenchmarksProtocolPrivate,
-    DatasetsProtocolPrivate,
-    ModelsProtocolPrivate,
-    ProviderSpec,
-    RemoteProviderConfig,
-    RemoteProviderSpec,
-    ScoringFunctionsProtocolPrivate,
-    ShieldsProtocolPrivate,
-    ToolGroupsProtocolPrivate,
-)
 
 logger = get_logger(name=__name__, category="core")
 
diff --git a/src/llama_stack/core/routers/__init__.py b/src/llama_stack/core/routers/__init__.py
index 729d1c9ea..c2d051422 100644
--- a/src/llama_stack/core/routers/__init__.py
+++ b/src/llama_stack/core/routers/__init__.py
@@ -6,13 +6,14 @@
 
 from typing import Any
 
+from llama_stack_api import Api, RoutingTable
+
 from llama_stack.core.datatypes import (
     AccessRule,
     RoutedProtocol,
 )
 from llama_stack.core.stack import StackRunConfig
 from llama_stack.core.store import DistributionRegistry
-from llama_stack.providers.datatypes import Api, RoutingTable
 from llama_stack.providers.utils.inference.inference_store import InferenceStore
 
 
diff --git a/src/llama_stack/core/routers/datasets.py b/src/llama_stack/core/routers/datasets.py
index 2f1d5f78e..dcf247874 100644
--- a/src/llama_stack/core/routers/datasets.py
+++ b/src/llama_stack/core/routers/datasets.py
@@ -6,11 +6,9 @@
 
 from typing import Any
 
-from llama_stack.apis.common.responses import PaginatedResponse
-from llama_stack.apis.datasetio import DatasetIO
-from llama_stack.apis.datasets import DatasetPurpose, DataSource
+from llama_stack_api import DatasetIO, DatasetPurpose, DataSource, PaginatedResponse, RoutingTable
+
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import RoutingTable
 
 logger = get_logger(name=__name__, category="core::routers")
 
diff --git a/src/llama_stack/core/routers/eval_scoring.py b/src/llama_stack/core/routers/eval_scoring.py
index ffca81bf0..cbbbf5cc5 100644
--- a/src/llama_stack/core/routers/eval_scoring.py
+++ b/src/llama_stack/core/routers/eval_scoring.py
@@ -6,15 +6,19 @@
 
 from typing import Any
 
-from llama_stack.apis.eval import BenchmarkConfig, Eval, EvaluateResponse, Job
-from llama_stack.apis.scoring import (
+from llama_stack_api import (
+    BenchmarkConfig,
+    Eval,
+    EvaluateResponse,
+    Job,
+    RoutingTable,
     ScoreBatchResponse,
     ScoreResponse,
     Scoring,
     ScoringFnParams,
 )
+
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import RoutingTable
 
 logger = get_logger(name=__name__, category="core::routers")
 
diff --git a/src/llama_stack/core/routers/inference.py b/src/llama_stack/core/routers/inference.py
index d6270d428..a538ab02e 100644
--- a/src/llama_stack/core/routers/inference.py
+++ b/src/llama_stack/core/routers/inference.py
@@ -11,17 +11,19 @@ from datetime import UTC, datetime
 from typing import Annotated, Any
 
 from fastapi import Body
-from openai.types.chat import ChatCompletionToolChoiceOptionParam as OpenAIChatCompletionToolChoiceOptionParam
-from openai.types.chat import ChatCompletionToolParam as OpenAIChatCompletionToolParam
-from pydantic import TypeAdapter
-
-from llama_stack.apis.common.errors import ModelNotFoundError, ModelTypeError
-from llama_stack.apis.inference import (
+from llama_stack_api import (
+    HealthResponse,
+    HealthStatus,
     Inference,
     ListOpenAIChatCompletionResponse,
+    ModelNotFoundError,
+    ModelType,
+    ModelTypeError,
     OpenAIAssistantMessageParam,
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
+    OpenAIChatCompletionContentPartImageParam,
+    OpenAIChatCompletionContentPartTextParam,
     OpenAIChatCompletionRequestWithExtraBody,
     OpenAIChatCompletionToolCall,
     OpenAIChatCompletionToolCallFunction,
@@ -35,18 +37,17 @@ from llama_stack.apis.inference import (
     OpenAIMessageParam,
     Order,
     RerankResponse,
+    RoutingTable,
 )
-from llama_stack.apis.inference.inference import (
-    OpenAIChatCompletionContentPartImageParam,
-    OpenAIChatCompletionContentPartTextParam,
-)
-from llama_stack.apis.models import ModelType
+from openai.types.chat import ChatCompletionToolChoiceOptionParam as OpenAIChatCompletionToolChoiceOptionParam
+from openai.types.chat import ChatCompletionToolParam as OpenAIChatCompletionToolParam
+from pydantic import TypeAdapter
+
 from llama_stack.core.telemetry.telemetry import MetricEvent
 from llama_stack.core.telemetry.tracing import enqueue_event, get_current_span
 from llama_stack.log import get_logger
 from llama_stack.models.llama.llama3.chat_format import ChatFormat
 from llama_stack.models.llama.llama3.tokenizer import Tokenizer
-from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable
 from llama_stack.providers.utils.inference.inference_store import InferenceStore
 
 logger = get_logger(name=__name__, category="core::routers")
diff --git a/src/llama_stack/core/routers/safety.py b/src/llama_stack/core/routers/safety.py
index e5ff2ada9..f85bbb767 100644
--- a/src/llama_stack/core/routers/safety.py
+++ b/src/llama_stack/core/routers/safety.py
@@ -6,13 +6,10 @@
 
 from typing import Any
 
-from llama_stack.apis.inference import OpenAIMessageParam
-from llama_stack.apis.safety import RunShieldResponse, Safety
-from llama_stack.apis.safety.safety import ModerationObject
-from llama_stack.apis.shields import Shield
+from llama_stack_api import ModerationObject, OpenAIMessageParam, RoutingTable, RunShieldResponse, Safety, Shield
+
 from llama_stack.core.datatypes import SafetyConfig
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import RoutingTable
 
 logger = get_logger(name=__name__, category="core::routers")
 
diff --git a/src/llama_stack/core/routers/tool_runtime.py b/src/llama_stack/core/routers/tool_runtime.py
index fb13d94a4..984a8e2a7 100644
--- a/src/llama_stack/core/routers/tool_runtime.py
+++ b/src/llama_stack/core/routers/tool_runtime.py
@@ -6,13 +6,12 @@
 
 from typing import Any
 
-from llama_stack.apis.common.content_types import (
+from llama_stack_api import (
     URL,
-)
-from llama_stack.apis.tools import (
     ListToolDefsResponse,
     ToolRuntime,
 )
+
 from llama_stack.log import get_logger
 
 from ..routing_tables.toolgroups import ToolGroupsRoutingTable
diff --git a/src/llama_stack/core/routers/vector_io.py b/src/llama_stack/core/routers/vector_io.py
index ed5fb8253..bfd090e32 100644
--- a/src/llama_stack/core/routers/vector_io.py
+++ b/src/llama_stack/core/routers/vector_io.py
@@ -9,14 +9,16 @@ import uuid
 from typing import Annotated, Any
 
 from fastapi import Body
-
-from llama_stack.apis.common.content_types import InterleavedContent
-from llama_stack.apis.models import ModelType
-from llama_stack.apis.vector_io import (
+from llama_stack_api import (
     Chunk,
+    HealthResponse,
+    HealthStatus,
+    InterleavedContent,
+    ModelType,
     OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
     OpenAICreateVectorStoreRequestWithExtraBody,
     QueryChunksResponse,
+    RoutingTable,
     SearchRankingOptions,
     VectorIO,
     VectorStoreChunkingStrategy,
@@ -33,9 +35,9 @@ from llama_stack.apis.vector_io import (
     VectorStoreObject,
     VectorStoreSearchResponsePage,
 )
+
 from llama_stack.core.datatypes import VectorStoresConfig
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable
 
 logger = get_logger(name=__name__, category="core::routers")
 
diff --git a/src/llama_stack/core/routing_tables/benchmarks.py b/src/llama_stack/core/routing_tables/benchmarks.py
index 8c87d395d..66830bc41 100644
--- a/src/llama_stack/core/routing_tables/benchmarks.py
+++ b/src/llama_stack/core/routing_tables/benchmarks.py
@@ -6,7 +6,8 @@
 
 from typing import Any
 
-from llama_stack.apis.benchmarks import Benchmark, Benchmarks, ListBenchmarksResponse
+from llama_stack_api import Benchmark, Benchmarks, ListBenchmarksResponse
+
 from llama_stack.core.datatypes import (
     BenchmarkWithOwner,
 )
diff --git a/src/llama_stack/core/routing_tables/common.py b/src/llama_stack/core/routing_tables/common.py
index d6faf93c5..cfbafc9a8 100644
--- a/src/llama_stack/core/routing_tables/common.py
+++ b/src/llama_stack/core/routing_tables/common.py
@@ -6,9 +6,8 @@
 
 from typing import Any
 
-from llama_stack.apis.common.errors import ModelNotFoundError
-from llama_stack.apis.models import Model
-from llama_stack.apis.resource import ResourceType
+from llama_stack_api import Api, Model, ModelNotFoundError, ResourceType, RoutingTable
+
 from llama_stack.core.access_control.access_control import AccessDeniedError, is_action_allowed
 from llama_stack.core.access_control.datatypes import Action
 from llama_stack.core.datatypes import (
@@ -21,7 +20,6 @@ from llama_stack.core.datatypes import (
 from llama_stack.core.request_headers import get_authenticated_user
 from llama_stack.core.store import DistributionRegistry
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import Api, RoutingTable
 
 logger = get_logger(name=__name__, category="core::routing_tables")
 
diff --git a/src/llama_stack/core/routing_tables/datasets.py b/src/llama_stack/core/routing_tables/datasets.py
index b129c9ec5..c49c9769b 100644
--- a/src/llama_stack/core/routing_tables/datasets.py
+++ b/src/llama_stack/core/routing_tables/datasets.py
@@ -7,18 +7,19 @@
 import uuid
 from typing import Any
 
-from llama_stack.apis.common.errors import DatasetNotFoundError
-from llama_stack.apis.datasets import (
+from llama_stack_api import (
     Dataset,
+    DatasetNotFoundError,
     DatasetPurpose,
     Datasets,
     DatasetType,
     DataSource,
     ListDatasetsResponse,
+    ResourceType,
     RowsDataSource,
     URIDataSource,
 )
-from llama_stack.apis.resource import ResourceType
+
 from llama_stack.core.datatypes import (
     DatasetWithOwner,
 )
diff --git a/src/llama_stack/core/routing_tables/models.py b/src/llama_stack/core/routing_tables/models.py
index 1fb1186cd..e1210a139 100644
--- a/src/llama_stack/core/routing_tables/models.py
+++ b/src/llama_stack/core/routing_tables/models.py
@@ -7,8 +7,16 @@
 import time
 from typing import Any
 
-from llama_stack.apis.common.errors import ModelNotFoundError
-from llama_stack.apis.models import ListModelsResponse, Model, Models, ModelType, OpenAIListModelsResponse, OpenAIModel
+from llama_stack_api import (
+    ListModelsResponse,
+    Model,
+    ModelNotFoundError,
+    Models,
+    ModelType,
+    OpenAIListModelsResponse,
+    OpenAIModel,
+)
+
 from llama_stack.core.datatypes import (
     ModelWithOwner,
     RegistryEntrySource,
diff --git a/src/llama_stack/core/routing_tables/scoring_functions.py b/src/llama_stack/core/routing_tables/scoring_functions.py
index 520f07014..66165ac2f 100644
--- a/src/llama_stack/core/routing_tables/scoring_functions.py
+++ b/src/llama_stack/core/routing_tables/scoring_functions.py
@@ -4,14 +4,15 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.common.type_system import ParamType
-from llama_stack.apis.resource import ResourceType
-from llama_stack.apis.scoring_functions import (
+from llama_stack_api import (
     ListScoringFunctionsResponse,
+    ParamType,
+    ResourceType,
     ScoringFn,
     ScoringFnParams,
     ScoringFunctions,
 )
+
 from llama_stack.core.datatypes import (
     ScoringFnWithOwner,
 )
diff --git a/src/llama_stack/core/routing_tables/shields.py b/src/llama_stack/core/routing_tables/shields.py
index b1918d20a..0f981c49d 100644
--- a/src/llama_stack/core/routing_tables/shields.py
+++ b/src/llama_stack/core/routing_tables/shields.py
@@ -6,8 +6,8 @@
 
 from typing import Any
 
-from llama_stack.apis.resource import ResourceType
-from llama_stack.apis.shields import ListShieldsResponse, Shield, Shields
+from llama_stack_api import ListShieldsResponse, ResourceType, Shield, Shields
+
 from llama_stack.core.datatypes import (
     ShieldWithOwner,
 )
diff --git a/src/llama_stack/core/routing_tables/toolgroups.py b/src/llama_stack/core/routing_tables/toolgroups.py
index 2d47bbb17..a552cb96e 100644
--- a/src/llama_stack/core/routing_tables/toolgroups.py
+++ b/src/llama_stack/core/routing_tables/toolgroups.py
@@ -6,9 +6,16 @@
 
 from typing import Any
 
-from llama_stack.apis.common.content_types import URL
-from llama_stack.apis.common.errors import ToolGroupNotFoundError
-from llama_stack.apis.tools import ListToolDefsResponse, ListToolGroupsResponse, ToolDef, ToolGroup, ToolGroups
+from llama_stack_api import (
+    URL,
+    ListToolDefsResponse,
+    ListToolGroupsResponse,
+    ToolDef,
+    ToolGroup,
+    ToolGroupNotFoundError,
+    ToolGroups,
+)
+
 from llama_stack.core.datatypes import AuthenticationRequiredError, ToolGroupWithOwner
 from llama_stack.log import get_logger
 
diff --git a/src/llama_stack/core/routing_tables/vector_stores.py b/src/llama_stack/core/routing_tables/vector_stores.py
index e77739abe..f95463b3c 100644
--- a/src/llama_stack/core/routing_tables/vector_stores.py
+++ b/src/llama_stack/core/routing_tables/vector_stores.py
@@ -6,12 +6,12 @@
 
 from typing import Any
 
-from llama_stack.apis.common.errors import ModelNotFoundError, ModelTypeError
-from llama_stack.apis.models import ModelType
-from llama_stack.apis.resource import ResourceType
-
 # Removed VectorStores import to avoid exposing public API
-from llama_stack.apis.vector_io.vector_io import (
+from llama_stack_api import (
+    ModelNotFoundError,
+    ModelType,
+    ModelTypeError,
+    ResourceType,
     SearchRankingOptions,
     VectorStoreChunkingStrategy,
     VectorStoreDeleteResponse,
@@ -22,6 +22,7 @@ from llama_stack.apis.vector_io.vector_io import (
     VectorStoreObject,
     VectorStoreSearchResponsePage,
 )
+
 from llama_stack.core.datatypes import (
     VectorStoreWithOwner,
 )
diff --git a/src/llama_stack/core/server/auth_providers.py b/src/llama_stack/core/server/auth_providers.py
index da398bf99..a7f5d7916 100644
--- a/src/llama_stack/core/server/auth_providers.py
+++ b/src/llama_stack/core/server/auth_providers.py
@@ -11,9 +11,9 @@ from urllib.parse import parse_qs, urljoin, urlparse
 
 import httpx
 import jwt
+from llama_stack_api import TokenValidationError
 from pydantic import BaseModel, Field
 
-from llama_stack.apis.common.errors import TokenValidationError
 from llama_stack.core.datatypes import (
     AuthenticationConfig,
     CustomAuthConfig,
diff --git a/src/llama_stack/core/server/routes.py b/src/llama_stack/core/server/routes.py
index 4f7ff2295..e7a84937d 100644
--- a/src/llama_stack/core/server/routes.py
+++ b/src/llama_stack/core/server/routes.py
@@ -10,11 +10,10 @@ from collections.abc import Callable
 from typing import Any
 
 from aiohttp import hdrs
+from llama_stack_api import Api, ExternalApiSpec, WebMethod
 from starlette.routing import Route
 
-from llama_stack.apis.datatypes import Api, ExternalApiSpec
 from llama_stack.core.resolver import api_protocol_map
-from llama_stack.schema_utils import WebMethod
 
 EndpointFunc = Callable[..., Any]
 PathParams = dict[str, str]
diff --git a/src/llama_stack/core/server/server.py b/src/llama_stack/core/server/server.py
index 5bf876c02..8116348ec 100644
--- a/src/llama_stack/core/server/server.py
+++ b/src/llama_stack/core/server/server.py
@@ -28,11 +28,10 @@ from fastapi import Path as FastapiPath
 from fastapi.exceptions import RequestValidationError
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse, StreamingResponse
+from llama_stack_api import Api, ConflictError, PaginatedResponse, ResourceNotFoundError
 from openai import BadRequestError
 from pydantic import BaseModel, ValidationError
 
-from llama_stack.apis.common.errors import ConflictError, ResourceNotFoundError
-from llama_stack.apis.common.responses import PaginatedResponse
 from llama_stack.core.access_control.access_control import AccessDeniedError
 from llama_stack.core.datatypes import (
     AuthenticationRequiredError,
@@ -58,7 +57,6 @@ from llama_stack.core.utils.config import redact_sensitive_fields
 from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
 from llama_stack.core.utils.context import preserve_contexts_async_generator
 from llama_stack.log import LoggingConfig, get_logger, setup_logging
-from llama_stack.providers.datatypes import Api
 
 from .auth import AuthenticationMiddleware
 from .quota import QuotaMiddleware
diff --git a/src/llama_stack/core/stack.py b/src/llama_stack/core/stack.py
index 2ed0eccd2..674c35f31 100644
--- a/src/llama_stack/core/stack.py
+++ b/src/llama_stack/core/stack.py
@@ -12,27 +12,31 @@ import tempfile
 from typing import Any
 
 import yaml
+from llama_stack_api import (
+    Agents,
+    Api,
+    Batches,
+    Benchmarks,
+    Conversations,
+    DatasetIO,
+    Datasets,
+    Eval,
+    Files,
+    Inference,
+    Inspect,
+    Models,
+    PostTraining,
+    Prompts,
+    Providers,
+    Safety,
+    Scoring,
+    ScoringFunctions,
+    Shields,
+    ToolGroups,
+    ToolRuntime,
+    VectorIO,
+)
 
-from llama_stack.apis.agents import Agents
-from llama_stack.apis.batches import Batches
-from llama_stack.apis.benchmarks import Benchmarks
-from llama_stack.apis.conversations import Conversations
-from llama_stack.apis.datasetio import DatasetIO
-from llama_stack.apis.datasets import Datasets
-from llama_stack.apis.eval import Eval
-from llama_stack.apis.files import Files
-from llama_stack.apis.inference import Inference
-from llama_stack.apis.inspect import Inspect
-from llama_stack.apis.models import Models
-from llama_stack.apis.post_training import PostTraining
-from llama_stack.apis.prompts import Prompts
-from llama_stack.apis.providers import Providers
-from llama_stack.apis.safety import Safety
-from llama_stack.apis.scoring import Scoring
-from llama_stack.apis.scoring_functions import ScoringFunctions
-from llama_stack.apis.shields import Shields
-from llama_stack.apis.tools import ToolGroups, ToolRuntime
-from llama_stack.apis.vector_io import VectorIO
 from llama_stack.core.conversations.conversations import ConversationServiceConfig, ConversationServiceImpl
 from llama_stack.core.datatypes import Provider, SafetyConfig, StackRunConfig, VectorStoresConfig
 from llama_stack.core.distribution import get_provider_registry
@@ -54,7 +58,6 @@ from llama_stack.core.storage.datatypes import (
 from llama_stack.core.store.registry import create_dist_registry
 from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import Api
 
 logger = get_logger(name=__name__, category="core")
 
diff --git a/src/llama_stack/core/telemetry/telemetry.py b/src/llama_stack/core/telemetry/telemetry.py
index 459c1aa1a..1a56277ea 100644
--- a/src/llama_stack/core/telemetry/telemetry.py
+++ b/src/llama_stack/core/telemetry/telemetry.py
@@ -16,6 +16,7 @@ from typing import (
     cast,
 )
 
+from llama_stack_api import json_schema_type, register_schema
 from opentelemetry import metrics, trace
 from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
 from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
@@ -28,7 +29,6 @@ from pydantic import BaseModel, Field
 
 from llama_stack.log import get_logger
 from llama_stack.models.llama.datatypes import Primitive
-from llama_stack.schema_utils import json_schema_type, register_schema
 
 ROOT_SPAN_MARKERS = ["__root__", "__root_span__"]
 
diff --git a/src/llama_stack/distributions/dell/dell.py b/src/llama_stack/distributions/dell/dell.py
index 88e72688f..fd76e3ccb 100644
--- a/src/llama_stack/distributions/dell/dell.py
+++ b/src/llama_stack/distributions/dell/dell.py
@@ -4,7 +4,8 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.models import ModelType
+from llama_stack_api import ModelType
+
 from llama_stack.core.datatypes import (
     BuildProvider,
     ModelInput,
diff --git a/src/llama_stack/distributions/meta-reference-gpu/meta_reference.py b/src/llama_stack/distributions/meta-reference-gpu/meta_reference.py
index 4e4ddef33..67af0e92a 100644
--- a/src/llama_stack/distributions/meta-reference-gpu/meta_reference.py
+++ b/src/llama_stack/distributions/meta-reference-gpu/meta_reference.py
@@ -6,7 +6,8 @@
 
 from pathlib import Path
 
-from llama_stack.apis.models import ModelType
+from llama_stack_api import ModelType
+
 from llama_stack.core.datatypes import (
     BuildProvider,
     ModelInput,
diff --git a/src/llama_stack/distributions/open-benchmark/open_benchmark.py b/src/llama_stack/distributions/open-benchmark/open_benchmark.py
index 2b7760894..59deca6d0 100644
--- a/src/llama_stack/distributions/open-benchmark/open_benchmark.py
+++ b/src/llama_stack/distributions/open-benchmark/open_benchmark.py
@@ -5,8 +5,8 @@
 # the root directory of this source tree.
 
 
-from llama_stack.apis.datasets import DatasetPurpose, URIDataSource
-from llama_stack.apis.models import ModelType
+from llama_stack_api import DatasetPurpose, ModelType, URIDataSource
+
 from llama_stack.core.datatypes import (
     BenchmarkInput,
     BuildProvider,
diff --git a/src/llama_stack/distributions/starter/starter.py b/src/llama_stack/distributions/starter/starter.py
index 7b7773289..1a8126290 100644
--- a/src/llama_stack/distributions/starter/starter.py
+++ b/src/llama_stack/distributions/starter/starter.py
@@ -7,6 +7,8 @@
 
 from typing import Any
 
+from llama_stack_api import RemoteProviderSpec
+
 from llama_stack.core.datatypes import (
     BuildProvider,
     Provider,
@@ -19,7 +21,6 @@ from llama_stack.core.datatypes import (
 )
 from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings
-from llama_stack.providers.datatypes import RemoteProviderSpec
 from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplConfig
 from llama_stack.providers.inline.inference.sentence_transformers import (
     SentenceTransformersInferenceConfig,
diff --git a/src/llama_stack/distributions/template.py b/src/llama_stack/distributions/template.py
index e6813806a..faf5fb085 100644
--- a/src/llama_stack/distributions/template.py
+++ b/src/llama_stack/distributions/template.py
@@ -10,10 +10,9 @@ from typing import Any, Literal
 import jinja2
 import rich
 import yaml
+from llama_stack_api import DatasetPurpose, ModelType
 from pydantic import BaseModel, Field
 
-from llama_stack.apis.datasets import DatasetPurpose
-from llama_stack.apis.models import ModelType
 from llama_stack.core.datatypes import (
     LLAMA_STACK_RUN_CONFIG_VERSION,
     Api,
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/agents.py b/src/llama_stack/providers/inline/agents/meta_reference/agents.py
index 880e0b680..025fcc676 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/agents.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/agents.py
@@ -5,25 +5,26 @@
 # the root directory of this source tree.
 
 
-from llama_stack.apis.agents import (
+from llama_stack_api import (
     Agents,
+    Conversations,
+    Inference,
     ListOpenAIResponseInputItem,
     ListOpenAIResponseObject,
     OpenAIDeleteResponseObject,
     OpenAIResponseInput,
     OpenAIResponseInputTool,
     OpenAIResponseObject,
+    OpenAIResponsePrompt,
+    OpenAIResponseText,
     Order,
+    ResponseGuardrail,
+    Safety,
+    ToolGroups,
+    ToolRuntime,
+    VectorIO,
 )
-from llama_stack.apis.agents.agents import ResponseGuardrail
-from llama_stack.apis.agents.openai_responses import OpenAIResponsePrompt, OpenAIResponseText
-from llama_stack.apis.conversations import Conversations
-from llama_stack.apis.inference import (
-    Inference,
-)
-from llama_stack.apis.safety import Safety
-from llama_stack.apis.tools import ToolGroups, ToolRuntime
-from llama_stack.apis.vector_io import VectorIO
+
 from llama_stack.core.datatypes import AccessRule
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.kvstore import InmemoryKVStoreImpl, kvstore_impl
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
index ed7f959c0..347eeef78 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
@@ -8,14 +8,15 @@ import time
 import uuid
 from collections.abc import AsyncIterator
 
-from pydantic import BaseModel, TypeAdapter
-
-from llama_stack.apis.agents import Order
-from llama_stack.apis.agents.agents import ResponseGuardrailSpec
-from llama_stack.apis.agents.openai_responses import (
+from llama_stack_api import (
+    ConversationItem,
+    Conversations,
+    Inference,
+    InvalidConversationIdError,
     ListOpenAIResponseInputItem,
     ListOpenAIResponseObject,
     OpenAIDeleteResponseObject,
+    OpenAIMessageParam,
     OpenAIResponseInput,
     OpenAIResponseInputMessageContentText,
     OpenAIResponseInputTool,
@@ -25,20 +26,16 @@ from llama_stack.apis.agents.openai_responses import (
     OpenAIResponsePrompt,
     OpenAIResponseText,
     OpenAIResponseTextFormat,
-)
-from llama_stack.apis.common.errors import (
-    InvalidConversationIdError,
-)
-from llama_stack.apis.conversations import Conversations
-from llama_stack.apis.conversations.conversations import ConversationItem
-from llama_stack.apis.inference import (
-    Inference,
-    OpenAIMessageParam,
     OpenAISystemMessageParam,
+    Order,
+    ResponseGuardrailSpec,
+    Safety,
+    ToolGroups,
+    ToolRuntime,
+    VectorIO,
 )
-from llama_stack.apis.safety import Safety
-from llama_stack.apis.tools import ToolGroups, ToolRuntime
-from llama_stack.apis.vector_io import VectorIO
+from pydantic import BaseModel, TypeAdapter
+
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.responses.responses_store import (
     ResponsesStore,
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
index c16bc8df3..6a791e92d 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
@@ -8,10 +8,18 @@ import uuid
 from collections.abc import AsyncIterator
 from typing import Any
 
-from llama_stack.apis.agents.openai_responses import (
+from llama_stack_api import (
     AllowedToolsFilter,
     ApprovalFilter,
+    Inference,
     MCPListToolsTool,
+    OpenAIAssistantMessageParam,
+    OpenAIChatCompletion,
+    OpenAIChatCompletionChunk,
+    OpenAIChatCompletionRequestWithExtraBody,
+    OpenAIChatCompletionToolCall,
+    OpenAIChoice,
+    OpenAIMessageParam,
     OpenAIResponseContentPartOutputText,
     OpenAIResponseContentPartReasoningText,
     OpenAIResponseContentPartRefusal,
@@ -56,16 +64,7 @@ from llama_stack.apis.agents.openai_responses import (
     OpenAIResponseUsageOutputTokensDetails,
     WebSearchToolTypes,
 )
-from llama_stack.apis.inference import (
-    Inference,
-    OpenAIAssistantMessageParam,
-    OpenAIChatCompletion,
-    OpenAIChatCompletionChunk,
-    OpenAIChatCompletionRequestWithExtraBody,
-    OpenAIChatCompletionToolCall,
-    OpenAIChoice,
-    OpenAIMessageParam,
-)
+
 from llama_stack.core.telemetry import tracing
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
@@ -1023,9 +1022,9 @@ class StreamingResponseOrchestrator:
         self, tools: list[OpenAIResponseInputTool], output_messages: list[OpenAIResponseOutput]
     ) -> AsyncIterator[OpenAIResponseObjectStream]:
         """Process all tools and emit appropriate streaming events."""
+        from llama_stack_api import ToolDef
         from openai.types.chat import ChatCompletionToolParam
 
-        from llama_stack.apis.tools import ToolDef
         from llama_stack.models.llama.datatypes import ToolDefinition
         from llama_stack.providers.utils.inference.openai_compat import convert_tooldef_to_openai_tool
 
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py
index 09a161d50..38fb2a94f 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py
@@ -9,7 +9,12 @@ import json
 from collections.abc import AsyncIterator
 from typing import Any
 
-from llama_stack.apis.agents.openai_responses import (
+from llama_stack_api import (
+    ImageContentItem,
+    OpenAIChatCompletionContentPartImageParam,
+    OpenAIChatCompletionContentPartTextParam,
+    OpenAIChatCompletionToolCall,
+    OpenAIImageURL,
     OpenAIResponseInputToolFileSearch,
     OpenAIResponseInputToolMCP,
     OpenAIResponseObjectStreamResponseFileSearchCallCompleted,
@@ -23,22 +28,15 @@ from llama_stack.apis.agents.openai_responses import (
     OpenAIResponseObjectStreamResponseWebSearchCallSearching,
     OpenAIResponseOutputMessageFileSearchToolCall,
     OpenAIResponseOutputMessageFileSearchToolCallResults,
-    OpenAIResponseOutputMessageMCPCall,
     OpenAIResponseOutputMessageWebSearchToolCall,
-)
-from llama_stack.apis.common.content_types import (
-    ImageContentItem,
-    TextContentItem,
-)
-from llama_stack.apis.inference import (
-    OpenAIChatCompletionContentPartImageParam,
-    OpenAIChatCompletionContentPartTextParam,
-    OpenAIChatCompletionToolCall,
-    OpenAIImageURL,
     OpenAIToolMessageParam,
+    TextContentItem,
+    ToolGroups,
+    ToolInvocationResult,
+    ToolRuntime,
+    VectorIO,
 )
-from llama_stack.apis.tools import ToolGroups, ToolInvocationResult, ToolRuntime
-from llama_stack.apis.vector_io import VectorIO
+
 from llama_stack.core.telemetry import tracing
 from llama_stack.log import get_logger
 
@@ -398,6 +396,10 @@ class ToolExecutor:
         # Build output message
         message: Any
         if mcp_tool_to_server and function.name in mcp_tool_to_server:
+            from llama_stack_api import (
+                OpenAIResponseOutputMessageMCPCall,
+            )
+
             message = OpenAIResponseOutputMessageMCPCall(
                 id=item_id,
                 arguments=function.arguments,
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/types.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/types.py
index 3b9a14b01..35ad03378 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/types.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/types.py
@@ -7,10 +7,10 @@
 from dataclasses import dataclass
 from typing import cast
 
-from openai.types.chat import ChatCompletionToolParam
-from pydantic import BaseModel
-
-from llama_stack.apis.agents.openai_responses import (
+from llama_stack_api import (
+    OpenAIChatCompletionToolCall,
+    OpenAIMessageParam,
+    OpenAIResponseFormatParam,
     OpenAIResponseInput,
     OpenAIResponseInputTool,
     OpenAIResponseInputToolFileSearch,
@@ -26,7 +26,8 @@ from llama_stack.apis.agents.openai_responses import (
     OpenAIResponseTool,
     OpenAIResponseToolMCP,
 )
-from llama_stack.apis.inference import OpenAIChatCompletionToolCall, OpenAIMessageParam, OpenAIResponseFormatParam
+from openai.types.chat import ChatCompletionToolParam
+from pydantic import BaseModel
 
 
 class ToolExecutionResult(BaseModel):
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/utils.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/utils.py
index 26af1d595..943bbae41 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/utils.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/utils.py
@@ -9,9 +9,23 @@ import re
 import uuid
 from collections.abc import Sequence
 
-from llama_stack.apis.agents.agents import ResponseGuardrailSpec
-from llama_stack.apis.agents.openai_responses import (
+from llama_stack_api import (
+    OpenAIAssistantMessageParam,
+    OpenAIChatCompletionContentPartImageParam,
+    OpenAIChatCompletionContentPartParam,
+    OpenAIChatCompletionContentPartTextParam,
+    OpenAIChatCompletionToolCall,
+    OpenAIChatCompletionToolCallFunction,
+    OpenAIChoice,
+    OpenAIDeveloperMessageParam,
+    OpenAIImageURL,
+    OpenAIJSONSchema,
+    OpenAIMessageParam,
     OpenAIResponseAnnotationFileCitation,
+    OpenAIResponseFormatJSONObject,
+    OpenAIResponseFormatJSONSchema,
+    OpenAIResponseFormatParam,
+    OpenAIResponseFormatText,
     OpenAIResponseInput,
     OpenAIResponseInputFunctionToolCallOutput,
     OpenAIResponseInputMessageContent,
@@ -27,28 +41,12 @@ from llama_stack.apis.agents.openai_responses import (
     OpenAIResponseOutputMessageMCPCall,
     OpenAIResponseOutputMessageMCPListTools,
     OpenAIResponseText,
-)
-from llama_stack.apis.inference import (
-    OpenAIAssistantMessageParam,
-    OpenAIChatCompletionContentPartImageParam,
-    OpenAIChatCompletionContentPartParam,
-    OpenAIChatCompletionContentPartTextParam,
-    OpenAIChatCompletionToolCall,
-    OpenAIChatCompletionToolCallFunction,
-    OpenAIChoice,
-    OpenAIDeveloperMessageParam,
-    OpenAIImageURL,
-    OpenAIJSONSchema,
-    OpenAIMessageParam,
-    OpenAIResponseFormatJSONObject,
-    OpenAIResponseFormatJSONSchema,
-    OpenAIResponseFormatParam,
-    OpenAIResponseFormatText,
     OpenAISystemMessageParam,
     OpenAIToolMessageParam,
     OpenAIUserMessageParam,
+    ResponseGuardrailSpec,
+    Safety,
 )
-from llama_stack.apis.safety import Safety
 
 
 async def convert_chat_choice_to_response_message(
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/safety.py b/src/llama_stack/providers/inline/agents/meta_reference/safety.py
index f0ae51423..dd90ac298 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/safety.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/safety.py
@@ -6,8 +6,8 @@
 
 import asyncio
 
-from llama_stack.apis.inference import OpenAIMessageParam
-from llama_stack.apis.safety import Safety, SafetyViolation, ViolationLevel
+from llama_stack_api import OpenAIMessageParam, Safety, SafetyViolation, ViolationLevel
+
 from llama_stack.core.telemetry import tracing
 from llama_stack.log import get_logger
 
diff --git a/src/llama_stack/providers/inline/batches/reference/__init__.py b/src/llama_stack/providers/inline/batches/reference/__init__.py
index a8ae92eb2..27d0f4213 100644
--- a/src/llama_stack/providers/inline/batches/reference/__init__.py
+++ b/src/llama_stack/providers/inline/batches/reference/__init__.py
@@ -6,9 +6,8 @@
 
 from typing import Any
 
-from llama_stack.apis.files import Files
-from llama_stack.apis.inference import Inference
-from llama_stack.apis.models import Models
+from llama_stack_api import Files, Inference, Models
+
 from llama_stack.core.datatypes import AccessRule, Api
 from llama_stack.providers.utils.kvstore import kvstore_impl
 
diff --git a/src/llama_stack/providers/inline/batches/reference/batches.py b/src/llama_stack/providers/inline/batches/reference/batches.py
index 7c4358b84..f0f8da96c 100644
--- a/src/llama_stack/providers/inline/batches/reference/batches.py
+++ b/src/llama_stack/providers/inline/batches/reference/batches.py
@@ -13,25 +13,29 @@ import uuid
 from io import BytesIO
 from typing import Any, Literal
 
-from openai.types.batch import BatchError, Errors
-from pydantic import BaseModel
-
-from llama_stack.apis.batches import Batches, BatchObject, ListBatchesResponse
-from llama_stack.apis.common.errors import ConflictError, ResourceNotFoundError
-from llama_stack.apis.files import Files, OpenAIFilePurpose
-from llama_stack.apis.inference import (
+from llama_stack_api import (
+    Batches,
+    BatchObject,
+    ConflictError,
+    Files,
     Inference,
+    ListBatchesResponse,
+    Models,
     OpenAIAssistantMessageParam,
     OpenAIChatCompletionRequestWithExtraBody,
     OpenAICompletionRequestWithExtraBody,
     OpenAIDeveloperMessageParam,
     OpenAIEmbeddingsRequestWithExtraBody,
+    OpenAIFilePurpose,
     OpenAIMessageParam,
     OpenAISystemMessageParam,
     OpenAIToolMessageParam,
     OpenAIUserMessageParam,
+    ResourceNotFoundError,
 )
-from llama_stack.apis.models import Models
+from openai.types.batch import BatchError, Errors
+from pydantic import BaseModel
+
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.kvstore import KVStore
 
diff --git a/src/llama_stack/providers/inline/datasetio/localfs/datasetio.py b/src/llama_stack/providers/inline/datasetio/localfs/datasetio.py
index e8ebeb30d..1fcfbbef4 100644
--- a/src/llama_stack/providers/inline/datasetio/localfs/datasetio.py
+++ b/src/llama_stack/providers/inline/datasetio/localfs/datasetio.py
@@ -5,10 +5,8 @@
 # the root directory of this source tree.
 from typing import Any
 
-from llama_stack.apis.common.responses import PaginatedResponse
-from llama_stack.apis.datasetio import DatasetIO
-from llama_stack.apis.datasets import Dataset
-from llama_stack.providers.datatypes import DatasetsProtocolPrivate
+from llama_stack_api import Dataset, DatasetIO, DatasetsProtocolPrivate, PaginatedResponse
+
 from llama_stack.providers.utils.datasetio.url_utils import get_dataframe_from_uri
 from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.pagination import paginate_records
diff --git a/src/llama_stack/providers/inline/eval/meta_reference/eval.py b/src/llama_stack/providers/inline/eval/meta_reference/eval.py
index 5ddbd56c5..e6020e8a3 100644
--- a/src/llama_stack/providers/inline/eval/meta_reference/eval.py
+++ b/src/llama_stack/providers/inline/eval/meta_reference/eval.py
@@ -6,26 +6,29 @@
 import json
 from typing import Any
 
-from tqdm import tqdm
-
-from llama_stack.apis.agents import Agents
-from llama_stack.apis.benchmarks import Benchmark
-from llama_stack.apis.datasetio import DatasetIO
-from llama_stack.apis.datasets import Datasets
-from llama_stack.apis.inference import (
+from llama_stack_api import (
+    Agents,
+    Benchmark,
+    BenchmarkConfig,
+    BenchmarksProtocolPrivate,
+    DatasetIO,
+    Datasets,
+    Eval,
+    EvaluateResponse,
     Inference,
+    Job,
+    JobStatus,
     OpenAIChatCompletionRequestWithExtraBody,
     OpenAICompletionRequestWithExtraBody,
     OpenAISystemMessageParam,
     OpenAIUserMessageParam,
+    Scoring,
 )
-from llama_stack.apis.scoring import Scoring
-from llama_stack.providers.datatypes import BenchmarksProtocolPrivate
+from tqdm import tqdm
+
 from llama_stack.providers.utils.common.data_schema_validator import ColumnName
 from llama_stack.providers.utils.kvstore import kvstore_impl
 
-from .....apis.common.job_types import Job, JobStatus
-from .....apis.eval.eval import BenchmarkConfig, Eval, EvaluateResponse
 from .config import MetaReferenceEvalConfig
 
 EVAL_TASKS_PREFIX = "benchmarks:"
diff --git a/src/llama_stack/providers/inline/files/localfs/files.py b/src/llama_stack/providers/inline/files/localfs/files.py
index a76b982ce..5e8c887f1 100644
--- a/src/llama_stack/providers/inline/files/localfs/files.py
+++ b/src/llama_stack/providers/inline/files/localfs/files.py
@@ -10,17 +10,17 @@ from pathlib import Path
 from typing import Annotated
 
 from fastapi import Depends, File, Form, Response, UploadFile
-
-from llama_stack.apis.common.errors import ResourceNotFoundError
-from llama_stack.apis.common.responses import Order
-from llama_stack.apis.files import (
+from llama_stack_api import (
     ExpiresAfter,
     Files,
     ListOpenAIFileResponse,
     OpenAIFileDeleteResponse,
     OpenAIFileObject,
     OpenAIFilePurpose,
+    Order,
+    ResourceNotFoundError,
 )
+
 from llama_stack.core.datatypes import AccessRule
 from llama_stack.core.id_generation import generate_object_id
 from llama_stack.log import get_logger
diff --git a/src/llama_stack/providers/inline/inference/meta_reference/config.py b/src/llama_stack/providers/inline/inference/meta_reference/config.py
index 961548f9c..802e79f15 100644
--- a/src/llama_stack/providers/inline/inference/meta_reference/config.py
+++ b/src/llama_stack/providers/inline/inference/meta_reference/config.py
@@ -6,9 +6,9 @@
 
 from typing import Any
 
+from llama_stack_api import QuantizationConfig
 from pydantic import BaseModel, field_validator
 
-from llama_stack.apis.inference import QuantizationConfig
 from llama_stack.providers.utils.inference import supported_inference_models
 
 
diff --git a/src/llama_stack/providers/inline/inference/meta_reference/generators.py b/src/llama_stack/providers/inline/inference/meta_reference/generators.py
index 51a2ddfad..2155a1ae8 100644
--- a/src/llama_stack/providers/inline/inference/meta_reference/generators.py
+++ b/src/llama_stack/providers/inline/inference/meta_reference/generators.py
@@ -8,9 +8,7 @@ import math
 from typing import Optional
 
 import torch
-from lmformatenforcer import JsonSchemaParser, TokenEnforcer, TokenEnforcerTokenizerData
-
-from llama_stack.apis.inference import (
+from llama_stack_api import (
     GreedySamplingStrategy,
     JsonSchemaResponseFormat,
     OpenAIChatCompletionRequestWithExtraBody,
@@ -20,6 +18,8 @@ from llama_stack.apis.inference import (
     SamplingParams,
     TopPSamplingStrategy,
 )
+from lmformatenforcer import JsonSchemaParser, TokenEnforcer, TokenEnforcerTokenizerData
+
 from llama_stack.models.llama.datatypes import QuantizationMode, ToolPromptFormat
 from llama_stack.models.llama.llama3.generation import Llama3
 from llama_stack.models.llama.llama3.tokenizer import Tokenizer as Llama3Tokenizer
diff --git a/src/llama_stack/providers/inline/inference/meta_reference/inference.py b/src/llama_stack/providers/inline/inference/meta_reference/inference.py
index ef21132a0..753185fe7 100644
--- a/src/llama_stack/providers/inline/inference/meta_reference/inference.py
+++ b/src/llama_stack/providers/inline/inference/meta_reference/inference.py
@@ -9,22 +9,23 @@ import time
 import uuid
 from collections.abc import AsyncIterator
 
-from llama_stack.apis.inference import (
+from llama_stack_api import (
     InferenceProvider,
+    Model,
+    ModelsProtocolPrivate,
+    ModelType,
     OpenAIAssistantMessageParam,
+    OpenAIChatCompletion,
+    OpenAIChatCompletionChunk,
     OpenAIChatCompletionRequestWithExtraBody,
     OpenAIChatCompletionUsage,
     OpenAIChoice,
+    OpenAICompletion,
     OpenAICompletionRequestWithExtraBody,
     OpenAIUserMessageParam,
     ToolChoice,
 )
-from llama_stack.apis.inference.inference import (
-    OpenAIChatCompletion,
-    OpenAIChatCompletionChunk,
-    OpenAICompletion,
-)
-from llama_stack.apis.models import Model, ModelType
+
 from llama_stack.log import get_logger
 from llama_stack.models.llama.datatypes import RawMessage, RawTextItem, ToolDefinition
 from llama_stack.models.llama.llama3.chat_format import ChatFormat as Llama3ChatFormat
@@ -40,7 +41,6 @@ from llama_stack.models.llama.llama4.prompt_templates.system_prompts import (
 from llama_stack.models.llama.llama4.tokenizer import Tokenizer as Llama4Tokenizer
 from llama_stack.models.llama.sku_list import resolve_model
 from llama_stack.models.llama.sku_types import ModelFamily, is_multimodal
-from llama_stack.providers.datatypes import ModelsProtocolPrivate
 from llama_stack.providers.utils.inference.embedding_mixin import (
     SentenceTransformerEmbeddingMixin,
 )
@@ -376,7 +376,7 @@ class MetaReferenceInferenceImpl(
         # Convert tool calls to OpenAI format
         openai_tool_calls = None
         if decoded_message.tool_calls:
-            from llama_stack.apis.inference import (
+            from llama_stack_api import (
                 OpenAIChatCompletionToolCall,
                 OpenAIChatCompletionToolCallFunction,
             )
@@ -441,13 +441,14 @@ class MetaReferenceInferenceImpl(
         params: OpenAIChatCompletionRequestWithExtraBody,
     ) -> AsyncIterator[OpenAIChatCompletionChunk]:
         """Stream chat completion chunks as they're generated."""
-        from llama_stack.apis.inference import (
+        from llama_stack_api import (
             OpenAIChatCompletionChunk,
             OpenAIChatCompletionToolCall,
             OpenAIChatCompletionToolCallFunction,
             OpenAIChoiceDelta,
             OpenAIChunkChoice,
         )
+
         from llama_stack.models.llama.datatypes import StopReason
         from llama_stack.providers.utils.inference.prompt_adapter import decode_assistant_message
 
diff --git a/src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py b/src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py
index e6dcf3ae7..14c9a41a4 100644
--- a/src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py
+++ b/src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py
@@ -6,19 +6,19 @@
 
 from collections.abc import AsyncIterator
 
-from llama_stack.apis.inference import (
+from llama_stack_api import (
     InferenceProvider,
-    OpenAIChatCompletionRequestWithExtraBody,
-    OpenAICompletionRequestWithExtraBody,
-)
-from llama_stack.apis.inference.inference import (
+    Model,
+    ModelsProtocolPrivate,
+    ModelType,
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
+    OpenAIChatCompletionRequestWithExtraBody,
     OpenAICompletion,
+    OpenAICompletionRequestWithExtraBody,
 )
-from llama_stack.apis.models import ModelType
+
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import Model, ModelsProtocolPrivate
 from llama_stack.providers.utils.inference.embedding_mixin import (
     SentenceTransformerEmbeddingMixin,
 )
diff --git a/src/llama_stack/providers/inline/post_training/common/validator.py b/src/llama_stack/providers/inline/post_training/common/validator.py
index 950b75f86..7a85d0e03 100644
--- a/src/llama_stack/providers/inline/post_training/common/validator.py
+++ b/src/llama_stack/providers/inline/post_training/common/validator.py
@@ -12,11 +12,8 @@
 
 from typing import Any
 
-from llama_stack.apis.common.type_system import (
-    ChatCompletionInputType,
-    DialogType,
-    StringType,
-)
+from llama_stack_api import ChatCompletionInputType, DialogType, StringType
+
 from llama_stack.providers.utils.common.data_schema_validator import (
     ColumnName,
 )
diff --git a/src/llama_stack/providers/inline/post_training/huggingface/post_training.py b/src/llama_stack/providers/inline/post_training/huggingface/post_training.py
index 22ace1ae0..f3f3d8d56 100644
--- a/src/llama_stack/providers/inline/post_training/huggingface/post_training.py
+++ b/src/llama_stack/providers/inline/post_training/huggingface/post_training.py
@@ -6,11 +6,11 @@
 from enum import Enum
 from typing import Any
 
-from llama_stack.apis.datasetio import DatasetIO
-from llama_stack.apis.datasets import Datasets
-from llama_stack.apis.post_training import (
+from llama_stack_api import (
     AlgorithmConfig,
     Checkpoint,
+    DatasetIO,
+    Datasets,
     DPOAlignmentConfig,
     JobStatus,
     ListPostTrainingJobsResponse,
@@ -19,6 +19,7 @@ from llama_stack.apis.post_training import (
     PostTrainingJobStatusResponse,
     TrainingConfig,
 )
+
 from llama_stack.providers.inline.post_training.huggingface.config import (
     HuggingFacePostTrainingConfig,
 )
diff --git a/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py b/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py
index 39b83a3fd..58a30618c 100644
--- a/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py
+++ b/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py
@@ -12,20 +12,20 @@ from typing import Any
 
 import torch
 from datasets import Dataset
+from llama_stack_api import (
+    Checkpoint,
+    DataConfig,
+    DatasetIO,
+    Datasets,
+    LoraFinetuningConfig,
+    TrainingConfig,
+)
 from peft import LoraConfig
 from transformers import (
     AutoTokenizer,
 )
 from trl import SFTConfig, SFTTrainer
 
-from llama_stack.apis.datasetio import DatasetIO
-from llama_stack.apis.datasets import Datasets
-from llama_stack.apis.post_training import (
-    Checkpoint,
-    DataConfig,
-    LoraFinetuningConfig,
-    TrainingConfig,
-)
 from llama_stack.log import get_logger
 from llama_stack.providers.inline.post_training.common.utils import evacuate_model_from_device
 
diff --git a/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py b/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py
index 11d707df9..f7dc3ebf2 100644
--- a/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py
+++ b/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py
@@ -11,18 +11,18 @@ from typing import Any
 
 import torch
 from datasets import Dataset
+from llama_stack_api import (
+    Checkpoint,
+    DatasetIO,
+    Datasets,
+    DPOAlignmentConfig,
+    TrainingConfig,
+)
 from transformers import (
     AutoTokenizer,
 )
 from trl import DPOConfig, DPOTrainer
 
-from llama_stack.apis.datasetio import DatasetIO
-from llama_stack.apis.datasets import Datasets
-from llama_stack.apis.post_training import (
-    Checkpoint,
-    DPOAlignmentConfig,
-    TrainingConfig,
-)
 from llama_stack.log import get_logger
 from llama_stack.providers.inline.post_training.common.utils import evacuate_model_from_device
 
diff --git a/src/llama_stack/providers/inline/post_training/huggingface/utils.py b/src/llama_stack/providers/inline/post_training/huggingface/utils.py
index a930602d0..86c3c3f52 100644
--- a/src/llama_stack/providers/inline/post_training/huggingface/utils.py
+++ b/src/llama_stack/providers/inline/post_training/huggingface/utils.py
@@ -14,6 +14,7 @@ from typing import TYPE_CHECKING, Any, Protocol
 import psutil
 import torch
 from datasets import Dataset
+from llama_stack_api import Checkpoint, DatasetIO, TrainingConfig
 from transformers import AutoConfig, AutoModelForCausalLM
 
 if TYPE_CHECKING:
@@ -34,8 +35,6 @@ class HFAutoModel(Protocol):
     def save_pretrained(self, save_directory: str | Path) -> None: ...
 
 
-from llama_stack.apis.datasetio import DatasetIO
-from llama_stack.apis.post_training import Checkpoint, TrainingConfig
 from llama_stack.log import get_logger
 
 from .config import HuggingFacePostTrainingConfig
diff --git a/src/llama_stack/providers/inline/post_training/torchtune/common/utils.py b/src/llama_stack/providers/inline/post_training/torchtune/common/utils.py
index f0fa052a2..1483b8385 100644
--- a/src/llama_stack/providers/inline/post_training/torchtune/common/utils.py
+++ b/src/llama_stack/providers/inline/post_training/torchtune/common/utils.py
@@ -13,6 +13,7 @@
 from collections.abc import Callable
 
 import torch
+from llama_stack_api import DatasetFormat
 from pydantic import BaseModel
 from torchtune.data._messages import InputOutputToMessages, ShareGPTToMessages
 from torchtune.models.llama3 import llama3_tokenizer
@@ -21,7 +22,6 @@ from torchtune.models.llama3_1 import lora_llama3_1_8b
 from torchtune.models.llama3_2 import lora_llama3_2_3b
 from torchtune.modules.transforms import Transform
 
-from llama_stack.apis.post_training import DatasetFormat
 from llama_stack.models.llama.sku_list import resolve_model
 from llama_stack.models.llama.sku_types import Model
 
diff --git a/src/llama_stack/providers/inline/post_training/torchtune/post_training.py b/src/llama_stack/providers/inline/post_training/torchtune/post_training.py
index 765f6789d..3370d42fa 100644
--- a/src/llama_stack/providers/inline/post_training/torchtune/post_training.py
+++ b/src/llama_stack/providers/inline/post_training/torchtune/post_training.py
@@ -6,11 +6,11 @@
 from enum import Enum
 from typing import Any
 
-from llama_stack.apis.datasetio import DatasetIO
-from llama_stack.apis.datasets import Datasets
-from llama_stack.apis.post_training import (
+from llama_stack_api import (
     AlgorithmConfig,
     Checkpoint,
+    DatasetIO,
+    Datasets,
     DPOAlignmentConfig,
     JobStatus,
     ListPostTrainingJobsResponse,
@@ -20,6 +20,7 @@ from llama_stack.apis.post_training import (
     PostTrainingJobStatusResponse,
     TrainingConfig,
 )
+
 from llama_stack.providers.inline.post_training.torchtune.config import (
     TorchtunePostTrainingConfig,
 )
diff --git a/src/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py b/src/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py
index c648cdc46..2bf1d0fe7 100644
--- a/src/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py
+++ b/src/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py
@@ -12,6 +12,17 @@ from pathlib import Path
 from typing import Any
 
 import torch
+from llama_stack_api import (
+    Checkpoint,
+    DataConfig,
+    DatasetIO,
+    Datasets,
+    LoraFinetuningConfig,
+    OptimizerConfig,
+    PostTrainingMetric,
+    QATFinetuningConfig,
+    TrainingConfig,
+)
 from torch import nn
 from torch.optim import Optimizer
 from torch.utils.data import DataLoader, DistributedSampler
@@ -32,17 +43,6 @@ from torchtune.training.lr_schedulers import get_cosine_schedule_with_warmup
 from torchtune.training.metric_logging import DiskLogger
 from tqdm import tqdm
 
-from llama_stack.apis.common.training_types import PostTrainingMetric
-from llama_stack.apis.datasetio import DatasetIO
-from llama_stack.apis.datasets import Datasets
-from llama_stack.apis.post_training import (
-    Checkpoint,
-    DataConfig,
-    LoraFinetuningConfig,
-    OptimizerConfig,
-    QATFinetuningConfig,
-    TrainingConfig,
-)
 from llama_stack.core.utils.config_dirs import DEFAULT_CHECKPOINT_DIR
 from llama_stack.core.utils.model_utils import model_local_dir
 from llama_stack.log import get_logger
diff --git a/src/llama_stack/providers/inline/safety/code_scanner/code_scanner.py b/src/llama_stack/providers/inline/safety/code_scanner/code_scanner.py
index 7da9ea0d7..80e907c10 100644
--- a/src/llama_stack/providers/inline/safety/code_scanner/code_scanner.py
+++ b/src/llama_stack/providers/inline/safety/code_scanner/code_scanner.py
@@ -10,15 +10,17 @@ from typing import TYPE_CHECKING, Any
 if TYPE_CHECKING:
     from codeshield.cs import CodeShieldScanResult
 
-from llama_stack.apis.inference import OpenAIMessageParam
-from llama_stack.apis.safety import (
+from llama_stack_api import (
+    ModerationObject,
+    ModerationObjectResults,
+    OpenAIMessageParam,
     RunShieldResponse,
     Safety,
     SafetyViolation,
+    Shield,
     ViolationLevel,
 )
-from llama_stack.apis.safety.safety import ModerationObject, ModerationObjectResults
-from llama_stack.apis.shields import Shield
+
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.prompt_adapter import (
     interleaved_content_as_str,
diff --git a/src/llama_stack/providers/inline/safety/llama_guard/llama_guard.py b/src/llama_stack/providers/inline/safety/llama_guard/llama_guard.py
index 6f6346e82..36e4280b9 100644
--- a/src/llama_stack/providers/inline/safety/llama_guard/llama_guard.py
+++ b/src/llama_stack/providers/inline/safety/llama_guard/llama_guard.py
@@ -9,26 +9,27 @@ import uuid
 from string import Template
 from typing import Any
 
-from llama_stack.apis.common.content_types import ImageContentItem, TextContentItem
-from llama_stack.apis.inference import (
+from llama_stack_api import (
+    ImageContentItem,
     Inference,
+    ModerationObject,
+    ModerationObjectResults,
     OpenAIChatCompletionRequestWithExtraBody,
     OpenAIMessageParam,
     OpenAIUserMessageParam,
-)
-from llama_stack.apis.safety import (
     RunShieldResponse,
     Safety,
     SafetyViolation,
+    Shield,
+    ShieldsProtocolPrivate,
+    TextContentItem,
     ViolationLevel,
 )
-from llama_stack.apis.safety.safety import ModerationObject, ModerationObjectResults
-from llama_stack.apis.shields import Shield
+
 from llama_stack.core.datatypes import Api
 from llama_stack.log import get_logger
 from llama_stack.models.llama.datatypes import Role
 from llama_stack.models.llama.sku_types import CoreModelId
-from llama_stack.providers.datatypes import ShieldsProtocolPrivate
 from llama_stack.providers.utils.inference.prompt_adapter import (
     interleaved_content_as_str,
 )
diff --git a/src/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py b/src/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py
index 2015e1150..b4f495f19 100644
--- a/src/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py
+++ b/src/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py
@@ -7,21 +7,21 @@
 from typing import Any
 
 import torch
-from transformers import AutoModelForSequenceClassification, AutoTokenizer
-
-from llama_stack.apis.inference import OpenAIMessageParam
-from llama_stack.apis.safety import (
+from llama_stack_api import (
+    ModerationObject,
+    OpenAIMessageParam,
     RunShieldResponse,
     Safety,
     SafetyViolation,
+    Shield,
+    ShieldsProtocolPrivate,
     ShieldStore,
     ViolationLevel,
 )
-from llama_stack.apis.safety.safety import ModerationObject
-from llama_stack.apis.shields import Shield
+from transformers import AutoModelForSequenceClassification, AutoTokenizer
+
 from llama_stack.core.utils.model_utils import model_local_dir
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import ShieldsProtocolPrivate
 from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
 
 from .config import PromptGuardConfig, PromptGuardType
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring.py b/src/llama_stack/providers/inline/scoring/basic/scoring.py
index b19b68039..326fd9211 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring.py
@@ -5,17 +5,19 @@
 # the root directory of this source tree.
 from typing import Any
 
-from llama_stack.apis.datasetio import DatasetIO
-from llama_stack.apis.datasets import Datasets
-from llama_stack.apis.scoring import (
+from llama_stack_api import (
+    DatasetIO,
+    Datasets,
     ScoreBatchResponse,
     ScoreResponse,
     Scoring,
+    ScoringFn,
+    ScoringFnParams,
+    ScoringFunctionsProtocolPrivate,
     ScoringResult,
 )
-from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnParams
+
 from llama_stack.core.datatypes import Api
-from llama_stack.providers.datatypes import ScoringFunctionsProtocolPrivate
 from llama_stack.providers.utils.common.data_schema_validator import (
     get_valid_schemas,
     validate_dataset_schema,
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py
index b87974d08..93c2627dd 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py
@@ -8,8 +8,8 @@ import json
 import re
 from typing import Any
 
-from llama_stack.apis.scoring import ScoringResultRow
-from llama_stack.apis.scoring_functions import ScoringFnParams
+from llama_stack_api import ScoringFnParams, ScoringResultRow
+
 from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
 
 from .fn_defs.docvqa import docvqa
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py
index 60804330f..382c64d88 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py
@@ -6,8 +6,8 @@
 
 from typing import Any
 
-from llama_stack.apis.scoring import ScoringResultRow
-from llama_stack.apis.scoring_functions import ScoringFnParams
+from llama_stack_api import ScoringFnParams, ScoringResultRow
+
 from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
 
 from .fn_defs.equality import equality
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py
index aad3dfe26..a7305d13a 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py
@@ -4,10 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.common.type_system import NumberType
-from llama_stack.apis.scoring_functions import (
+from llama_stack_api import (
     AggregationFunctionType,
     BasicScoringFnParams,
+    NumberType,
     ScoringFn,
 )
 
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py
index 9b24ff791..f7d2f32ae 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py
@@ -4,10 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.common.type_system import NumberType
-from llama_stack.apis.scoring_functions import (
+from llama_stack_api import (
     AggregationFunctionType,
     BasicScoringFnParams,
+    NumberType,
     ScoringFn,
 )
 
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py
index adca0791d..a2ed1d695 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py
@@ -4,10 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.common.type_system import NumberType
-from llama_stack.apis.scoring_functions import (
+from llama_stack_api import (
     AggregationFunctionType,
     BasicScoringFnParams,
+    NumberType,
     ScoringFn,
 )
 
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py
index 8b1bf5352..4e2b49a1f 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py
@@ -4,9 +4,9 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.common.type_system import NumberType
-from llama_stack.apis.scoring_functions import (
+from llama_stack_api import (
     AggregationFunctionType,
+    NumberType,
     RegexParserScoringFnParams,
     ScoringFn,
 )
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py
index ea04331c9..df0cf52d9 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py
@@ -4,9 +4,9 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.common.type_system import NumberType
-from llama_stack.apis.scoring_functions import (
+from llama_stack_api import (
     AggregationFunctionType,
+    NumberType,
     RegexParserScoringFnParams,
     ScoringFn,
 )
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py
index 9cae66fa6..1f143c4a6 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py
@@ -4,10 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.common.type_system import NumberType
-from llama_stack.apis.scoring_functions import (
+from llama_stack_api import (
     AggregationFunctionType,
     BasicScoringFnParams,
+    NumberType,
     ScoringFn,
 )
 
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py
index 77f6176e6..4ec85bb09 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py
@@ -6,8 +6,8 @@
 
 from typing import Any
 
-from llama_stack.apis.scoring import ScoringResultRow
-from llama_stack.apis.scoring_functions import ScoringFnParams
+from llama_stack_api import ScoringFnParams, ScoringResultRow
+
 from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
 
 from .fn_defs.ifeval import (
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py
index d765959a8..4e9d49e96 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py
@@ -5,8 +5,8 @@
 # the root directory of this source tree.
 from typing import Any
 
-from llama_stack.apis.scoring import ScoringResultRow
-from llama_stack.apis.scoring_functions import ScoringFnParams, ScoringFnParamsType
+from llama_stack_api import ScoringFnParams, ScoringFnParamsType, ScoringResultRow
+
 from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
 
 from ..utils.math_utils import first_answer, normalize_final_answer, try_evaluate_frac, try_evaluate_latex
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py
index cb336e303..7f213b38c 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py
@@ -6,8 +6,8 @@
 import re
 from typing import Any
 
-from llama_stack.apis.scoring import ScoringResultRow
-from llama_stack.apis.scoring_functions import ScoringFnParams, ScoringFnParamsType
+from llama_stack_api import ScoringFnParams, ScoringFnParamsType, ScoringResultRow
+
 from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
 
 from .fn_defs.regex_parser_multiple_choice_answer import (
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py
index d6e10e6c9..b291924d5 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py
@@ -6,8 +6,8 @@
 
 from typing import Any
 
-from llama_stack.apis.scoring import ScoringResultRow
-from llama_stack.apis.scoring_functions import ScoringFnParams
+from llama_stack_api import ScoringFnParams, ScoringResultRow
+
 from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
 
 from .fn_defs.subset_of import subset_of
diff --git a/src/llama_stack/providers/inline/scoring/braintrust/braintrust.py b/src/llama_stack/providers/inline/scoring/braintrust/braintrust.py
index 14810f706..cbab93c74 100644
--- a/src/llama_stack/providers/inline/scoring/braintrust/braintrust.py
+++ b/src/llama_stack/providers/inline/scoring/braintrust/braintrust.py
@@ -17,21 +17,22 @@ from autoevals.ragas import (
     ContextRelevancy,
     Faithfulness,
 )
-from pydantic import BaseModel
-
-from llama_stack.apis.datasetio import DatasetIO
-from llama_stack.apis.datasets import Datasets
-from llama_stack.apis.scoring import (
+from llama_stack_api import (
+    DatasetIO,
+    Datasets,
     ScoreBatchResponse,
     ScoreResponse,
     Scoring,
+    ScoringFn,
+    ScoringFnParams,
+    ScoringFunctionsProtocolPrivate,
     ScoringResult,
     ScoringResultRow,
 )
-from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnParams
+from pydantic import BaseModel
+
 from llama_stack.core.datatypes import Api
 from llama_stack.core.request_headers import NeedsRequestProviderData
-from llama_stack.providers.datatypes import ScoringFunctionsProtocolPrivate
 from llama_stack.providers.utils.common.data_schema_validator import (
     get_valid_schemas,
     validate_dataset_schema,
diff --git a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py
index 4fe07f822..b058305b4 100644
--- a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py
+++ b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py
@@ -4,10 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.common.type_system import NumberType
-from llama_stack.apis.scoring_functions import (
+from llama_stack_api import (
     AggregationFunctionType,
     BasicScoringFnParams,
+    NumberType,
     ScoringFn,
 )
 
diff --git a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py
index a1995cc4e..d619d38a8 100644
--- a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py
+++ b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py
@@ -4,10 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.common.type_system import NumberType
-from llama_stack.apis.scoring_functions import (
+from llama_stack_api import (
     AggregationFunctionType,
     BasicScoringFnParams,
+    NumberType,
     ScoringFn,
 )
 
diff --git a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py
index e8fe15259..34354a1fc 100644
--- a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py
+++ b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py
@@ -4,10 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.common.type_system import NumberType
-from llama_stack.apis.scoring_functions import (
+from llama_stack_api import (
     AggregationFunctionType,
     BasicScoringFnParams,
+    NumberType,
     ScoringFn,
 )
 
diff --git a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py
index d9b129a8b..4092ccc4a 100644
--- a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py
+++ b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py
@@ -4,10 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.common.type_system import NumberType
-from llama_stack.apis.scoring_functions import (
+from llama_stack_api import (
     AggregationFunctionType,
     BasicScoringFnParams,
+    NumberType,
     ScoringFn,
 )
 
diff --git a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py
index c1d7e855b..2b32b9eec 100644
--- a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py
+++ b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py
@@ -4,10 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.common.type_system import NumberType
-from llama_stack.apis.scoring_functions import (
+from llama_stack_api import (
     AggregationFunctionType,
     BasicScoringFnParams,
+    NumberType,
     ScoringFn,
 )
 
diff --git a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py
index 01ddd0dd0..4d6547002 100644
--- a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py
+++ b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py
@@ -4,10 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.common.type_system import NumberType
-from llama_stack.apis.scoring_functions import (
+from llama_stack_api import (
     AggregationFunctionType,
     BasicScoringFnParams,
+    NumberType,
     ScoringFn,
 )
 
diff --git a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py
index 55d89344a..739dfd7bd 100644
--- a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py
+++ b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py
@@ -4,10 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.common.type_system import NumberType
-from llama_stack.apis.scoring_functions import (
+from llama_stack_api import (
     AggregationFunctionType,
     BasicScoringFnParams,
+    NumberType,
     ScoringFn,
 )
 
diff --git a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py
index c621ecf7f..59ed5949b 100644
--- a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py
+++ b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py
@@ -4,10 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.common.type_system import NumberType
-from llama_stack.apis.scoring_functions import (
+from llama_stack_api import (
     AggregationFunctionType,
     BasicScoringFnParams,
+    NumberType,
     ScoringFn,
 )
 
diff --git a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py
index 2e85c0c7c..96c36d226 100644
--- a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py
+++ b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py
@@ -4,10 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.common.type_system import NumberType
-from llama_stack.apis.scoring_functions import (
+from llama_stack_api import (
     AggregationFunctionType,
     BasicScoringFnParams,
+    NumberType,
     ScoringFn,
 )
 
diff --git a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py
index 9b7628524..aa636d2b3 100644
--- a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py
+++ b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py
@@ -5,18 +5,20 @@
 # the root directory of this source tree.
 from typing import Any
 
-from llama_stack.apis.datasetio import DatasetIO
-from llama_stack.apis.datasets import Datasets
-from llama_stack.apis.inference import Inference
-from llama_stack.apis.scoring import (
+from llama_stack_api import (
+    DatasetIO,
+    Datasets,
+    Inference,
     ScoreBatchResponse,
     ScoreResponse,
     Scoring,
+    ScoringFn,
+    ScoringFnParams,
+    ScoringFunctionsProtocolPrivate,
     ScoringResult,
 )
-from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnParams
+
 from llama_stack.core.datatypes import Api
-from llama_stack.providers.datatypes import ScoringFunctionsProtocolPrivate
 from llama_stack.providers.utils.common.data_schema_validator import (
     get_valid_schemas,
     validate_dataset_schema,
diff --git a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py
index 074f1ff46..ed26169a5 100644
--- a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py
+++ b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py
@@ -4,10 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.common.type_system import NumberType
-from llama_stack.apis.scoring_functions import (
+from llama_stack_api import (
     AggregationFunctionType,
     LLMAsJudgeScoringFnParams,
+    NumberType,
     ScoringFn,
 )
 
diff --git a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py
index 205e0bbf3..bffffd878 100644
--- a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py
+++ b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py
@@ -4,8 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.common.type_system import NumberType
-from llama_stack.apis.scoring_functions import LLMAsJudgeScoringFnParams, ScoringFn
+from llama_stack_api import LLMAsJudgeScoringFnParams, NumberType, ScoringFn
 
 llm_as_judge_base = ScoringFn(
     identifier="llm-as-judge::base",
diff --git a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py
index fbecb6e20..169a4d8b7 100644
--- a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py
+++ b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py
@@ -6,9 +6,8 @@
 import re
 from typing import Any
 
-from llama_stack.apis.inference import Inference, OpenAIChatCompletionRequestWithExtraBody
-from llama_stack.apis.scoring import ScoringResultRow
-from llama_stack.apis.scoring_functions import ScoringFnParams
+from llama_stack_api import Inference, OpenAIChatCompletionRequestWithExtraBody, ScoringFnParams, ScoringResultRow
+
 from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
 
 from .fn_defs.llm_as_judge_405b_simpleqa import llm_as_judge_405b_simpleqa
diff --git a/src/llama_stack/providers/inline/tool_runtime/rag/__init__.py b/src/llama_stack/providers/inline/tool_runtime/rag/__init__.py
index f9a7e7b89..60117dc3d 100644
--- a/src/llama_stack/providers/inline/tool_runtime/rag/__init__.py
+++ b/src/llama_stack/providers/inline/tool_runtime/rag/__init__.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack.providers.datatypes import Api
+from llama_stack_api import Api
 
 from .config import RagToolRuntimeConfig
 
diff --git a/src/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py b/src/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py
index 14cbec49d..f499989cb 100644
--- a/src/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py
+++ b/src/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py
@@ -6,15 +6,16 @@
 
 
 from jinja2 import Template
-
-from llama_stack.apis.common.content_types import InterleavedContent
-from llama_stack.apis.inference import OpenAIChatCompletionRequestWithExtraBody, OpenAIUserMessageParam
-from llama_stack.apis.tools.rag_tool import (
+from llama_stack_api import (
     DefaultRAGQueryGeneratorConfig,
+    InterleavedContent,
     LLMRAGQueryGeneratorConfig,
+    OpenAIChatCompletionRequestWithExtraBody,
+    OpenAIUserMessageParam,
     RAGQueryGenerator,
     RAGQueryGeneratorConfig,
 )
+
 from llama_stack.providers.utils.inference.prompt_adapter import (
     interleaved_content_as_str,
 )
diff --git a/src/llama_stack/providers/inline/tool_runtime/rag/memory.py b/src/llama_stack/providers/inline/tool_runtime/rag/memory.py
index 6a59be0ca..aacb7bb38 100644
--- a/src/llama_stack/providers/inline/tool_runtime/rag/memory.py
+++ b/src/llama_stack/providers/inline/tool_runtime/rag/memory.py
@@ -12,34 +12,31 @@ from typing import Any
 
 import httpx
 from fastapi import UploadFile
-from pydantic import TypeAdapter
-
-from llama_stack.apis.common.content_types import (
+from llama_stack_api import (
     URL,
+    Files,
+    Inference,
     InterleavedContent,
     InterleavedContentItem,
-    TextContentItem,
-)
-from llama_stack.apis.files import Files, OpenAIFilePurpose
-from llama_stack.apis.inference import Inference
-from llama_stack.apis.tools import (
     ListToolDefsResponse,
+    OpenAIFilePurpose,
+    QueryChunksResponse,
     RAGDocument,
     RAGQueryConfig,
     RAGQueryResult,
+    TextContentItem,
     ToolDef,
     ToolGroup,
+    ToolGroupsProtocolPrivate,
     ToolInvocationResult,
     ToolRuntime,
-)
-from llama_stack.apis.vector_io import (
-    QueryChunksResponse,
     VectorIO,
     VectorStoreChunkingStrategyStatic,
     VectorStoreChunkingStrategyStaticConfig,
 )
+from pydantic import TypeAdapter
+
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
 from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
 from llama_stack.providers.utils.memory.vector_store import parse_data_url
 
diff --git a/src/llama_stack/providers/inline/vector_io/chroma/__init__.py b/src/llama_stack/providers/inline/vector_io/chroma/__init__.py
index 575e5ad88..155b8a0cb 100644
--- a/src/llama_stack/providers/inline/vector_io/chroma/__init__.py
+++ b/src/llama_stack/providers/inline/vector_io/chroma/__init__.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack.providers.datatypes import Api
+from llama_stack_api import Api
 
 from .config import ChromaVectorIOConfig
 
diff --git a/src/llama_stack/providers/inline/vector_io/chroma/config.py b/src/llama_stack/providers/inline/vector_io/chroma/config.py
index 1798f10de..d955b1d06 100644
--- a/src/llama_stack/providers/inline/vector_io/chroma/config.py
+++ b/src/llama_stack/providers/inline/vector_io/chroma/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.core.storage.datatypes import KVStoreReference
-from llama_stack.schema_utils import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/inline/vector_io/faiss/__init__.py b/src/llama_stack/providers/inline/vector_io/faiss/__init__.py
index 24d1f292a..b834589e3 100644
--- a/src/llama_stack/providers/inline/vector_io/faiss/__init__.py
+++ b/src/llama_stack/providers/inline/vector_io/faiss/__init__.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack.providers.datatypes import Api
+from llama_stack_api import Api
 
 from .config import FaissVectorIOConfig
 
diff --git a/src/llama_stack/providers/inline/vector_io/faiss/config.py b/src/llama_stack/providers/inline/vector_io/faiss/config.py
index dd7a7aeca..dd433f818 100644
--- a/src/llama_stack/providers/inline/vector_io/faiss/config.py
+++ b/src/llama_stack/providers/inline/vector_io/faiss/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel
 
 from llama_stack.core.storage.datatypes import KVStoreReference
-from llama_stack.schema_utils import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/inline/vector_io/faiss/faiss.py b/src/llama_stack/providers/inline/vector_io/faiss/faiss.py
index 96760b834..abef42499 100644
--- a/src/llama_stack/providers/inline/vector_io/faiss/faiss.py
+++ b/src/llama_stack/providers/inline/vector_io/faiss/faiss.py
@@ -12,15 +12,22 @@ from typing import Any
 
 import faiss  # type: ignore[import-untyped]
 import numpy as np
+from llama_stack_api import (
+    Chunk,
+    Files,
+    HealthResponse,
+    HealthStatus,
+    Inference,
+    InterleavedContent,
+    QueryChunksResponse,
+    VectorIO,
+    VectorStore,
+    VectorStoreNotFoundError,
+    VectorStoresProtocolPrivate,
+)
 from numpy.typing import NDArray
 
-from llama_stack.apis.common.errors import VectorStoreNotFoundError
-from llama_stack.apis.files import Files
-from llama_stack.apis.inference import Inference, InterleavedContent
-from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
-from llama_stack.apis.vector_stores import VectorStore
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import HealthResponse, HealthStatus, VectorStoresProtocolPrivate
 from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.kvstore.api import KVStore
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
diff --git a/src/llama_stack/providers/inline/vector_io/milvus/__init__.py b/src/llama_stack/providers/inline/vector_io/milvus/__init__.py
index 7dc9c6a33..2f84769f3 100644
--- a/src/llama_stack/providers/inline/vector_io/milvus/__init__.py
+++ b/src/llama_stack/providers/inline/vector_io/milvus/__init__.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack.providers.datatypes import Api
+from llama_stack_api import Api
 
 from .config import MilvusVectorIOConfig
 
diff --git a/src/llama_stack/providers/inline/vector_io/milvus/config.py b/src/llama_stack/providers/inline/vector_io/milvus/config.py
index b333b04ea..08d05c991 100644
--- a/src/llama_stack/providers/inline/vector_io/milvus/config.py
+++ b/src/llama_stack/providers/inline/vector_io/milvus/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.core.storage.datatypes import KVStoreReference
-from llama_stack.schema_utils import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/inline/vector_io/qdrant/__init__.py b/src/llama_stack/providers/inline/vector_io/qdrant/__init__.py
index bef6d50e6..145d19455 100644
--- a/src/llama_stack/providers/inline/vector_io/qdrant/__init__.py
+++ b/src/llama_stack/providers/inline/vector_io/qdrant/__init__.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack.providers.datatypes import Api
+from llama_stack_api import Api
 
 from .config import QdrantVectorIOConfig
 
diff --git a/src/llama_stack/providers/inline/vector_io/qdrant/config.py b/src/llama_stack/providers/inline/vector_io/qdrant/config.py
index e7ecde7b7..437d643f0 100644
--- a/src/llama_stack/providers/inline/vector_io/qdrant/config.py
+++ b/src/llama_stack/providers/inline/vector_io/qdrant/config.py
@@ -7,10 +7,10 @@
 
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel
 
 from llama_stack.core.storage.datatypes import KVStoreReference
-from llama_stack.schema_utils import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py b/src/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py
index df96e927c..e84c299dc 100644
--- a/src/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py
+++ b/src/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack.providers.datatypes import Api
+from llama_stack_api import Api
 
 from .config import SQLiteVectorIOConfig
 
diff --git a/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
index 399800d3e..e979ff323 100644
--- a/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
+++ b/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
@@ -12,15 +12,19 @@ from typing import Any
 
 import numpy as np
 import sqlite_vec  # type: ignore[import-untyped]
+from llama_stack_api import (
+    Chunk,
+    Files,
+    Inference,
+    QueryChunksResponse,
+    VectorIO,
+    VectorStore,
+    VectorStoreNotFoundError,
+    VectorStoresProtocolPrivate,
+)
 from numpy.typing import NDArray
 
-from llama_stack.apis.common.errors import VectorStoreNotFoundError
-from llama_stack.apis.files import Files
-from llama_stack.apis.inference import Inference
-from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
-from llama_stack.apis.vector_stores import VectorStore
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
 from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.kvstore.api import KVStore
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
diff --git a/src/llama_stack/providers/registry/agents.py b/src/llama_stack/providers/registry/agents.py
index 1845d6f46..bd204cecd 100644
--- a/src/llama_stack/providers/registry/agents.py
+++ b/src/llama_stack/providers/registry/agents.py
@@ -5,11 +5,12 @@
 # the root directory of this source tree.
 
 
-from llama_stack.providers.datatypes import (
+from llama_stack_api import (
     Api,
     InlineProviderSpec,
     ProviderSpec,
 )
+
 from llama_stack.providers.utils.kvstore import kvstore_dependencies
 
 
diff --git a/src/llama_stack/providers/registry/batches.py b/src/llama_stack/providers/registry/batches.py
index a07942486..e11bb8332 100644
--- a/src/llama_stack/providers/registry/batches.py
+++ b/src/llama_stack/providers/registry/batches.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec
+from llama_stack_api import Api, InlineProviderSpec, ProviderSpec
 
 
 def available_providers() -> list[ProviderSpec]:
diff --git a/src/llama_stack/providers/registry/datasetio.py b/src/llama_stack/providers/registry/datasetio.py
index a9feb0bac..bfd7ede3c 100644
--- a/src/llama_stack/providers/registry/datasetio.py
+++ b/src/llama_stack/providers/registry/datasetio.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack.providers.datatypes import (
+from llama_stack_api import (
     Api,
     InlineProviderSpec,
     ProviderSpec,
diff --git a/src/llama_stack/providers/registry/eval.py b/src/llama_stack/providers/registry/eval.py
index 4ef0bb41f..9c8b1eebd 100644
--- a/src/llama_stack/providers/registry/eval.py
+++ b/src/llama_stack/providers/registry/eval.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec
+from llama_stack_api import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec
 
 
 def available_providers() -> list[ProviderSpec]:
diff --git a/src/llama_stack/providers/registry/files.py b/src/llama_stack/providers/registry/files.py
index 3f5949ba2..dfc527816 100644
--- a/src/llama_stack/providers/registry/files.py
+++ b/src/llama_stack/providers/registry/files.py
@@ -4,7 +4,8 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec
+from llama_stack_api import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec
+
 from llama_stack.providers.utils.sqlstore.sqlstore import sql_store_pip_packages
 
 
diff --git a/src/llama_stack/providers/registry/inference.py b/src/llama_stack/providers/registry/inference.py
index 3cbfd408b..819e5aff5 100644
--- a/src/llama_stack/providers/registry/inference.py
+++ b/src/llama_stack/providers/registry/inference.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack.providers.datatypes import (
+from llama_stack_api import (
     Api,
     InlineProviderSpec,
     ProviderSpec,
diff --git a/src/llama_stack/providers/registry/post_training.py b/src/llama_stack/providers/registry/post_training.py
index 2092e3b2d..a5529b714 100644
--- a/src/llama_stack/providers/registry/post_training.py
+++ b/src/llama_stack/providers/registry/post_training.py
@@ -7,7 +7,7 @@
 
 from typing import cast
 
-from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec
+from llama_stack_api import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec
 
 # We provide two versions of these providers so that distributions can package the appropriate version of torch.
 # The CPU version is used for distributions that don't have GPU support -- they result in smaller container images.
diff --git a/src/llama_stack/providers/registry/safety.py b/src/llama_stack/providers/registry/safety.py
index b30074398..c9dbbce24 100644
--- a/src/llama_stack/providers/registry/safety.py
+++ b/src/llama_stack/providers/registry/safety.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack.providers.datatypes import (
+from llama_stack_api import (
     Api,
     InlineProviderSpec,
     ProviderSpec,
diff --git a/src/llama_stack/providers/registry/scoring.py b/src/llama_stack/providers/registry/scoring.py
index a4ec54ed2..45c5dbed7 100644
--- a/src/llama_stack/providers/registry/scoring.py
+++ b/src/llama_stack/providers/registry/scoring.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec
+from llama_stack_api import Api, InlineProviderSpec, ProviderSpec
 
 
 def available_providers() -> list[ProviderSpec]:
diff --git a/src/llama_stack/providers/registry/tool_runtime.py b/src/llama_stack/providers/registry/tool_runtime.py
index 39dc7fccd..3f0a83a30 100644
--- a/src/llama_stack/providers/registry/tool_runtime.py
+++ b/src/llama_stack/providers/registry/tool_runtime.py
@@ -5,12 +5,13 @@
 # the root directory of this source tree.
 
 
-from llama_stack.providers.datatypes import (
+from llama_stack_api import (
     Api,
     InlineProviderSpec,
     ProviderSpec,
     RemoteProviderSpec,
 )
+
 from llama_stack.providers.registry.vector_io import DEFAULT_VECTOR_IO_DEPS
 
 
diff --git a/src/llama_stack/providers/registry/vector_io.py b/src/llama_stack/providers/registry/vector_io.py
index 55b302751..a00941586 100644
--- a/src/llama_stack/providers/registry/vector_io.py
+++ b/src/llama_stack/providers/registry/vector_io.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack.providers.datatypes import (
+from llama_stack_api import (
     Api,
     InlineProviderSpec,
     ProviderSpec,
@@ -244,7 +244,7 @@ Two ranker types are supported:
 Example using RAGQueryConfig with different search modes:
 
 ```python
-from llama_stack.apis.tools import RAGQueryConfig, RRFRanker, WeightedRanker
+from llama_stack_api import RAGQueryConfig, RRFRanker, WeightedRanker
 
 # Vector search
 config = RAGQueryConfig(mode="vector", max_chunks=5)
diff --git a/src/llama_stack/providers/remote/datasetio/huggingface/huggingface.py b/src/llama_stack/providers/remote/datasetio/huggingface/huggingface.py
index a34e354bf..1260ce644 100644
--- a/src/llama_stack/providers/remote/datasetio/huggingface/huggingface.py
+++ b/src/llama_stack/providers/remote/datasetio/huggingface/huggingface.py
@@ -6,10 +6,8 @@
 from typing import Any
 from urllib.parse import parse_qs, urlparse
 
-from llama_stack.apis.common.responses import PaginatedResponse
-from llama_stack.apis.datasetio import DatasetIO
-from llama_stack.apis.datasets import Dataset
-from llama_stack.providers.datatypes import DatasetsProtocolPrivate
+from llama_stack_api import Dataset, DatasetIO, DatasetsProtocolPrivate, PaginatedResponse
+
 from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.pagination import paginate_records
 
diff --git a/src/llama_stack/providers/remote/datasetio/nvidia/datasetio.py b/src/llama_stack/providers/remote/datasetio/nvidia/datasetio.py
index f723c92cc..cb674b0d7 100644
--- a/src/llama_stack/providers/remote/datasetio/nvidia/datasetio.py
+++ b/src/llama_stack/providers/remote/datasetio/nvidia/datasetio.py
@@ -7,11 +7,7 @@
 from typing import Any
 
 import aiohttp
-
-from llama_stack.apis.common.content_types import URL
-from llama_stack.apis.common.responses import PaginatedResponse
-from llama_stack.apis.common.type_system import ParamType
-from llama_stack.apis.datasets import Dataset
+from llama_stack_api import URL, Dataset, PaginatedResponse, ParamType
 
 from .config import NvidiaDatasetIOConfig
 
diff --git a/src/llama_stack/providers/remote/eval/nvidia/eval.py b/src/llama_stack/providers/remote/eval/nvidia/eval.py
index 8fc7ffdd3..fbdec0d4d 100644
--- a/src/llama_stack/providers/remote/eval/nvidia/eval.py
+++ b/src/llama_stack/providers/remote/eval/nvidia/eval.py
@@ -6,18 +6,24 @@
 from typing import Any
 
 import requests
+from llama_stack_api import (
+    Agents,
+    Benchmark,
+    BenchmarkConfig,
+    BenchmarksProtocolPrivate,
+    DatasetIO,
+    Datasets,
+    Eval,
+    EvaluateResponse,
+    Inference,
+    Job,
+    JobStatus,
+    Scoring,
+    ScoringResult,
+)
 
-from llama_stack.apis.agents import Agents
-from llama_stack.apis.benchmarks import Benchmark
-from llama_stack.apis.datasetio import DatasetIO
-from llama_stack.apis.datasets import Datasets
-from llama_stack.apis.inference import Inference
-from llama_stack.apis.scoring import Scoring, ScoringResult
-from llama_stack.providers.datatypes import BenchmarksProtocolPrivate
 from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
 
-from .....apis.common.job_types import Job, JobStatus
-from .....apis.eval.eval import BenchmarkConfig, Eval, EvaluateResponse
 from .config import NVIDIAEvalConfig
 
 DEFAULT_NAMESPACE = "nvidia"
diff --git a/src/llama_stack/providers/remote/files/openai/files.py b/src/llama_stack/providers/remote/files/openai/files.py
index c5d4194df..bbd630977 100644
--- a/src/llama_stack/providers/remote/files/openai/files.py
+++ b/src/llama_stack/providers/remote/files/openai/files.py
@@ -8,17 +8,17 @@ from datetime import UTC, datetime
 from typing import Annotated, Any
 
 from fastapi import Depends, File, Form, Response, UploadFile
-
-from llama_stack.apis.common.errors import ResourceNotFoundError
-from llama_stack.apis.common.responses import Order
-from llama_stack.apis.files import (
+from llama_stack_api import (
     ExpiresAfter,
     Files,
     ListOpenAIFileResponse,
     OpenAIFileDeleteResponse,
     OpenAIFileObject,
     OpenAIFilePurpose,
+    Order,
+    ResourceNotFoundError,
 )
+
 from llama_stack.core.datatypes import AccessRule
 from llama_stack.providers.utils.files.form_data import parse_expires_after
 from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
diff --git a/src/llama_stack/providers/remote/files/s3/files.py b/src/llama_stack/providers/remote/files/s3/files.py
index 76261bdf4..14f1e3852 100644
--- a/src/llama_stack/providers/remote/files/s3/files.py
+++ b/src/llama_stack/providers/remote/files/s3/files.py
@@ -17,16 +17,17 @@ from fastapi import Depends, File, Form, Response, UploadFile
 if TYPE_CHECKING:
     from mypy_boto3_s3.client import S3Client
 
-from llama_stack.apis.common.errors import ResourceNotFoundError
-from llama_stack.apis.common.responses import Order
-from llama_stack.apis.files import (
+from llama_stack_api import (
     ExpiresAfter,
     Files,
     ListOpenAIFileResponse,
     OpenAIFileDeleteResponse,
     OpenAIFileObject,
     OpenAIFilePurpose,
+    Order,
+    ResourceNotFoundError,
 )
+
 from llama_stack.core.datatypes import AccessRule
 from llama_stack.core.id_generation import generate_object_id
 from llama_stack.providers.utils.files.form_data import parse_expires_after
diff --git a/src/llama_stack/providers/remote/inference/anthropic/config.py b/src/llama_stack/providers/remote/inference/anthropic/config.py
index 31e6aa12b..7ee4c54e2 100644
--- a/src/llama_stack/providers/remote/inference/anthropic/config.py
+++ b/src/llama_stack/providers/remote/inference/anthropic/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type
 
 
 class AnthropicProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/azure/config.py b/src/llama_stack/providers/remote/inference/azure/config.py
index 7c31df7a6..596f6c234 100644
--- a/src/llama_stack/providers/remote/inference/azure/config.py
+++ b/src/llama_stack/providers/remote/inference/azure/config.py
@@ -7,10 +7,10 @@
 import os
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field, HttpUrl, SecretStr
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type
 
 
 class AzureProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/bedrock/bedrock.py b/src/llama_stack/providers/remote/inference/bedrock/bedrock.py
index 1bf44b51a..1a9fe533b 100644
--- a/src/llama_stack/providers/remote/inference/bedrock/bedrock.py
+++ b/src/llama_stack/providers/remote/inference/bedrock/bedrock.py
@@ -6,9 +6,7 @@
 
 from collections.abc import AsyncIterator, Iterable
 
-from openai import AuthenticationError
-
-from llama_stack.apis.inference import (
+from llama_stack_api import (
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
     OpenAIChatCompletionRequestWithExtraBody,
@@ -17,6 +15,8 @@ from llama_stack.apis.inference import (
     OpenAIEmbeddingsRequestWithExtraBody,
     OpenAIEmbeddingsResponse,
 )
+from openai import AuthenticationError
+
 from llama_stack.core.telemetry.tracing import get_current_span
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
diff --git a/src/llama_stack/providers/remote/inference/cerebras/cerebras.py b/src/llama_stack/providers/remote/inference/cerebras/cerebras.py
index d5def9da1..c7f3111f9 100644
--- a/src/llama_stack/providers/remote/inference/cerebras/cerebras.py
+++ b/src/llama_stack/providers/remote/inference/cerebras/cerebras.py
@@ -6,10 +6,11 @@
 
 from urllib.parse import urljoin
 
-from llama_stack.apis.inference import (
+from llama_stack_api import (
     OpenAIEmbeddingsRequestWithExtraBody,
     OpenAIEmbeddingsResponse,
 )
+
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 
 from .config import CerebrasImplConfig
diff --git a/src/llama_stack/providers/remote/inference/cerebras/config.py b/src/llama_stack/providers/remote/inference/cerebras/config.py
index 9ba773724..a1fd41e2d 100644
--- a/src/llama_stack/providers/remote/inference/cerebras/config.py
+++ b/src/llama_stack/providers/remote/inference/cerebras/config.py
@@ -7,10 +7,10 @@
 import os
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type
 
 DEFAULT_BASE_URL = "https://api.cerebras.ai"
 
diff --git a/src/llama_stack/providers/remote/inference/databricks/config.py b/src/llama_stack/providers/remote/inference/databricks/config.py
index 84357f764..4974593d2 100644
--- a/src/llama_stack/providers/remote/inference/databricks/config.py
+++ b/src/llama_stack/providers/remote/inference/databricks/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field, SecretStr
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type
 
 
 class DatabricksProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/databricks/databricks.py b/src/llama_stack/providers/remote/inference/databricks/databricks.py
index 636241383..8b802379f 100644
--- a/src/llama_stack/providers/remote/inference/databricks/databricks.py
+++ b/src/llama_stack/providers/remote/inference/databricks/databricks.py
@@ -7,8 +7,8 @@
 from collections.abc import Iterable
 
 from databricks.sdk import WorkspaceClient
+from llama_stack_api import OpenAICompletion, OpenAICompletionRequestWithExtraBody
 
-from llama_stack.apis.inference import OpenAICompletion, OpenAICompletionRequestWithExtraBody
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 
diff --git a/src/llama_stack/providers/remote/inference/fireworks/config.py b/src/llama_stack/providers/remote/inference/fireworks/config.py
index 20ba99606..d786655eb 100644
--- a/src/llama_stack/providers/remote/inference/fireworks/config.py
+++ b/src/llama_stack/providers/remote/inference/fireworks/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/inference/gemini/config.py b/src/llama_stack/providers/remote/inference/gemini/config.py
index df5da29a2..6c25c005c 100644
--- a/src/llama_stack/providers/remote/inference/gemini/config.py
+++ b/src/llama_stack/providers/remote/inference/gemini/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type
 
 
 class GeminiProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/gemini/gemini.py b/src/llama_stack/providers/remote/inference/gemini/gemini.py
index ee960d13b..79d694f06 100644
--- a/src/llama_stack/providers/remote/inference/gemini/gemini.py
+++ b/src/llama_stack/providers/remote/inference/gemini/gemini.py
@@ -6,12 +6,13 @@
 
 from typing import Any
 
-from llama_stack.apis.inference import (
+from llama_stack_api import (
     OpenAIEmbeddingData,
     OpenAIEmbeddingsRequestWithExtraBody,
     OpenAIEmbeddingsResponse,
     OpenAIEmbeddingUsage,
 )
+
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 
 from .config import GeminiConfig
diff --git a/src/llama_stack/providers/remote/inference/groq/config.py b/src/llama_stack/providers/remote/inference/groq/config.py
index c1aedca3e..cec327716 100644
--- a/src/llama_stack/providers/remote/inference/groq/config.py
+++ b/src/llama_stack/providers/remote/inference/groq/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type
 
 
 class GroqProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/llama_openai_compat/config.py b/src/llama_stack/providers/remote/inference/llama_openai_compat/config.py
index 4b5750ed4..c16311830 100644
--- a/src/llama_stack/providers/remote/inference/llama_openai_compat/config.py
+++ b/src/llama_stack/providers/remote/inference/llama_openai_compat/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type
 
 
 class LlamaProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/llama_openai_compat/llama.py b/src/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
index 05d6e8cc8..1dea3e3cb 100644
--- a/src/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
+++ b/src/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
@@ -4,12 +4,13 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.inference.inference import (
+from llama_stack_api import (
     OpenAICompletion,
     OpenAICompletionRequestWithExtraBody,
     OpenAIEmbeddingsRequestWithExtraBody,
     OpenAIEmbeddingsResponse,
 )
+
 from llama_stack.log import get_logger
 from llama_stack.providers.remote.inference.llama_openai_compat.config import LlamaCompatConfig
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
diff --git a/src/llama_stack/providers/remote/inference/nvidia/__init__.py b/src/llama_stack/providers/remote/inference/nvidia/__init__.py
index b4926f33e..b89b2a750 100644
--- a/src/llama_stack/providers/remote/inference/nvidia/__init__.py
+++ b/src/llama_stack/providers/remote/inference/nvidia/__init__.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.inference import Inference
+from llama_stack_api import Inference
 
 from .config import NVIDIAConfig
 
diff --git a/src/llama_stack/providers/remote/inference/nvidia/config.py b/src/llama_stack/providers/remote/inference/nvidia/config.py
index 618bbe078..6ff98d290 100644
--- a/src/llama_stack/providers/remote/inference/nvidia/config.py
+++ b/src/llama_stack/providers/remote/inference/nvidia/config.py
@@ -7,10 +7,10 @@
 import os
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type
 
 
 class NVIDIAProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/nvidia/nvidia.py b/src/llama_stack/providers/remote/inference/nvidia/nvidia.py
index bc5aa7953..9e4c6f559 100644
--- a/src/llama_stack/providers/remote/inference/nvidia/nvidia.py
+++ b/src/llama_stack/providers/remote/inference/nvidia/nvidia.py
@@ -8,16 +8,15 @@
 from collections.abc import Iterable
 
 import aiohttp
-
-from llama_stack.apis.inference import (
+from llama_stack_api import (
+    Model,
+    ModelType,
+    OpenAIChatCompletionContentPartImageParam,
+    OpenAIChatCompletionContentPartTextParam,
     RerankData,
     RerankResponse,
 )
-from llama_stack.apis.inference.inference import (
-    OpenAIChatCompletionContentPartImageParam,
-    OpenAIChatCompletionContentPartTextParam,
-)
-from llama_stack.apis.models import Model, ModelType
+
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 
diff --git a/src/llama_stack/providers/remote/inference/oci/__init__.py b/src/llama_stack/providers/remote/inference/oci/__init__.py
index 280a8c1d2..b7d6125f3 100644
--- a/src/llama_stack/providers/remote/inference/oci/__init__.py
+++ b/src/llama_stack/providers/remote/inference/oci/__init__.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.inference import InferenceProvider
+from llama_stack_api import InferenceProvider
 
 from .config import OCIConfig
 
diff --git a/src/llama_stack/providers/remote/inference/oci/config.py b/src/llama_stack/providers/remote/inference/oci/config.py
index 9747b08ea..24b4ad926 100644
--- a/src/llama_stack/providers/remote/inference/oci/config.py
+++ b/src/llama_stack/providers/remote/inference/oci/config.py
@@ -7,10 +7,10 @@
 import os
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type
 
 
 class OCIProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/oci/oci.py b/src/llama_stack/providers/remote/inference/oci/oci.py
index 253dcf2b6..36e56cf6c 100644
--- a/src/llama_stack/providers/remote/inference/oci/oci.py
+++ b/src/llama_stack/providers/remote/inference/oci/oci.py
@@ -10,15 +10,15 @@ from typing import Any
 
 import httpx
 import oci
+from llama_stack_api import (
+    ModelType,
+    OpenAIEmbeddingsRequestWithExtraBody,
+    OpenAIEmbeddingsResponse,
+)
 from oci.generative_ai.generative_ai_client import GenerativeAiClient
 from oci.generative_ai.models import ModelCollection
 from openai._base_client import DefaultAsyncHttpxClient
 
-from llama_stack.apis.inference.inference import (
-    OpenAIEmbeddingsRequestWithExtraBody,
-    OpenAIEmbeddingsResponse,
-)
-from llama_stack.apis.models import ModelType
 from llama_stack.log import get_logger
 from llama_stack.providers.remote.inference.oci.auth import OciInstancePrincipalAuth, OciUserPrincipalAuth
 from llama_stack.providers.remote.inference.oci.config import OCIConfig
diff --git a/src/llama_stack/providers/remote/inference/ollama/ollama.py b/src/llama_stack/providers/remote/inference/ollama/ollama.py
index 50f36d045..6a471429e 100644
--- a/src/llama_stack/providers/remote/inference/ollama/ollama.py
+++ b/src/llama_stack/providers/remote/inference/ollama/ollama.py
@@ -7,15 +7,15 @@
 
 import asyncio
 
-from ollama import AsyncClient as AsyncOllamaClient
-
-from llama_stack.apis.common.errors import UnsupportedModelError
-from llama_stack.apis.models import Model
-from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import (
+from llama_stack_api import (
     HealthResponse,
     HealthStatus,
+    Model,
+    UnsupportedModelError,
 )
+from ollama import AsyncClient as AsyncOllamaClient
+
+from llama_stack.log import get_logger
 from llama_stack.providers.remote.inference.ollama.config import OllamaImplConfig
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 
diff --git a/src/llama_stack/providers/remote/inference/openai/config.py b/src/llama_stack/providers/remote/inference/openai/config.py
index 36c66bd28..cbb01b2d0 100644
--- a/src/llama_stack/providers/remote/inference/openai/config.py
+++ b/src/llama_stack/providers/remote/inference/openai/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type
 
 
 class OpenAIProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/passthrough/config.py b/src/llama_stack/providers/remote/inference/passthrough/config.py
index eca28a86a..7045dbf2e 100644
--- a/src/llama_stack/providers/remote/inference/passthrough/config.py
+++ b/src/llama_stack/providers/remote/inference/passthrough/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/inference/passthrough/passthrough.py b/src/llama_stack/providers/remote/inference/passthrough/passthrough.py
index 3c56acfbd..19cf0c5d7 100644
--- a/src/llama_stack/providers/remote/inference/passthrough/passthrough.py
+++ b/src/llama_stack/providers/remote/inference/passthrough/passthrough.py
@@ -6,10 +6,9 @@
 
 from collections.abc import AsyncIterator
 
-from openai import AsyncOpenAI
-
-from llama_stack.apis.inference import (
+from llama_stack_api import (
     Inference,
+    Model,
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
     OpenAIChatCompletionRequestWithExtraBody,
@@ -18,7 +17,8 @@ from llama_stack.apis.inference import (
     OpenAIEmbeddingsRequestWithExtraBody,
     OpenAIEmbeddingsResponse,
 )
-from llama_stack.apis.models import Model
+from openai import AsyncOpenAI
+
 from llama_stack.core.request_headers import NeedsRequestProviderData
 
 from .config import PassthroughImplConfig
diff --git a/src/llama_stack/providers/remote/inference/runpod/config.py b/src/llama_stack/providers/remote/inference/runpod/config.py
index a2a1add97..aaa4230a8 100644
--- a/src/llama_stack/providers/remote/inference/runpod/config.py
+++ b/src/llama_stack/providers/remote/inference/runpod/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field, SecretStr
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type
 
 
 class RunpodProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/runpod/runpod.py b/src/llama_stack/providers/remote/inference/runpod/runpod.py
index a76e941cb..4596b2df5 100644
--- a/src/llama_stack/providers/remote/inference/runpod/runpod.py
+++ b/src/llama_stack/providers/remote/inference/runpod/runpod.py
@@ -6,11 +6,12 @@
 
 from collections.abc import AsyncIterator
 
-from llama_stack.apis.inference import (
+from llama_stack_api import (
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
     OpenAIChatCompletionRequestWithExtraBody,
 )
+
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 
 from .config import RunpodImplConfig
diff --git a/src/llama_stack/providers/remote/inference/sambanova/config.py b/src/llama_stack/providers/remote/inference/sambanova/config.py
index f63210434..6d72e7205 100644
--- a/src/llama_stack/providers/remote/inference/sambanova/config.py
+++ b/src/llama_stack/providers/remote/inference/sambanova/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type
 
 
 class SambaNovaProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/tgi/config.py b/src/llama_stack/providers/remote/inference/tgi/config.py
index 47952abba..051a2afa3 100644
--- a/src/llama_stack/providers/remote/inference/tgi/config.py
+++ b/src/llama_stack/providers/remote/inference/tgi/config.py
@@ -5,10 +5,10 @@
 # the root directory of this source tree.
 
 
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field, SecretStr
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/inference/tgi/tgi.py b/src/llama_stack/providers/remote/inference/tgi/tgi.py
index 6ae7b2544..831a26e39 100644
--- a/src/llama_stack/providers/remote/inference/tgi/tgi.py
+++ b/src/llama_stack/providers/remote/inference/tgi/tgi.py
@@ -8,12 +8,12 @@
 from collections.abc import Iterable
 
 from huggingface_hub import AsyncInferenceClient, HfApi
-from pydantic import SecretStr
-
-from llama_stack.apis.inference import (
+from llama_stack_api import (
     OpenAIEmbeddingsRequestWithExtraBody,
     OpenAIEmbeddingsResponse,
 )
+from pydantic import SecretStr
+
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 
diff --git a/src/llama_stack/providers/remote/inference/together/config.py b/src/llama_stack/providers/remote/inference/together/config.py
index 47392c8e7..96c0538e3 100644
--- a/src/llama_stack/providers/remote/inference/together/config.py
+++ b/src/llama_stack/providers/remote/inference/together/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/inference/together/together.py b/src/llama_stack/providers/remote/inference/together/together.py
index 963b384a0..f1355a760 100644
--- a/src/llama_stack/providers/remote/inference/together/together.py
+++ b/src/llama_stack/providers/remote/inference/together/together.py
@@ -8,15 +8,15 @@
 from collections.abc import Iterable
 from typing import Any, cast
 
+from llama_stack_api import (
+    Model,
+    OpenAIEmbeddingsRequestWithExtraBody,
+    OpenAIEmbeddingsResponse,
+    OpenAIEmbeddingUsage,
+)
 from together import AsyncTogether  # type: ignore[import-untyped]
 from together.constants import BASE_URL  # type: ignore[import-untyped]
 
-from llama_stack.apis.inference import (
-    OpenAIEmbeddingsRequestWithExtraBody,
-    OpenAIEmbeddingsResponse,
-)
-from llama_stack.apis.inference.inference import OpenAIEmbeddingUsage
-from llama_stack.apis.models import Model
 from llama_stack.core.request_headers import NeedsRequestProviderData
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
diff --git a/src/llama_stack/providers/remote/inference/vertexai/config.py b/src/llama_stack/providers/remote/inference/vertexai/config.py
index 5f2efa894..53e2b3e65 100644
--- a/src/llama_stack/providers/remote/inference/vertexai/config.py
+++ b/src/llama_stack/providers/remote/inference/vertexai/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field, SecretStr
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type
 
 
 class VertexAIProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/vllm/config.py b/src/llama_stack/providers/remote/inference/vllm/config.py
index e362aece6..23f713961 100644
--- a/src/llama_stack/providers/remote/inference/vllm/config.py
+++ b/src/llama_stack/providers/remote/inference/vllm/config.py
@@ -6,10 +6,10 @@
 
 from pathlib import Path
 
+from llama_stack_api import json_schema_type
 from pydantic import Field, SecretStr, field_validator
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/inference/vllm/vllm.py b/src/llama_stack/providers/remote/inference/vllm/vllm.py
index fa350ec48..f7938c22c 100644
--- a/src/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/src/llama_stack/providers/remote/inference/vllm/vllm.py
@@ -7,19 +7,17 @@ from collections.abc import AsyncIterator
 from urllib.parse import urljoin
 
 import httpx
-from pydantic import ConfigDict
-
-from llama_stack.apis.inference import (
+from llama_stack_api import (
+    HealthResponse,
+    HealthStatus,
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
     OpenAIChatCompletionRequestWithExtraBody,
     ToolChoice,
 )
+from pydantic import ConfigDict
+
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import (
-    HealthResponse,
-    HealthStatus,
-)
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 
 from .config import VLLMInferenceAdapterConfig
diff --git a/src/llama_stack/providers/remote/inference/watsonx/config.py b/src/llama_stack/providers/remote/inference/watsonx/config.py
index 8d8df13b4..1bba040ef 100644
--- a/src/llama_stack/providers/remote/inference/watsonx/config.py
+++ b/src/llama_stack/providers/remote/inference/watsonx/config.py
@@ -7,10 +7,10 @@
 import os
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type
 
 
 class WatsonXProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/watsonx/watsonx.py b/src/llama_stack/providers/remote/inference/watsonx/watsonx.py
index e71ffe5e1..de23c25d7 100644
--- a/src/llama_stack/providers/remote/inference/watsonx/watsonx.py
+++ b/src/llama_stack/providers/remote/inference/watsonx/watsonx.py
@@ -9,8 +9,9 @@ from typing import Any
 
 import litellm
 import requests
-
-from llama_stack.apis.inference.inference import (
+from llama_stack_api import (
+    Model,
+    ModelType,
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
     OpenAIChatCompletionRequestWithExtraBody,
@@ -20,8 +21,7 @@ from llama_stack.apis.inference.inference import (
     OpenAIEmbeddingsRequestWithExtraBody,
     OpenAIEmbeddingsResponse,
 )
-from llama_stack.apis.models import Model
-from llama_stack.apis.models.models import ModelType
+
 from llama_stack.core.telemetry.tracing import get_current_span
 from llama_stack.log import get_logger
 from llama_stack.providers.remote.inference.watsonx.config import WatsonXConfig
@@ -238,7 +238,8 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
         )
 
         # Convert response to OpenAI format
-        from llama_stack.apis.inference import OpenAIEmbeddingUsage
+        from llama_stack_api import OpenAIEmbeddingUsage
+
         from llama_stack.providers.utils.inference.litellm_openai_mixin import b64_encode_openai_embeddings_response
 
         data = b64_encode_openai_embeddings_response(response.data, params.encoding_format)
diff --git a/src/llama_stack/providers/remote/post_training/nvidia/README.md b/src/llama_stack/providers/remote/post_training/nvidia/README.md
index 83f20a44e..f998f44ba 100644
--- a/src/llama_stack/providers/remote/post_training/nvidia/README.md
+++ b/src/llama_stack/providers/remote/post_training/nvidia/README.md
@@ -128,7 +128,7 @@ client.post_training.job.cancel(job_uuid="your-job-id")
 #### 1. Register the model
 
 ```python
-from llama_stack.apis.models import Model, ModelType
+from llama_stack_api.models import Model, ModelType
 
 client.models.register(
     model_id="test-example-model@v1",
diff --git a/src/llama_stack/providers/remote/post_training/nvidia/post_training.py b/src/llama_stack/providers/remote/post_training/nvidia/post_training.py
index d839ffd6f..02c35241b 100644
--- a/src/llama_stack/providers/remote/post_training/nvidia/post_training.py
+++ b/src/llama_stack/providers/remote/post_training/nvidia/post_training.py
@@ -8,9 +8,7 @@ from datetime import datetime
 from typing import Any, Literal
 
 import aiohttp
-from pydantic import BaseModel, ConfigDict
-
-from llama_stack.apis.post_training import (
+from llama_stack_api import (
     AlgorithmConfig,
     DPOAlignmentConfig,
     JobStatus,
@@ -19,6 +17,8 @@ from llama_stack.apis.post_training import (
     PostTrainingJobStatusResponse,
     TrainingConfig,
 )
+from pydantic import BaseModel, ConfigDict
+
 from llama_stack.providers.remote.post_training.nvidia.config import NvidiaPostTrainingConfig
 from llama_stack.providers.remote.post_training.nvidia.utils import warn_unsupported_params
 from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
diff --git a/src/llama_stack/providers/remote/post_training/nvidia/utils.py b/src/llama_stack/providers/remote/post_training/nvidia/utils.py
index 162951ff3..78762155d 100644
--- a/src/llama_stack/providers/remote/post_training/nvidia/utils.py
+++ b/src/llama_stack/providers/remote/post_training/nvidia/utils.py
@@ -7,9 +7,9 @@
 import warnings
 from typing import Any
 
+from llama_stack_api import TrainingConfig
 from pydantic import BaseModel
 
-from llama_stack.apis.post_training import TrainingConfig
 from llama_stack.log import get_logger
 from llama_stack.providers.remote.post_training.nvidia.config import SFTLoRADefaultConfig
 
diff --git a/src/llama_stack/providers/remote/safety/bedrock/bedrock.py b/src/llama_stack/providers/remote/safety/bedrock/bedrock.py
index 75f96816a..86b93c32e 100644
--- a/src/llama_stack/providers/remote/safety/bedrock/bedrock.py
+++ b/src/llama_stack/providers/remote/safety/bedrock/bedrock.py
@@ -7,16 +7,17 @@
 import json
 from typing import Any
 
-from llama_stack.apis.inference import OpenAIMessageParam
-from llama_stack.apis.safety import (
+from llama_stack_api import (
+    OpenAIMessageParam,
     RunShieldResponse,
     Safety,
     SafetyViolation,
+    Shield,
+    ShieldsProtocolPrivate,
     ViolationLevel,
 )
-from llama_stack.apis.shields import Shield
+
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import ShieldsProtocolPrivate
 from llama_stack.providers.utils.bedrock.client import create_bedrock_client
 
 from .config import BedrockSafetyConfig
diff --git a/src/llama_stack/providers/remote/safety/bedrock/config.py b/src/llama_stack/providers/remote/safety/bedrock/config.py
index 1ca8d95cb..ca28924d4 100644
--- a/src/llama_stack/providers/remote/safety/bedrock/config.py
+++ b/src/llama_stack/providers/remote/safety/bedrock/config.py
@@ -5,8 +5,9 @@
 # the root directory of this source tree.
 
 
+from llama_stack_api import json_schema_type
+
 from llama_stack.providers.utils.bedrock.config import BedrockBaseConfig
-from llama_stack.schema_utils import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/safety/nvidia/README.md b/src/llama_stack/providers/remote/safety/nvidia/README.md
index af11b2539..f3ec0f1e0 100644
--- a/src/llama_stack/providers/remote/safety/nvidia/README.md
+++ b/src/llama_stack/providers/remote/safety/nvidia/README.md
@@ -42,8 +42,8 @@ client.initialize()
 #### Create a safety shield
 
 ```python
-from llama_stack.apis.safety import Shield
-from llama_stack.apis.inference import Message
+from llama_stack_api.safety import Shield
+from llama_stack_api.inference import Message
 
 # Create a safety shield
 shield = Shield(
diff --git a/src/llama_stack/providers/remote/safety/nvidia/config.py b/src/llama_stack/providers/remote/safety/nvidia/config.py
index 1c618f4f4..fc686ae73 100644
--- a/src/llama_stack/providers/remote/safety/nvidia/config.py
+++ b/src/llama_stack/providers/remote/safety/nvidia/config.py
@@ -6,10 +6,9 @@
 import os
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
-from llama_stack.schema_utils import json_schema_type
-
 
 @json_schema_type
 class NVIDIASafetyConfig(BaseModel):
diff --git a/src/llama_stack/providers/remote/safety/nvidia/nvidia.py b/src/llama_stack/providers/remote/safety/nvidia/nvidia.py
index 236f16207..b3b5090e0 100644
--- a/src/llama_stack/providers/remote/safety/nvidia/nvidia.py
+++ b/src/llama_stack/providers/remote/safety/nvidia/nvidia.py
@@ -7,12 +7,18 @@
 from typing import Any
 
 import requests
+from llama_stack_api import (
+    ModerationObject,
+    OpenAIMessageParam,
+    RunShieldResponse,
+    Safety,
+    SafetyViolation,
+    Shield,
+    ShieldsProtocolPrivate,
+    ViolationLevel,
+)
 
-from llama_stack.apis.inference import OpenAIMessageParam
-from llama_stack.apis.safety import ModerationObject, RunShieldResponse, Safety, SafetyViolation, ViolationLevel
-from llama_stack.apis.shields import Shield
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import ShieldsProtocolPrivate
 
 from .config import NVIDIASafetyConfig
 
diff --git a/src/llama_stack/providers/remote/safety/sambanova/config.py b/src/llama_stack/providers/remote/safety/sambanova/config.py
index 2cde97098..a8e745851 100644
--- a/src/llama_stack/providers/remote/safety/sambanova/config.py
+++ b/src/llama_stack/providers/remote/safety/sambanova/config.py
@@ -6,10 +6,9 @@
 
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field, SecretStr
 
-from llama_stack.schema_utils import json_schema_type
-
 
 class SambaNovaProviderDataValidator(BaseModel):
     sambanova_api_key: str | None = Field(
diff --git a/src/llama_stack/providers/remote/safety/sambanova/sambanova.py b/src/llama_stack/providers/remote/safety/sambanova/sambanova.py
index 72359badd..119ebb6ed 100644
--- a/src/llama_stack/providers/remote/safety/sambanova/sambanova.py
+++ b/src/llama_stack/providers/remote/safety/sambanova/sambanova.py
@@ -8,18 +8,18 @@ from typing import Any
 
 import litellm
 import requests
-
-from llama_stack.apis.inference import OpenAIMessageParam
-from llama_stack.apis.safety import (
+from llama_stack_api import (
+    OpenAIMessageParam,
     RunShieldResponse,
     Safety,
     SafetyViolation,
+    Shield,
+    ShieldsProtocolPrivate,
     ViolationLevel,
 )
-from llama_stack.apis.shields import Shield
+
 from llama_stack.core.request_headers import NeedsRequestProviderData
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import ShieldsProtocolPrivate
 
 from .config import SambaNovaSafetyConfig
 
diff --git a/src/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py b/src/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py
index 9a98964b7..84e47dd4f 100644
--- a/src/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py
+++ b/src/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py
@@ -8,17 +8,17 @@ import json
 from typing import Any
 
 import httpx
-
-from llama_stack.apis.common.content_types import URL
-from llama_stack.apis.tools import (
+from llama_stack_api import (
+    URL,
     ListToolDefsResponse,
     ToolDef,
     ToolGroup,
+    ToolGroupsProtocolPrivate,
     ToolInvocationResult,
     ToolRuntime,
 )
+
 from llama_stack.core.request_headers import NeedsRequestProviderData
-from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
 
 from .config import BingSearchToolConfig
 
diff --git a/src/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py b/src/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py
index 02e5b5c69..b7eee776a 100644
--- a/src/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py
+++ b/src/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py
@@ -7,18 +7,18 @@
 from typing import Any
 
 import httpx
-
-from llama_stack.apis.common.content_types import URL
-from llama_stack.apis.tools import (
+from llama_stack_api import (
+    URL,
     ListToolDefsResponse,
     ToolDef,
     ToolGroup,
+    ToolGroupsProtocolPrivate,
     ToolInvocationResult,
     ToolRuntime,
 )
+
 from llama_stack.core.request_headers import NeedsRequestProviderData
 from llama_stack.models.llama.datatypes import BuiltinTool
-from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
 
 from .config import BraveSearchToolConfig
 
diff --git a/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py b/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py
index 578bb6d34..efb1eb2df 100644
--- a/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py
+++ b/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py
@@ -7,17 +7,18 @@
 from typing import Any
 from urllib.parse import urlparse
 
-from llama_stack.apis.common.content_types import URL
-from llama_stack.apis.datatypes import Api
-from llama_stack.apis.tools import (
+from llama_stack_api import (
+    URL,
+    Api,
     ListToolDefsResponse,
     ToolGroup,
+    ToolGroupsProtocolPrivate,
     ToolInvocationResult,
     ToolRuntime,
 )
+
 from llama_stack.core.request_headers import NeedsRequestProviderData
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
 from llama_stack.providers.utils.tools.mcp import invoke_mcp_tool, list_mcp_tools
 
 from .config import MCPProviderConfig
diff --git a/src/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py b/src/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py
index ca629fced..d65d66e67 100644
--- a/src/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py
+++ b/src/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py
@@ -8,17 +8,17 @@ import json
 from typing import Any
 
 import httpx
-
-from llama_stack.apis.common.content_types import URL
-from llama_stack.apis.tools import (
+from llama_stack_api import (
+    URL,
     ListToolDefsResponse,
     ToolDef,
     ToolGroup,
+    ToolGroupsProtocolPrivate,
     ToolInvocationResult,
     ToolRuntime,
 )
+
 from llama_stack.core.request_headers import NeedsRequestProviderData
-from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
 
 from .config import TavilySearchToolConfig
 
diff --git a/src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py b/src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py
index 410e34195..9cc865092 100644
--- a/src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py
+++ b/src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py
@@ -8,17 +8,17 @@ import json
 from typing import Any
 
 import httpx
-
-from llama_stack.apis.common.content_types import URL
-from llama_stack.apis.tools import (
+from llama_stack_api import (
+    URL,
     ListToolDefsResponse,
     ToolDef,
     ToolGroup,
+    ToolGroupsProtocolPrivate,
     ToolInvocationResult,
     ToolRuntime,
 )
+
 from llama_stack.core.request_headers import NeedsRequestProviderData
-from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
 
 from .config import WolframAlphaToolConfig
 
diff --git a/src/llama_stack/providers/remote/vector_io/chroma/__init__.py b/src/llama_stack/providers/remote/vector_io/chroma/__init__.py
index e4b77c68d..d774ea643 100644
--- a/src/llama_stack/providers/remote/vector_io/chroma/__init__.py
+++ b/src/llama_stack/providers/remote/vector_io/chroma/__init__.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.providers.datatypes import Api, ProviderSpec
+from llama_stack_api import Api, ProviderSpec
 
 from .config import ChromaVectorIOConfig
 
diff --git a/src/llama_stack/providers/remote/vector_io/chroma/chroma.py b/src/llama_stack/providers/remote/vector_io/chroma/chroma.py
index 97e2244b8..eca5d349b 100644
--- a/src/llama_stack/providers/remote/vector_io/chroma/chroma.py
+++ b/src/llama_stack/providers/remote/vector_io/chroma/chroma.py
@@ -9,14 +9,19 @@ from typing import Any
 from urllib.parse import urlparse
 
 import chromadb
+from llama_stack_api import (
+    Chunk,
+    Files,
+    Inference,
+    InterleavedContent,
+    QueryChunksResponse,
+    VectorIO,
+    VectorStore,
+    VectorStoresProtocolPrivate,
+)
 from numpy.typing import NDArray
 
-from llama_stack.apis.files import Files
-from llama_stack.apis.inference import Inference, InterleavedContent
-from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
-from llama_stack.apis.vector_stores import VectorStore
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
 from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig
 from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.kvstore.api import KVStore
diff --git a/src/llama_stack/providers/remote/vector_io/chroma/config.py b/src/llama_stack/providers/remote/vector_io/chroma/config.py
index 209ba90bb..b1e4f9a4a 100644
--- a/src/llama_stack/providers/remote/vector_io/chroma/config.py
+++ b/src/llama_stack/providers/remote/vector_io/chroma/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.core.storage.datatypes import KVStoreReference
-from llama_stack.schema_utils import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/vector_io/milvus/__init__.py b/src/llama_stack/providers/remote/vector_io/milvus/__init__.py
index 526075bb2..1b703d486 100644
--- a/src/llama_stack/providers/remote/vector_io/milvus/__init__.py
+++ b/src/llama_stack/providers/remote/vector_io/milvus/__init__.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.providers.datatypes import Api, ProviderSpec
+from llama_stack_api import Api, ProviderSpec
 
 from .config import MilvusVectorIOConfig
 
diff --git a/src/llama_stack/providers/remote/vector_io/milvus/config.py b/src/llama_stack/providers/remote/vector_io/milvus/config.py
index 8ff9e1328..2e2c788c7 100644
--- a/src/llama_stack/providers/remote/vector_io/milvus/config.py
+++ b/src/llama_stack/providers/remote/vector_io/milvus/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, ConfigDict, Field
 
 from llama_stack.core.storage.datatypes import KVStoreReference
-from llama_stack.schema_utils import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/vector_io/milvus/milvus.py b/src/llama_stack/providers/remote/vector_io/milvus/milvus.py
index 73339b5be..b856bf918 100644
--- a/src/llama_stack/providers/remote/vector_io/milvus/milvus.py
+++ b/src/llama_stack/providers/remote/vector_io/milvus/milvus.py
@@ -8,16 +8,21 @@ import asyncio
 import os
 from typing import Any
 
+from llama_stack_api import (
+    Chunk,
+    Files,
+    Inference,
+    InterleavedContent,
+    QueryChunksResponse,
+    VectorIO,
+    VectorStore,
+    VectorStoreNotFoundError,
+    VectorStoresProtocolPrivate,
+)
 from numpy.typing import NDArray
 from pymilvus import AnnSearchRequest, DataType, Function, FunctionType, MilvusClient, RRFRanker, WeightedRanker
 
-from llama_stack.apis.common.errors import VectorStoreNotFoundError
-from llama_stack.apis.files import Files
-from llama_stack.apis.inference import Inference, InterleavedContent
-from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
-from llama_stack.apis.vector_stores import VectorStore
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
 from llama_stack.providers.inline.vector_io.milvus import MilvusVectorIOConfig as InlineMilvusVectorIOConfig
 from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.kvstore.api import KVStore
diff --git a/src/llama_stack/providers/remote/vector_io/pgvector/__init__.py b/src/llama_stack/providers/remote/vector_io/pgvector/__init__.py
index 8086b7650..36018fd95 100644
--- a/src/llama_stack/providers/remote/vector_io/pgvector/__init__.py
+++ b/src/llama_stack/providers/remote/vector_io/pgvector/__init__.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.providers.datatypes import Api, ProviderSpec
+from llama_stack_api import Api, ProviderSpec
 
 from .config import PGVectorVectorIOConfig
 
diff --git a/src/llama_stack/providers/remote/vector_io/pgvector/config.py b/src/llama_stack/providers/remote/vector_io/pgvector/config.py
index d81e524e4..aeb1c83bb 100644
--- a/src/llama_stack/providers/remote/vector_io/pgvector/config.py
+++ b/src/llama_stack/providers/remote/vector_io/pgvector/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.core.storage.datatypes import KVStoreReference
-from llama_stack.schema_utils import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py b/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
index cf10a0e01..8aa0303b6 100644
--- a/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
+++ b/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
@@ -8,18 +8,23 @@ import heapq
 from typing import Any
 
 import psycopg2
+from llama_stack_api import (
+    Chunk,
+    Files,
+    Inference,
+    InterleavedContent,
+    QueryChunksResponse,
+    VectorIO,
+    VectorStore,
+    VectorStoreNotFoundError,
+    VectorStoresProtocolPrivate,
+)
 from numpy.typing import NDArray
 from psycopg2 import sql
 from psycopg2.extras import Json, execute_values
 from pydantic import BaseModel, TypeAdapter
 
-from llama_stack.apis.common.errors import VectorStoreNotFoundError
-from llama_stack.apis.files import Files
-from llama_stack.apis.inference import Inference, InterleavedContent
-from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
-from llama_stack.apis.vector_stores import VectorStore
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
 from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
 from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.kvstore.api import KVStore
diff --git a/src/llama_stack/providers/remote/vector_io/qdrant/__init__.py b/src/llama_stack/providers/remote/vector_io/qdrant/__init__.py
index e9527f101..b5b02fe59 100644
--- a/src/llama_stack/providers/remote/vector_io/qdrant/__init__.py
+++ b/src/llama_stack/providers/remote/vector_io/qdrant/__init__.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.providers.datatypes import Api, ProviderSpec
+from llama_stack_api import Api, ProviderSpec
 
 from .config import QdrantVectorIOConfig
 
diff --git a/src/llama_stack/providers/remote/vector_io/qdrant/config.py b/src/llama_stack/providers/remote/vector_io/qdrant/config.py
index 01fbcc5cb..8cc4cbb2b 100644
--- a/src/llama_stack/providers/remote/vector_io/qdrant/config.py
+++ b/src/llama_stack/providers/remote/vector_io/qdrant/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel
 
 from llama_stack.core.storage.datatypes import KVStoreReference
-from llama_stack.schema_utils import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py b/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
index 7d17c5591..53d6be2b6 100644
--- a/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
+++ b/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
@@ -9,23 +9,24 @@ import hashlib
 import uuid
 from typing import Any
 
+from llama_stack_api import (
+    Chunk,
+    Files,
+    Inference,
+    InterleavedContent,
+    QueryChunksResponse,
+    VectorIO,
+    VectorStore,
+    VectorStoreChunkingStrategy,
+    VectorStoreFileObject,
+    VectorStoreNotFoundError,
+    VectorStoresProtocolPrivate,
+)
 from numpy.typing import NDArray
 from qdrant_client import AsyncQdrantClient, models
 from qdrant_client.models import PointStruct
 
-from llama_stack.apis.common.errors import VectorStoreNotFoundError
-from llama_stack.apis.files import Files
-from llama_stack.apis.inference import Inference, InterleavedContent
-from llama_stack.apis.vector_io import (
-    Chunk,
-    QueryChunksResponse,
-    VectorIO,
-    VectorStoreChunkingStrategy,
-    VectorStoreFileObject,
-)
-from llama_stack.apis.vector_stores import VectorStore
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
 from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig
 from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
diff --git a/src/llama_stack/providers/remote/vector_io/weaviate/__init__.py b/src/llama_stack/providers/remote/vector_io/weaviate/__init__.py
index 12e11d013..47546d459 100644
--- a/src/llama_stack/providers/remote/vector_io/weaviate/__init__.py
+++ b/src/llama_stack/providers/remote/vector_io/weaviate/__init__.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.providers.datatypes import Api, ProviderSpec
+from llama_stack_api import Api, ProviderSpec
 
 from .config import WeaviateVectorIOConfig
 
diff --git a/src/llama_stack/providers/remote/vector_io/weaviate/config.py b/src/llama_stack/providers/remote/vector_io/weaviate/config.py
index 66dbf1fed..19f9679fb 100644
--- a/src/llama_stack/providers/remote/vector_io/weaviate/config.py
+++ b/src/llama_stack/providers/remote/vector_io/weaviate/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.core.storage.datatypes import KVStoreReference
-from llama_stack.schema_utils import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py b/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
index d200662da..c72666f63 100644
--- a/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
+++ b/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
@@ -8,19 +8,23 @@ from typing import Any
 
 import weaviate
 import weaviate.classes as wvc
+from llama_stack_api import (
+    Chunk,
+    Files,
+    Inference,
+    InterleavedContent,
+    QueryChunksResponse,
+    VectorIO,
+    VectorStore,
+    VectorStoreNotFoundError,
+    VectorStoresProtocolPrivate,
+)
 from numpy.typing import NDArray
 from weaviate.classes.init import Auth
 from weaviate.classes.query import Filter, HybridFusion
 
-from llama_stack.apis.common.content_types import InterleavedContent
-from llama_stack.apis.common.errors import VectorStoreNotFoundError
-from llama_stack.apis.files import Files
-from llama_stack.apis.inference import Inference
-from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
-from llama_stack.apis.vector_stores import VectorStore
 from llama_stack.core.request_headers import NeedsRequestProviderData
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
 from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.kvstore.api import KVStore
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
diff --git a/src/llama_stack/providers/utils/common/data_schema_validator.py b/src/llama_stack/providers/utils/common/data_schema_validator.py
index b0305104f..7ef245779 100644
--- a/src/llama_stack/providers/utils/common/data_schema_validator.py
+++ b/src/llama_stack/providers/utils/common/data_schema_validator.py
@@ -7,11 +7,8 @@
 from enum import Enum
 from typing import Any
 
-from llama_stack.apis.common.type_system import (
-    ChatCompletionInputType,
-    CompletionInputType,
-    StringType,
-)
+from llama_stack_api import ChatCompletionInputType, CompletionInputType, StringType
+
 from llama_stack.core.datatypes import Api
 
 
diff --git a/src/llama_stack/providers/utils/files/form_data.py b/src/llama_stack/providers/utils/files/form_data.py
index 3d8fb6d85..21afbec2b 100644
--- a/src/llama_stack/providers/utils/files/form_data.py
+++ b/src/llama_stack/providers/utils/files/form_data.py
@@ -7,10 +7,9 @@
 import json
 
 from fastapi import Request
+from llama_stack_api import ExpiresAfter
 from pydantic import BaseModel, ValidationError
 
-from llama_stack.apis.files import ExpiresAfter
-
 
 async def parse_pydantic_from_form[T: BaseModel](request: Request, field_name: str, model_class: type[T]) -> T | None:
     """
diff --git a/src/llama_stack/providers/utils/inference/embedding_mixin.py b/src/llama_stack/providers/utils/inference/embedding_mixin.py
index bab495eef..f7e5c711b 100644
--- a/src/llama_stack/providers/utils/inference/embedding_mixin.py
+++ b/src/llama_stack/providers/utils/inference/embedding_mixin.py
@@ -17,7 +17,7 @@ from llama_stack.log import get_logger
 if TYPE_CHECKING:
     from sentence_transformers import SentenceTransformer
 
-from llama_stack.apis.inference import (
+from llama_stack_api import (
     ModelStore,
     OpenAIEmbeddingData,
     OpenAIEmbeddingsRequestWithExtraBody,
diff --git a/src/llama_stack/providers/utils/inference/inference_store.py b/src/llama_stack/providers/utils/inference/inference_store.py
index a3a28aec0..3c707dd01 100644
--- a/src/llama_stack/providers/utils/inference/inference_store.py
+++ b/src/llama_stack/providers/utils/inference/inference_store.py
@@ -6,15 +6,15 @@
 import asyncio
 from typing import Any
 
-from sqlalchemy.exc import IntegrityError
-
-from llama_stack.apis.inference import (
+from llama_stack_api import (
     ListOpenAIChatCompletionResponse,
     OpenAIChatCompletion,
     OpenAICompletionWithInputMessages,
     OpenAIMessageParam,
     Order,
 )
+from sqlalchemy.exc import IntegrityError
+
 from llama_stack.core.datatypes import AccessRule
 from llama_stack.core.storage.datatypes import InferenceStoreReference, StorageBackendType
 from llama_stack.log import get_logger
diff --git a/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py b/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py
index a793c499e..4f468725b 100644
--- a/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py
+++ b/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py
@@ -9,8 +9,7 @@ import struct
 from collections.abc import AsyncIterator
 
 import litellm
-
-from llama_stack.apis.inference import (
+from llama_stack_api import (
     InferenceProvider,
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
@@ -22,6 +21,7 @@ from llama_stack.apis.inference import (
     OpenAIEmbeddingsResponse,
     OpenAIEmbeddingUsage,
 )
+
 from llama_stack.core.request_headers import NeedsRequestProviderData
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper, ProviderModelEntry
diff --git a/src/llama_stack/providers/utils/inference/model_registry.py b/src/llama_stack/providers/utils/inference/model_registry.py
index 8a120b698..e7ca5ab74 100644
--- a/src/llama_stack/providers/utils/inference/model_registry.py
+++ b/src/llama_stack/providers/utils/inference/model_registry.py
@@ -6,12 +6,10 @@
 
 from typing import Any
 
+from llama_stack_api import Model, ModelsProtocolPrivate, ModelType, UnsupportedModelError
 from pydantic import BaseModel, Field, SecretStr
 
-from llama_stack.apis.common.errors import UnsupportedModelError
-from llama_stack.apis.models import ModelType
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import Model, ModelsProtocolPrivate
 from llama_stack.providers.utils.inference import (
     ALL_HUGGINGFACE_REPOS_TO_MODEL_DESCRIPTOR,
 )
diff --git a/src/llama_stack/providers/utils/inference/openai_compat.py b/src/llama_stack/providers/utils/inference/openai_compat.py
index c2e6829e0..c97e42274 100644
--- a/src/llama_stack/providers/utils/inference/openai_compat.py
+++ b/src/llama_stack/providers/utils/inference/openai_compat.py
@@ -20,25 +20,23 @@ except ImportError:
     from openai.types.chat.chat_completion_message_tool_call import (
         ChatCompletionMessageToolCall as OpenAIChatCompletionMessageFunctionToolCall,
     )
+from llama_stack_api import (
+    URL,
+    GreedySamplingStrategy,
+    ImageContentItem,
+    JsonSchemaResponseFormat,
+    OpenAIResponseFormatParam,
+    SamplingParams,
+    TextContentItem,
+    TopKSamplingStrategy,
+    TopPSamplingStrategy,
+    _URLOrData,
+)
 from openai.types.chat import (
     ChatCompletionMessageToolCall,
 )
 from pydantic import BaseModel
 
-from llama_stack.apis.common.content_types import (
-    URL,
-    ImageContentItem,
-    TextContentItem,
-    _URLOrData,
-)
-from llama_stack.apis.inference import (
-    GreedySamplingStrategy,
-    JsonSchemaResponseFormat,
-    OpenAIResponseFormatParam,
-    SamplingParams,
-    TopKSamplingStrategy,
-    TopPSamplingStrategy,
-)
 from llama_stack.log import get_logger
 from llama_stack.models.llama.datatypes import (
     BuiltinTool,
diff --git a/src/llama_stack/providers/utils/inference/openai_mixin.py b/src/llama_stack/providers/utils/inference/openai_mixin.py
index 09059da09..c05873df5 100644
--- a/src/llama_stack/providers/utils/inference/openai_mixin.py
+++ b/src/llama_stack/providers/utils/inference/openai_mixin.py
@@ -10,11 +10,9 @@ from abc import ABC, abstractmethod
 from collections.abc import AsyncIterator, Iterable
 from typing import Any
 
-from openai import AsyncOpenAI
-from pydantic import BaseModel, ConfigDict
-
-from llama_stack.apis.inference import (
+from llama_stack_api import (
     Model,
+    ModelType,
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
     OpenAIChatCompletionRequestWithExtraBody,
@@ -26,7 +24,9 @@ from llama_stack.apis.inference import (
     OpenAIEmbeddingUsage,
     OpenAIMessageParam,
 )
-from llama_stack.apis.models import ModelType
+from openai import AsyncOpenAI
+from pydantic import BaseModel, ConfigDict
+
 from llama_stack.core.request_headers import NeedsRequestProviderData
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
diff --git a/src/llama_stack/providers/utils/inference/prompt_adapter.py b/src/llama_stack/providers/utils/inference/prompt_adapter.py
index 35a7b3484..ea01a34e9 100644
--- a/src/llama_stack/providers/utils/inference/prompt_adapter.py
+++ b/src/llama_stack/providers/utils/inference/prompt_adapter.py
@@ -12,16 +12,11 @@ import re
 from typing import Any
 
 import httpx
-from PIL import Image as PIL_Image
-
-from llama_stack.apis.common.content_types import (
+from llama_stack_api import (
+    CompletionRequest,
     ImageContentItem,
     InterleavedContent,
     InterleavedContentItem,
-    TextContentItem,
-)
-from llama_stack.apis.inference import (
-    CompletionRequest,
     OpenAIAssistantMessageParam,
     OpenAIChatCompletionContentPartImageParam,
     OpenAIChatCompletionContentPartTextParam,
@@ -32,8 +27,11 @@ from llama_stack.apis.inference import (
     OpenAIUserMessageParam,
     ResponseFormat,
     ResponseFormatType,
+    TextContentItem,
     ToolChoice,
 )
+from PIL import Image as PIL_Image
+
 from llama_stack.log import get_logger
 from llama_stack.models.llama.datatypes import (
     RawContent,
diff --git a/src/llama_stack/providers/utils/kvstore/sqlite/config.py b/src/llama_stack/providers/utils/kvstore/sqlite/config.py
index 6a8b0a7cf..895268a4f 100644
--- a/src/llama_stack/providers/utils/kvstore/sqlite/config.py
+++ b/src/llama_stack/providers/utils/kvstore/sqlite/config.py
@@ -4,10 +4,9 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
-from llama_stack.schema_utils import json_schema_type
-
 
 @json_schema_type
 class SqliteControlPlaneConfig(BaseModel):
diff --git a/src/llama_stack/providers/utils/memory/file_utils.py b/src/llama_stack/providers/utils/memory/file_utils.py
index 4c40056f3..6786293c6 100644
--- a/src/llama_stack/providers/utils/memory/file_utils.py
+++ b/src/llama_stack/providers/utils/memory/file_utils.py
@@ -8,7 +8,7 @@ import base64
 import mimetypes
 import os
 
-from llama_stack.apis.common.content_types import URL
+from llama_stack_api import URL
 
 
 def data_url_from_file(file_path: str) -> URL:
diff --git a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
index 853245598..68d1c11e5 100644
--- a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
+++ b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
@@ -13,16 +13,15 @@ from abc import ABC, abstractmethod
 from typing import Annotated, Any
 
 from fastapi import Body
-from pydantic import TypeAdapter
-
-from llama_stack.apis.common.errors import VectorStoreNotFoundError
-from llama_stack.apis.files import Files, OpenAIFileObject
-from llama_stack.apis.vector_io import (
+from llama_stack_api import (
     Chunk,
+    Files,
     OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
     OpenAICreateVectorStoreRequestWithExtraBody,
+    OpenAIFileObject,
     QueryChunksResponse,
     SearchRankingOptions,
+    VectorStore,
     VectorStoreChunkingStrategy,
     VectorStoreChunkingStrategyAuto,
     VectorStoreChunkingStrategyStatic,
@@ -39,11 +38,13 @@ from llama_stack.apis.vector_io import (
     VectorStoreFileStatus,
     VectorStoreListFilesResponse,
     VectorStoreListResponse,
+    VectorStoreNotFoundError,
     VectorStoreObject,
     VectorStoreSearchResponse,
     VectorStoreSearchResponsePage,
 )
-from llama_stack.apis.vector_stores import VectorStore
+from pydantic import TypeAdapter
+
 from llama_stack.core.id_generation import generate_object_id
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.kvstore.api import KVStore
diff --git a/src/llama_stack/providers/utils/memory/vector_store.py b/src/llama_stack/providers/utils/memory/vector_store.py
index 99f875227..37ac79039 100644
--- a/src/llama_stack/providers/utils/memory/vector_store.py
+++ b/src/llama_stack/providers/utils/memory/vector_store.py
@@ -14,20 +14,22 @@ from urllib.parse import unquote
 
 import httpx
 import numpy as np
+from llama_stack_api import (
+    URL,
+    Api,
+    Chunk,
+    ChunkMetadata,
+    InterleavedContent,
+    OpenAIEmbeddingsRequestWithExtraBody,
+    QueryChunksResponse,
+    RAGDocument,
+    VectorStore,
+)
 from numpy.typing import NDArray
 from pydantic import BaseModel
 
-from llama_stack.apis.common.content_types import (
-    URL,
-    InterleavedContent,
-)
-from llama_stack.apis.inference import OpenAIEmbeddingsRequestWithExtraBody
-from llama_stack.apis.tools import RAGDocument
-from llama_stack.apis.vector_io import Chunk, ChunkMetadata, QueryChunksResponse
-from llama_stack.apis.vector_stores import VectorStore
 from llama_stack.log import get_logger
 from llama_stack.models.llama.llama3.tokenizer import Tokenizer
-from llama_stack.providers.datatypes import Api
 from llama_stack.providers.utils.inference.prompt_adapter import (
     interleaved_content_as_str,
 )
diff --git a/src/llama_stack/providers/utils/pagination.py b/src/llama_stack/providers/utils/pagination.py
index 033022491..d1d9e36c5 100644
--- a/src/llama_stack/providers/utils/pagination.py
+++ b/src/llama_stack/providers/utils/pagination.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack.apis.common.responses import PaginatedResponse
+from llama_stack_api import PaginatedResponse
 
 
 def paginate_records(
diff --git a/src/llama_stack/providers/utils/responses/responses_store.py b/src/llama_stack/providers/utils/responses/responses_store.py
index fdca8ddee..c7dfed15a 100644
--- a/src/llama_stack/providers/utils/responses/responses_store.py
+++ b/src/llama_stack/providers/utils/responses/responses_store.py
@@ -4,18 +4,17 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.agents import (
-    Order,
-)
-from llama_stack.apis.agents.openai_responses import (
+from llama_stack_api import (
     ListOpenAIResponseInputItem,
     ListOpenAIResponseObject,
     OpenAIDeleteResponseObject,
+    OpenAIMessageParam,
     OpenAIResponseInput,
     OpenAIResponseObject,
     OpenAIResponseObjectWithInput,
+    Order,
 )
-from llama_stack.apis.inference import OpenAIMessageParam
+
 from llama_stack.core.datatypes import AccessRule
 from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqlStoreReference
 from llama_stack.log import get_logger
diff --git a/src/llama_stack/providers/utils/scoring/aggregation_utils.py b/src/llama_stack/providers/utils/scoring/aggregation_utils.py
index cff9a112f..aa6fe7248 100644
--- a/src/llama_stack/providers/utils/scoring/aggregation_utils.py
+++ b/src/llama_stack/providers/utils/scoring/aggregation_utils.py
@@ -6,8 +6,7 @@
 import statistics
 from typing import Any
 
-from llama_stack.apis.scoring import ScoringResultRow
-from llama_stack.apis.scoring_functions import AggregationFunctionType
+from llama_stack_api import AggregationFunctionType, ScoringResultRow
 
 
 def aggregate_accuracy(scoring_results: list[ScoringResultRow]) -> dict[str, Any]:
diff --git a/src/llama_stack/providers/utils/scoring/base_scoring_fn.py b/src/llama_stack/providers/utils/scoring/base_scoring_fn.py
index 2fae177b7..d16c75263 100644
--- a/src/llama_stack/providers/utils/scoring/base_scoring_fn.py
+++ b/src/llama_stack/providers/utils/scoring/base_scoring_fn.py
@@ -6,8 +6,8 @@
 from abc import ABC, abstractmethod
 from typing import Any
 
-from llama_stack.apis.scoring import ScoringFnParams, ScoringResultRow
-from llama_stack.apis.scoring_functions import ScoringFn
+from llama_stack_api import ScoringFn, ScoringFnParams, ScoringResultRow
+
 from llama_stack.providers.utils.scoring.aggregation_utils import aggregate_metrics
 
 
diff --git a/src/llama_stack/providers/utils/sqlstore/api.py b/src/llama_stack/providers/utils/sqlstore/api.py
index bcd224234..033a00edc 100644
--- a/src/llama_stack/providers/utils/sqlstore/api.py
+++ b/src/llama_stack/providers/utils/sqlstore/api.py
@@ -8,10 +8,9 @@ from collections.abc import Mapping, Sequence
 from enum import Enum
 from typing import Any, Literal, Protocol
 
+from llama_stack_api import PaginatedResponse
 from pydantic import BaseModel
 
-from llama_stack.apis.common.responses import PaginatedResponse
-
 
 class ColumnType(Enum):
     INTEGER = "INTEGER"
diff --git a/src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py b/src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py
index cfc3131f4..263f5e69f 100644
--- a/src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py
+++ b/src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py
@@ -6,6 +6,7 @@
 from collections.abc import Mapping, Sequence
 from typing import Any, Literal, cast
 
+from llama_stack_api import PaginatedResponse
 from sqlalchemy import (
     JSON,
     Boolean,
@@ -26,7 +27,6 @@ from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine
 from sqlalchemy.ext.asyncio.engine import AsyncEngine
 from sqlalchemy.sql.elements import ColumnElement
 
-from llama_stack.apis.common.responses import PaginatedResponse
 from llama_stack.core.storage.datatypes import SqlAlchemySqlStoreConfig
 from llama_stack.log import get_logger
 
diff --git a/src/llama_stack/providers/utils/tools/mcp.py b/src/llama_stack/providers/utils/tools/mcp.py
index a271cb959..82c85f46c 100644
--- a/src/llama_stack/providers/utils/tools/mcp.py
+++ b/src/llama_stack/providers/utils/tools/mcp.py
@@ -10,17 +10,20 @@ from enum import Enum
 from typing import Any, cast
 
 import httpx
+from llama_stack_api import (
+    ImageContentItem,
+    InterleavedContentItem,
+    ListToolDefsResponse,
+    TextContentItem,
+    ToolDef,
+    ToolInvocationResult,
+    _URLOrData,
+)
 from mcp import ClientSession, McpError
 from mcp import types as mcp_types
 from mcp.client.sse import sse_client
 from mcp.client.streamable_http import streamablehttp_client
 
-from llama_stack.apis.common.content_types import ImageContentItem, InterleavedContentItem, TextContentItem, _URLOrData
-from llama_stack.apis.tools import (
-    ListToolDefsResponse,
-    ToolDef,
-    ToolInvocationResult,
-)
 from llama_stack.core.datatypes import AuthenticationRequiredError
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.tools.ttl_dict import TTLDict
diff --git a/tests/external/llama-stack-api-weather/src/llama_stack_api_weather/weather.py b/tests/external/llama-stack-api-weather/src/llama_stack_api_weather/weather.py
index e97a9d8fb..9c399b7bf 100644
--- a/tests/external/llama-stack-api-weather/src/llama_stack_api_weather/weather.py
+++ b/tests/external/llama-stack-api-weather/src/llama_stack_api_weather/weather.py
@@ -6,9 +6,7 @@
 
 from typing import Protocol
 
-from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.providers.datatypes import Api, ProviderSpec, RemoteProviderSpec
-from llama_stack.schema_utils import webmethod
+from llama_stack_api import LLAMA_STACK_API_V1, Api, ProviderSpec, RemoteProviderSpec, webmethod
 
 
 def available_providers() -> list[ProviderSpec]:
diff --git a/tests/integration/batches/conftest.py b/tests/integration/batches/conftest.py
index 3ab8df3d9..b9c0ac916 100644
--- a/tests/integration/batches/conftest.py
+++ b/tests/integration/batches/conftest.py
@@ -13,8 +13,7 @@ from contextlib import contextmanager
 from io import BytesIO
 
 import pytest
-
-from llama_stack.apis.files import OpenAIFilePurpose
+from llama_stack_api import OpenAIFilePurpose
 
 
 class BatchHelper:
diff --git a/tests/integration/files/test_files.py b/tests/integration/files/test_files.py
index d9e8dd501..61878ac4c 100644
--- a/tests/integration/files/test_files.py
+++ b/tests/integration/files/test_files.py
@@ -9,8 +9,8 @@ from unittest.mock import patch
 
 import pytest
 import requests
+from llama_stack_api import OpenAIFilePurpose
 
-from llama_stack.apis.files import OpenAIFilePurpose
 from llama_stack.core.datatypes import User
 
 purpose = OpenAIFilePurpose.ASSISTANTS
diff --git a/tests/integration/inference/test_provider_data_routing.py b/tests/integration/inference/test_provider_data_routing.py
index 99aa75395..d007b57d6 100644
--- a/tests/integration/inference/test_provider_data_routing.py
+++ b/tests/integration/inference/test_provider_data_routing.py
@@ -15,14 +15,14 @@ that enables routing based on provider_data alone.
 from unittest.mock import AsyncMock, patch
 
 import pytest
-
-from llama_stack.apis.datatypes import Api
-from llama_stack.apis.inference.inference import (
+from llama_stack_api import (
+    Api,
     OpenAIAssistantMessageParam,
     OpenAIChatCompletion,
     OpenAIChatCompletionUsage,
     OpenAIChoice,
 )
+
 from llama_stack.core.library_client import LlamaStackAsLibraryClient
 from llama_stack.core.telemetry.telemetry import MetricEvent
 
diff --git a/tests/integration/post_training/test_post_training.py b/tests/integration/post_training/test_post_training.py
index b5be71c7c..ff6925b58 100644
--- a/tests/integration/post_training/test_post_training.py
+++ b/tests/integration/post_training/test_post_training.py
@@ -9,8 +9,7 @@ import time
 import uuid
 
 import pytest
-
-from llama_stack.apis.post_training import (
+from llama_stack_api import (
     DataConfig,
     DatasetFormat,
     DPOAlignmentConfig,
@@ -18,6 +17,7 @@ from llama_stack.apis.post_training import (
     LoraFinetuningConfig,
     TrainingConfig,
 )
+
 from llama_stack.log import get_logger
 
 # Configure logging
diff --git a/tests/integration/responses/recordings/42c357284497af596ae6c9341b0c189daa31e88b25d0381a985f24203b7a5a38.json b/tests/integration/responses/recordings/42c357284497af596ae6c9341b0c189daa31e88b25d0381a985f24203b7a5a38.json
index 7ec2ac931..4e80e1cdd 100644
--- a/tests/integration/responses/recordings/42c357284497af596ae6c9341b0c189daa31e88b25d0381a985f24203b7a5a38.json
+++ b/tests/integration/responses/recordings/42c357284497af596ae6c9341b0c189daa31e88b25d0381a985f24203b7a5a38.json
@@ -10,7 +10,7 @@
   },
   "response": {
     "body": {
-      "__type__": "llama_stack.apis.tools.tools.ToolInvocationResult",
+      "__type__": "llama_stack_api.tools.ToolInvocationResult",
       "__data__": {
         "content": "{\"query\": \"Llama 4 Maverick model experts\", \"top_k\": [{\"url\": \"https://console.groq.com/docs/model/meta-llama/llama-4-maverick-17b-128e-instruct\", \"title\": \"Llama 4 Maverick 17B 128E\", \"content\": \"Llama 4 Maverick is Meta's natively multimodal model that enables text and image understanding. With a 17 billion parameter mixture-of-experts architecture (128 experts), this model offers industry-leading performance for multimodal tasks like natural assistant-like chat, image recognition, and coding tasks. Llama 4 Maverick features an auto-regressive language model that uses a mixture-of-experts (MoE) architecture with 17B activated parameters (400B total) and incorporates early fusion for native multimodality. The model uses 128 experts to efficiently handle both text and image inputs while maintaining high performance across chat, knowledge, and code generation tasks, with a knowledge cutoff of August 2024. * For multimodal applications, this model supports up to 5 image inputs create(  model =\\\"meta-llama/llama-4-maverick-17b-128e-instruct\\\",   messages =[  {  \\\"role\\\":  \\\"user\\\",   \\\"content\\\":  \\\"Explain why fast inference is critical for reasoning models\\\"   }   ]  )  print(completion.\", \"score\": 0.9170729, \"raw_content\": null}, {\"url\": \"https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E\", \"title\": \"meta-llama/Llama-4-Maverick-17B-128E - Hugging Face\", \"content\": \"Model Architecture: The Llama 4 models are auto-regressive language models that use a mixture-of-experts (MoE) architecture and incorporate\", \"score\": 0.8021998, \"raw_content\": null}, {\"url\": \"https://www.ibm.com/new/announcements/meta-llama-4-maverick-and-llama-4-scout-now-available-in-watsonx-ai\", \"title\": \"Meta Llama 4 Maverick and Llama 4 Scout now available in watsonx ...\", \"content\": \"# Meta Llama 4 Maverick and Llama 4 Scout now available in watsonx.ai **IBM is excited to announce the addition of Meta\\u2019s latest generation of open models, Llama 4, to** **watsonx.ai****.** Llama 4 Scout and Llama 4 Maverick, the first mixture of experts (MoE) models released by Meta, provide frontier multimodal performance, high speeds, low cost, and industry leading context length. With the introduction of these latest offerings from Meta, IBM now supports a total of 13 Meta models in the expansive library of \\u00a0foundation models available in watsonx.ai. Trained on 40 trillion tokens of data, Llama 4 Scout offers performance rivalling or exceeding that of models with significantly larger active parameter counts while keeping costs and latency low. ## Llama 4 models on IBM watsonx\", \"score\": 0.78194773, \"raw_content\": null}, {\"url\": \"https://medium.com/@divyanshbhatiajm19/metas-llama-4-family-the-complete-guide-to-scout-maverick-and-behemoth-ai-models-in-2025-21a90c882e8a\", \"title\": \"Meta's Llama 4 Family: The Complete Guide to Scout, Maverick, and ...\", \"content\": \"# Meta\\u2019s Llama 4 Family: The Complete Guide to Scout, Maverick, and Behemoth AI Models in 2025 Feature Llama 4 Scout Llama 4 Maverick Llama 4 Behemoth **Total Parameters** 109B 400B ~2T **Active Parameters** 17B 17B 288B **Expert Count** 16 128 16 **Context Window** 10M tokens 1M tokens Not specified **Hardware Requirements** Single H100 GPU Single H100 DGX host Multiple GPUs **Inference Cost** Not specified $0.19-$0.49 per 1M tokens Not specified **Release Status** Available now Available now In training **Primary Use Cases** Long-context analysis, code processing High-performance multimodal applications Research, STEM reasoning The Llama 4 family represents Meta\\u2019s most significant AI development to date, with each model offering distinct advantages for different use cases:\", \"score\": 0.69672287, \"raw_content\": null}, {\"url\": \"https://www.llama.com/models/llama-4/\", \"title\": \"Unmatched Performance and Efficiency | Llama 4\", \"content\": \"# Llama 4 # Llama 4 Llama 4 Scout Class-leading natively multimodal model that offers superior text and visual intelligence, single H100 GPU efficiency, and a 10M context window for seamless long document analysis. Llama 4 MaverickIndustry-leading natively multimodal model for image and text understanding with groundbreaking intelligence and fast responses at a low cost. We evaluated model performance on a suite of common benchmarks across a wide range of languages, testing for coding, reasoning, knowledge, vision understanding, multilinguality, and long context. 4. Specialized long context evals are not traditionally reported for generalist models, so we share internal runs to showcase llama's frontier performance. 4. Specialized long context evals are not traditionally reported for generalist models, so we share internal runs to showcase llama's frontier performance.\", \"score\": 0.629889, \"raw_content\": null}]}",
         "error_message": null,
diff --git a/tests/integration/responses/recordings/54aa690e31b5c33a0488a5d7403393e5712917253462292829b37b9320d6df82.json b/tests/integration/responses/recordings/54aa690e31b5c33a0488a5d7403393e5712917253462292829b37b9320d6df82.json
index a6c31dc72..a8e1e8611 100644
--- a/tests/integration/responses/recordings/54aa690e31b5c33a0488a5d7403393e5712917253462292829b37b9320d6df82.json
+++ b/tests/integration/responses/recordings/54aa690e31b5c33a0488a5d7403393e5712917253462292829b37b9320d6df82.json
@@ -10,7 +10,7 @@
   },
   "response": {
     "body": {
-      "__type__": "llama_stack.apis.tools.tools.ToolInvocationResult",
+      "__type__": "llama_stack_api.tools.ToolInvocationResult",
       "__data__": {
         "content": "{\"query\": \"Llama 4 Maverick model number of experts\", \"top_k\": [{\"url\": \"https://console.groq.com/docs/model/meta-llama/llama-4-maverick-17b-128e-instruct\", \"title\": \"Llama 4 Maverick 17B 128E\", \"content\": \"Llama 4 Maverick is Meta's natively multimodal model that enables text and image understanding. With a 17 billion parameter mixture-of-experts architecture (128 experts), this model offers industry-leading performance for multimodal tasks like natural assistant-like chat, image recognition, and coding tasks. Llama 4 Maverick features an auto-regressive language model that uses a mixture-of-experts (MoE) architecture with 17B activated parameters (400B total) and incorporates early fusion for native multimodality. The model uses 128 experts to efficiently handle both text and image inputs while maintaining high performance across chat, knowledge, and code generation tasks, with a knowledge cutoff of August 2024. * For multimodal applications, this model supports up to 5 image inputs create(  model =\\\"meta-llama/llama-4-maverick-17b-128e-instruct\\\",   messages =[  {  \\\"role\\\":  \\\"user\\\",   \\\"content\\\":  \\\"Explain why fast inference is critical for reasoning models\\\"   }   ]  )  print(completion.\", \"score\": 0.9287263, \"raw_content\": null}, {\"url\": \"https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E\", \"title\": \"meta-llama/Llama-4-Maverick-17B-128E\", \"content\": \"... model with 16 experts, and Llama 4 Maverick, a 17 billion parameter model with 128 experts. Model developer: Meta. Model Architecture: The\", \"score\": 0.9183121, \"raw_content\": null}, {\"url\": \"https://build.nvidia.com/meta/llama-4-maverick-17b-128e-instruct/modelcard\", \"title\": \"llama-4-maverick-17b-128e-instruct Model by Meta\", \"content\": \"... model with 16 experts, and Llama 4 Maverick, a 17 billion parameter model with 128 experts. Third-Party Community Consideration. This model\", \"score\": 0.91399205, \"raw_content\": null}, {\"url\": \"https://replicate.com/meta/llama-4-maverick-instruct\", \"title\": \"meta/llama-4-maverick-instruct | Run with an API on ...\", \"content\": \"... model with 16 experts, and Llama 4 Maverick, a 17 billion parameter model with 128 experts. All services are online \\u00b7 Home \\u00b7 About \\u00b7 Changelog\", \"score\": 0.9073207, \"raw_content\": null}, {\"url\": \"https://openrouter.ai/meta-llama/llama-4-maverick\", \"title\": \"Llama 4 Maverick - API, Providers, Stats\", \"content\": \"# Meta: Llama 4 Maverick ### meta-llama/llama-4-maverick Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per forward pass (400B total). Released on April 5, 2025 under the Llama 4 Community License, Maverick is suited for research and commercial applications requiring advanced multimodal understanding and high model throughput. Llama 4 Maverick - API, Providers, Stats | OpenRouter ## Providers for Llama 4 Maverick ## Performance for Llama 4 Maverick ## Apps using Llama 4 Maverick ## Recent activity on Llama 4 Maverick ## Uptime stats for Llama 4 Maverick ## Sample code and API for Llama 4 Maverick\", \"score\": 0.8958969, \"raw_content\": null}]}",
         "error_message": null,
diff --git a/tests/integration/responses/recordings/77ad6e42c34823ac51a784cfe4fa0ee18d09bd413189a7c03b24bf3871e3d8d7.json b/tests/integration/responses/recordings/77ad6e42c34823ac51a784cfe4fa0ee18d09bd413189a7c03b24bf3871e3d8d7.json
index b92c67940..dd7884012 100644
--- a/tests/integration/responses/recordings/77ad6e42c34823ac51a784cfe4fa0ee18d09bd413189a7c03b24bf3871e3d8d7.json
+++ b/tests/integration/responses/recordings/77ad6e42c34823ac51a784cfe4fa0ee18d09bd413189a7c03b24bf3871e3d8d7.json
@@ -10,7 +10,7 @@
   },
   "response": {
     "body": {
-      "__type__": "llama_stack.apis.tools.tools.ToolInvocationResult",
+      "__type__": "llama_stack_api.tools.ToolInvocationResult",
       "__data__": {
         "content": "{\"query\": \"latest version of Python\", \"top_k\": [{\"url\": \"https://www.liquidweb.com/blog/latest-python-version/\", \"title\": \"The latest Python version: Python 3.14 - Liquid Web\", \"content\": \"The latest major version, Python 3.14 was officially released on October 7, 2025. Let's explore the key features of Python's current version, how to download\", \"score\": 0.890761, \"raw_content\": null}, {\"url\": \"https://docs.python.org/3/whatsnew/3.14.html\", \"title\": \"What's new in Python 3.14 \\u2014 Python 3.14.0 documentation\", \"content\": \"Python 3.14 is the latest stable release of the Python programming language, with a mix of changes to the language, the implementation, and the standard\", \"score\": 0.8124067, \"raw_content\": null}, {\"url\": \"https://devguide.python.org/versions/\", \"title\": \"Status of Python versions - Python Developer's Guide\", \"content\": \"The main branch is currently the future Python 3.15, and is the only branch that accepts new features. The latest release for each Python version can be found\", \"score\": 0.80089486, \"raw_content\": null}, {\"url\": \"https://www.python.org/doc/versions/\", \"title\": \"Python documentation by version\", \"content\": \"Python 3.12.4, documentation released on 6 June 2024. Python 3.12.3, documentation released on 9 April 2024. Python 3.12.2, documentation released on 6 February\", \"score\": 0.74563974, \"raw_content\": null}, {\"url\": \"https://www.python.org/downloads/\", \"title\": \"Download Python | Python.org\", \"content\": \"Active Python Releases \\u00b7 3.15 pre-release 2026-10-07 (planned) 2031-10 PEP 790 \\u00b7 3.14 bugfix 2025-10-07 2030-10 PEP 745 \\u00b7 3.13 bugfix 2024-10-07 2029-10 PEP 719\", \"score\": 0.6551821, \"raw_content\": null}]}",
         "error_message": null,
diff --git a/tests/integration/safety/test_llama_guard.py b/tests/integration/safety/test_llama_guard.py
index 5a73bb044..99b4982f0 100644
--- a/tests/integration/safety/test_llama_guard.py
+++ b/tests/integration/safety/test_llama_guard.py
@@ -12,8 +12,8 @@ import warnings
 from collections.abc import Generator
 
 import pytest
+from llama_stack_api import ViolationLevel
 
-from llama_stack.apis.safety import ViolationLevel
 from llama_stack.models.llama.sku_types import CoreModelId
 
 # Llama Guard models available for text and vision shields
diff --git a/tests/integration/safety/test_safety.py b/tests/integration/safety/test_safety.py
index 6337abc9c..6a926f1d5 100644
--- a/tests/integration/safety/test_safety.py
+++ b/tests/integration/safety/test_safety.py
@@ -7,8 +7,7 @@ import base64
 import mimetypes
 
 import pytest
-
-from llama_stack.apis.safety import ViolationLevel
+from llama_stack_api import ViolationLevel
 
 CODE_SCANNER_ENABLED_PROVIDERS = {"ollama", "together", "fireworks"}
 
diff --git a/tests/integration/safety/test_vision_safety.py b/tests/integration/safety/test_vision_safety.py
index 7b3779e9e..b85a23263 100644
--- a/tests/integration/safety/test_vision_safety.py
+++ b/tests/integration/safety/test_vision_safety.py
@@ -9,8 +9,7 @@ import mimetypes
 import os
 
 import pytest
-
-from llama_stack.apis.safety import ViolationLevel
+from llama_stack_api import ViolationLevel
 
 VISION_SHIELD_ENABLED_PROVIDERS = {"together"}
 
diff --git a/tests/integration/tool_runtime/test_registration.py b/tests/integration/tool_runtime/test_registration.py
index 4d532ed87..1b1b6ef28 100644
--- a/tests/integration/tool_runtime/test_registration.py
+++ b/tests/integration/tool_runtime/test_registration.py
@@ -7,8 +7,8 @@
 import re
 
 import pytest
+from llama_stack_api import ToolGroupNotFoundError
 
-from llama_stack.apis.common.errors import ToolGroupNotFoundError
 from llama_stack.core.library_client import LlamaStackAsLibraryClient
 from tests.common.mcp import MCP_TOOLGROUP_ID, make_mcp_server
 
diff --git a/tests/integration/vector_io/test_openai_vector_stores.py b/tests/integration/vector_io/test_openai_vector_stores.py
index 1043d4903..c65dfecac 100644
--- a/tests/integration/vector_io/test_openai_vector_stores.py
+++ b/tests/integration/vector_io/test_openai_vector_stores.py
@@ -8,11 +8,10 @@ import time
 from io import BytesIO
 
 import pytest
+from llama_stack_api import Chunk, ExpiresAfter
 from llama_stack_client import BadRequestError
 from openai import BadRequestError as OpenAIBadRequestError
 
-from llama_stack.apis.files import ExpiresAfter
-from llama_stack.apis.vector_io import Chunk
 from llama_stack.core.library_client import LlamaStackAsLibraryClient
 from llama_stack.log import get_logger
 
@@ -646,7 +645,7 @@ def test_openai_vector_store_attach_file(
 ):
     """Test OpenAI vector store attach file."""
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
-    from llama_stack.apis.files import ExpiresAfter
+    from llama_stack_api import ExpiresAfter
 
     compat_client = compat_client_with_empty_stores
 
@@ -710,7 +709,7 @@ def test_openai_vector_store_attach_files_on_creation(
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
 
     compat_client = compat_client_with_empty_stores
-    from llama_stack.apis.files import ExpiresAfter
+    from llama_stack_api import ExpiresAfter
 
     # Create some files and attach them to the vector store
     valid_file_ids = []
@@ -775,7 +774,7 @@ def test_openai_vector_store_list_files(
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
 
     compat_client = compat_client_with_empty_stores
-    from llama_stack.apis.files import ExpiresAfter
+    from llama_stack_api import ExpiresAfter
 
     # Create a vector store
     vector_store = compat_client.vector_stores.create(
@@ -867,7 +866,7 @@ def test_openai_vector_store_retrieve_file_contents(
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
 
     compat_client = compat_client_with_empty_stores
-    from llama_stack.apis.files import ExpiresAfter
+    from llama_stack_api import ExpiresAfter
 
     # Create a vector store
     vector_store = compat_client.vector_stores.create(
@@ -928,7 +927,7 @@ def test_openai_vector_store_delete_file(
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
 
     compat_client = compat_client_with_empty_stores
-    from llama_stack.apis.files import ExpiresAfter
+    from llama_stack_api import ExpiresAfter
 
     # Create a vector store
     vector_store = compat_client.vector_stores.create(
@@ -994,7 +993,7 @@ def test_openai_vector_store_delete_file_removes_from_vector_store(
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
 
     compat_client = compat_client_with_empty_stores
-    from llama_stack.apis.files import ExpiresAfter
+    from llama_stack_api import ExpiresAfter
 
     # Create a vector store
     vector_store = compat_client.vector_stores.create(
@@ -1046,7 +1045,7 @@ def test_openai_vector_store_update_file(
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
 
     compat_client = compat_client_with_empty_stores
-    from llama_stack.apis.files import ExpiresAfter
+    from llama_stack_api import ExpiresAfter
 
     # Create a vector store
     vector_store = compat_client.vector_stores.create(
@@ -1103,7 +1102,7 @@ def test_create_vector_store_files_duplicate_vector_store_name(
     This test confirms that client.vector_stores.create() creates a unique ID
     """
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
-    from llama_stack.apis.files import ExpiresAfter
+    from llama_stack_api import ExpiresAfter
 
     compat_client = compat_client_with_empty_stores
 
diff --git a/tests/integration/vector_io/test_vector_io.py b/tests/integration/vector_io/test_vector_io.py
index 1b2099069..acaa44bcb 100644
--- a/tests/integration/vector_io/test_vector_io.py
+++ b/tests/integration/vector_io/test_vector_io.py
@@ -5,8 +5,7 @@
 # the root directory of this source tree.
 
 import pytest
-
-from llama_stack.apis.vector_io import Chunk
+from llama_stack_api import Chunk
 
 from ..conftest import vector_provider_wrapper
 
diff --git a/tests/unit/conversations/test_api_models.py b/tests/unit/conversations/test_api_models.py
index 8416cba0b..f8576f076 100644
--- a/tests/unit/conversations/test_api_models.py
+++ b/tests/unit/conversations/test_api_models.py
@@ -5,11 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack.apis.conversations.conversations import (
-    Conversation,
-    ConversationItem,
-    ConversationItemList,
-)
+from llama_stack_api import Conversation, ConversationItem, ConversationItemList
 
 
 def test_conversation_model_defaults():
diff --git a/tests/unit/conversations/test_conversations.py b/tests/unit/conversations/test_conversations.py
index 3f0175831..2f942eb9c 100644
--- a/tests/unit/conversations/test_conversations.py
+++ b/tests/unit/conversations/test_conversations.py
@@ -8,14 +8,11 @@ import tempfile
 from pathlib import Path
 
 import pytest
+from llama_stack_api import OpenAIResponseInputMessageContentText, OpenAIResponseMessage
 from openai.types.conversations.conversation import Conversation as OpenAIConversation
 from openai.types.conversations.conversation_item import ConversationItem as OpenAIConversationItem
 from pydantic import TypeAdapter
 
-from llama_stack.apis.agents.openai_responses import (
-    OpenAIResponseInputMessageContentText,
-    OpenAIResponseMessage,
-)
 from llama_stack.core.conversations.conversations import (
     ConversationServiceConfig,
     ConversationServiceImpl,
diff --git a/tests/unit/core/routers/test_safety_router.py b/tests/unit/core/routers/test_safety_router.py
index bf195ff33..7e465513e 100644
--- a/tests/unit/core/routers/test_safety_router.py
+++ b/tests/unit/core/routers/test_safety_router.py
@@ -6,8 +6,8 @@
 
 from unittest.mock import AsyncMock
 
-from llama_stack.apis.safety.safety import ModerationObject, ModerationObjectResults
-from llama_stack.apis.shields import ListShieldsResponse, Shield
+from llama_stack_api import ListShieldsResponse, ModerationObject, ModerationObjectResults, Shield
+
 from llama_stack.core.datatypes import SafetyConfig
 from llama_stack.core.routers.safety import SafetyRouter
 
diff --git a/tests/unit/core/routers/test_vector_io.py b/tests/unit/core/routers/test_vector_io.py
index f9bd84a37..071fbe6e7 100644
--- a/tests/unit/core/routers/test_vector_io.py
+++ b/tests/unit/core/routers/test_vector_io.py
@@ -7,8 +7,8 @@
 from unittest.mock import AsyncMock, Mock
 
 import pytest
+from llama_stack_api import OpenAICreateVectorStoreRequestWithExtraBody
 
-from llama_stack.apis.vector_io import OpenAICreateVectorStoreRequestWithExtraBody
 from llama_stack.core.routers.vector_io import VectorIORouter
 
 
diff --git a/tests/unit/core/test_stack_validation.py b/tests/unit/core/test_stack_validation.py
index d28803006..acb31e1c9 100644
--- a/tests/unit/core/test_stack_validation.py
+++ b/tests/unit/core/test_stack_validation.py
@@ -9,12 +9,10 @@
 from unittest.mock import AsyncMock
 
 import pytest
+from llama_stack_api import Api, ListModelsResponse, ListShieldsResponse, Model, ModelType, Shield
 
-from llama_stack.apis.models import ListModelsResponse, Model, ModelType
-from llama_stack.apis.shields import ListShieldsResponse, Shield
 from llama_stack.core.datatypes import QualifiedModel, SafetyConfig, StackRunConfig, StorageConfig, VectorStoresConfig
 from llama_stack.core.stack import validate_safety_config, validate_vector_stores_config
-from llama_stack.providers.datatypes import Api
 
 
 class TestVectorStoresValidation:
diff --git a/tests/unit/distribution/routers/test_routing_tables.py b/tests/unit/distribution/routers/test_routing_tables.py
index 8c1838ba3..2405d536e 100644
--- a/tests/unit/distribution/routers/test_routing_tables.py
+++ b/tests/unit/distribution/routers/test_routing_tables.py
@@ -9,15 +9,22 @@
 from unittest.mock import AsyncMock
 
 import pytest
+from llama_stack_api import (
+    URL,
+    Api,
+    Dataset,
+    DatasetPurpose,
+    ListToolDefsResponse,
+    Model,
+    ModelNotFoundError,
+    ModelType,
+    NumberType,
+    Shield,
+    ToolDef,
+    ToolGroup,
+    URIDataSource,
+)
 
-from llama_stack.apis.common.content_types import URL
-from llama_stack.apis.common.errors import ModelNotFoundError
-from llama_stack.apis.common.type_system import NumberType
-from llama_stack.apis.datasets.datasets import Dataset, DatasetPurpose, URIDataSource
-from llama_stack.apis.datatypes import Api
-from llama_stack.apis.models import Model, ModelType
-from llama_stack.apis.shields.shields import Shield
-from llama_stack.apis.tools import ListToolDefsResponse, ToolDef, ToolGroup
 from llama_stack.core.datatypes import RegistryEntrySource
 from llama_stack.core.routing_tables.benchmarks import BenchmarksRoutingTable
 from llama_stack.core.routing_tables.datasets import DatasetsRoutingTable
diff --git a/tests/unit/distribution/test_api_recordings.py b/tests/unit/distribution/test_api_recordings.py
index 2b7ce5c4e..f66b57df8 100644
--- a/tests/unit/distribution/test_api_recordings.py
+++ b/tests/unit/distribution/test_api_recordings.py
@@ -9,10 +9,9 @@ from pathlib import Path
 from unittest.mock import patch
 
 import pytest
-from openai import AsyncOpenAI
 
 # Import the real Pydantic response types instead of using Mocks
-from llama_stack.apis.inference import (
+from llama_stack_api import (
     OpenAIAssistantMessageParam,
     OpenAIChatCompletion,
     OpenAIChoice,
@@ -20,6 +19,8 @@ from llama_stack.apis.inference import (
     OpenAIEmbeddingsResponse,
     OpenAIEmbeddingUsage,
 )
+from openai import AsyncOpenAI
+
 from llama_stack.testing.api_recorder import (
     APIRecordingMode,
     ResponseStorage,
diff --git a/tests/unit/distribution/test_distribution.py b/tests/unit/distribution/test_distribution.py
index 11f55cfdb..a27455e24 100644
--- a/tests/unit/distribution/test_distribution.py
+++ b/tests/unit/distribution/test_distribution.py
@@ -9,6 +9,7 @@ from unittest.mock import patch
 
 import pytest
 import yaml
+from llama_stack_api import ProviderSpec
 from pydantic import BaseModel, Field, ValidationError
 
 from llama_stack.core.datatypes import Api, Provider, StackRunConfig
@@ -22,7 +23,6 @@ from llama_stack.core.storage.datatypes import (
     SqlStoreReference,
     StorageConfig,
 )
-from llama_stack.providers.datatypes import ProviderSpec
 
 
 class SampleConfig(BaseModel):
@@ -312,7 +312,7 @@ pip_packages:
         """Test loading an external provider from a module (success path)."""
         from types import SimpleNamespace
 
-        from llama_stack.providers.datatypes import Api, ProviderSpec
+        from llama_stack_api import Api, ProviderSpec
 
         # Simulate a provider module with get_provider_spec
         fake_spec = ProviderSpec(
@@ -395,8 +395,9 @@ pip_packages:
 
     def test_external_provider_from_module_building(self, mock_providers):
         """Test loading an external provider from a module during build (building=True, partial spec)."""
+        from llama_stack_api import Api
+
         from llama_stack.core.datatypes import BuildConfig, BuildProvider, DistributionSpec
-        from llama_stack.providers.datatypes import Api
 
         # No importlib patch needed, should not import module when type of `config` is BuildConfig or DistributionSpec
         build_config = BuildConfig(
@@ -456,8 +457,9 @@ class TestGetExternalProvidersFromModule:
         """Test provider with module containing version spec (e.g., package==1.0.0)."""
         from types import SimpleNamespace
 
+        from llama_stack_api import ProviderSpec
+
         from llama_stack.core.distribution import get_external_providers_from_module
-        from llama_stack.providers.datatypes import ProviderSpec
 
         fake_spec = ProviderSpec(
             api=Api.inference,
@@ -593,8 +595,9 @@ class TestGetExternalProvidersFromModule:
         """Test when get_provider_spec returns a list of specs."""
         from types import SimpleNamespace
 
+        from llama_stack_api import ProviderSpec
+
         from llama_stack.core.distribution import get_external_providers_from_module
-        from llama_stack.providers.datatypes import ProviderSpec
 
         spec1 = ProviderSpec(
             api=Api.inference,
@@ -641,8 +644,9 @@ class TestGetExternalProvidersFromModule:
         """Test that list return filters specs by provider_type."""
         from types import SimpleNamespace
 
+        from llama_stack_api import ProviderSpec
+
         from llama_stack.core.distribution import get_external_providers_from_module
-        from llama_stack.providers.datatypes import ProviderSpec
 
         spec1 = ProviderSpec(
             api=Api.inference,
@@ -689,8 +693,9 @@ class TestGetExternalProvidersFromModule:
         """Test that list return adds multiple different provider_types when config requests them."""
         from types import SimpleNamespace
 
+        from llama_stack_api import ProviderSpec
+
         from llama_stack.core.distribution import get_external_providers_from_module
-        from llama_stack.providers.datatypes import ProviderSpec
 
         # Module returns both inline and remote variants
         spec1 = ProviderSpec(
@@ -828,8 +833,9 @@ class TestGetExternalProvidersFromModule:
         """Test multiple APIs with providers."""
         from types import SimpleNamespace
 
+        from llama_stack_api import ProviderSpec
+
         from llama_stack.core.distribution import get_external_providers_from_module
-        from llama_stack.providers.datatypes import ProviderSpec
 
         inference_spec = ProviderSpec(
             api=Api.inference,
diff --git a/tests/unit/files/test_files.py b/tests/unit/files/test_files.py
index 426e2cf64..080d1ddbe 100644
--- a/tests/unit/files/test_files.py
+++ b/tests/unit/files/test_files.py
@@ -6,10 +6,8 @@
 
 
 import pytest
+from llama_stack_api import OpenAIFilePurpose, Order, ResourceNotFoundError
 
-from llama_stack.apis.common.errors import ResourceNotFoundError
-from llama_stack.apis.common.responses import Order
-from llama_stack.apis.files import OpenAIFilePurpose
 from llama_stack.core.access_control.access_control import default_policy
 from llama_stack.core.storage.datatypes import SqliteSqlStoreConfig, SqlStoreReference
 from llama_stack.providers.inline.files.localfs import (
diff --git a/tests/unit/providers/batches/test_reference.py b/tests/unit/providers/batches/test_reference.py
index 89cb1af9d..3c93a578d 100644
--- a/tests/unit/providers/batches/test_reference.py
+++ b/tests/unit/providers/batches/test_reference.py
@@ -58,9 +58,7 @@ import json
 from unittest.mock import AsyncMock, MagicMock
 
 import pytest
-
-from llama_stack.apis.batches import BatchObject
-from llama_stack.apis.common.errors import ConflictError, ResourceNotFoundError
+from llama_stack_api import BatchObject, ConflictError, ResourceNotFoundError
 
 
 class TestReferenceBatchesImpl:
diff --git a/tests/unit/providers/batches/test_reference_idempotency.py b/tests/unit/providers/batches/test_reference_idempotency.py
index e6cb29b9b..4cd5d962d 100644
--- a/tests/unit/providers/batches/test_reference_idempotency.py
+++ b/tests/unit/providers/batches/test_reference_idempotency.py
@@ -43,8 +43,7 @@ Key Behaviors Tested:
 import asyncio
 
 import pytest
-
-from llama_stack.apis.common.errors import ConflictError
+from llama_stack_api import ConflictError
 
 
 class TestReferenceBatchesIdempotency:
diff --git a/tests/unit/providers/files/test_s3_files.py b/tests/unit/providers/files/test_s3_files.py
index 92a45a9f2..ae63c1a78 100644
--- a/tests/unit/providers/files/test_s3_files.py
+++ b/tests/unit/providers/files/test_s3_files.py
@@ -8,9 +8,7 @@ from unittest.mock import patch
 
 import pytest
 from botocore.exceptions import ClientError
-
-from llama_stack.apis.common.errors import ResourceNotFoundError
-from llama_stack.apis.files import OpenAIFilePurpose
+from llama_stack_api import OpenAIFilePurpose, ResourceNotFoundError
 
 
 class TestS3FilesImpl:
@@ -228,7 +226,7 @@ class TestS3FilesImpl:
 
             mock_now.return_value = 0
 
-            from llama_stack.apis.files import ExpiresAfter
+            from llama_stack_api import ExpiresAfter
 
             sample_text_file.filename = "test_expired_file"
             uploaded = await s3_provider.openai_upload_file(
@@ -260,7 +258,7 @@ class TestS3FilesImpl:
 
     async def test_unsupported_expires_after_anchor(self, s3_provider, sample_text_file):
         """Unsupported anchor value should raise ValueError."""
-        from llama_stack.apis.files import ExpiresAfter
+        from llama_stack_api import ExpiresAfter
 
         sample_text_file.filename = "test_unsupported_expires_after_anchor"
 
@@ -273,7 +271,7 @@ class TestS3FilesImpl:
 
     async def test_nonint_expires_after_seconds(self, s3_provider, sample_text_file):
         """Non-integer seconds in expires_after should raise ValueError."""
-        from llama_stack.apis.files import ExpiresAfter
+        from llama_stack_api import ExpiresAfter
 
         sample_text_file.filename = "test_nonint_expires_after_seconds"
 
@@ -286,7 +284,7 @@ class TestS3FilesImpl:
 
     async def test_expires_after_seconds_out_of_bounds(self, s3_provider, sample_text_file):
         """Seconds outside allowed range should raise ValueError."""
-        from llama_stack.apis.files import ExpiresAfter
+        from llama_stack_api import ExpiresAfter
 
         with pytest.raises(ValueError, match="greater than or equal to 3600"):
             await s3_provider.openai_upload_file(
diff --git a/tests/unit/providers/files/test_s3_files_auth.py b/tests/unit/providers/files/test_s3_files_auth.py
index 6097f2808..873db4e27 100644
--- a/tests/unit/providers/files/test_s3_files_auth.py
+++ b/tests/unit/providers/files/test_s3_files_auth.py
@@ -7,9 +7,8 @@
 from unittest.mock import patch
 
 import pytest
+from llama_stack_api import OpenAIFilePurpose, ResourceNotFoundError
 
-from llama_stack.apis.common.errors import ResourceNotFoundError
-from llama_stack.apis.files import OpenAIFilePurpose
 from llama_stack.core.datatypes import User
 from llama_stack.providers.remote.files.s3.files import S3FilesImpl
 
diff --git a/tests/unit/providers/inference/test_bedrock_adapter.py b/tests/unit/providers/inference/test_bedrock_adapter.py
index fdd07c032..b3eecc558 100644
--- a/tests/unit/providers/inference/test_bedrock_adapter.py
+++ b/tests/unit/providers/inference/test_bedrock_adapter.py
@@ -8,9 +8,9 @@ from types import SimpleNamespace
 from unittest.mock import AsyncMock, MagicMock
 
 import pytest
+from llama_stack_api import OpenAIChatCompletionRequestWithExtraBody
 from openai import AuthenticationError
 
-from llama_stack.apis.inference import OpenAIChatCompletionRequestWithExtraBody
 from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
 from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
 
diff --git a/tests/unit/providers/inference/test_remote_vllm.py b/tests/unit/providers/inference/test_remote_vllm.py
index ffd45798e..e2a5455b7 100644
--- a/tests/unit/providers/inference/test_remote_vllm.py
+++ b/tests/unit/providers/inference/test_remote_vllm.py
@@ -9,8 +9,9 @@ import time
 from unittest.mock import AsyncMock, MagicMock, PropertyMock, patch
 
 import pytest
-
-from llama_stack.apis.inference import (
+from llama_stack_api import (
+    HealthStatus,
+    Model,
     OpenAIAssistantMessageParam,
     OpenAIChatCompletion,
     OpenAIChatCompletionRequestWithExtraBody,
@@ -20,10 +21,9 @@ from llama_stack.apis.inference import (
     OpenAICompletionRequestWithExtraBody,
     ToolChoice,
 )
-from llama_stack.apis.models import Model
+
 from llama_stack.core.routers.inference import InferenceRouter
 from llama_stack.core.routing_tables.models import ModelsRoutingTable
-from llama_stack.providers.datatypes import HealthStatus
 from llama_stack.providers.remote.inference.vllm.config import VLLMInferenceAdapterConfig
 from llama_stack.providers.remote.inference.vllm.vllm import VLLMInferenceAdapter
 
diff --git a/tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py b/tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py
index fff29928c..36d2b86a9 100644
--- a/tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py
+++ b/tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py
@@ -7,8 +7,8 @@
 from unittest.mock import AsyncMock
 
 import pytest
+from llama_stack_api import ToolDef
 
-from llama_stack.apis.tools import ToolDef
 from llama_stack.providers.inline.agents.meta_reference.responses.streaming import (
     convert_tooldef_to_chat_tool,
 )
diff --git a/tests/unit/providers/nvidia/test_datastore.py b/tests/unit/providers/nvidia/test_datastore.py
index b59636f7b..0d9f1cc35 100644
--- a/tests/unit/providers/nvidia/test_datastore.py
+++ b/tests/unit/providers/nvidia/test_datastore.py
@@ -8,9 +8,8 @@ import os
 from unittest.mock import patch
 
 import pytest
+from llama_stack_api import Dataset, DatasetPurpose, ResourceType, URIDataSource
 
-from llama_stack.apis.datasets import Dataset, DatasetPurpose, URIDataSource
-from llama_stack.apis.resource import ResourceType
 from llama_stack.providers.remote.datasetio.nvidia.config import NvidiaDatasetIOConfig
 from llama_stack.providers.remote.datasetio.nvidia.datasetio import NvidiaDatasetIOAdapter
 
diff --git a/tests/unit/providers/nvidia/test_eval.py b/tests/unit/providers/nvidia/test_eval.py
index 86e005b76..c41379801 100644
--- a/tests/unit/providers/nvidia/test_eval.py
+++ b/tests/unit/providers/nvidia/test_eval.py
@@ -8,12 +8,18 @@ import os
 from unittest.mock import MagicMock, patch
 
 import pytest
+from llama_stack_api import (
+    Benchmark,
+    BenchmarkConfig,
+    EvaluateResponse,
+    Job,
+    JobStatus,
+    ModelCandidate,
+    ResourceType,
+    SamplingParams,
+    TopPSamplingStrategy,
+)
 
-from llama_stack.apis.benchmarks import Benchmark
-from llama_stack.apis.common.job_types import Job, JobStatus
-from llama_stack.apis.eval.eval import BenchmarkConfig, EvaluateResponse, ModelCandidate, SamplingParams
-from llama_stack.apis.inference.inference import TopPSamplingStrategy
-from llama_stack.apis.resource import ResourceType
 from llama_stack.models.llama.sku_types import CoreModelId
 from llama_stack.providers.remote.eval.nvidia.config import NVIDIAEvalConfig
 from llama_stack.providers.remote.eval.nvidia.eval import NVIDIAEvalImpl
diff --git a/tests/unit/providers/nvidia/test_parameters.py b/tests/unit/providers/nvidia/test_parameters.py
index ad381da26..ba68a7abe 100644
--- a/tests/unit/providers/nvidia/test_parameters.py
+++ b/tests/unit/providers/nvidia/test_parameters.py
@@ -9,8 +9,7 @@ import warnings
 from unittest.mock import patch
 
 import pytest
-
-from llama_stack.apis.post_training.post_training import (
+from llama_stack_api import (
     DataConfig,
     DatasetFormat,
     EfficiencyConfig,
@@ -19,6 +18,7 @@ from llama_stack.apis.post_training.post_training import (
     OptimizerType,
     TrainingConfig,
 )
+
 from llama_stack.core.library_client import convert_pydantic_to_json_value
 from llama_stack.providers.remote.post_training.nvidia.post_training import (
     NvidiaPostTrainingAdapter,
diff --git a/tests/unit/providers/nvidia/test_rerank_inference.py b/tests/unit/providers/nvidia/test_rerank_inference.py
index 2793b5f44..8b313abcd 100644
--- a/tests/unit/providers/nvidia/test_rerank_inference.py
+++ b/tests/unit/providers/nvidia/test_rerank_inference.py
@@ -8,8 +8,8 @@ from unittest.mock import AsyncMock, MagicMock, patch
 
 import aiohttp
 import pytest
+from llama_stack_api import ModelType
 
-from llama_stack.apis.models import ModelType
 from llama_stack.providers.remote.inference.nvidia.config import NVIDIAConfig
 from llama_stack.providers.remote.inference.nvidia.nvidia import NVIDIAInferenceAdapter
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
diff --git a/tests/unit/providers/nvidia/test_safety.py b/tests/unit/providers/nvidia/test_safety.py
index 622302630..ea6254841 100644
--- a/tests/unit/providers/nvidia/test_safety.py
+++ b/tests/unit/providers/nvidia/test_safety.py
@@ -9,14 +9,15 @@ from typing import Any
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
-
-from llama_stack.apis.inference import (
+from llama_stack_api import (
     OpenAIAssistantMessageParam,
     OpenAIUserMessageParam,
+    ResourceType,
+    RunShieldResponse,
+    Shield,
+    ViolationLevel,
 )
-from llama_stack.apis.resource import ResourceType
-from llama_stack.apis.safety import RunShieldResponse, ViolationLevel
-from llama_stack.apis.shields import Shield
+
 from llama_stack.providers.remote.safety.nvidia.config import NVIDIASafetyConfig
 from llama_stack.providers.remote.safety.nvidia.nvidia import NVIDIASafetyAdapter
 
diff --git a/tests/unit/providers/nvidia/test_supervised_fine_tuning.py b/tests/unit/providers/nvidia/test_supervised_fine_tuning.py
index 91148605d..4d0ce695b 100644
--- a/tests/unit/providers/nvidia/test_supervised_fine_tuning.py
+++ b/tests/unit/providers/nvidia/test_supervised_fine_tuning.py
@@ -9,8 +9,7 @@ import warnings
 from unittest.mock import patch
 
 import pytest
-
-from llama_stack.apis.post_training.post_training import (
+from llama_stack_api import (
     DataConfig,
     DatasetFormat,
     LoraFinetuningConfig,
@@ -19,6 +18,7 @@ from llama_stack.apis.post_training.post_training import (
     QATFinetuningConfig,
     TrainingConfig,
 )
+
 from llama_stack.core.library_client import convert_pydantic_to_json_value
 from llama_stack.providers.remote.post_training.nvidia.post_training import (
     ListNvidiaPostTrainingJobs,
diff --git a/tests/unit/providers/test_bedrock.py b/tests/unit/providers/test_bedrock.py
index 684fcf262..df7453712 100644
--- a/tests/unit/providers/test_bedrock.py
+++ b/tests/unit/providers/test_bedrock.py
@@ -7,7 +7,8 @@
 from types import SimpleNamespace
 from unittest.mock import AsyncMock, PropertyMock, patch
 
-from llama_stack.apis.inference import OpenAIChatCompletionRequestWithExtraBody
+from llama_stack_api import OpenAIChatCompletionRequestWithExtraBody
+
 from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
 from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
 
diff --git a/tests/unit/providers/utils/inference/test_openai_mixin.py b/tests/unit/providers/utils/inference/test_openai_mixin.py
index 0b5ea078b..b9b59bb79 100644
--- a/tests/unit/providers/utils/inference/test_openai_mixin.py
+++ b/tests/unit/providers/utils/inference/test_openai_mixin.py
@@ -10,10 +10,9 @@ from typing import Any
 from unittest.mock import AsyncMock, MagicMock, Mock, PropertyMock, patch
 
 import pytest
+from llama_stack_api import Model, ModelType, OpenAIChatCompletionRequestWithExtraBody, OpenAIUserMessageParam
 from pydantic import BaseModel, Field
 
-from llama_stack.apis.inference import Model, OpenAIChatCompletionRequestWithExtraBody, OpenAIUserMessageParam
-from llama_stack.apis.models import ModelType
 from llama_stack.core.request_headers import request_provider_data_context
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
diff --git a/tests/unit/providers/utils/inference/test_prompt_adapter.py b/tests/unit/providers/utils/inference/test_prompt_adapter.py
index 62c8db74d..a7c9289d7 100644
--- a/tests/unit/providers/utils/inference/test_prompt_adapter.py
+++ b/tests/unit/providers/utils/inference/test_prompt_adapter.py
@@ -4,10 +4,8 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.inference import (
-    OpenAIAssistantMessageParam,
-    OpenAIUserMessageParam,
-)
+from llama_stack_api import OpenAIAssistantMessageParam, OpenAIUserMessageParam
+
 from llama_stack.models.llama.datatypes import RawTextItem
 from llama_stack.providers.utils.inference.prompt_adapter import (
     convert_openai_message_to_raw_message,
diff --git a/tests/unit/providers/utils/memory/test_vector_store.py b/tests/unit/providers/utils/memory/test_vector_store.py
index 590bdd1d2..00db5795a 100644
--- a/tests/unit/providers/utils/memory/test_vector_store.py
+++ b/tests/unit/providers/utils/memory/test_vector_store.py
@@ -7,9 +7,8 @@
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
+from llama_stack_api import URL, RAGDocument, TextContentItem
 
-from llama_stack.apis.common.content_types import URL, TextContentItem
-from llama_stack.apis.tools import RAGDocument
 from llama_stack.providers.utils.memory.vector_store import content_from_data_and_mime_type, content_from_doc
 
 
diff --git a/tests/unit/providers/utils/test_model_registry.py b/tests/unit/providers/utils/test_model_registry.py
index 04e75aa82..4a85cf8b8 100644
--- a/tests/unit/providers/utils/test_model_registry.py
+++ b/tests/unit/providers/utils/test_model_registry.py
@@ -34,8 +34,8 @@
 #
 
 import pytest
+from llama_stack_api import Model
 
-from llama_stack.apis.models import Model
 from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper, ProviderModelEntry
 
 
diff --git a/tests/unit/providers/vector_io/conftest.py b/tests/unit/providers/vector_io/conftest.py
index 5e56ea417..216e9b8ea 100644
--- a/tests/unit/providers/vector_io/conftest.py
+++ b/tests/unit/providers/vector_io/conftest.py
@@ -9,9 +9,8 @@ from unittest.mock import AsyncMock, MagicMock, patch
 
 import numpy as np
 import pytest
+from llama_stack_api import Chunk, ChunkMetadata, QueryChunksResponse, VectorStore
 
-from llama_stack.apis.vector_io import Chunk, ChunkMetadata, QueryChunksResponse
-from llama_stack.apis.vector_stores import VectorStore
 from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
 from llama_stack.providers.inline.vector_io.faiss.faiss import FaissIndex, FaissVectorIOAdapter
diff --git a/tests/unit/providers/vector_io/test_faiss.py b/tests/unit/providers/vector_io/test_faiss.py
index 44bcd0cfd..0d5c1399f 100644
--- a/tests/unit/providers/vector_io/test_faiss.py
+++ b/tests/unit/providers/vector_io/test_faiss.py
@@ -9,11 +9,8 @@ from unittest.mock import MagicMock, patch
 
 import numpy as np
 import pytest
+from llama_stack_api import Chunk, Files, HealthStatus, QueryChunksResponse, VectorStore
 
-from llama_stack.apis.files import Files
-from llama_stack.apis.vector_io import Chunk, QueryChunksResponse
-from llama_stack.apis.vector_stores import VectorStore
-from llama_stack.providers.datatypes import HealthStatus
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
 from llama_stack.providers.inline.vector_io.faiss.faiss import (
     FaissIndex,
diff --git a/tests/unit/providers/vector_io/test_sqlite_vec.py b/tests/unit/providers/vector_io/test_sqlite_vec.py
index 5ee62cd63..17a99ce1c 100644
--- a/tests/unit/providers/vector_io/test_sqlite_vec.py
+++ b/tests/unit/providers/vector_io/test_sqlite_vec.py
@@ -8,8 +8,8 @@ import asyncio
 
 import numpy as np
 import pytest
+from llama_stack_api import Chunk, QueryChunksResponse
 
-from llama_stack.apis.vector_io import Chunk, QueryChunksResponse
 from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import (
     SQLiteVecIndex,
     SQLiteVecVectorIOAdapter,
diff --git a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
index 121623e1b..7ba40eefb 100644
--- a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
+++ b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
@@ -10,17 +10,17 @@ from unittest.mock import AsyncMock, patch
 
 import numpy as np
 import pytest
-
-from llama_stack.apis.common.errors import VectorStoreNotFoundError
-from llama_stack.apis.vector_io import (
+from llama_stack_api import (
     Chunk,
     OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
     OpenAICreateVectorStoreRequestWithExtraBody,
     QueryChunksResponse,
+    VectorStore,
     VectorStoreChunkingStrategyAuto,
     VectorStoreFileObject,
+    VectorStoreNotFoundError,
 )
-from llama_stack.apis.vector_stores import VectorStore
+
 from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import VECTOR_DBS_PREFIX
 
 # This test is a unit test for the inline VectorIO providers. This should only contain
@@ -222,7 +222,7 @@ async def test_insert_chunks_missing_db_raises(vector_io_adapter):
 
 async def test_insert_chunks_with_missing_document_id(vector_io_adapter):
     """Ensure no KeyError when document_id is missing or in different places."""
-    from llama_stack.apis.vector_io import Chunk, ChunkMetadata
+    from llama_stack_api import Chunk, ChunkMetadata
 
     fake_index = AsyncMock()
     vector_io_adapter.cache["db1"] = fake_index
@@ -255,7 +255,7 @@ async def test_insert_chunks_with_missing_document_id(vector_io_adapter):
 
 async def test_document_id_with_invalid_type_raises_error():
     """Ensure TypeError is raised when document_id is not a string."""
-    from llama_stack.apis.vector_io import Chunk
+    from llama_stack_api import Chunk
 
     # Integer document_id should raise TypeError
     from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
diff --git a/tests/unit/providers/vector_io/test_vector_utils.py b/tests/unit/providers/vector_io/test_vector_utils.py
index 1ca753a44..678b76fbd 100644
--- a/tests/unit/providers/vector_io/test_vector_utils.py
+++ b/tests/unit/providers/vector_io/test_vector_utils.py
@@ -4,7 +4,8 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.vector_io import Chunk, ChunkMetadata
+from llama_stack_api import Chunk, ChunkMetadata
+
 from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
 
 # This test is a unit test for the chunk_utils.py helpers. This should only contain
diff --git a/tests/unit/rag/test_rag_query.py b/tests/unit/rag/test_rag_query.py
index 8563d0d53..e3f5e46d7 100644
--- a/tests/unit/rag/test_rag_query.py
+++ b/tests/unit/rag/test_rag_query.py
@@ -7,13 +7,8 @@
 from unittest.mock import AsyncMock, MagicMock
 
 import pytest
+from llama_stack_api import Chunk, ChunkMetadata, QueryChunksResponse, RAGQueryConfig
 
-from llama_stack.apis.tools.rag_tool import RAGQueryConfig
-from llama_stack.apis.vector_io import (
-    Chunk,
-    ChunkMetadata,
-    QueryChunksResponse,
-)
 from llama_stack.providers.inline.tool_runtime.rag.memory import MemoryToolRuntimeImpl
 
 
diff --git a/tests/unit/rag/test_vector_store.py b/tests/unit/rag/test_vector_store.py
index 1f73fdb8e..23c12dcab 100644
--- a/tests/unit/rag/test_vector_store.py
+++ b/tests/unit/rag/test_vector_store.py
@@ -12,13 +12,8 @@ from unittest.mock import AsyncMock, MagicMock
 
 import numpy as np
 import pytest
+from llama_stack_api import Chunk, OpenAIEmbeddingData, OpenAIEmbeddingsRequestWithExtraBody, RAGDocument
 
-from llama_stack.apis.inference.inference import (
-    OpenAIEmbeddingData,
-    OpenAIEmbeddingsRequestWithExtraBody,
-)
-from llama_stack.apis.tools import RAGDocument
-from llama_stack.apis.vector_io import Chunk
 from llama_stack.providers.utils.memory.vector_store import (
     URL,
     VectorStoreWithIndex,
diff --git a/tests/unit/registry/test_registry.py b/tests/unit/registry/test_registry.py
index d4c9786d1..01f486ab2 100644
--- a/tests/unit/registry/test_registry.py
+++ b/tests/unit/registry/test_registry.py
@@ -6,9 +6,8 @@
 
 
 import pytest
+from llama_stack_api import Model, VectorStore
 
-from llama_stack.apis.inference import Model
-from llama_stack.apis.vector_stores import VectorStore
 from llama_stack.core.datatypes import VectorStoreWithOwner
 from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig
 from llama_stack.core.store.registry import (
@@ -304,7 +303,8 @@ async def test_double_registration_different_objects(disk_dist_registry):
 
 async def test_double_registration_with_cache(cached_disk_dist_registry):
     """Test double registration behavior with caching enabled."""
-    from llama_stack.apis.models import ModelType
+    from llama_stack_api import ModelType
+
     from llama_stack.core.datatypes import ModelWithOwner
 
     model1 = ModelWithOwner(
diff --git a/tests/unit/registry/test_registry_acl.py b/tests/unit/registry/test_registry_acl.py
index 09b9a3cfb..2827f60b9 100644
--- a/tests/unit/registry/test_registry_acl.py
+++ b/tests/unit/registry/test_registry_acl.py
@@ -5,7 +5,8 @@
 # the root directory of this source tree.
 
 
-from llama_stack.apis.models import ModelType
+from llama_stack_api import ModelType
+
 from llama_stack.core.datatypes import ModelWithOwner, User
 from llama_stack.core.store.registry import CachedDiskDistributionRegistry
 
diff --git a/tests/unit/server/test_access_control.py b/tests/unit/server/test_access_control.py
index ea4f9b8b2..1df933d4d 100644
--- a/tests/unit/server/test_access_control.py
+++ b/tests/unit/server/test_access_control.py
@@ -8,10 +8,9 @@ from unittest.mock import MagicMock, Mock, patch
 
 import pytest
 import yaml
+from llama_stack_api import Api, ModelType
 from pydantic import TypeAdapter, ValidationError
 
-from llama_stack.apis.datatypes import Api
-from llama_stack.apis.models import ModelType
 from llama_stack.core.access_control.access_control import AccessDeniedError, is_action_allowed
 from llama_stack.core.datatypes import AccessRule, ModelWithOwner, User
 from llama_stack.core.routing_tables.models import ModelsRoutingTable
diff --git a/tests/unit/server/test_auth.py b/tests/unit/server/test_auth.py
index cc9397f07..57a552514 100644
--- a/tests/unit/server/test_auth.py
+++ b/tests/unit/server/test_auth.py
@@ -144,7 +144,7 @@ def middleware_with_mocks(mock_auth_endpoint):
     middleware = AuthenticationMiddleware(mock_app, auth_config, {})
 
     # Mock the route_impls to simulate finding routes with required scopes
-    from llama_stack.schema_utils import WebMethod
+    from llama_stack_api import WebMethod
 
     routes = {
         ("POST", "/test/scoped"): WebMethod(route="/test/scoped", method="POST", required_scope="test.read"),
diff --git a/tests/unit/server/test_resolver.py b/tests/unit/server/test_resolver.py
index b44f12f7e..071178f96 100644
--- a/tests/unit/server/test_resolver.py
+++ b/tests/unit/server/test_resolver.py
@@ -9,9 +9,9 @@ import sys
 from typing import Any, Protocol
 from unittest.mock import AsyncMock, MagicMock
 
+from llama_stack_api import Inference, InlineProviderSpec, ProviderSpec
 from pydantic import BaseModel, Field
 
-from llama_stack.apis.inference import Inference
 from llama_stack.core.datatypes import Api, Provider, StackRunConfig
 from llama_stack.core.resolver import resolve_impls
 from llama_stack.core.routers.inference import InferenceRouter
@@ -25,7 +25,6 @@ from llama_stack.core.storage.datatypes import (
     SqlStoreReference,
     StorageConfig,
 )
-from llama_stack.providers.datatypes import InlineProviderSpec, ProviderSpec
 from llama_stack.providers.utils.kvstore import register_kvstore_backends
 from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
 
diff --git a/tests/unit/server/test_sse.py b/tests/unit/server/test_sse.py
index 0303a6ded..fdaf9022b 100644
--- a/tests/unit/server/test_sse.py
+++ b/tests/unit/server/test_sse.py
@@ -9,8 +9,8 @@ import logging  # allow-direct-logging
 from unittest.mock import AsyncMock, MagicMock
 
 import pytest
+from llama_stack_api import PaginatedResponse
 
-from llama_stack.apis.common.responses import PaginatedResponse
 from llama_stack.core.server.server import create_dynamic_typed_route, create_sse_event, sse_generator
 
 
diff --git a/tests/unit/tools/test_tools_json_schema.py b/tests/unit/tools/test_tools_json_schema.py
index 8fe3103bc..79e0b6e28 100644
--- a/tests/unit/tools/test_tools_json_schema.py
+++ b/tests/unit/tools/test_tools_json_schema.py
@@ -9,9 +9,9 @@ Unit tests for JSON Schema-based tool definitions.
 Tests the new input_schema and output_schema fields.
 """
 
+from llama_stack_api import ToolDef
 from pydantic import ValidationError
 
-from llama_stack.apis.tools import ToolDef
 from llama_stack.models.llama.datatypes import BuiltinTool, ToolDefinition
 
 
diff --git a/tests/unit/utils/inference/test_inference_store.py b/tests/unit/utils/inference/test_inference_store.py
index d2de1c759..4da20b125 100644
--- a/tests/unit/utils/inference/test_inference_store.py
+++ b/tests/unit/utils/inference/test_inference_store.py
@@ -7,14 +7,14 @@
 import time
 
 import pytest
-
-from llama_stack.apis.inference import (
+from llama_stack_api import (
     OpenAIAssistantMessageParam,
     OpenAIChatCompletion,
     OpenAIChoice,
     OpenAIUserMessageParam,
     Order,
 )
+
 from llama_stack.core.storage.datatypes import InferenceStoreReference, SqliteSqlStoreConfig
 from llama_stack.providers.utils.inference.inference_store import InferenceStore
 from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
diff --git a/tests/unit/utils/responses/test_responses_store.py b/tests/unit/utils/responses/test_responses_store.py
index 34cff3d3f..1119a93d8 100644
--- a/tests/unit/utils/responses/test_responses_store.py
+++ b/tests/unit/utils/responses/test_responses_store.py
@@ -9,13 +9,8 @@ from tempfile import TemporaryDirectory
 from uuid import uuid4
 
 import pytest
+from llama_stack_api import OpenAIMessageParam, OpenAIResponseInput, OpenAIResponseObject, OpenAIUserMessageParam, Order
 
-from llama_stack.apis.agents import Order
-from llama_stack.apis.agents.openai_responses import (
-    OpenAIResponseInput,
-    OpenAIResponseObject,
-)
-from llama_stack.apis.inference import OpenAIMessageParam, OpenAIUserMessageParam
 from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqliteSqlStoreConfig
 from llama_stack.providers.utils.responses.responses_store import ResponsesStore
 from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
@@ -46,7 +41,7 @@ def create_test_response_object(
 
 def create_test_response_input(content: str, input_id: str) -> OpenAIResponseInput:
     """Helper to create a test response input."""
-    from llama_stack.apis.agents.openai_responses import OpenAIResponseMessage
+    from llama_stack_api import OpenAIResponseMessage
 
     return OpenAIResponseMessage(
         id=input_id,
diff --git a/uv.lock b/uv.lock
index 884d41b79..ddf8c1cd4 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1945,6 +1945,7 @@ dependencies = [
     { name = "httpx" },
     { name = "jinja2" },
     { name = "jsonschema" },
+    { name = "llama-stack-api" },
     { name = "openai" },
     { name = "opentelemetry-exporter-otlp-proto-http" },
     { name = "opentelemetry-sdk" },
@@ -2094,6 +2095,7 @@ requires-dist = [
     { name = "httpx" },
     { name = "jinja2", specifier = ">=3.1.6" },
     { name = "jsonschema" },
+    { name = "llama-stack-api", editable = "src/llama-stack-api" },
     { name = "llama-stack-client", marker = "extra == 'client'", specifier = ">=0.3.0" },
     { name = "openai", specifier = ">=2.5.0" },
     { name = "opentelemetry-exporter-otlp-proto-http", specifier = ">=1.30.0" },
@@ -2226,6 +2228,25 @@ unit = [
     { name = "together" },
 ]
 
+[[package]]
+name = "llama-stack-api"
+version = "0.1.0"
+source = { editable = "src/llama-stack-api" }
+dependencies = [
+    { name = "jsonschema" },
+    { name = "opentelemetry-exporter-otlp-proto-http" },
+    { name = "opentelemetry-sdk" },
+    { name = "pydantic" },
+]
+
+[package.metadata]
+requires-dist = [
+    { name = "jsonschema" },
+    { name = "opentelemetry-exporter-otlp-proto-http", specifier = ">=1.30.0" },
+    { name = "opentelemetry-sdk", specifier = ">=1.30.0" },
+    { name = "pydantic", specifier = ">=2.11.9" },
+]
+
 [[package]]
 name = "llama-stack-client"
 version = "0.3.0"

From 2441ca9389f3febabea2504daf8a68c31a00eb75 Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Thu, 13 Nov 2025 13:16:02 -0800
Subject: [PATCH 25/62] fix(api): ensure openapi spec has deprecated routes
 (#4156)

Deprecated doesn't mean it's "gone", it just means it is "going away" in
the next major version of the package.
---
 client-sdks/stainless/openapi.yml             | 371 ++++++++++++++++++
 docs/openapi_generator/pyopenapi/generator.py |   4 +-
 docs/static/stainless-llama-stack-spec.yaml   | 371 ++++++++++++++++++
 3 files changed, 744 insertions(+), 2 deletions(-)

diff --git a/client-sdks/stainless/openapi.yml b/client-sdks/stainless/openapi.yml
index 1be4af6c9..65a255c17 100644
--- a/client-sdks/stainless/openapi.yml
+++ b/client-sdks/stainless/openapi.yml
@@ -998,6 +998,39 @@ paths:
       description: List models using the OpenAI API.
       parameters: []
       deprecated: false
+    post:
+      responses:
+        '200':
+          description: A Model.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/Model'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Models
+      summary: Register model.
+      description: >-
+        Register model.
+
+        Register a model.
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/RegisterModelRequest'
+        required: true
+      deprecated: true
   /v1/models/{model_id}:
     get:
       responses:
@@ -1032,6 +1065,36 @@ paths:
           schema:
             type: string
       deprecated: false
+    delete:
+      responses:
+        '200':
+          description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Models
+      summary: Unregister model.
+      description: >-
+        Unregister model.
+
+        Unregister a model.
+      parameters:
+        - name: model_id
+          in: path
+          description: >-
+            The identifier of the model to unregister.
+          required: true
+          schema:
+            type: string
+      deprecated: true
   /v1/moderations:
     post:
       responses:
@@ -1662,6 +1725,32 @@ paths:
       description: List all scoring functions.
       parameters: []
       deprecated: false
+    post:
+      responses:
+        '200':
+          description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - ScoringFunctions
+      summary: Register a scoring function.
+      description: Register a scoring function.
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/RegisterScoringFunctionRequest'
+        required: true
+      deprecated: true
   /v1/scoring-functions/{scoring_fn_id}:
     get:
       responses:
@@ -1693,6 +1782,33 @@ paths:
           schema:
             type: string
       deprecated: false
+    delete:
+      responses:
+        '200':
+          description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - ScoringFunctions
+      summary: Unregister a scoring function.
+      description: Unregister a scoring function.
+      parameters:
+        - name: scoring_fn_id
+          in: path
+          description: >-
+            The ID of the scoring function to unregister.
+          required: true
+          schema:
+            type: string
+      deprecated: true
   /v1/scoring/score:
     post:
       responses:
@@ -1781,6 +1897,36 @@ paths:
       description: List all shields.
       parameters: []
       deprecated: false
+    post:
+      responses:
+        '200':
+          description: A Shield.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/Shield'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Shields
+      summary: Register a shield.
+      description: Register a shield.
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/RegisterShieldRequest'
+        required: true
+      deprecated: true
   /v1/shields/{identifier}:
     get:
       responses:
@@ -1812,6 +1958,33 @@ paths:
           schema:
             type: string
       deprecated: false
+    delete:
+      responses:
+        '200':
+          description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Shields
+      summary: Unregister a shield.
+      description: Unregister a shield.
+      parameters:
+        - name: identifier
+          in: path
+          description: >-
+            The identifier of the shield to unregister.
+          required: true
+          schema:
+            type: string
+      deprecated: true
   /v1/tool-runtime/invoke:
     post:
       responses:
@@ -1907,6 +2080,32 @@ paths:
       description: List tool groups with optional provider.
       parameters: []
       deprecated: false
+    post:
+      responses:
+        '200':
+          description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - ToolGroups
+      summary: Register a tool group.
+      description: Register a tool group.
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/RegisterToolGroupRequest'
+        required: true
+      deprecated: true
   /v1/toolgroups/{toolgroup_id}:
     get:
       responses:
@@ -1938,6 +2137,32 @@ paths:
           schema:
             type: string
       deprecated: false
+    delete:
+      responses:
+        '200':
+          description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - ToolGroups
+      summary: Unregister a tool group.
+      description: Unregister a tool group.
+      parameters:
+        - name: toolgroup_id
+          in: path
+          description: The ID of the tool group to unregister.
+          required: true
+          schema:
+            type: string
+      deprecated: true
   /v1/tools:
     get:
       responses:
@@ -11420,6 +11645,152 @@ components:
         - hyperparam_search_config
         - logger_config
       title: SupervisedFineTuneRequest
+    RegisterModelRequest:
+      type: object
+      properties:
+        model_id:
+          type: string
+          description: The identifier of the model to register.
+        provider_model_id:
+          type: string
+          description: >-
+            The identifier of the model in the provider.
+        provider_id:
+          type: string
+          description: The identifier of the provider.
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: Any additional metadata for this model.
+        model_type:
+          $ref: '#/components/schemas/ModelType'
+          description: The type of model to register.
+      additionalProperties: false
+      required:
+        - model_id
+      title: RegisterModelRequest
+    ParamType:
+      oneOf:
+        - $ref: '#/components/schemas/StringType'
+        - $ref: '#/components/schemas/NumberType'
+        - $ref: '#/components/schemas/BooleanType'
+        - $ref: '#/components/schemas/ArrayType'
+        - $ref: '#/components/schemas/ObjectType'
+        - $ref: '#/components/schemas/JsonType'
+        - $ref: '#/components/schemas/UnionType'
+        - $ref: '#/components/schemas/ChatCompletionInputType'
+        - $ref: '#/components/schemas/CompletionInputType'
+      discriminator:
+        propertyName: type
+        mapping:
+          string: '#/components/schemas/StringType'
+          number: '#/components/schemas/NumberType'
+          boolean: '#/components/schemas/BooleanType'
+          array: '#/components/schemas/ArrayType'
+          object: '#/components/schemas/ObjectType'
+          json: '#/components/schemas/JsonType'
+          union: '#/components/schemas/UnionType'
+          chat_completion_input: '#/components/schemas/ChatCompletionInputType'
+          completion_input: '#/components/schemas/CompletionInputType'
+    RegisterScoringFunctionRequest:
+      type: object
+      properties:
+        scoring_fn_id:
+          type: string
+          description: >-
+            The ID of the scoring function to register.
+        description:
+          type: string
+          description: The description of the scoring function.
+        return_type:
+          $ref: '#/components/schemas/ParamType'
+          description: The return type of the scoring function.
+        provider_scoring_fn_id:
+          type: string
+          description: >-
+            The ID of the provider scoring function to use for the scoring function.
+        provider_id:
+          type: string
+          description: >-
+            The ID of the provider to use for the scoring function.
+        params:
+          $ref: '#/components/schemas/ScoringFnParams'
+          description: >-
+            The parameters for the scoring function for benchmark eval, these can
+            be overridden for app eval.
+      additionalProperties: false
+      required:
+        - scoring_fn_id
+        - description
+        - return_type
+      title: RegisterScoringFunctionRequest
+    RegisterShieldRequest:
+      type: object
+      properties:
+        shield_id:
+          type: string
+          description: >-
+            The identifier of the shield to register.
+        provider_shield_id:
+          type: string
+          description: >-
+            The identifier of the shield in the provider.
+        provider_id:
+          type: string
+          description: The identifier of the provider.
+        params:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: The parameters of the shield.
+      additionalProperties: false
+      required:
+        - shield_id
+      title: RegisterShieldRequest
+    RegisterToolGroupRequest:
+      type: object
+      properties:
+        toolgroup_id:
+          type: string
+          description: The ID of the tool group to register.
+        provider_id:
+          type: string
+          description: >-
+            The ID of the provider to use for the tool group.
+        mcp_endpoint:
+          $ref: '#/components/schemas/URL'
+          description: >-
+            The MCP endpoint to use for the tool group.
+        args:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            A dictionary of arguments to pass to the tool group.
+      additionalProperties: false
+      required:
+        - toolgroup_id
+        - provider_id
+      title: RegisterToolGroupRequest
     DataSource:
       oneOf:
         - $ref: '#/components/schemas/URIDataSource'
diff --git a/docs/openapi_generator/pyopenapi/generator.py b/docs/openapi_generator/pyopenapi/generator.py
index afbb5c710..9b5f76e2a 100644
--- a/docs/openapi_generator/pyopenapi/generator.py
+++ b/docs/openapi_generator/pyopenapi/generator.py
@@ -979,8 +979,8 @@ class Generator:
                     if deprecated:
                         filtered_operations.append(op)
                 elif self.options.stability_filter == "stainless":
-                    # Include both stable (v1 non-deprecated) and experimental (v1alpha, v1beta) endpoints
-                    if (stability_level == "v1" and not deprecated) or stability_level in ["v1alpha", "v1beta"]:
+                    # Include stable (v1), deprecated (v1 deprecated), and experimental (v1alpha, v1beta) endpoints
+                    if stability_level == "v1" or stability_level in ["v1alpha", "v1beta"]:
                         filtered_operations.append(op)
 
             operations = filtered_operations
diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml
index 1be4af6c9..65a255c17 100644
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@@ -998,6 +998,39 @@ paths:
       description: List models using the OpenAI API.
       parameters: []
       deprecated: false
+    post:
+      responses:
+        '200':
+          description: A Model.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/Model'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Models
+      summary: Register model.
+      description: >-
+        Register model.
+
+        Register a model.
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/RegisterModelRequest'
+        required: true
+      deprecated: true
   /v1/models/{model_id}:
     get:
       responses:
@@ -1032,6 +1065,36 @@ paths:
           schema:
             type: string
       deprecated: false
+    delete:
+      responses:
+        '200':
+          description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Models
+      summary: Unregister model.
+      description: >-
+        Unregister model.
+
+        Unregister a model.
+      parameters:
+        - name: model_id
+          in: path
+          description: >-
+            The identifier of the model to unregister.
+          required: true
+          schema:
+            type: string
+      deprecated: true
   /v1/moderations:
     post:
       responses:
@@ -1662,6 +1725,32 @@ paths:
       description: List all scoring functions.
       parameters: []
       deprecated: false
+    post:
+      responses:
+        '200':
+          description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - ScoringFunctions
+      summary: Register a scoring function.
+      description: Register a scoring function.
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/RegisterScoringFunctionRequest'
+        required: true
+      deprecated: true
   /v1/scoring-functions/{scoring_fn_id}:
     get:
       responses:
@@ -1693,6 +1782,33 @@ paths:
           schema:
             type: string
       deprecated: false
+    delete:
+      responses:
+        '200':
+          description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - ScoringFunctions
+      summary: Unregister a scoring function.
+      description: Unregister a scoring function.
+      parameters:
+        - name: scoring_fn_id
+          in: path
+          description: >-
+            The ID of the scoring function to unregister.
+          required: true
+          schema:
+            type: string
+      deprecated: true
   /v1/scoring/score:
     post:
       responses:
@@ -1781,6 +1897,36 @@ paths:
       description: List all shields.
       parameters: []
       deprecated: false
+    post:
+      responses:
+        '200':
+          description: A Shield.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/Shield'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Shields
+      summary: Register a shield.
+      description: Register a shield.
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/RegisterShieldRequest'
+        required: true
+      deprecated: true
   /v1/shields/{identifier}:
     get:
       responses:
@@ -1812,6 +1958,33 @@ paths:
           schema:
             type: string
       deprecated: false
+    delete:
+      responses:
+        '200':
+          description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Shields
+      summary: Unregister a shield.
+      description: Unregister a shield.
+      parameters:
+        - name: identifier
+          in: path
+          description: >-
+            The identifier of the shield to unregister.
+          required: true
+          schema:
+            type: string
+      deprecated: true
   /v1/tool-runtime/invoke:
     post:
       responses:
@@ -1907,6 +2080,32 @@ paths:
       description: List tool groups with optional provider.
       parameters: []
       deprecated: false
+    post:
+      responses:
+        '200':
+          description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - ToolGroups
+      summary: Register a tool group.
+      description: Register a tool group.
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/RegisterToolGroupRequest'
+        required: true
+      deprecated: true
   /v1/toolgroups/{toolgroup_id}:
     get:
       responses:
@@ -1938,6 +2137,32 @@ paths:
           schema:
             type: string
       deprecated: false
+    delete:
+      responses:
+        '200':
+          description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - ToolGroups
+      summary: Unregister a tool group.
+      description: Unregister a tool group.
+      parameters:
+        - name: toolgroup_id
+          in: path
+          description: The ID of the tool group to unregister.
+          required: true
+          schema:
+            type: string
+      deprecated: true
   /v1/tools:
     get:
       responses:
@@ -11420,6 +11645,152 @@ components:
         - hyperparam_search_config
         - logger_config
       title: SupervisedFineTuneRequest
+    RegisterModelRequest:
+      type: object
+      properties:
+        model_id:
+          type: string
+          description: The identifier of the model to register.
+        provider_model_id:
+          type: string
+          description: >-
+            The identifier of the model in the provider.
+        provider_id:
+          type: string
+          description: The identifier of the provider.
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: Any additional metadata for this model.
+        model_type:
+          $ref: '#/components/schemas/ModelType'
+          description: The type of model to register.
+      additionalProperties: false
+      required:
+        - model_id
+      title: RegisterModelRequest
+    ParamType:
+      oneOf:
+        - $ref: '#/components/schemas/StringType'
+        - $ref: '#/components/schemas/NumberType'
+        - $ref: '#/components/schemas/BooleanType'
+        - $ref: '#/components/schemas/ArrayType'
+        - $ref: '#/components/schemas/ObjectType'
+        - $ref: '#/components/schemas/JsonType'
+        - $ref: '#/components/schemas/UnionType'
+        - $ref: '#/components/schemas/ChatCompletionInputType'
+        - $ref: '#/components/schemas/CompletionInputType'
+      discriminator:
+        propertyName: type
+        mapping:
+          string: '#/components/schemas/StringType'
+          number: '#/components/schemas/NumberType'
+          boolean: '#/components/schemas/BooleanType'
+          array: '#/components/schemas/ArrayType'
+          object: '#/components/schemas/ObjectType'
+          json: '#/components/schemas/JsonType'
+          union: '#/components/schemas/UnionType'
+          chat_completion_input: '#/components/schemas/ChatCompletionInputType'
+          completion_input: '#/components/schemas/CompletionInputType'
+    RegisterScoringFunctionRequest:
+      type: object
+      properties:
+        scoring_fn_id:
+          type: string
+          description: >-
+            The ID of the scoring function to register.
+        description:
+          type: string
+          description: The description of the scoring function.
+        return_type:
+          $ref: '#/components/schemas/ParamType'
+          description: The return type of the scoring function.
+        provider_scoring_fn_id:
+          type: string
+          description: >-
+            The ID of the provider scoring function to use for the scoring function.
+        provider_id:
+          type: string
+          description: >-
+            The ID of the provider to use for the scoring function.
+        params:
+          $ref: '#/components/schemas/ScoringFnParams'
+          description: >-
+            The parameters for the scoring function for benchmark eval, these can
+            be overridden for app eval.
+      additionalProperties: false
+      required:
+        - scoring_fn_id
+        - description
+        - return_type
+      title: RegisterScoringFunctionRequest
+    RegisterShieldRequest:
+      type: object
+      properties:
+        shield_id:
+          type: string
+          description: >-
+            The identifier of the shield to register.
+        provider_shield_id:
+          type: string
+          description: >-
+            The identifier of the shield in the provider.
+        provider_id:
+          type: string
+          description: The identifier of the provider.
+        params:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: The parameters of the shield.
+      additionalProperties: false
+      required:
+        - shield_id
+      title: RegisterShieldRequest
+    RegisterToolGroupRequest:
+      type: object
+      properties:
+        toolgroup_id:
+          type: string
+          description: The ID of the tool group to register.
+        provider_id:
+          type: string
+          description: >-
+            The ID of the provider to use for the tool group.
+        mcp_endpoint:
+          $ref: '#/components/schemas/URL'
+          description: >-
+            The MCP endpoint to use for the tool group.
+        args:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            A dictionary of arguments to pass to the tool group.
+      additionalProperties: false
+      required:
+        - toolgroup_id
+        - provider_id
+      title: RegisterToolGroupRequest
     DataSource:
       oneOf:
         - $ref: '#/components/schemas/URIDataSource'

From a82b79ce57fce407d4a980149a575e41f73d43b5 Mon Sep 17 00:00:00 2001
From: Francisco Arceo <arceofrancisco@gmail.com>
Date: Thu, 13 Nov 2025 16:43:31 -0500
Subject: [PATCH 26/62] fix: Error out when creating vector store with unknown
 embedding model (#4154)

# What does this PR do?
Error out when creating vector store with unknown embedding model

Closes https://github.com/llamastack/llama-stack/issues/4047

## Test Plan
Added tests

Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
---
 src/llama_stack/core/routers/vector_io.py | 10 ++++++
 tests/unit/core/routers/test_vector_io.py | 37 ++++++++++++++++++++++-
 2 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/src/llama_stack/core/routers/vector_io.py b/src/llama_stack/core/routers/vector_io.py
index bfd090e32..47412c07f 100644
--- a/src/llama_stack/core/routers/vector_io.py
+++ b/src/llama_stack/core/routers/vector_io.py
@@ -14,7 +14,9 @@ from llama_stack_api import (
     HealthResponse,
     HealthStatus,
     InterleavedContent,
+    ModelNotFoundError,
     ModelType,
+    ModelTypeError,
     OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
     OpenAICreateVectorStoreRequestWithExtraBody,
     QueryChunksResponse,
@@ -124,6 +126,14 @@ class VectorIORouter(VectorIO):
         if embedding_model is not None and embedding_dimension is None:
             embedding_dimension = await self._get_embedding_model_dimension(embedding_model)
 
+        # Validate that embedding model exists and is of the correct type
+        if embedding_model is not None:
+            model = await self.routing_table.get_object_by_identifier("model", embedding_model)
+            if model is None:
+                raise ModelNotFoundError(embedding_model)
+            if model.model_type != ModelType.embedding:
+                raise ModelTypeError(embedding_model, model.model_type, ModelType.embedding)
+
         # Auto-select provider if not specified
         if provider_id is None:
             num_providers = len(self.routing_table.impls_by_provider_id)
diff --git a/tests/unit/core/routers/test_vector_io.py b/tests/unit/core/routers/test_vector_io.py
index 071fbe6e7..03bc1ff5f 100644
--- a/tests/unit/core/routers/test_vector_io.py
+++ b/tests/unit/core/routers/test_vector_io.py
@@ -7,7 +7,12 @@
 from unittest.mock import AsyncMock, Mock
 
 import pytest
-from llama_stack_api import OpenAICreateVectorStoreRequestWithExtraBody
+from llama_stack_api import (
+    ModelNotFoundError,
+    ModelType,
+    ModelTypeError,
+    OpenAICreateVectorStoreRequestWithExtraBody,
+)
 
 from llama_stack.core.routers.vector_io import VectorIORouter
 
@@ -21,6 +26,7 @@ async def test_single_provider_auto_selection():
             Mock(identifier="all-MiniLM-L6-v2", model_type="embedding", metadata={"embedding_dimension": 384})
         ]
     )
+    mock_routing_table.get_object_by_identifier = AsyncMock(return_value=Mock(model_type=ModelType.embedding))
     mock_routing_table.register_vector_store = AsyncMock(
         return_value=Mock(identifier="vs_123", provider_id="inline::faiss", provider_resource_id="vs_123")
     )
@@ -48,6 +54,7 @@ async def test_create_vector_stores_multiple_providers_missing_provider_id_error
             Mock(identifier="all-MiniLM-L6-v2", model_type="embedding", metadata={"embedding_dimension": 384})
         ]
     )
+    mock_routing_table.get_object_by_identifier = AsyncMock(return_value=Mock(model_type=ModelType.embedding))
     router = VectorIORouter(mock_routing_table)
     request = OpenAICreateVectorStoreRequestWithExtraBody.model_validate(
         {"name": "test_store", "embedding_model": "all-MiniLM-L6-v2"}
@@ -117,3 +124,31 @@ async def test_update_vector_store_same_provider_id_succeeds():
     provider.openai_update_vector_store.assert_called_once_with(
         vector_store_id="vs_123", name="updated_name", expires_after=None, metadata={"provider_id": "inline::faiss"}
     )
+
+
+async def test_create_vector_store_with_unknown_embedding_model_raises_error():
+    """Test that creating a vector store with an unknown embedding model raises ModelNotFoundError."""
+    mock_routing_table = Mock(impls_by_provider_id={"provider": "mock"})
+    mock_routing_table.get_object_by_identifier = AsyncMock(return_value=None)
+
+    router = VectorIORouter(mock_routing_table)
+    request = OpenAICreateVectorStoreRequestWithExtraBody.model_validate(
+        {"embedding_model": "unknown-model", "embedding_dimension": 384}
+    )
+
+    with pytest.raises(ModelNotFoundError, match="Model 'unknown-model' not found"):
+        await router.openai_create_vector_store(request)
+
+
+async def test_create_vector_store_with_wrong_model_type_raises_error():
+    """Test that creating a vector store with a non-embedding model raises ModelTypeError."""
+    mock_routing_table = Mock(impls_by_provider_id={"provider": "mock"})
+    mock_routing_table.get_object_by_identifier = AsyncMock(return_value=Mock(model_type=ModelType.llm))
+
+    router = VectorIORouter(mock_routing_table)
+    request = OpenAICreateVectorStoreRequestWithExtraBody.model_validate(
+        {"embedding_model": "text-model", "embedding_dimension": 384}
+    )
+
+    with pytest.raises(ModelTypeError, match="Model 'text-model' is of type"):
+        await router.openai_create_vector_store(request)

From ba744d791ad9cb6e7eccf2fd7128138c02cf7f58 Mon Sep 17 00:00:00 2001
From: slekkala1 <swapna942@meta.com>
Date: Thu, 13 Nov 2025 14:21:03 -0800
Subject: [PATCH 27/62] fix: failure in responses during construct metrics
 (#4157)

# What does this PR do?
Without this we get below in server logs
```
RuntimeError: OpenAI response failed: InferenceRouter._construct_metrics() got an unexpected keyword argument
         'model_id'
```
Seems the method signature got update but this callsite was not updated
## Test Plan
CI and test with Sabre (Agent framework integration)
---
 src/llama_stack/core/routers/inference.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/llama_stack/core/routers/inference.py b/src/llama_stack/core/routers/inference.py
index a538ab02e..292a7c4bb 100644
--- a/src/llama_stack/core/routers/inference.py
+++ b/src/llama_stack/core/routers/inference.py
@@ -417,7 +417,7 @@ class InferenceRouter(Inference):
                             prompt_tokens=chunk.usage.prompt_tokens,
                             completion_tokens=chunk.usage.completion_tokens,
                             total_tokens=chunk.usage.total_tokens,
-                            model_id=fully_qualified_model_id,
+                            fully_qualified_model_id=fully_qualified_model_id,
                             provider_id=provider_id,
                         )
                         for metric in metrics:

From a078f089d9070d5618d185fb9dfdbc53f5e3c34f Mon Sep 17 00:00:00 2001
From: Charlie Doern <cdoern@redhat.com>
Date: Thu, 13 Nov 2025 18:04:36 -0500
Subject: [PATCH 28/62] fix: rename llama_stack_api dir (#4155)

# What does this PR do?

the directory structure was src/llama-stack-api/llama_stack_api

instead it should just be src/llama_stack_api to match the other
packages.

update the structure and pyproject/linting config

---------

Signed-off-by: Charlie Doern <cdoern@redhat.com>
Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com>
---
 .github/workflows/python-build-test.yml       |   4 +-
 .pre-commit-config.yaml                       |   2 +-
 pyproject.toml                                |  12 +-
 scripts/generate_prompt_format.py             |   2 +-
 src/llama_stack/cli/stack/_list_deps.py       |   2 +-
 src/llama_stack/cli/stack/utils.py            |   2 +-
 src/llama_stack/core/build.py                 |   2 +-
 src/llama_stack/core/client.py                |   3 +-
 src/llama_stack/core/configure.py             |   3 +-
 .../core/conversations/conversations.py       |  14 +-
 src/llama_stack/core/datatypes.py             |  18 +-
 src/llama_stack/core/distribution.py          |  10 +-
 src/llama_stack/core/external.py              |   2 +-
 src/llama_stack/core/inspect.py               |  10 +-
 src/llama_stack/core/library_client.py        |   1 +
 src/llama_stack/core/prompts/prompts.py       |   2 +-
 src/llama_stack/core/providers.py             |   2 +-
 src/llama_stack/core/resolver.py              |  27 +-
 src/llama_stack/core/routers/__init__.py      |   3 +-
 src/llama_stack/core/routers/datasets.py      |   3 +-
 src/llama_stack/core/routers/eval_scoring.py  |   3 +-
 src/llama_stack/core/routers/inference.py     |  20 +-
 src/llama_stack/core/routers/safety.py        |   3 +-
 src/llama_stack/core/routers/tool_runtime.py  |   3 +-
 src/llama_stack/core/routers/vector_io.py     |   6 +-
 .../core/routing_tables/benchmarks.py         |   3 +-
 src/llama_stack/core/routing_tables/common.py |   3 +-
 .../core/routing_tables/datasets.py           |   9 +-
 src/llama_stack/core/routing_tables/models.py |  15 +-
 .../core/routing_tables/scoring_functions.py  |   9 +-
 .../core/routing_tables/shields.py            |   3 +-
 .../core/routing_tables/toolgroups.py         |   5 +-
 .../core/routing_tables/vector_stores.py      |  10 +-
 src/llama_stack/core/server/auth_providers.py |   2 +-
 src/llama_stack/core/server/routes.py         |   2 +-
 src/llama_stack/core/server/server.py         |   2 +-
 src/llama_stack/core/stack.py                 |  44 +-
 src/llama_stack/core/telemetry/telemetry.py   |   2 +-
 src/llama_stack/distributions/dell/dell.py    |   3 +-
 .../meta-reference-gpu/meta_reference.py      |   3 +-
 .../open-benchmark/open_benchmark.py          |   3 +-
 .../distributions/starter/starter.py          |   3 +-
 src/llama_stack/distributions/template.py     |   2 +-
 .../inline/agents/meta_reference/agents.py    |   9 +-
 .../responses/openai_responses.py             |  14 +-
 .../meta_reference/responses/streaming.py     |   9 +-
 .../meta_reference/responses/tool_executor.py |   5 +-
 .../agents/meta_reference/responses/types.py  |   5 +-
 .../inline/agents/meta_reference/safety.py    |   3 +-
 .../inline/batches/reference/__init__.py      |   3 +-
 .../inline/batches/reference/batches.py       |  10 +-
 .../inline/datasetio/localfs/datasetio.py     |   3 +-
 .../inline/eval/meta_reference/eval.py        |   8 +-
 .../providers/inline/files/localfs/files.py   |  16 +-
 .../inline/inference/meta_reference/config.py |   2 +-
 .../inference/meta_reference/generators.py    |  16 +-
 .../inference/meta_reference/inference.py     |  38 +-
 .../sentence_transformers.py                  |   9 +-
 .../inline/post_training/common/validator.py  |   3 +-
 .../huggingface/post_training.py              |  11 +-
 .../recipes/finetune_single_device.py         |  16 +-
 .../recipes/finetune_single_device_dpo.py     |  14 +-
 .../inline/post_training/huggingface/utils.py |   3 +-
 .../post_training/torchtune/common/utils.py   |   2 +-
 .../post_training/torchtune/post_training.py  |  11 +-
 .../recipes/lora_finetuning_single_device.py  |  22 +-
 .../safety/code_scanner/code_scanner.py       |   9 +-
 .../inline/safety/llama_guard/llama_guard.py  |  15 +-
 .../safety/prompt_guard/prompt_guard.py       |  10 +-
 .../providers/inline/scoring/basic/scoring.py |  11 +-
 .../basic/scoring_fn/docvqa_scoring_fn.py     |   3 +-
 .../basic/scoring_fn/equality_scoring_fn.py   |   3 +-
 .../basic/scoring_fn/ifeval_scoring_fn.py     |   3 +-
 .../regex_parser_math_response_scoring_fn.py  |   3 +-
 .../scoring_fn/regex_parser_scoring_fn.py     |   3 +-
 .../basic/scoring_fn/subset_of_scoring_fn.py  |   3 +-
 .../inline/scoring/braintrust/braintrust.py   |  20 +-
 .../inline/scoring/llm_as_judge/scoring.py    |  11 +-
 .../scoring_fn/llm_as_judge_scoring_fn.py     |   3 +-
 .../tool_runtime/rag/context_retriever.py     |   8 +-
 .../inline/tool_runtime/rag/memory.py         |  10 +-
 .../inline/vector_io/chroma/config.py         |   2 +-
 .../inline/vector_io/faiss/config.py          |   2 +-
 .../providers/inline/vector_io/faiss/faiss.py |  14 +-
 .../inline/vector_io/milvus/config.py         |   2 +-
 .../inline/vector_io/qdrant/config.py         |   2 +-
 .../inline/vector_io/sqlite_vec/sqlite_vec.py |  20 +-
 src/llama_stack/providers/registry/agents.py  |   3 +-
 src/llama_stack/providers/registry/files.py   |   3 +-
 .../providers/registry/tool_runtime.py        |   3 +-
 .../datasetio/huggingface/huggingface.py      |   3 +-
 .../remote/datasetio/nvidia/datasetio.py      |   1 +
 .../providers/remote/eval/nvidia/eval.py      |   4 +-
 .../providers/remote/files/openai/files.py    |  12 +-
 .../providers/remote/files/s3/files.py        |  13 +-
 .../remote/inference/anthropic/config.py      |   2 +-
 .../remote/inference/azure/config.py          |   2 +-
 .../remote/inference/bedrock/bedrock.py       |  10 +-
 .../remote/inference/cerebras/cerebras.py     |   3 +-
 .../remote/inference/cerebras/config.py       |   2 +-
 .../remote/inference/databricks/config.py     |   2 +-
 .../remote/inference/databricks/databricks.py |   2 +-
 .../remote/inference/fireworks/config.py      |   2 +-
 .../remote/inference/gemini/config.py         |   2 +-
 .../remote/inference/gemini/gemini.py         |   3 +-
 .../providers/remote/inference/groq/config.py |   2 +-
 .../inference/llama_openai_compat/config.py   |   2 +-
 .../inference/llama_openai_compat/llama.py    |   7 +-
 .../remote/inference/nvidia/config.py         |   2 +-
 .../remote/inference/nvidia/nvidia.py         |   6 +-
 .../providers/remote/inference/oci/config.py  |   2 +-
 .../providers/remote/inference/oci/oci.py     |  10 +-
 .../remote/inference/ollama/ollama.py         |  10 +-
 .../remote/inference/openai/config.py         |   2 +-
 .../remote/inference/passthrough/config.py    |   2 +-
 .../inference/passthrough/passthrough.py      |   6 +-
 .../remote/inference/runpod/config.py         |   2 +-
 .../remote/inference/runpod/runpod.py         |   3 +-
 .../remote/inference/sambanova/config.py      |   2 +-
 .../providers/remote/inference/tgi/config.py  |   2 +-
 .../providers/remote/inference/tgi/tgi.py     |   8 +-
 .../remote/inference/together/config.py       |   2 +-
 .../remote/inference/together/together.py     |  12 +-
 .../remote/inference/vertexai/config.py       |   2 +-
 .../providers/remote/inference/vllm/config.py |   2 +-
 .../providers/remote/inference/vllm/vllm.py   |   8 +-
 .../remote/inference/watsonx/config.py        |   2 +-
 .../remote/inference/watsonx/watsonx.py       |  15 +-
 .../post_training/nvidia/post_training.py     |  10 +-
 .../remote/post_training/nvidia/utils.py      |   2 +-
 .../remote/safety/bedrock/bedrock.py          |   5 +-
 .../providers/remote/safety/bedrock/config.py |   3 +-
 .../providers/remote/safety/nvidia/config.py  |   3 +-
 .../providers/remote/safety/nvidia/nvidia.py  |   4 +-
 .../remote/safety/sambanova/config.py         |   3 +-
 .../remote/safety/sambanova/sambanova.py      |   6 +-
 .../tool_runtime/bing_search/bing_search.py   |   4 +-
 .../tool_runtime/brave_search/brave_search.py |   6 +-
 .../model_context_protocol.py                 |   7 +-
 .../tavily_search/tavily_search.py            |   4 +-
 .../wolfram_alpha/wolfram_alpha.py            |   4 +-
 .../remote/vector_io/chroma/chroma.py         |  16 +-
 .../remote/vector_io/chroma/config.py         |   2 +-
 .../remote/vector_io/milvus/config.py         |   2 +-
 .../remote/vector_io/milvus/milvus.py         |  22 +-
 .../remote/vector_io/pgvector/config.py       |   2 +-
 .../remote/vector_io/pgvector/pgvector.py     |  22 +-
 .../remote/vector_io/qdrant/config.py         |   2 +-
 .../remote/vector_io/qdrant/qdrant.py         |  18 +-
 .../remote/vector_io/weaviate/config.py       |   2 +-
 .../remote/vector_io/weaviate/weaviate.py     |  22 +-
 .../utils/common/data_schema_validator.py     |   3 +-
 .../providers/utils/files/form_data.py        |   3 +-
 .../utils/inference/inference_store.py        |  10 +-
 .../utils/inference/litellm_openai_mixin.py   |  14 +-
 .../utils/inference/model_registry.py         |   2 +-
 .../utils/inference/openai_compat.py          |  24 +-
 .../providers/utils/inference/openai_mixin.py |  16 +-
 .../utils/inference/prompt_adapter.py         |  36 +-
 .../providers/utils/kvstore/sqlite/config.py  |   3 +-
 .../utils/memory/openai_vector_store_mixin.py |  20 +-
 .../providers/utils/memory/vector_store.py    |  18 +-
 .../utils/responses/responses_store.py        |   7 +-
 .../utils/scoring/base_scoring_fn.py          |   3 +-
 .../providers/utils/sqlstore/api.py           |   3 +-
 .../utils/sqlstore/sqlalchemy_sqlstore.py     |   2 +-
 src/llama_stack/providers/utils/tools/mcp.py  |  16 +-
 .../README.md                                 |   2 +-
 .../llama_stack_api/__init__.py               |   2 +-
 .../llama_stack_api/agents.py                 |   0
 .../llama_stack_api/batches.py                |   0
 .../llama_stack_api/benchmarks.py             |   0
 .../llama_stack_api/common/__init__.py        |   0
 .../llama_stack_api/common/content_types.py   |   0
 .../llama_stack_api/common/errors.py          |   0
 .../llama_stack_api/common/job_types.py       |   0
 .../llama_stack_api/common/responses.py       |   0
 .../llama_stack_api/common/tracing.py         |   0
 .../llama_stack_api/common/training_types.py  |   0
 .../llama_stack_api/common/type_system.py     |   0
 .../llama_stack_api/conversations.py          |   0
 .../llama_stack_api/datasetio.py              |   0
 .../llama_stack_api/datasets.py               |   0
 .../llama_stack_api/datatypes.py              |   0
 .../llama_stack_api/eval.py                   |   0
 .../llama_stack_api/files.py                  |   0
 .../llama_stack_api/inference.py              |   0
 .../llama_stack_api/inspect.py                |   0
 .../llama_stack_api/models.py                 |   0
 .../llama_stack_api/openai_responses.py       |   0
 .../llama_stack_api/post_training.py          |   0
 .../llama_stack_api/prompts.py                |   0
 .../llama_stack_api/providers.py              |   0
 .../llama_stack_api/py.typed                  |   0
 .../pyproject.toml                            |   2 +-
 .../llama_stack_api/rag_tool.py               |   0
 .../llama_stack_api/resource.py               |   0
 .../llama_stack_api/safety.py                 |   0
 .../llama_stack_api/schema_utils.py           |   0
 .../llama_stack_api/scoring.py                |   0
 .../llama_stack_api/scoring_functions.py      |   0
 .../llama_stack_api/shields.py                |   0
 .../llama_stack_api/strong_typing/__init__.py |   0
 .../strong_typing/auxiliary.py                |   0
 .../llama_stack_api/strong_typing/classdef.py |   0
 .../llama_stack_api/strong_typing/core.py     |   0
 .../strong_typing/deserializer.py             |   0
 .../strong_typing/docstring.py                |   0
 .../strong_typing/exception.py                |   0
 .../strong_typing/inspection.py               |   0
 .../llama_stack_api/strong_typing/mapping.py  |   0
 .../llama_stack_api/strong_typing/name.py     |   0
 .../llama_stack_api/strong_typing/py.typed    |   0
 .../llama_stack_api/strong_typing/schema.py   |   0
 .../strong_typing/serialization.py            |   0
 .../strong_typing/serializer.py               |   0
 .../llama_stack_api/strong_typing/slots.py    |   0
 .../strong_typing/topological.py              |   0
 .../llama_stack_api/tools.py                  |   0
 src/llama_stack_api/uv.lock                   | 498 ++++++++++++++++++
 .../llama_stack_api/vector_io.py              |   0
 .../llama_stack_api/vector_stores.py          |   0
 .../llama_stack_api/version.py                |   0
 tests/integration/batches/conftest.py         |   1 +
 tests/integration/files/test_files.py         |   2 +-
 .../inference/test_provider_data_routing.py   |   6 +-
 .../post_training/test_post_training.py       |   4 +-
 tests/integration/safety/test_llama_guard.py  |   2 +-
 tests/integration/safety/test_safety.py       |   1 +
 .../integration/safety/test_vision_safety.py  |   1 +
 .../tool_runtime/test_registration.py         |   2 +-
 .../vector_io/test_openai_vector_stores.py    |   2 +-
 tests/integration/vector_io/test_vector_io.py |   1 +
 .../unit/conversations/test_conversations.py  |   2 +-
 tests/unit/core/routers/test_safety_router.py |   3 +-
 tests/unit/core/routers/test_vector_io.py     |   7 +-
 tests/unit/core/test_stack_validation.py      |   2 +-
 .../routers/test_routing_tables.py            |  16 +-
 .../unit/distribution/test_api_recordings.py  |  16 +-
 tests/unit/distribution/test_distribution.py  |  20 +-
 tests/unit/files/test_files.py                |   2 +-
 .../unit/providers/batches/test_reference.py  |   1 +
 .../batches/test_reference_idempotency.py     |   1 +
 tests/unit/providers/files/test_s3_files.py   |   1 +
 .../providers/files/test_s3_files_auth.py     |   2 +-
 .../inference/test_bedrock_adapter.py         |   2 +-
 .../providers/inference/test_remote_vllm.py   |  10 +-
 .../responses/test_streaming.py               |   2 +-
 tests/unit/providers/nvidia/test_datastore.py |   2 +-
 tests/unit/providers/nvidia/test_eval.py      |   8 +-
 .../unit/providers/nvidia/test_parameters.py  |  12 +-
 .../providers/nvidia/test_rerank_inference.py |   2 +-
 tests/unit/providers/nvidia/test_safety.py    |   6 +-
 .../nvidia/test_supervised_fine_tuning.py     |  18 +-
 tests/unit/providers/test_bedrock.py          |   3 +-
 .../utils/inference/test_openai_mixin.py      |   2 +-
 .../utils/inference/test_prompt_adapter.py    |   3 +-
 .../utils/memory/test_vector_store.py         |   2 +-
 .../providers/utils/test_model_registry.py    |   2 +-
 tests/unit/providers/vector_io/conftest.py    |   2 +-
 tests/unit/providers/vector_io/test_faiss.py  |   2 +-
 .../providers/vector_io/test_sqlite_vec.py    |   2 +-
 .../test_vector_io_openai_vector_stores.py    |   7 +-
 .../providers/vector_io/test_vector_utils.py  |   3 +-
 tests/unit/rag/test_rag_query.py              |   2 +-
 tests/unit/rag/test_vector_store.py           |   2 +-
 tests/unit/registry/test_registry.py          |   5 +-
 tests/unit/registry/test_registry_acl.py      |   3 +-
 tests/unit/server/test_access_control.py      |   2 +-
 tests/unit/server/test_resolver.py            |   2 +-
 tests/unit/server/test_sse.py                 |   2 +-
 tests/unit/tools/test_tools_json_schema.py    |   2 +-
 .../utils/inference/test_inference_store.py   |   8 +-
 .../utils/responses/test_responses_store.py   |   2 +-
 uv.lock                                       |   8 +-
 275 files changed, 1187 insertions(+), 745 deletions(-)
 rename src/{llama-stack-api => llama_stack_api}/README.md (98%)
 rename src/{llama-stack-api => }/llama_stack_api/__init__.py (99%)
 rename src/{llama-stack-api => }/llama_stack_api/agents.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/batches.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/benchmarks.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/common/__init__.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/common/content_types.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/common/errors.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/common/job_types.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/common/responses.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/common/tracing.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/common/training_types.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/common/type_system.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/conversations.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/datasetio.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/datasets.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/datatypes.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/eval.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/files.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/inference.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/inspect.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/models.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/openai_responses.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/post_training.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/prompts.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/providers.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/py.typed (100%)
 rename src/{llama-stack-api => llama_stack_api}/pyproject.toml (99%)
 rename src/{llama-stack-api => }/llama_stack_api/rag_tool.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/resource.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/safety.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/schema_utils.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/scoring.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/scoring_functions.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/shields.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/strong_typing/__init__.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/strong_typing/auxiliary.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/strong_typing/classdef.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/strong_typing/core.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/strong_typing/deserializer.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/strong_typing/docstring.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/strong_typing/exception.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/strong_typing/inspection.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/strong_typing/mapping.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/strong_typing/name.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/strong_typing/py.typed (100%)
 rename src/{llama-stack-api => }/llama_stack_api/strong_typing/schema.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/strong_typing/serialization.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/strong_typing/serializer.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/strong_typing/slots.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/strong_typing/topological.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/tools.py (100%)
 create mode 100644 src/llama_stack_api/uv.lock
 rename src/{llama-stack-api => }/llama_stack_api/vector_io.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/vector_stores.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/version.py (100%)

diff --git a/.github/workflows/python-build-test.yml b/.github/workflows/python-build-test.yml
index b0f2c6e69..b58f4eb69 100644
--- a/.github/workflows/python-build-test.yml
+++ b/.github/workflows/python-build-test.yml
@@ -31,7 +31,7 @@ jobs:
         version: 0.7.6
 
     - name: Build Llama Stack API package
-      working-directory: src/llama-stack-api
+      working-directory: src/llama_stack_api
       run: uv build
 
     - name: Build Llama Stack package
@@ -39,7 +39,7 @@ jobs:
 
     - name: Install Llama Stack package (with api stubs from local build)
       run: |
-        uv pip install --find-links src/llama-stack-api/dist dist/*.whl
+        uv pip install --find-links src/llama_stack_api/dist dist/*.whl
 
     - name: Verify Llama Stack package
       run: |
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 6f4dd6a0e..c60440173 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -42,7 +42,7 @@ repos:
     hooks:
     -   id: ruff
         args: [ --fix ]
-        exclude: ^(src/llama_stack/strong_typing/.*|src/llama-stack-api/llama_stack_api/strong_typing/.*)$
+        exclude: ^(src/llama_stack_api/strong_typing/.*)$
     -   id: ruff-format
 
 -   repo: https://github.com/adamchainz/blacken-docs
diff --git a/pyproject.toml b/pyproject.toml
index d287b4be7..34728d6ea 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -181,7 +181,7 @@ install-wheel-from-presigned = "llama_stack.cli.scripts.run:install_wheel_from_p
 
 [tool.setuptools.packages.find]
 where = ["src"]
-include = ["llama_stack", "llama_stack.*", "llama-stack-api", "llama-stack-api.*"]
+include = ["llama_stack", "llama_stack.*", "llama_stack_api", "llama_stack_api.*"]
 
 [[tool.uv.index]]
 name = "pytorch-cpu"
@@ -191,7 +191,7 @@ explicit = true
 [tool.uv.sources]
 torch = [{ index = "pytorch-cpu" }]
 torchvision = [{ index = "pytorch-cpu" }]
-llama-stack-api = [{ path = "src/llama-stack-api", editable = true }]
+llama-stack-api = [{ path = "src/llama_stack_api", editable = true }]
 
 [tool.ruff]
 line-length = 120
@@ -258,7 +258,7 @@ unfixable = [
 ] # Using import * is acceptable (or at least tolerated) in an __init__.py of a package API
 
 [tool.mypy]
-mypy_path = ["src", "src/llama-stack-api"]
+mypy_path = ["src"]
 packages = ["llama_stack", "llama_stack_api"]
 plugins = ['pydantic.mypy']
 disable_error_code = []
@@ -281,14 +281,12 @@ exclude = [
     "^src/llama_stack/core/store/registry\\.py$",
     "^src/llama_stack/core/utils/exec\\.py$",
     "^src/llama_stack/core/utils/prompt_for_config\\.py$",
-    # Moved to llama-stack-api but still excluded
     "^src/llama_stack/models/llama/llama3/interface\\.py$",
     "^src/llama_stack/models/llama/llama3/tokenizer\\.py$",
     "^src/llama_stack/models/llama/llama3/tool_utils\\.py$",
     "^src/llama_stack/models/llama/llama3/generation\\.py$",
     "^src/llama_stack/models/llama/llama3/multimodal/model\\.py$",
     "^src/llama_stack/models/llama/llama4/",
-    "^src/llama-stack-api/llama_stack_api/core/telemetry/telemetry\\.py$",
     "^src/llama_stack/providers/inline/agents/meta_reference/",
     "^src/llama_stack/providers/inline/datasetio/localfs/",
     "^src/llama_stack/providers/inline/eval/meta_reference/eval\\.py$",
@@ -342,9 +340,7 @@ exclude = [
     "^src/llama_stack/providers/utils/telemetry/dataset_mixin\\.py$",
     "^src/llama_stack/providers/utils/telemetry/trace_protocol\\.py$",
     "^src/llama_stack/providers/utils/telemetry/tracing\\.py$",
-    "^src/llama-stack-api/llama_stack_api/core/telemetry/trace_protocol\\.py$",
-    "^src/llama-stack-api/llama_stack_api/core/telemetry/tracing\\.py$",
-    "^src/llama-stack-api/llama_stack_api/strong_typing/auxiliary\\.py$",
+    "^src/llama_stack_api/strong_typing/auxiliary\\.py$",
     "^src/llama_stack/distributions/template\\.py$",
 ]
 
diff --git a/scripts/generate_prompt_format.py b/scripts/generate_prompt_format.py
index 8099a3f0d..381bbc6a7 100755
--- a/scripts/generate_prompt_format.py
+++ b/scripts/generate_prompt_format.py
@@ -14,11 +14,11 @@ import os
 from pathlib import Path
 
 import fire
-from llama_stack_api import ModelNotFoundError
 
 from llama_stack.models.llama.llama3.generation import Llama3
 from llama_stack.models.llama.llama4.generation import Llama4
 from llama_stack.models.llama.sku_list import resolve_model
+from llama_stack_api import ModelNotFoundError
 
 THIS_DIR = Path(__file__).parent.resolve()
 
diff --git a/src/llama_stack/cli/stack/_list_deps.py b/src/llama_stack/cli/stack/_list_deps.py
index 50fe394fc..82bef1a4f 100644
--- a/src/llama_stack/cli/stack/_list_deps.py
+++ b/src/llama_stack/cli/stack/_list_deps.py
@@ -9,7 +9,6 @@ import sys
 from pathlib import Path
 
 import yaml
-from llama_stack_api import Api
 from termcolor import cprint
 
 from llama_stack.cli.stack.utils import ImageType
@@ -22,6 +21,7 @@ from llama_stack.core.datatypes import (
 from llama_stack.core.distribution import get_provider_registry
 from llama_stack.core.stack import replace_env_vars
 from llama_stack.log import get_logger
+from llama_stack_api import Api
 
 TEMPLATES_PATH = Path(__file__).parent.parent.parent / "templates"
 
diff --git a/src/llama_stack/cli/stack/utils.py b/src/llama_stack/cli/stack/utils.py
index 0a4e22b09..d49b142e0 100644
--- a/src/llama_stack/cli/stack/utils.py
+++ b/src/llama_stack/cli/stack/utils.py
@@ -11,7 +11,6 @@ from functools import lru_cache
 from pathlib import Path
 
 import yaml
-from llama_stack_api import Api
 from termcolor import cprint
 
 from llama_stack.core.datatypes import (
@@ -33,6 +32,7 @@ from llama_stack.core.storage.datatypes import (
 from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR
 from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.core.utils.image_types import LlamaStackImageType
+from llama_stack_api import Api
 
 TEMPLATES_PATH = Path(__file__).parent.parent.parent / "distributions"
 
diff --git a/src/llama_stack/core/build.py b/src/llama_stack/core/build.py
index 27ded7ede..630b2a47f 100644
--- a/src/llama_stack/core/build.py
+++ b/src/llama_stack/core/build.py
@@ -6,7 +6,6 @@
 
 import sys
 
-from llama_stack_api import Api
 from pydantic import BaseModel
 from termcolor import cprint
 
@@ -14,6 +13,7 @@ from llama_stack.core.datatypes import BuildConfig
 from llama_stack.core.distribution import get_provider_registry
 from llama_stack.distributions.template import DistributionTemplate
 from llama_stack.log import get_logger
+from llama_stack_api import Api
 
 log = get_logger(name=__name__, category="core")
 
diff --git a/src/llama_stack/core/client.py b/src/llama_stack/core/client.py
index 41acacdb5..ba935a35e 100644
--- a/src/llama_stack/core/client.py
+++ b/src/llama_stack/core/client.py
@@ -12,10 +12,11 @@ from enum import Enum
 from typing import Any, Union, get_args, get_origin
 
 import httpx
-from llama_stack_api import RemoteProviderConfig
 from pydantic import BaseModel, parse_obj_as
 from termcolor import cprint
 
+from llama_stack_api import RemoteProviderConfig
+
 _CLIENT_CLASSES = {}
 
 
diff --git a/src/llama_stack/core/configure.py b/src/llama_stack/core/configure.py
index bdb3b9734..d738b8a61 100644
--- a/src/llama_stack/core/configure.py
+++ b/src/llama_stack/core/configure.py
@@ -6,8 +6,6 @@
 import textwrap
 from typing import Any
 
-from llama_stack_api import Api, ProviderSpec
-
 from llama_stack.core.datatypes import (
     LLAMA_STACK_RUN_CONFIG_VERSION,
     DistributionSpec,
@@ -22,6 +20,7 @@ from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars
 from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.core.utils.prompt_for_config import prompt_for_config
 from llama_stack.log import get_logger
+from llama_stack_api import Api, ProviderSpec
 
 logger = get_logger(name=__name__, category="core")
 
diff --git a/src/llama_stack/core/conversations/conversations.py b/src/llama_stack/core/conversations/conversations.py
index b94cd4fdd..4cf5a82ee 100644
--- a/src/llama_stack/core/conversations/conversations.py
+++ b/src/llama_stack/core/conversations/conversations.py
@@ -8,6 +8,13 @@ import secrets
 import time
 from typing import Any, Literal
 
+from pydantic import BaseModel, TypeAdapter
+
+from llama_stack.core.datatypes import AccessRule, StackRunConfig
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
+from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore
+from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl
 from llama_stack_api import (
     Conversation,
     ConversationDeletedResource,
@@ -18,13 +25,6 @@ from llama_stack_api import (
     Conversations,
     Metadata,
 )
-from pydantic import BaseModel, TypeAdapter
-
-from llama_stack.core.datatypes import AccessRule, StackRunConfig
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
-from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore
-from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl
 
 logger = get_logger(name=__name__, category="openai_conversations")
 
diff --git a/src/llama_stack/core/datatypes.py b/src/llama_stack/core/datatypes.py
index 4231363b6..1e29690ff 100644
--- a/src/llama_stack/core/datatypes.py
+++ b/src/llama_stack/core/datatypes.py
@@ -9,6 +9,15 @@ from pathlib import Path
 from typing import Annotated, Any, Literal, Self
 from urllib.parse import urlparse
 
+from pydantic import BaseModel, Field, field_validator, model_validator
+
+from llama_stack.core.access_control.datatypes import AccessRule
+from llama_stack.core.storage.datatypes import (
+    KVStoreReference,
+    StorageBackendType,
+    StorageConfig,
+)
+from llama_stack.log import LoggingConfig
 from llama_stack_api import (
     Api,
     Benchmark,
@@ -35,15 +44,6 @@ from llama_stack_api import (
     VectorStore,
     VectorStoreInput,
 )
-from pydantic import BaseModel, Field, field_validator, model_validator
-
-from llama_stack.core.access_control.datatypes import AccessRule
-from llama_stack.core.storage.datatypes import (
-    KVStoreReference,
-    StorageBackendType,
-    StorageConfig,
-)
-from llama_stack.log import LoggingConfig
 
 LLAMA_STACK_BUILD_CONFIG_VERSION = 2
 LLAMA_STACK_RUN_CONFIG_VERSION = 2
diff --git a/src/llama_stack/core/distribution.py b/src/llama_stack/core/distribution.py
index 162f9f2b0..658c75ef2 100644
--- a/src/llama_stack/core/distribution.py
+++ b/src/llama_stack/core/distribution.py
@@ -10,17 +10,17 @@ import os
 from typing import Any
 
 import yaml
+from pydantic import BaseModel
+
+from llama_stack.core.datatypes import BuildConfig, DistributionSpec
+from llama_stack.core.external import load_external_apis
+from llama_stack.log import get_logger
 from llama_stack_api import (
     Api,
     InlineProviderSpec,
     ProviderSpec,
     RemoteProviderSpec,
 )
-from pydantic import BaseModel
-
-from llama_stack.core.datatypes import BuildConfig, DistributionSpec
-from llama_stack.core.external import load_external_apis
-from llama_stack.log import get_logger
 
 logger = get_logger(name=__name__, category="core")
 
diff --git a/src/llama_stack/core/external.py b/src/llama_stack/core/external.py
index ce0c7eb72..d1a2d6e42 100644
--- a/src/llama_stack/core/external.py
+++ b/src/llama_stack/core/external.py
@@ -6,10 +6,10 @@
 
 
 import yaml
-from llama_stack_api import Api, ExternalApiSpec
 
 from llama_stack.core.datatypes import BuildConfig, StackRunConfig
 from llama_stack.log import get_logger
+from llama_stack_api import Api, ExternalApiSpec
 
 logger = get_logger(name=__name__, category="core")
 
diff --git a/src/llama_stack/core/inspect.py b/src/llama_stack/core/inspect.py
index 53ddd3475..272c9d1bc 100644
--- a/src/llama_stack/core/inspect.py
+++ b/src/llama_stack/core/inspect.py
@@ -6,6 +6,11 @@
 
 from importlib.metadata import version
 
+from pydantic import BaseModel
+
+from llama_stack.core.datatypes import StackRunConfig
+from llama_stack.core.external import load_external_apis
+from llama_stack.core.server.routes import get_all_api_routes
 from llama_stack_api import (
     HealthInfo,
     HealthStatus,
@@ -14,11 +19,6 @@ from llama_stack_api import (
     RouteInfo,
     VersionInfo,
 )
-from pydantic import BaseModel
-
-from llama_stack.core.datatypes import StackRunConfig
-from llama_stack.core.external import load_external_apis
-from llama_stack.core.server.routes import get_all_api_routes
 
 
 class DistributionInspectConfig(BaseModel):
diff --git a/src/llama_stack/core/library_client.py b/src/llama_stack/core/library_client.py
index 959284720..2a224d915 100644
--- a/src/llama_stack/core/library_client.py
+++ b/src/llama_stack/core/library_client.py
@@ -18,6 +18,7 @@ from typing import Any, TypeVar, Union, get_args, get_origin
 import httpx
 import yaml
 from fastapi import Response as FastAPIResponse
+
 from llama_stack_api import is_unwrapped_body_param
 
 try:
diff --git a/src/llama_stack/core/prompts/prompts.py b/src/llama_stack/core/prompts/prompts.py
index d9532b978..9f532c1cd 100644
--- a/src/llama_stack/core/prompts/prompts.py
+++ b/src/llama_stack/core/prompts/prompts.py
@@ -7,11 +7,11 @@
 import json
 from typing import Any
 
-from llama_stack_api import ListPromptsResponse, Prompt, Prompts
 from pydantic import BaseModel
 
 from llama_stack.core.datatypes import StackRunConfig
 from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
+from llama_stack_api import ListPromptsResponse, Prompt, Prompts
 
 
 class PromptServiceConfig(BaseModel):
diff --git a/src/llama_stack/core/providers.py b/src/llama_stack/core/providers.py
index 7337d9e35..e3fe3c7b3 100644
--- a/src/llama_stack/core/providers.py
+++ b/src/llama_stack/core/providers.py
@@ -7,10 +7,10 @@
 import asyncio
 from typing import Any
 
-from llama_stack_api import HealthResponse, HealthStatus, ListProvidersResponse, ProviderInfo, Providers
 from pydantic import BaseModel
 
 from llama_stack.log import get_logger
+from llama_stack_api import HealthResponse, HealthStatus, ListProvidersResponse, ProviderInfo, Providers
 
 from .datatypes import StackRunConfig
 from .utils.config import redact_sensitive_fields
diff --git a/src/llama_stack/core/resolver.py b/src/llama_stack/core/resolver.py
index ca154fbc6..6bc32c2d0 100644
--- a/src/llama_stack/core/resolver.py
+++ b/src/llama_stack/core/resolver.py
@@ -8,6 +8,19 @@ import importlib.metadata
 import inspect
 from typing import Any
 
+from llama_stack.core.client import get_client_impl
+from llama_stack.core.datatypes import (
+    AccessRule,
+    AutoRoutedProviderSpec,
+    Provider,
+    RoutingTableProviderSpec,
+    StackRunConfig,
+)
+from llama_stack.core.distribution import builtin_automatically_routed_apis
+from llama_stack.core.external import load_external_apis
+from llama_stack.core.store import DistributionRegistry
+from llama_stack.core.utils.dynamic import instantiate_class_type
+from llama_stack.log import get_logger
 from llama_stack_api import (
     LLAMA_STACK_API_V1ALPHA,
     Agents,
@@ -48,20 +61,6 @@ from llama_stack_api import (
     Providers as ProvidersAPI,
 )
 
-from llama_stack.core.client import get_client_impl
-from llama_stack.core.datatypes import (
-    AccessRule,
-    AutoRoutedProviderSpec,
-    Provider,
-    RoutingTableProviderSpec,
-    StackRunConfig,
-)
-from llama_stack.core.distribution import builtin_automatically_routed_apis
-from llama_stack.core.external import load_external_apis
-from llama_stack.core.store import DistributionRegistry
-from llama_stack.core.utils.dynamic import instantiate_class_type
-from llama_stack.log import get_logger
-
 logger = get_logger(name=__name__, category="core")
 
 
diff --git a/src/llama_stack/core/routers/__init__.py b/src/llama_stack/core/routers/__init__.py
index c2d051422..289755bcb 100644
--- a/src/llama_stack/core/routers/__init__.py
+++ b/src/llama_stack/core/routers/__init__.py
@@ -6,8 +6,6 @@
 
 from typing import Any
 
-from llama_stack_api import Api, RoutingTable
-
 from llama_stack.core.datatypes import (
     AccessRule,
     RoutedProtocol,
@@ -15,6 +13,7 @@ from llama_stack.core.datatypes import (
 from llama_stack.core.stack import StackRunConfig
 from llama_stack.core.store import DistributionRegistry
 from llama_stack.providers.utils.inference.inference_store import InferenceStore
+from llama_stack_api import Api, RoutingTable
 
 
 async def get_routing_table_impl(
diff --git a/src/llama_stack/core/routers/datasets.py b/src/llama_stack/core/routers/datasets.py
index dcf247874..b6a5f3b96 100644
--- a/src/llama_stack/core/routers/datasets.py
+++ b/src/llama_stack/core/routers/datasets.py
@@ -6,9 +6,8 @@
 
 from typing import Any
 
-from llama_stack_api import DatasetIO, DatasetPurpose, DataSource, PaginatedResponse, RoutingTable
-
 from llama_stack.log import get_logger
+from llama_stack_api import DatasetIO, DatasetPurpose, DataSource, PaginatedResponse, RoutingTable
 
 logger = get_logger(name=__name__, category="core::routers")
 
diff --git a/src/llama_stack/core/routers/eval_scoring.py b/src/llama_stack/core/routers/eval_scoring.py
index cbbbf5cc5..4d7269180 100644
--- a/src/llama_stack/core/routers/eval_scoring.py
+++ b/src/llama_stack/core/routers/eval_scoring.py
@@ -6,6 +6,7 @@
 
 from typing import Any
 
+from llama_stack.log import get_logger
 from llama_stack_api import (
     BenchmarkConfig,
     Eval,
@@ -18,8 +19,6 @@ from llama_stack_api import (
     ScoringFnParams,
 )
 
-from llama_stack.log import get_logger
-
 logger = get_logger(name=__name__, category="core::routers")
 
 
diff --git a/src/llama_stack/core/routers/inference.py b/src/llama_stack/core/routers/inference.py
index 292a7c4bb..719624e86 100644
--- a/src/llama_stack/core/routers/inference.py
+++ b/src/llama_stack/core/routers/inference.py
@@ -11,6 +11,16 @@ from datetime import UTC, datetime
 from typing import Annotated, Any
 
 from fastapi import Body
+from openai.types.chat import ChatCompletionToolChoiceOptionParam as OpenAIChatCompletionToolChoiceOptionParam
+from openai.types.chat import ChatCompletionToolParam as OpenAIChatCompletionToolParam
+from pydantic import TypeAdapter
+
+from llama_stack.core.telemetry.telemetry import MetricEvent
+from llama_stack.core.telemetry.tracing import enqueue_event, get_current_span
+from llama_stack.log import get_logger
+from llama_stack.models.llama.llama3.chat_format import ChatFormat
+from llama_stack.models.llama.llama3.tokenizer import Tokenizer
+from llama_stack.providers.utils.inference.inference_store import InferenceStore
 from llama_stack_api import (
     HealthResponse,
     HealthStatus,
@@ -39,16 +49,6 @@ from llama_stack_api import (
     RerankResponse,
     RoutingTable,
 )
-from openai.types.chat import ChatCompletionToolChoiceOptionParam as OpenAIChatCompletionToolChoiceOptionParam
-from openai.types.chat import ChatCompletionToolParam as OpenAIChatCompletionToolParam
-from pydantic import TypeAdapter
-
-from llama_stack.core.telemetry.telemetry import MetricEvent
-from llama_stack.core.telemetry.tracing import enqueue_event, get_current_span
-from llama_stack.log import get_logger
-from llama_stack.models.llama.llama3.chat_format import ChatFormat
-from llama_stack.models.llama.llama3.tokenizer import Tokenizer
-from llama_stack.providers.utils.inference.inference_store import InferenceStore
 
 logger = get_logger(name=__name__, category="core::routers")
 
diff --git a/src/llama_stack/core/routers/safety.py b/src/llama_stack/core/routers/safety.py
index f85bbb767..2bc99f14f 100644
--- a/src/llama_stack/core/routers/safety.py
+++ b/src/llama_stack/core/routers/safety.py
@@ -6,10 +6,9 @@
 
 from typing import Any
 
-from llama_stack_api import ModerationObject, OpenAIMessageParam, RoutingTable, RunShieldResponse, Safety, Shield
-
 from llama_stack.core.datatypes import SafetyConfig
 from llama_stack.log import get_logger
+from llama_stack_api import ModerationObject, OpenAIMessageParam, RoutingTable, RunShieldResponse, Safety, Shield
 
 logger = get_logger(name=__name__, category="core::routers")
 
diff --git a/src/llama_stack/core/routers/tool_runtime.py b/src/llama_stack/core/routers/tool_runtime.py
index 984a8e2a7..eccc05732 100644
--- a/src/llama_stack/core/routers/tool_runtime.py
+++ b/src/llama_stack/core/routers/tool_runtime.py
@@ -6,14 +6,13 @@
 
 from typing import Any
 
+from llama_stack.log import get_logger
 from llama_stack_api import (
     URL,
     ListToolDefsResponse,
     ToolRuntime,
 )
 
-from llama_stack.log import get_logger
-
 from ..routing_tables.toolgroups import ToolGroupsRoutingTable
 
 logger = get_logger(name=__name__, category="core::routers")
diff --git a/src/llama_stack/core/routers/vector_io.py b/src/llama_stack/core/routers/vector_io.py
index 47412c07f..5256dda44 100644
--- a/src/llama_stack/core/routers/vector_io.py
+++ b/src/llama_stack/core/routers/vector_io.py
@@ -9,6 +9,9 @@ import uuid
 from typing import Annotated, Any
 
 from fastapi import Body
+
+from llama_stack.core.datatypes import VectorStoresConfig
+from llama_stack.log import get_logger
 from llama_stack_api import (
     Chunk,
     HealthResponse,
@@ -38,9 +41,6 @@ from llama_stack_api import (
     VectorStoreSearchResponsePage,
 )
 
-from llama_stack.core.datatypes import VectorStoresConfig
-from llama_stack.log import get_logger
-
 logger = get_logger(name=__name__, category="core::routers")
 
 
diff --git a/src/llama_stack/core/routing_tables/benchmarks.py b/src/llama_stack/core/routing_tables/benchmarks.py
index 66830bc41..9037ffe8b 100644
--- a/src/llama_stack/core/routing_tables/benchmarks.py
+++ b/src/llama_stack/core/routing_tables/benchmarks.py
@@ -6,12 +6,11 @@
 
 from typing import Any
 
-from llama_stack_api import Benchmark, Benchmarks, ListBenchmarksResponse
-
 from llama_stack.core.datatypes import (
     BenchmarkWithOwner,
 )
 from llama_stack.log import get_logger
+from llama_stack_api import Benchmark, Benchmarks, ListBenchmarksResponse
 
 from .common import CommonRoutingTableImpl
 
diff --git a/src/llama_stack/core/routing_tables/common.py b/src/llama_stack/core/routing_tables/common.py
index cfbafc9a8..a9e3ff95f 100644
--- a/src/llama_stack/core/routing_tables/common.py
+++ b/src/llama_stack/core/routing_tables/common.py
@@ -6,8 +6,6 @@
 
 from typing import Any
 
-from llama_stack_api import Api, Model, ModelNotFoundError, ResourceType, RoutingTable
-
 from llama_stack.core.access_control.access_control import AccessDeniedError, is_action_allowed
 from llama_stack.core.access_control.datatypes import Action
 from llama_stack.core.datatypes import (
@@ -20,6 +18,7 @@ from llama_stack.core.datatypes import (
 from llama_stack.core.request_headers import get_authenticated_user
 from llama_stack.core.store import DistributionRegistry
 from llama_stack.log import get_logger
+from llama_stack_api import Api, Model, ModelNotFoundError, ResourceType, RoutingTable
 
 logger = get_logger(name=__name__, category="core::routing_tables")
 
diff --git a/src/llama_stack/core/routing_tables/datasets.py b/src/llama_stack/core/routing_tables/datasets.py
index c49c9769b..62fd07b13 100644
--- a/src/llama_stack/core/routing_tables/datasets.py
+++ b/src/llama_stack/core/routing_tables/datasets.py
@@ -7,6 +7,10 @@
 import uuid
 from typing import Any
 
+from llama_stack.core.datatypes import (
+    DatasetWithOwner,
+)
+from llama_stack.log import get_logger
 from llama_stack_api import (
     Dataset,
     DatasetNotFoundError,
@@ -20,11 +24,6 @@ from llama_stack_api import (
     URIDataSource,
 )
 
-from llama_stack.core.datatypes import (
-    DatasetWithOwner,
-)
-from llama_stack.log import get_logger
-
 from .common import CommonRoutingTableImpl
 
 logger = get_logger(name=__name__, category="core::routing_tables")
diff --git a/src/llama_stack/core/routing_tables/models.py b/src/llama_stack/core/routing_tables/models.py
index e1210a139..1facbb27b 100644
--- a/src/llama_stack/core/routing_tables/models.py
+++ b/src/llama_stack/core/routing_tables/models.py
@@ -7,6 +7,13 @@
 import time
 from typing import Any
 
+from llama_stack.core.datatypes import (
+    ModelWithOwner,
+    RegistryEntrySource,
+)
+from llama_stack.core.request_headers import PROVIDER_DATA_VAR, NeedsRequestProviderData
+from llama_stack.core.utils.dynamic import instantiate_class_type
+from llama_stack.log import get_logger
 from llama_stack_api import (
     ListModelsResponse,
     Model,
@@ -17,14 +24,6 @@ from llama_stack_api import (
     OpenAIModel,
 )
 
-from llama_stack.core.datatypes import (
-    ModelWithOwner,
-    RegistryEntrySource,
-)
-from llama_stack.core.request_headers import PROVIDER_DATA_VAR, NeedsRequestProviderData
-from llama_stack.core.utils.dynamic import instantiate_class_type
-from llama_stack.log import get_logger
-
 from .common import CommonRoutingTableImpl, lookup_model
 
 logger = get_logger(name=__name__, category="core::routing_tables")
diff --git a/src/llama_stack/core/routing_tables/scoring_functions.py b/src/llama_stack/core/routing_tables/scoring_functions.py
index 66165ac2f..65ed26b85 100644
--- a/src/llama_stack/core/routing_tables/scoring_functions.py
+++ b/src/llama_stack/core/routing_tables/scoring_functions.py
@@ -4,6 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
+from llama_stack.core.datatypes import (
+    ScoringFnWithOwner,
+)
+from llama_stack.log import get_logger
 from llama_stack_api import (
     ListScoringFunctionsResponse,
     ParamType,
@@ -13,11 +17,6 @@ from llama_stack_api import (
     ScoringFunctions,
 )
 
-from llama_stack.core.datatypes import (
-    ScoringFnWithOwner,
-)
-from llama_stack.log import get_logger
-
 from .common import CommonRoutingTableImpl
 
 logger = get_logger(name=__name__, category="core::routing_tables")
diff --git a/src/llama_stack/core/routing_tables/shields.py b/src/llama_stack/core/routing_tables/shields.py
index 0f981c49d..97b2efb96 100644
--- a/src/llama_stack/core/routing_tables/shields.py
+++ b/src/llama_stack/core/routing_tables/shields.py
@@ -6,12 +6,11 @@
 
 from typing import Any
 
-from llama_stack_api import ListShieldsResponse, ResourceType, Shield, Shields
-
 from llama_stack.core.datatypes import (
     ShieldWithOwner,
 )
 from llama_stack.log import get_logger
+from llama_stack_api import ListShieldsResponse, ResourceType, Shield, Shields
 
 from .common import CommonRoutingTableImpl
 
diff --git a/src/llama_stack/core/routing_tables/toolgroups.py b/src/llama_stack/core/routing_tables/toolgroups.py
index a552cb96e..7e2068608 100644
--- a/src/llama_stack/core/routing_tables/toolgroups.py
+++ b/src/llama_stack/core/routing_tables/toolgroups.py
@@ -6,6 +6,8 @@
 
 from typing import Any
 
+from llama_stack.core.datatypes import AuthenticationRequiredError, ToolGroupWithOwner
+from llama_stack.log import get_logger
 from llama_stack_api import (
     URL,
     ListToolDefsResponse,
@@ -16,9 +18,6 @@ from llama_stack_api import (
     ToolGroups,
 )
 
-from llama_stack.core.datatypes import AuthenticationRequiredError, ToolGroupWithOwner
-from llama_stack.log import get_logger
-
 from .common import CommonRoutingTableImpl
 
 logger = get_logger(name=__name__, category="core::routing_tables")
diff --git a/src/llama_stack/core/routing_tables/vector_stores.py b/src/llama_stack/core/routing_tables/vector_stores.py
index f95463b3c..93c119542 100644
--- a/src/llama_stack/core/routing_tables/vector_stores.py
+++ b/src/llama_stack/core/routing_tables/vector_stores.py
@@ -6,6 +6,11 @@
 
 from typing import Any
 
+from llama_stack.core.datatypes import (
+    VectorStoreWithOwner,
+)
+from llama_stack.log import get_logger
+
 # Removed VectorStores import to avoid exposing public API
 from llama_stack_api import (
     ModelNotFoundError,
@@ -23,11 +28,6 @@ from llama_stack_api import (
     VectorStoreSearchResponsePage,
 )
 
-from llama_stack.core.datatypes import (
-    VectorStoreWithOwner,
-)
-from llama_stack.log import get_logger
-
 from .common import CommonRoutingTableImpl, lookup_model
 
 logger = get_logger(name=__name__, category="core::routing_tables")
diff --git a/src/llama_stack/core/server/auth_providers.py b/src/llama_stack/core/server/auth_providers.py
index a7f5d7916..66942dd39 100644
--- a/src/llama_stack/core/server/auth_providers.py
+++ b/src/llama_stack/core/server/auth_providers.py
@@ -11,7 +11,6 @@ from urllib.parse import parse_qs, urljoin, urlparse
 
 import httpx
 import jwt
-from llama_stack_api import TokenValidationError
 from pydantic import BaseModel, Field
 
 from llama_stack.core.datatypes import (
@@ -23,6 +22,7 @@ from llama_stack.core.datatypes import (
     User,
 )
 from llama_stack.log import get_logger
+from llama_stack_api import TokenValidationError
 
 logger = get_logger(name=__name__, category="core::auth")
 
diff --git a/src/llama_stack/core/server/routes.py b/src/llama_stack/core/server/routes.py
index e7a84937d..af5002565 100644
--- a/src/llama_stack/core/server/routes.py
+++ b/src/llama_stack/core/server/routes.py
@@ -10,10 +10,10 @@ from collections.abc import Callable
 from typing import Any
 
 from aiohttp import hdrs
-from llama_stack_api import Api, ExternalApiSpec, WebMethod
 from starlette.routing import Route
 
 from llama_stack.core.resolver import api_protocol_map
+from llama_stack_api import Api, ExternalApiSpec, WebMethod
 
 EndpointFunc = Callable[..., Any]
 PathParams = dict[str, str]
diff --git a/src/llama_stack/core/server/server.py b/src/llama_stack/core/server/server.py
index 8116348ec..0d3513980 100644
--- a/src/llama_stack/core/server/server.py
+++ b/src/llama_stack/core/server/server.py
@@ -28,7 +28,6 @@ from fastapi import Path as FastapiPath
 from fastapi.exceptions import RequestValidationError
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse, StreamingResponse
-from llama_stack_api import Api, ConflictError, PaginatedResponse, ResourceNotFoundError
 from openai import BadRequestError
 from pydantic import BaseModel, ValidationError
 
@@ -57,6 +56,7 @@ from llama_stack.core.utils.config import redact_sensitive_fields
 from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
 from llama_stack.core.utils.context import preserve_contexts_async_generator
 from llama_stack.log import LoggingConfig, get_logger, setup_logging
+from llama_stack_api import Api, ConflictError, PaginatedResponse, ResourceNotFoundError
 
 from .auth import AuthenticationMiddleware
 from .quota import QuotaMiddleware
diff --git a/src/llama_stack/core/stack.py b/src/llama_stack/core/stack.py
index 674c35f31..00d990cb1 100644
--- a/src/llama_stack/core/stack.py
+++ b/src/llama_stack/core/stack.py
@@ -12,6 +12,28 @@ import tempfile
 from typing import Any
 
 import yaml
+
+from llama_stack.core.conversations.conversations import ConversationServiceConfig, ConversationServiceImpl
+from llama_stack.core.datatypes import Provider, SafetyConfig, StackRunConfig, VectorStoresConfig
+from llama_stack.core.distribution import get_provider_registry
+from llama_stack.core.inspect import DistributionInspectConfig, DistributionInspectImpl
+from llama_stack.core.prompts.prompts import PromptServiceConfig, PromptServiceImpl
+from llama_stack.core.providers import ProviderImpl, ProviderImplConfig
+from llama_stack.core.resolver import ProviderRegistry, resolve_impls
+from llama_stack.core.routing_tables.common import CommonRoutingTableImpl
+from llama_stack.core.storage.datatypes import (
+    InferenceStoreReference,
+    KVStoreReference,
+    ServerStoresConfig,
+    SqliteKVStoreConfig,
+    SqliteSqlStoreConfig,
+    SqlStoreReference,
+    StorageBackendConfig,
+    StorageConfig,
+)
+from llama_stack.core.store.registry import create_dist_registry
+from llama_stack.core.utils.dynamic import instantiate_class_type
+from llama_stack.log import get_logger
 from llama_stack_api import (
     Agents,
     Api,
@@ -37,28 +59,6 @@ from llama_stack_api import (
     VectorIO,
 )
 
-from llama_stack.core.conversations.conversations import ConversationServiceConfig, ConversationServiceImpl
-from llama_stack.core.datatypes import Provider, SafetyConfig, StackRunConfig, VectorStoresConfig
-from llama_stack.core.distribution import get_provider_registry
-from llama_stack.core.inspect import DistributionInspectConfig, DistributionInspectImpl
-from llama_stack.core.prompts.prompts import PromptServiceConfig, PromptServiceImpl
-from llama_stack.core.providers import ProviderImpl, ProviderImplConfig
-from llama_stack.core.resolver import ProviderRegistry, resolve_impls
-from llama_stack.core.routing_tables.common import CommonRoutingTableImpl
-from llama_stack.core.storage.datatypes import (
-    InferenceStoreReference,
-    KVStoreReference,
-    ServerStoresConfig,
-    SqliteKVStoreConfig,
-    SqliteSqlStoreConfig,
-    SqlStoreReference,
-    StorageBackendConfig,
-    StorageConfig,
-)
-from llama_stack.core.store.registry import create_dist_registry
-from llama_stack.core.utils.dynamic import instantiate_class_type
-from llama_stack.log import get_logger
-
 logger = get_logger(name=__name__, category="core")
 
 
diff --git a/src/llama_stack/core/telemetry/telemetry.py b/src/llama_stack/core/telemetry/telemetry.py
index 1a56277ea..5268fa641 100644
--- a/src/llama_stack/core/telemetry/telemetry.py
+++ b/src/llama_stack/core/telemetry/telemetry.py
@@ -16,7 +16,6 @@ from typing import (
     cast,
 )
 
-from llama_stack_api import json_schema_type, register_schema
 from opentelemetry import metrics, trace
 from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
 from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
@@ -29,6 +28,7 @@ from pydantic import BaseModel, Field
 
 from llama_stack.log import get_logger
 from llama_stack.models.llama.datatypes import Primitive
+from llama_stack_api import json_schema_type, register_schema
 
 ROOT_SPAN_MARKERS = ["__root__", "__root_span__"]
 
diff --git a/src/llama_stack/distributions/dell/dell.py b/src/llama_stack/distributions/dell/dell.py
index fd76e3ccb..52a07b7f1 100644
--- a/src/llama_stack/distributions/dell/dell.py
+++ b/src/llama_stack/distributions/dell/dell.py
@@ -4,8 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api import ModelType
-
 from llama_stack.core.datatypes import (
     BuildProvider,
     ModelInput,
@@ -18,6 +16,7 @@ from llama_stack.providers.inline.inference.sentence_transformers import (
     SentenceTransformersInferenceConfig,
 )
 from llama_stack.providers.remote.vector_io.chroma import ChromaVectorIOConfig
+from llama_stack_api import ModelType
 
 
 def get_distribution_template() -> DistributionTemplate:
diff --git a/src/llama_stack/distributions/meta-reference-gpu/meta_reference.py b/src/llama_stack/distributions/meta-reference-gpu/meta_reference.py
index 67af0e92a..a515794d5 100644
--- a/src/llama_stack/distributions/meta-reference-gpu/meta_reference.py
+++ b/src/llama_stack/distributions/meta-reference-gpu/meta_reference.py
@@ -6,8 +6,6 @@
 
 from pathlib import Path
 
-from llama_stack_api import ModelType
-
 from llama_stack.core.datatypes import (
     BuildProvider,
     ModelInput,
@@ -23,6 +21,7 @@ from llama_stack.providers.inline.inference.sentence_transformers import (
     SentenceTransformersInferenceConfig,
 )
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
+from llama_stack_api import ModelType
 
 
 def get_distribution_template() -> DistributionTemplate:
diff --git a/src/llama_stack/distributions/open-benchmark/open_benchmark.py b/src/llama_stack/distributions/open-benchmark/open_benchmark.py
index 59deca6d0..1f4dbf2c2 100644
--- a/src/llama_stack/distributions/open-benchmark/open_benchmark.py
+++ b/src/llama_stack/distributions/open-benchmark/open_benchmark.py
@@ -5,8 +5,6 @@
 # the root directory of this source tree.
 
 
-from llama_stack_api import DatasetPurpose, ModelType, URIDataSource
-
 from llama_stack.core.datatypes import (
     BenchmarkInput,
     BuildProvider,
@@ -34,6 +32,7 @@ from llama_stack.providers.remote.vector_io.pgvector.config import (
     PGVectorVectorIOConfig,
 )
 from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
+from llama_stack_api import DatasetPurpose, ModelType, URIDataSource
 
 
 def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderModelEntry]]]:
diff --git a/src/llama_stack/distributions/starter/starter.py b/src/llama_stack/distributions/starter/starter.py
index 1a8126290..4c21a8c99 100644
--- a/src/llama_stack/distributions/starter/starter.py
+++ b/src/llama_stack/distributions/starter/starter.py
@@ -7,8 +7,6 @@
 
 from typing import Any
 
-from llama_stack_api import RemoteProviderSpec
-
 from llama_stack.core.datatypes import (
     BuildProvider,
     Provider,
@@ -39,6 +37,7 @@ from llama_stack.providers.remote.vector_io.qdrant.config import QdrantVectorIOC
 from llama_stack.providers.remote.vector_io.weaviate.config import WeaviateVectorIOConfig
 from llama_stack.providers.utils.kvstore.config import PostgresKVStoreConfig
 from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig
+from llama_stack_api import RemoteProviderSpec
 
 
 def _get_config_for_provider(provider_spec: ProviderSpec) -> dict[str, Any]:
diff --git a/src/llama_stack/distributions/template.py b/src/llama_stack/distributions/template.py
index faf5fb085..5755a26de 100644
--- a/src/llama_stack/distributions/template.py
+++ b/src/llama_stack/distributions/template.py
@@ -10,7 +10,6 @@ from typing import Any, Literal
 import jinja2
 import rich
 import yaml
-from llama_stack_api import DatasetPurpose, ModelType
 from pydantic import BaseModel, Field
 
 from llama_stack.core.datatypes import (
@@ -43,6 +42,7 @@ from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
 from llama_stack.providers.utils.kvstore.config import get_pip_packages as get_kv_pip_packages
 from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
 from llama_stack.providers.utils.sqlstore.sqlstore import get_pip_packages as get_sql_pip_packages
+from llama_stack_api import DatasetPurpose, ModelType
 
 
 def filter_empty_values(obj: Any) -> Any:
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/agents.py b/src/llama_stack/providers/inline/agents/meta_reference/agents.py
index 025fcc676..347f6fdb1 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/agents.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/agents.py
@@ -5,6 +5,10 @@
 # the root directory of this source tree.
 
 
+from llama_stack.core.datatypes import AccessRule
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.kvstore import InmemoryKVStoreImpl, kvstore_impl
+from llama_stack.providers.utils.responses.responses_store import ResponsesStore
 from llama_stack_api import (
     Agents,
     Conversations,
@@ -25,11 +29,6 @@ from llama_stack_api import (
     VectorIO,
 )
 
-from llama_stack.core.datatypes import AccessRule
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.kvstore import InmemoryKVStoreImpl, kvstore_impl
-from llama_stack.providers.utils.responses.responses_store import ResponsesStore
-
 from .config import MetaReferenceAgentsImplConfig
 from .responses.openai_responses import OpenAIResponsesImpl
 
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
index 347eeef78..3f88b1562 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
@@ -8,6 +8,13 @@ import time
 import uuid
 from collections.abc import AsyncIterator
 
+from pydantic import BaseModel, TypeAdapter
+
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.responses.responses_store import (
+    ResponsesStore,
+    _OpenAIResponseObjectWithInputAndMessages,
+)
 from llama_stack_api import (
     ConversationItem,
     Conversations,
@@ -34,13 +41,6 @@ from llama_stack_api import (
     ToolRuntime,
     VectorIO,
 )
-from pydantic import BaseModel, TypeAdapter
-
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.responses.responses_store import (
-    ResponsesStore,
-    _OpenAIResponseObjectWithInputAndMessages,
-)
 
 from .streaming import StreamingResponseOrchestrator
 from .tool_executor import ToolExecutor
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
index 6a791e92d..ea4486b62 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
@@ -8,6 +8,9 @@ import uuid
 from collections.abc import AsyncIterator
 from typing import Any
 
+from llama_stack.core.telemetry import tracing
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
 from llama_stack_api import (
     AllowedToolsFilter,
     ApprovalFilter,
@@ -65,10 +68,6 @@ from llama_stack_api import (
     WebSearchToolTypes,
 )
 
-from llama_stack.core.telemetry import tracing
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
-
 from .types import ChatCompletionContext, ChatCompletionResult
 from .utils import (
     convert_chat_choice_to_response_message,
@@ -1022,11 +1021,11 @@ class StreamingResponseOrchestrator:
         self, tools: list[OpenAIResponseInputTool], output_messages: list[OpenAIResponseOutput]
     ) -> AsyncIterator[OpenAIResponseObjectStream]:
         """Process all tools and emit appropriate streaming events."""
-        from llama_stack_api import ToolDef
         from openai.types.chat import ChatCompletionToolParam
 
         from llama_stack.models.llama.datatypes import ToolDefinition
         from llama_stack.providers.utils.inference.openai_compat import convert_tooldef_to_openai_tool
+        from llama_stack_api import ToolDef
 
         def make_openai_tool(tool_name: str, tool: ToolDef) -> ChatCompletionToolParam:
             tool_def = ToolDefinition(
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py
index 38fb2a94f..616ec2477 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py
@@ -9,6 +9,8 @@ import json
 from collections.abc import AsyncIterator
 from typing import Any
 
+from llama_stack.core.telemetry import tracing
+from llama_stack.log import get_logger
 from llama_stack_api import (
     ImageContentItem,
     OpenAIChatCompletionContentPartImageParam,
@@ -37,9 +39,6 @@ from llama_stack_api import (
     VectorIO,
 )
 
-from llama_stack.core.telemetry import tracing
-from llama_stack.log import get_logger
-
 from .types import ChatCompletionContext, ToolExecutionResult
 
 logger = get_logger(name=__name__, category="agents::meta_reference")
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/types.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/types.py
index 35ad03378..f6efcee22 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/types.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/types.py
@@ -7,6 +7,9 @@
 from dataclasses import dataclass
 from typing import cast
 
+from openai.types.chat import ChatCompletionToolParam
+from pydantic import BaseModel
+
 from llama_stack_api import (
     OpenAIChatCompletionToolCall,
     OpenAIMessageParam,
@@ -26,8 +29,6 @@ from llama_stack_api import (
     OpenAIResponseTool,
     OpenAIResponseToolMCP,
 )
-from openai.types.chat import ChatCompletionToolParam
-from pydantic import BaseModel
 
 
 class ToolExecutionResult(BaseModel):
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/safety.py b/src/llama_stack/providers/inline/agents/meta_reference/safety.py
index dd90ac298..bfb557a99 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/safety.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/safety.py
@@ -6,10 +6,9 @@
 
 import asyncio
 
-from llama_stack_api import OpenAIMessageParam, Safety, SafetyViolation, ViolationLevel
-
 from llama_stack.core.telemetry import tracing
 from llama_stack.log import get_logger
+from llama_stack_api import OpenAIMessageParam, Safety, SafetyViolation, ViolationLevel
 
 log = get_logger(name=__name__, category="agents::meta_reference")
 
diff --git a/src/llama_stack/providers/inline/batches/reference/__init__.py b/src/llama_stack/providers/inline/batches/reference/__init__.py
index 27d0f4213..11c4b06a9 100644
--- a/src/llama_stack/providers/inline/batches/reference/__init__.py
+++ b/src/llama_stack/providers/inline/batches/reference/__init__.py
@@ -6,10 +6,9 @@
 
 from typing import Any
 
-from llama_stack_api import Files, Inference, Models
-
 from llama_stack.core.datatypes import AccessRule, Api
 from llama_stack.providers.utils.kvstore import kvstore_impl
+from llama_stack_api import Files, Inference, Models
 
 from .batches import ReferenceBatchesImpl
 from .config import ReferenceBatchesImplConfig
diff --git a/src/llama_stack/providers/inline/batches/reference/batches.py b/src/llama_stack/providers/inline/batches/reference/batches.py
index f0f8da96c..73727799d 100644
--- a/src/llama_stack/providers/inline/batches/reference/batches.py
+++ b/src/llama_stack/providers/inline/batches/reference/batches.py
@@ -13,6 +13,11 @@ import uuid
 from io import BytesIO
 from typing import Any, Literal
 
+from openai.types.batch import BatchError, Errors
+from pydantic import BaseModel
+
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.kvstore import KVStore
 from llama_stack_api import (
     Batches,
     BatchObject,
@@ -33,11 +38,6 @@ from llama_stack_api import (
     OpenAIUserMessageParam,
     ResourceNotFoundError,
 )
-from openai.types.batch import BatchError, Errors
-from pydantic import BaseModel
-
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.kvstore import KVStore
 
 from .config import ReferenceBatchesImplConfig
 
diff --git a/src/llama_stack/providers/inline/datasetio/localfs/datasetio.py b/src/llama_stack/providers/inline/datasetio/localfs/datasetio.py
index 1fcfbbef4..6ab1a540f 100644
--- a/src/llama_stack/providers/inline/datasetio/localfs/datasetio.py
+++ b/src/llama_stack/providers/inline/datasetio/localfs/datasetio.py
@@ -5,11 +5,10 @@
 # the root directory of this source tree.
 from typing import Any
 
-from llama_stack_api import Dataset, DatasetIO, DatasetsProtocolPrivate, PaginatedResponse
-
 from llama_stack.providers.utils.datasetio.url_utils import get_dataframe_from_uri
 from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.pagination import paginate_records
+from llama_stack_api import Dataset, DatasetIO, DatasetsProtocolPrivate, PaginatedResponse
 
 from .config import LocalFSDatasetIOConfig
 
diff --git a/src/llama_stack/providers/inline/eval/meta_reference/eval.py b/src/llama_stack/providers/inline/eval/meta_reference/eval.py
index e6020e8a3..d43e569e2 100644
--- a/src/llama_stack/providers/inline/eval/meta_reference/eval.py
+++ b/src/llama_stack/providers/inline/eval/meta_reference/eval.py
@@ -6,6 +6,10 @@
 import json
 from typing import Any
 
+from tqdm import tqdm
+
+from llama_stack.providers.utils.common.data_schema_validator import ColumnName
+from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack_api import (
     Agents,
     Benchmark,
@@ -24,10 +28,6 @@ from llama_stack_api import (
     OpenAIUserMessageParam,
     Scoring,
 )
-from tqdm import tqdm
-
-from llama_stack.providers.utils.common.data_schema_validator import ColumnName
-from llama_stack.providers.utils.kvstore import kvstore_impl
 
 from .config import MetaReferenceEvalConfig
 
diff --git a/src/llama_stack/providers/inline/files/localfs/files.py b/src/llama_stack/providers/inline/files/localfs/files.py
index 5e8c887f1..5fb35a378 100644
--- a/src/llama_stack/providers/inline/files/localfs/files.py
+++ b/src/llama_stack/providers/inline/files/localfs/files.py
@@ -10,6 +10,14 @@ from pathlib import Path
 from typing import Annotated
 
 from fastapi import Depends, File, Form, Response, UploadFile
+
+from llama_stack.core.datatypes import AccessRule
+from llama_stack.core.id_generation import generate_object_id
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.files.form_data import parse_expires_after
+from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
+from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore
+from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl
 from llama_stack_api import (
     ExpiresAfter,
     Files,
@@ -21,14 +29,6 @@ from llama_stack_api import (
     ResourceNotFoundError,
 )
 
-from llama_stack.core.datatypes import AccessRule
-from llama_stack.core.id_generation import generate_object_id
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.files.form_data import parse_expires_after
-from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
-from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore
-from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl
-
 from .config import LocalfsFilesImplConfig
 
 logger = get_logger(name=__name__, category="files")
diff --git a/src/llama_stack/providers/inline/inference/meta_reference/config.py b/src/llama_stack/providers/inline/inference/meta_reference/config.py
index 802e79f15..ec6e8bfe8 100644
--- a/src/llama_stack/providers/inline/inference/meta_reference/config.py
+++ b/src/llama_stack/providers/inline/inference/meta_reference/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import QuantizationConfig
 from pydantic import BaseModel, field_validator
 
 from llama_stack.providers.utils.inference import supported_inference_models
+from llama_stack_api import QuantizationConfig
 
 
 class MetaReferenceInferenceConfig(BaseModel):
diff --git a/src/llama_stack/providers/inline/inference/meta_reference/generators.py b/src/llama_stack/providers/inline/inference/meta_reference/generators.py
index 2155a1ae8..6781d0af9 100644
--- a/src/llama_stack/providers/inline/inference/meta_reference/generators.py
+++ b/src/llama_stack/providers/inline/inference/meta_reference/generators.py
@@ -8,6 +8,14 @@ import math
 from typing import Optional
 
 import torch
+from lmformatenforcer import JsonSchemaParser, TokenEnforcer, TokenEnforcerTokenizerData
+
+from llama_stack.models.llama.datatypes import QuantizationMode, ToolPromptFormat
+from llama_stack.models.llama.llama3.generation import Llama3
+from llama_stack.models.llama.llama3.tokenizer import Tokenizer as Llama3Tokenizer
+from llama_stack.models.llama.llama4.generation import Llama4
+from llama_stack.models.llama.llama4.tokenizer import Tokenizer as Llama4Tokenizer
+from llama_stack.models.llama.sku_types import Model, ModelFamily
 from llama_stack_api import (
     GreedySamplingStrategy,
     JsonSchemaResponseFormat,
@@ -18,14 +26,6 @@ from llama_stack_api import (
     SamplingParams,
     TopPSamplingStrategy,
 )
-from lmformatenforcer import JsonSchemaParser, TokenEnforcer, TokenEnforcerTokenizerData
-
-from llama_stack.models.llama.datatypes import QuantizationMode, ToolPromptFormat
-from llama_stack.models.llama.llama3.generation import Llama3
-from llama_stack.models.llama.llama3.tokenizer import Tokenizer as Llama3Tokenizer
-from llama_stack.models.llama.llama4.generation import Llama4
-from llama_stack.models.llama.llama4.tokenizer import Tokenizer as Llama4Tokenizer
-from llama_stack.models.llama.sku_types import Model, ModelFamily
 
 from .common import model_checkpoint_dir
 from .config import MetaReferenceInferenceConfig
diff --git a/src/llama_stack/providers/inline/inference/meta_reference/inference.py b/src/llama_stack/providers/inline/inference/meta_reference/inference.py
index 753185fe7..42d1299ab 100644
--- a/src/llama_stack/providers/inline/inference/meta_reference/inference.py
+++ b/src/llama_stack/providers/inline/inference/meta_reference/inference.py
@@ -9,23 +9,6 @@ import time
 import uuid
 from collections.abc import AsyncIterator
 
-from llama_stack_api import (
-    InferenceProvider,
-    Model,
-    ModelsProtocolPrivate,
-    ModelType,
-    OpenAIAssistantMessageParam,
-    OpenAIChatCompletion,
-    OpenAIChatCompletionChunk,
-    OpenAIChatCompletionRequestWithExtraBody,
-    OpenAIChatCompletionUsage,
-    OpenAIChoice,
-    OpenAICompletion,
-    OpenAICompletionRequestWithExtraBody,
-    OpenAIUserMessageParam,
-    ToolChoice,
-)
-
 from llama_stack.log import get_logger
 from llama_stack.models.llama.datatypes import RawMessage, RawTextItem, ToolDefinition
 from llama_stack.models.llama.llama3.chat_format import ChatFormat as Llama3ChatFormat
@@ -48,6 +31,22 @@ from llama_stack.providers.utils.inference.model_registry import (
     ModelRegistryHelper,
     build_hf_repo_model_entry,
 )
+from llama_stack_api import (
+    InferenceProvider,
+    Model,
+    ModelsProtocolPrivate,
+    ModelType,
+    OpenAIAssistantMessageParam,
+    OpenAIChatCompletion,
+    OpenAIChatCompletionChunk,
+    OpenAIChatCompletionRequestWithExtraBody,
+    OpenAIChatCompletionUsage,
+    OpenAIChoice,
+    OpenAICompletion,
+    OpenAICompletionRequestWithExtraBody,
+    OpenAIUserMessageParam,
+    ToolChoice,
+)
 
 from .config import MetaReferenceInferenceConfig
 from .generators import LlamaGenerator
@@ -441,6 +440,8 @@ class MetaReferenceInferenceImpl(
         params: OpenAIChatCompletionRequestWithExtraBody,
     ) -> AsyncIterator[OpenAIChatCompletionChunk]:
         """Stream chat completion chunks as they're generated."""
+        from llama_stack.models.llama.datatypes import StopReason
+        from llama_stack.providers.utils.inference.prompt_adapter import decode_assistant_message
         from llama_stack_api import (
             OpenAIChatCompletionChunk,
             OpenAIChatCompletionToolCall,
@@ -449,9 +450,6 @@ class MetaReferenceInferenceImpl(
             OpenAIChunkChoice,
         )
 
-        from llama_stack.models.llama.datatypes import StopReason
-        from llama_stack.providers.utils.inference.prompt_adapter import decode_assistant_message
-
         response_id = f"chatcmpl-{uuid.uuid4().hex[:24]}"
         created = int(time.time())
         generated_text = ""
diff --git a/src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py b/src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py
index 14c9a41a4..b5cadeec2 100644
--- a/src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py
+++ b/src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py
@@ -6,6 +6,10 @@
 
 from collections.abc import AsyncIterator
 
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.inference.embedding_mixin import (
+    SentenceTransformerEmbeddingMixin,
+)
 from llama_stack_api import (
     InferenceProvider,
     Model,
@@ -18,11 +22,6 @@ from llama_stack_api import (
     OpenAICompletionRequestWithExtraBody,
 )
 
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.inference.embedding_mixin import (
-    SentenceTransformerEmbeddingMixin,
-)
-
 from .config import SentenceTransformersInferenceConfig
 
 log = get_logger(name=__name__, category="inference")
diff --git a/src/llama_stack/providers/inline/post_training/common/validator.py b/src/llama_stack/providers/inline/post_training/common/validator.py
index 7a85d0e03..cc018c865 100644
--- a/src/llama_stack/providers/inline/post_training/common/validator.py
+++ b/src/llama_stack/providers/inline/post_training/common/validator.py
@@ -12,11 +12,10 @@
 
 from typing import Any
 
-from llama_stack_api import ChatCompletionInputType, DialogType, StringType
-
 from llama_stack.providers.utils.common.data_schema_validator import (
     ColumnName,
 )
+from llama_stack_api import ChatCompletionInputType, DialogType, StringType
 
 EXPECTED_DATASET_SCHEMA: dict[str, list[dict[str, Any]]] = {
     "instruct": [
diff --git a/src/llama_stack/providers/inline/post_training/huggingface/post_training.py b/src/llama_stack/providers/inline/post_training/huggingface/post_training.py
index f3f3d8d56..fa939d439 100644
--- a/src/llama_stack/providers/inline/post_training/huggingface/post_training.py
+++ b/src/llama_stack/providers/inline/post_training/huggingface/post_training.py
@@ -6,6 +6,11 @@
 from enum import Enum
 from typing import Any
 
+from llama_stack.providers.inline.post_training.huggingface.config import (
+    HuggingFacePostTrainingConfig,
+)
+from llama_stack.providers.utils.scheduler import JobArtifact, Scheduler
+from llama_stack.providers.utils.scheduler import JobStatus as SchedulerJobStatus
 from llama_stack_api import (
     AlgorithmConfig,
     Checkpoint,
@@ -20,12 +25,6 @@ from llama_stack_api import (
     TrainingConfig,
 )
 
-from llama_stack.providers.inline.post_training.huggingface.config import (
-    HuggingFacePostTrainingConfig,
-)
-from llama_stack.providers.utils.scheduler import JobArtifact, Scheduler
-from llama_stack.providers.utils.scheduler import JobStatus as SchedulerJobStatus
-
 
 class TrainingArtifactType(Enum):
     CHECKPOINT = "checkpoint"
diff --git a/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py b/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py
index 58a30618c..c7c737fbd 100644
--- a/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py
+++ b/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py
@@ -12,14 +12,6 @@ from typing import Any
 
 import torch
 from datasets import Dataset
-from llama_stack_api import (
-    Checkpoint,
-    DataConfig,
-    DatasetIO,
-    Datasets,
-    LoraFinetuningConfig,
-    TrainingConfig,
-)
 from peft import LoraConfig
 from transformers import (
     AutoTokenizer,
@@ -28,6 +20,14 @@ from trl import SFTConfig, SFTTrainer
 
 from llama_stack.log import get_logger
 from llama_stack.providers.inline.post_training.common.utils import evacuate_model_from_device
+from llama_stack_api import (
+    Checkpoint,
+    DataConfig,
+    DatasetIO,
+    Datasets,
+    LoraFinetuningConfig,
+    TrainingConfig,
+)
 
 from ..config import HuggingFacePostTrainingConfig
 from ..utils import (
diff --git a/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py b/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py
index f7dc3ebf2..da2626555 100644
--- a/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py
+++ b/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py
@@ -11,13 +11,6 @@ from typing import Any
 
 import torch
 from datasets import Dataset
-from llama_stack_api import (
-    Checkpoint,
-    DatasetIO,
-    Datasets,
-    DPOAlignmentConfig,
-    TrainingConfig,
-)
 from transformers import (
     AutoTokenizer,
 )
@@ -25,6 +18,13 @@ from trl import DPOConfig, DPOTrainer
 
 from llama_stack.log import get_logger
 from llama_stack.providers.inline.post_training.common.utils import evacuate_model_from_device
+from llama_stack_api import (
+    Checkpoint,
+    DatasetIO,
+    Datasets,
+    DPOAlignmentConfig,
+    TrainingConfig,
+)
 
 from ..config import HuggingFacePostTrainingConfig
 from ..utils import (
diff --git a/src/llama_stack/providers/inline/post_training/huggingface/utils.py b/src/llama_stack/providers/inline/post_training/huggingface/utils.py
index 86c3c3f52..2037f70e7 100644
--- a/src/llama_stack/providers/inline/post_training/huggingface/utils.py
+++ b/src/llama_stack/providers/inline/post_training/huggingface/utils.py
@@ -14,9 +14,10 @@ from typing import TYPE_CHECKING, Any, Protocol
 import psutil
 import torch
 from datasets import Dataset
-from llama_stack_api import Checkpoint, DatasetIO, TrainingConfig
 from transformers import AutoConfig, AutoModelForCausalLM
 
+from llama_stack_api import Checkpoint, DatasetIO, TrainingConfig
+
 if TYPE_CHECKING:
     from transformers import PretrainedConfig
 
diff --git a/src/llama_stack/providers/inline/post_training/torchtune/common/utils.py b/src/llama_stack/providers/inline/post_training/torchtune/common/utils.py
index 1483b8385..f929ea4dd 100644
--- a/src/llama_stack/providers/inline/post_training/torchtune/common/utils.py
+++ b/src/llama_stack/providers/inline/post_training/torchtune/common/utils.py
@@ -13,7 +13,6 @@
 from collections.abc import Callable
 
 import torch
-from llama_stack_api import DatasetFormat
 from pydantic import BaseModel
 from torchtune.data._messages import InputOutputToMessages, ShareGPTToMessages
 from torchtune.models.llama3 import llama3_tokenizer
@@ -24,6 +23,7 @@ from torchtune.modules.transforms import Transform
 
 from llama_stack.models.llama.sku_list import resolve_model
 from llama_stack.models.llama.sku_types import Model
+from llama_stack_api import DatasetFormat
 
 BuildLoraModelCallable = Callable[..., torch.nn.Module]
 BuildTokenizerCallable = Callable[..., Llama3Tokenizer]
diff --git a/src/llama_stack/providers/inline/post_training/torchtune/post_training.py b/src/llama_stack/providers/inline/post_training/torchtune/post_training.py
index 3370d42fa..515ff7b66 100644
--- a/src/llama_stack/providers/inline/post_training/torchtune/post_training.py
+++ b/src/llama_stack/providers/inline/post_training/torchtune/post_training.py
@@ -6,6 +6,11 @@
 from enum import Enum
 from typing import Any
 
+from llama_stack.providers.inline.post_training.torchtune.config import (
+    TorchtunePostTrainingConfig,
+)
+from llama_stack.providers.utils.scheduler import JobArtifact, Scheduler
+from llama_stack.providers.utils.scheduler import JobStatus as SchedulerJobStatus
 from llama_stack_api import (
     AlgorithmConfig,
     Checkpoint,
@@ -21,12 +26,6 @@ from llama_stack_api import (
     TrainingConfig,
 )
 
-from llama_stack.providers.inline.post_training.torchtune.config import (
-    TorchtunePostTrainingConfig,
-)
-from llama_stack.providers.utils.scheduler import JobArtifact, Scheduler
-from llama_stack.providers.utils.scheduler import JobStatus as SchedulerJobStatus
-
 
 class TrainingArtifactType(Enum):
     CHECKPOINT = "checkpoint"
diff --git a/src/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py b/src/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py
index 2bf1d0fe7..f5e5db415 100644
--- a/src/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py
+++ b/src/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py
@@ -12,17 +12,6 @@ from pathlib import Path
 from typing import Any
 
 import torch
-from llama_stack_api import (
-    Checkpoint,
-    DataConfig,
-    DatasetIO,
-    Datasets,
-    LoraFinetuningConfig,
-    OptimizerConfig,
-    PostTrainingMetric,
-    QATFinetuningConfig,
-    TrainingConfig,
-)
 from torch import nn
 from torch.optim import Optimizer
 from torch.utils.data import DataLoader, DistributedSampler
@@ -56,6 +45,17 @@ from llama_stack.providers.inline.post_training.torchtune.config import (
     TorchtunePostTrainingConfig,
 )
 from llama_stack.providers.inline.post_training.torchtune.datasets.sft import SFTDataset
+from llama_stack_api import (
+    Checkpoint,
+    DataConfig,
+    DatasetIO,
+    Datasets,
+    LoraFinetuningConfig,
+    OptimizerConfig,
+    PostTrainingMetric,
+    QATFinetuningConfig,
+    TrainingConfig,
+)
 
 log = get_logger(name=__name__, category="post_training")
 
diff --git a/src/llama_stack/providers/inline/safety/code_scanner/code_scanner.py b/src/llama_stack/providers/inline/safety/code_scanner/code_scanner.py
index 80e907c10..071fbe2dc 100644
--- a/src/llama_stack/providers/inline/safety/code_scanner/code_scanner.py
+++ b/src/llama_stack/providers/inline/safety/code_scanner/code_scanner.py
@@ -10,6 +10,10 @@ from typing import TYPE_CHECKING, Any
 if TYPE_CHECKING:
     from codeshield.cs import CodeShieldScanResult
 
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.inference.prompt_adapter import (
+    interleaved_content_as_str,
+)
 from llama_stack_api import (
     ModerationObject,
     ModerationObjectResults,
@@ -21,11 +25,6 @@ from llama_stack_api import (
     ViolationLevel,
 )
 
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.inference.prompt_adapter import (
-    interleaved_content_as_str,
-)
-
 from .config import CodeScannerConfig
 
 log = get_logger(name=__name__, category="safety")
diff --git a/src/llama_stack/providers/inline/safety/llama_guard/llama_guard.py b/src/llama_stack/providers/inline/safety/llama_guard/llama_guard.py
index 36e4280b9..ff1536bea 100644
--- a/src/llama_stack/providers/inline/safety/llama_guard/llama_guard.py
+++ b/src/llama_stack/providers/inline/safety/llama_guard/llama_guard.py
@@ -9,6 +9,13 @@ import uuid
 from string import Template
 from typing import Any
 
+from llama_stack.core.datatypes import Api
+from llama_stack.log import get_logger
+from llama_stack.models.llama.datatypes import Role
+from llama_stack.models.llama.sku_types import CoreModelId
+from llama_stack.providers.utils.inference.prompt_adapter import (
+    interleaved_content_as_str,
+)
 from llama_stack_api import (
     ImageContentItem,
     Inference,
@@ -26,14 +33,6 @@ from llama_stack_api import (
     ViolationLevel,
 )
 
-from llama_stack.core.datatypes import Api
-from llama_stack.log import get_logger
-from llama_stack.models.llama.datatypes import Role
-from llama_stack.models.llama.sku_types import CoreModelId
-from llama_stack.providers.utils.inference.prompt_adapter import (
-    interleaved_content_as_str,
-)
-
 from .config import LlamaGuardConfig
 
 CANNED_RESPONSE_TEXT = "I can't answer that. Can I help with something else?"
diff --git a/src/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py b/src/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py
index b4f495f19..51383da1b 100644
--- a/src/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py
+++ b/src/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py
@@ -7,6 +7,11 @@
 from typing import Any
 
 import torch
+from transformers import AutoModelForSequenceClassification, AutoTokenizer
+
+from llama_stack.core.utils.model_utils import model_local_dir
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
 from llama_stack_api import (
     ModerationObject,
     OpenAIMessageParam,
@@ -18,11 +23,6 @@ from llama_stack_api import (
     ShieldStore,
     ViolationLevel,
 )
-from transformers import AutoModelForSequenceClassification, AutoTokenizer
-
-from llama_stack.core.utils.model_utils import model_local_dir
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
 
 from .config import PromptGuardConfig, PromptGuardType
 
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring.py b/src/llama_stack/providers/inline/scoring/basic/scoring.py
index 326fd9211..cf5cb79ba 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring.py
@@ -5,6 +5,11 @@
 # the root directory of this source tree.
 from typing import Any
 
+from llama_stack.core.datatypes import Api
+from llama_stack.providers.utils.common.data_schema_validator import (
+    get_valid_schemas,
+    validate_dataset_schema,
+)
 from llama_stack_api import (
     DatasetIO,
     Datasets,
@@ -17,12 +22,6 @@ from llama_stack_api import (
     ScoringResult,
 )
 
-from llama_stack.core.datatypes import Api
-from llama_stack.providers.utils.common.data_schema_validator import (
-    get_valid_schemas,
-    validate_dataset_schema,
-)
-
 from .config import BasicScoringConfig
 from .scoring_fn.docvqa_scoring_fn import DocVQAScoringFn
 from .scoring_fn.equality_scoring_fn import EqualityScoringFn
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py
index 93c2627dd..e48bab8fa 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py
@@ -8,9 +8,8 @@ import json
 import re
 from typing import Any
 
-from llama_stack_api import ScoringFnParams, ScoringResultRow
-
 from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
+from llama_stack_api import ScoringFnParams, ScoringResultRow
 
 from .fn_defs.docvqa import docvqa
 
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py
index 382c64d88..2e79240be 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py
@@ -6,9 +6,8 @@
 
 from typing import Any
 
-from llama_stack_api import ScoringFnParams, ScoringResultRow
-
 from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
+from llama_stack_api import ScoringFnParams, ScoringResultRow
 
 from .fn_defs.equality import equality
 
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py
index 4ec85bb09..33b1c5a31 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py
@@ -6,9 +6,8 @@
 
 from typing import Any
 
-from llama_stack_api import ScoringFnParams, ScoringResultRow
-
 from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
+from llama_stack_api import ScoringFnParams, ScoringResultRow
 
 from .fn_defs.ifeval import (
     ifeval,
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py
index 4e9d49e96..1f4f2f979 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py
@@ -5,9 +5,8 @@
 # the root directory of this source tree.
 from typing import Any
 
-from llama_stack_api import ScoringFnParams, ScoringFnParamsType, ScoringResultRow
-
 from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
+from llama_stack_api import ScoringFnParams, ScoringFnParamsType, ScoringResultRow
 
 from ..utils.math_utils import first_answer, normalize_final_answer, try_evaluate_frac, try_evaluate_latex
 from .fn_defs.regex_parser_math_response import (
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py
index 7f213b38c..1cc74f874 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py
@@ -6,9 +6,8 @@
 import re
 from typing import Any
 
-from llama_stack_api import ScoringFnParams, ScoringFnParamsType, ScoringResultRow
-
 from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
+from llama_stack_api import ScoringFnParams, ScoringFnParamsType, ScoringResultRow
 
 from .fn_defs.regex_parser_multiple_choice_answer import (
     regex_parser_multiple_choice_answer,
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py
index b291924d5..fe15a4972 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py
@@ -6,9 +6,8 @@
 
 from typing import Any
 
-from llama_stack_api import ScoringFnParams, ScoringResultRow
-
 from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
+from llama_stack_api import ScoringFnParams, ScoringResultRow
 
 from .fn_defs.subset_of import subset_of
 
diff --git a/src/llama_stack/providers/inline/scoring/braintrust/braintrust.py b/src/llama_stack/providers/inline/scoring/braintrust/braintrust.py
index cbab93c74..cfa35547b 100644
--- a/src/llama_stack/providers/inline/scoring/braintrust/braintrust.py
+++ b/src/llama_stack/providers/inline/scoring/braintrust/braintrust.py
@@ -17,6 +17,16 @@ from autoevals.ragas import (
     ContextRelevancy,
     Faithfulness,
 )
+from pydantic import BaseModel
+
+from llama_stack.core.datatypes import Api
+from llama_stack.core.request_headers import NeedsRequestProviderData
+from llama_stack.providers.utils.common.data_schema_validator import (
+    get_valid_schemas,
+    validate_dataset_schema,
+    validate_row_schema,
+)
+from llama_stack.providers.utils.scoring.aggregation_utils import aggregate_metrics
 from llama_stack_api import (
     DatasetIO,
     Datasets,
@@ -29,16 +39,6 @@ from llama_stack_api import (
     ScoringResult,
     ScoringResultRow,
 )
-from pydantic import BaseModel
-
-from llama_stack.core.datatypes import Api
-from llama_stack.core.request_headers import NeedsRequestProviderData
-from llama_stack.providers.utils.common.data_schema_validator import (
-    get_valid_schemas,
-    validate_dataset_schema,
-    validate_row_schema,
-)
-from llama_stack.providers.utils.scoring.aggregation_utils import aggregate_metrics
 
 from .config import BraintrustScoringConfig
 from .scoring_fn.fn_defs.answer_correctness import answer_correctness_fn_def
diff --git a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py
index aa636d2b3..23e6ad705 100644
--- a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py
+++ b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py
@@ -5,6 +5,11 @@
 # the root directory of this source tree.
 from typing import Any
 
+from llama_stack.core.datatypes import Api
+from llama_stack.providers.utils.common.data_schema_validator import (
+    get_valid_schemas,
+    validate_dataset_schema,
+)
 from llama_stack_api import (
     DatasetIO,
     Datasets,
@@ -18,12 +23,6 @@ from llama_stack_api import (
     ScoringResult,
 )
 
-from llama_stack.core.datatypes import Api
-from llama_stack.providers.utils.common.data_schema_validator import (
-    get_valid_schemas,
-    validate_dataset_schema,
-)
-
 from .config import LlmAsJudgeScoringConfig
 from .scoring_fn.llm_as_judge_scoring_fn import LlmAsJudgeScoringFn
 
diff --git a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py
index 169a4d8b7..73ce82cda 100644
--- a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py
+++ b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py
@@ -6,9 +6,8 @@
 import re
 from typing import Any
 
-from llama_stack_api import Inference, OpenAIChatCompletionRequestWithExtraBody, ScoringFnParams, ScoringResultRow
-
 from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
+from llama_stack_api import Inference, OpenAIChatCompletionRequestWithExtraBody, ScoringFnParams, ScoringResultRow
 
 from .fn_defs.llm_as_judge_405b_simpleqa import llm_as_judge_405b_simpleqa
 from .fn_defs.llm_as_judge_base import llm_as_judge_base
diff --git a/src/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py b/src/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py
index f499989cb..240df199b 100644
--- a/src/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py
+++ b/src/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py
@@ -6,6 +6,10 @@
 
 
 from jinja2 import Template
+
+from llama_stack.providers.utils.inference.prompt_adapter import (
+    interleaved_content_as_str,
+)
 from llama_stack_api import (
     DefaultRAGQueryGeneratorConfig,
     InterleavedContent,
@@ -16,10 +20,6 @@ from llama_stack_api import (
     RAGQueryGeneratorConfig,
 )
 
-from llama_stack.providers.utils.inference.prompt_adapter import (
-    interleaved_content_as_str,
-)
-
 
 async def generate_rag_query(
     config: RAGQueryGeneratorConfig,
diff --git a/src/llama_stack/providers/inline/tool_runtime/rag/memory.py b/src/llama_stack/providers/inline/tool_runtime/rag/memory.py
index aacb7bb38..895d219bb 100644
--- a/src/llama_stack/providers/inline/tool_runtime/rag/memory.py
+++ b/src/llama_stack/providers/inline/tool_runtime/rag/memory.py
@@ -12,6 +12,11 @@ from typing import Any
 
 import httpx
 from fastapi import UploadFile
+from pydantic import TypeAdapter
+
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
+from llama_stack.providers.utils.memory.vector_store import parse_data_url
 from llama_stack_api import (
     URL,
     Files,
@@ -34,11 +39,6 @@ from llama_stack_api import (
     VectorStoreChunkingStrategyStatic,
     VectorStoreChunkingStrategyStaticConfig,
 )
-from pydantic import TypeAdapter
-
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
-from llama_stack.providers.utils.memory.vector_store import parse_data_url
 
 from .config import RagToolRuntimeConfig
 from .context_retriever import generate_rag_query
diff --git a/src/llama_stack/providers/inline/vector_io/chroma/config.py b/src/llama_stack/providers/inline/vector_io/chroma/config.py
index d955b1d06..3897991f5 100644
--- a/src/llama_stack/providers/inline/vector_io/chroma/config.py
+++ b/src/llama_stack/providers/inline/vector_io/chroma/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.core.storage.datatypes import KVStoreReference
+from llama_stack_api import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/inline/vector_io/faiss/config.py b/src/llama_stack/providers/inline/vector_io/faiss/config.py
index dd433f818..d516d9fe9 100644
--- a/src/llama_stack/providers/inline/vector_io/faiss/config.py
+++ b/src/llama_stack/providers/inline/vector_io/faiss/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel
 
 from llama_stack.core.storage.datatypes import KVStoreReference
+from llama_stack_api import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/inline/vector_io/faiss/faiss.py b/src/llama_stack/providers/inline/vector_io/faiss/faiss.py
index abef42499..d52a54e6a 100644
--- a/src/llama_stack/providers/inline/vector_io/faiss/faiss.py
+++ b/src/llama_stack/providers/inline/vector_io/faiss/faiss.py
@@ -12,6 +12,13 @@ from typing import Any
 
 import faiss  # type: ignore[import-untyped]
 import numpy as np
+from numpy.typing import NDArray
+
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.kvstore import kvstore_impl
+from llama_stack.providers.utils.kvstore.api import KVStore
+from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
+from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
 from llama_stack_api import (
     Chunk,
     Files,
@@ -25,13 +32,6 @@ from llama_stack_api import (
     VectorStoreNotFoundError,
     VectorStoresProtocolPrivate,
 )
-from numpy.typing import NDArray
-
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.kvstore import kvstore_impl
-from llama_stack.providers.utils.kvstore.api import KVStore
-from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
-from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
 
 from .config import FaissVectorIOConfig
 
diff --git a/src/llama_stack/providers/inline/vector_io/milvus/config.py b/src/llama_stack/providers/inline/vector_io/milvus/config.py
index 08d05c991..14ddd2362 100644
--- a/src/llama_stack/providers/inline/vector_io/milvus/config.py
+++ b/src/llama_stack/providers/inline/vector_io/milvus/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.core.storage.datatypes import KVStoreReference
+from llama_stack_api import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/inline/vector_io/qdrant/config.py b/src/llama_stack/providers/inline/vector_io/qdrant/config.py
index 437d643f0..4251f2f39 100644
--- a/src/llama_stack/providers/inline/vector_io/qdrant/config.py
+++ b/src/llama_stack/providers/inline/vector_io/qdrant/config.py
@@ -7,10 +7,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel
 
 from llama_stack.core.storage.datatypes import KVStoreReference
+from llama_stack_api import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
index e979ff323..74bc349a5 100644
--- a/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
+++ b/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
@@ -12,16 +12,6 @@ from typing import Any
 
 import numpy as np
 import sqlite_vec  # type: ignore[import-untyped]
-from llama_stack_api import (
-    Chunk,
-    Files,
-    Inference,
-    QueryChunksResponse,
-    VectorIO,
-    VectorStore,
-    VectorStoreNotFoundError,
-    VectorStoresProtocolPrivate,
-)
 from numpy.typing import NDArray
 
 from llama_stack.log import get_logger
@@ -35,6 +25,16 @@ from llama_stack.providers.utils.memory.vector_store import (
     VectorStoreWithIndex,
 )
 from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator
+from llama_stack_api import (
+    Chunk,
+    Files,
+    Inference,
+    QueryChunksResponse,
+    VectorIO,
+    VectorStore,
+    VectorStoreNotFoundError,
+    VectorStoresProtocolPrivate,
+)
 
 logger = get_logger(name=__name__, category="vector_io")
 
diff --git a/src/llama_stack/providers/registry/agents.py b/src/llama_stack/providers/registry/agents.py
index bd204cecd..455be1ae7 100644
--- a/src/llama_stack/providers/registry/agents.py
+++ b/src/llama_stack/providers/registry/agents.py
@@ -5,14 +5,13 @@
 # the root directory of this source tree.
 
 
+from llama_stack.providers.utils.kvstore import kvstore_dependencies
 from llama_stack_api import (
     Api,
     InlineProviderSpec,
     ProviderSpec,
 )
 
-from llama_stack.providers.utils.kvstore import kvstore_dependencies
-
 
 def available_providers() -> list[ProviderSpec]:
     return [
diff --git a/src/llama_stack/providers/registry/files.py b/src/llama_stack/providers/registry/files.py
index dfc527816..024254b57 100644
--- a/src/llama_stack/providers/registry/files.py
+++ b/src/llama_stack/providers/registry/files.py
@@ -4,9 +4,8 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec
-
 from llama_stack.providers.utils.sqlstore.sqlstore import sql_store_pip_packages
+from llama_stack_api import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec
 
 
 def available_providers() -> list[ProviderSpec]:
diff --git a/src/llama_stack/providers/registry/tool_runtime.py b/src/llama_stack/providers/registry/tool_runtime.py
index 3f0a83a30..d34312353 100644
--- a/src/llama_stack/providers/registry/tool_runtime.py
+++ b/src/llama_stack/providers/registry/tool_runtime.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 
+from llama_stack.providers.registry.vector_io import DEFAULT_VECTOR_IO_DEPS
 from llama_stack_api import (
     Api,
     InlineProviderSpec,
@@ -12,8 +13,6 @@ from llama_stack_api import (
     RemoteProviderSpec,
 )
 
-from llama_stack.providers.registry.vector_io import DEFAULT_VECTOR_IO_DEPS
-
 
 def available_providers() -> list[ProviderSpec]:
     return [
diff --git a/src/llama_stack/providers/remote/datasetio/huggingface/huggingface.py b/src/llama_stack/providers/remote/datasetio/huggingface/huggingface.py
index 1260ce644..72069f716 100644
--- a/src/llama_stack/providers/remote/datasetio/huggingface/huggingface.py
+++ b/src/llama_stack/providers/remote/datasetio/huggingface/huggingface.py
@@ -6,10 +6,9 @@
 from typing import Any
 from urllib.parse import parse_qs, urlparse
 
-from llama_stack_api import Dataset, DatasetIO, DatasetsProtocolPrivate, PaginatedResponse
-
 from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.pagination import paginate_records
+from llama_stack_api import Dataset, DatasetIO, DatasetsProtocolPrivate, PaginatedResponse
 
 from .config import HuggingfaceDatasetIOConfig
 
diff --git a/src/llama_stack/providers/remote/datasetio/nvidia/datasetio.py b/src/llama_stack/providers/remote/datasetio/nvidia/datasetio.py
index cb674b0d7..2f5548fa9 100644
--- a/src/llama_stack/providers/remote/datasetio/nvidia/datasetio.py
+++ b/src/llama_stack/providers/remote/datasetio/nvidia/datasetio.py
@@ -7,6 +7,7 @@
 from typing import Any
 
 import aiohttp
+
 from llama_stack_api import URL, Dataset, PaginatedResponse, ParamType
 
 from .config import NvidiaDatasetIOConfig
diff --git a/src/llama_stack/providers/remote/eval/nvidia/eval.py b/src/llama_stack/providers/remote/eval/nvidia/eval.py
index fbdec0d4d..5802cb098 100644
--- a/src/llama_stack/providers/remote/eval/nvidia/eval.py
+++ b/src/llama_stack/providers/remote/eval/nvidia/eval.py
@@ -6,6 +6,8 @@
 from typing import Any
 
 import requests
+
+from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
 from llama_stack_api import (
     Agents,
     Benchmark,
@@ -22,8 +24,6 @@ from llama_stack_api import (
     ScoringResult,
 )
 
-from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
-
 from .config import NVIDIAEvalConfig
 
 DEFAULT_NAMESPACE = "nvidia"
diff --git a/src/llama_stack/providers/remote/files/openai/files.py b/src/llama_stack/providers/remote/files/openai/files.py
index bbd630977..d2f5a08eb 100644
--- a/src/llama_stack/providers/remote/files/openai/files.py
+++ b/src/llama_stack/providers/remote/files/openai/files.py
@@ -8,6 +8,12 @@ from datetime import UTC, datetime
 from typing import Annotated, Any
 
 from fastapi import Depends, File, Form, Response, UploadFile
+
+from llama_stack.core.datatypes import AccessRule
+from llama_stack.providers.utils.files.form_data import parse_expires_after
+from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
+from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore
+from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl
 from llama_stack_api import (
     ExpiresAfter,
     Files,
@@ -18,12 +24,6 @@ from llama_stack_api import (
     Order,
     ResourceNotFoundError,
 )
-
-from llama_stack.core.datatypes import AccessRule
-from llama_stack.providers.utils.files.form_data import parse_expires_after
-from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
-from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore
-from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl
 from openai import OpenAI
 
 from .config import OpenAIFilesImplConfig
diff --git a/src/llama_stack/providers/remote/files/s3/files.py b/src/llama_stack/providers/remote/files/s3/files.py
index 14f1e3852..68822eb77 100644
--- a/src/llama_stack/providers/remote/files/s3/files.py
+++ b/src/llama_stack/providers/remote/files/s3/files.py
@@ -17,6 +17,12 @@ from fastapi import Depends, File, Form, Response, UploadFile
 if TYPE_CHECKING:
     from mypy_boto3_s3.client import S3Client
 
+from llama_stack.core.datatypes import AccessRule
+from llama_stack.core.id_generation import generate_object_id
+from llama_stack.providers.utils.files.form_data import parse_expires_after
+from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
+from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore
+from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl
 from llama_stack_api import (
     ExpiresAfter,
     Files,
@@ -28,13 +34,6 @@ from llama_stack_api import (
     ResourceNotFoundError,
 )
 
-from llama_stack.core.datatypes import AccessRule
-from llama_stack.core.id_generation import generate_object_id
-from llama_stack.providers.utils.files.form_data import parse_expires_after
-from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
-from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore
-from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl
-
 from .config import S3FilesImplConfig
 
 # TODO: provider data for S3 credentials
diff --git a/src/llama_stack/providers/remote/inference/anthropic/config.py b/src/llama_stack/providers/remote/inference/anthropic/config.py
index 7ee4c54e2..b706b90e1 100644
--- a/src/llama_stack/providers/remote/inference/anthropic/config.py
+++ b/src/llama_stack/providers/remote/inference/anthropic/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 
 class AnthropicProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/azure/config.py b/src/llama_stack/providers/remote/inference/azure/config.py
index 596f6c234..b801b91b2 100644
--- a/src/llama_stack/providers/remote/inference/azure/config.py
+++ b/src/llama_stack/providers/remote/inference/azure/config.py
@@ -7,10 +7,10 @@
 import os
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field, HttpUrl, SecretStr
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 
 class AzureProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/bedrock/bedrock.py b/src/llama_stack/providers/remote/inference/bedrock/bedrock.py
index 1a9fe533b..70ee95916 100644
--- a/src/llama_stack/providers/remote/inference/bedrock/bedrock.py
+++ b/src/llama_stack/providers/remote/inference/bedrock/bedrock.py
@@ -6,6 +6,11 @@
 
 from collections.abc import AsyncIterator, Iterable
 
+from openai import AuthenticationError
+
+from llama_stack.core.telemetry.tracing import get_current_span
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 from llama_stack_api import (
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
@@ -15,11 +20,6 @@ from llama_stack_api import (
     OpenAIEmbeddingsRequestWithExtraBody,
     OpenAIEmbeddingsResponse,
 )
-from openai import AuthenticationError
-
-from llama_stack.core.telemetry.tracing import get_current_span
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 
 from .config import BedrockConfig
 
diff --git a/src/llama_stack/providers/remote/inference/cerebras/cerebras.py b/src/llama_stack/providers/remote/inference/cerebras/cerebras.py
index c7f3111f9..680431e22 100644
--- a/src/llama_stack/providers/remote/inference/cerebras/cerebras.py
+++ b/src/llama_stack/providers/remote/inference/cerebras/cerebras.py
@@ -6,13 +6,12 @@
 
 from urllib.parse import urljoin
 
+from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 from llama_stack_api import (
     OpenAIEmbeddingsRequestWithExtraBody,
     OpenAIEmbeddingsResponse,
 )
 
-from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
-
 from .config import CerebrasImplConfig
 
 
diff --git a/src/llama_stack/providers/remote/inference/cerebras/config.py b/src/llama_stack/providers/remote/inference/cerebras/config.py
index a1fd41e2d..db357fd1c 100644
--- a/src/llama_stack/providers/remote/inference/cerebras/config.py
+++ b/src/llama_stack/providers/remote/inference/cerebras/config.py
@@ -7,10 +7,10 @@
 import os
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 DEFAULT_BASE_URL = "https://api.cerebras.ai"
 
diff --git a/src/llama_stack/providers/remote/inference/databricks/config.py b/src/llama_stack/providers/remote/inference/databricks/config.py
index 4974593d2..bd409fa13 100644
--- a/src/llama_stack/providers/remote/inference/databricks/config.py
+++ b/src/llama_stack/providers/remote/inference/databricks/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field, SecretStr
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 
 class DatabricksProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/databricks/databricks.py b/src/llama_stack/providers/remote/inference/databricks/databricks.py
index 8b802379f..c07d97b67 100644
--- a/src/llama_stack/providers/remote/inference/databricks/databricks.py
+++ b/src/llama_stack/providers/remote/inference/databricks/databricks.py
@@ -7,10 +7,10 @@
 from collections.abc import Iterable
 
 from databricks.sdk import WorkspaceClient
-from llama_stack_api import OpenAICompletion, OpenAICompletionRequestWithExtraBody
 
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
+from llama_stack_api import OpenAICompletion, OpenAICompletionRequestWithExtraBody
 
 from .config import DatabricksImplConfig
 
diff --git a/src/llama_stack/providers/remote/inference/fireworks/config.py b/src/llama_stack/providers/remote/inference/fireworks/config.py
index d786655eb..e36c76054 100644
--- a/src/llama_stack/providers/remote/inference/fireworks/config.py
+++ b/src/llama_stack/providers/remote/inference/fireworks/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/inference/gemini/config.py b/src/llama_stack/providers/remote/inference/gemini/config.py
index 6c25c005c..46cec7d0d 100644
--- a/src/llama_stack/providers/remote/inference/gemini/config.py
+++ b/src/llama_stack/providers/remote/inference/gemini/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 
 class GeminiProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/gemini/gemini.py b/src/llama_stack/providers/remote/inference/gemini/gemini.py
index 79d694f06..f6f48cc2b 100644
--- a/src/llama_stack/providers/remote/inference/gemini/gemini.py
+++ b/src/llama_stack/providers/remote/inference/gemini/gemini.py
@@ -6,6 +6,7 @@
 
 from typing import Any
 
+from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 from llama_stack_api import (
     OpenAIEmbeddingData,
     OpenAIEmbeddingsRequestWithExtraBody,
@@ -13,8 +14,6 @@ from llama_stack_api import (
     OpenAIEmbeddingUsage,
 )
 
-from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
-
 from .config import GeminiConfig
 
 
diff --git a/src/llama_stack/providers/remote/inference/groq/config.py b/src/llama_stack/providers/remote/inference/groq/config.py
index cec327716..cca53a4e8 100644
--- a/src/llama_stack/providers/remote/inference/groq/config.py
+++ b/src/llama_stack/providers/remote/inference/groq/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 
 class GroqProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/llama_openai_compat/config.py b/src/llama_stack/providers/remote/inference/llama_openai_compat/config.py
index c16311830..ded210d89 100644
--- a/src/llama_stack/providers/remote/inference/llama_openai_compat/config.py
+++ b/src/llama_stack/providers/remote/inference/llama_openai_compat/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 
 class LlamaProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/llama_openai_compat/llama.py b/src/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
index 1dea3e3cb..a5f67ecd1 100644
--- a/src/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
+++ b/src/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
@@ -4,6 +4,9 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
+from llama_stack.log import get_logger
+from llama_stack.providers.remote.inference.llama_openai_compat.config import LlamaCompatConfig
+from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 from llama_stack_api import (
     OpenAICompletion,
     OpenAICompletionRequestWithExtraBody,
@@ -11,10 +14,6 @@ from llama_stack_api import (
     OpenAIEmbeddingsResponse,
 )
 
-from llama_stack.log import get_logger
-from llama_stack.providers.remote.inference.llama_openai_compat.config import LlamaCompatConfig
-from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
-
 logger = get_logger(name=__name__, category="inference::llama_openai_compat")
 
 
diff --git a/src/llama_stack/providers/remote/inference/nvidia/config.py b/src/llama_stack/providers/remote/inference/nvidia/config.py
index 6ff98d290..e5b0c6b73 100644
--- a/src/llama_stack/providers/remote/inference/nvidia/config.py
+++ b/src/llama_stack/providers/remote/inference/nvidia/config.py
@@ -7,10 +7,10 @@
 import os
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 
 class NVIDIAProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/nvidia/nvidia.py b/src/llama_stack/providers/remote/inference/nvidia/nvidia.py
index 9e4c6f559..17f8775bf 100644
--- a/src/llama_stack/providers/remote/inference/nvidia/nvidia.py
+++ b/src/llama_stack/providers/remote/inference/nvidia/nvidia.py
@@ -8,6 +8,9 @@
 from collections.abc import Iterable
 
 import aiohttp
+
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 from llama_stack_api import (
     Model,
     ModelType,
@@ -17,9 +20,6 @@ from llama_stack_api import (
     RerankResponse,
 )
 
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
-
 from . import NVIDIAConfig
 from .utils import _is_nvidia_hosted
 
diff --git a/src/llama_stack/providers/remote/inference/oci/config.py b/src/llama_stack/providers/remote/inference/oci/config.py
index 24b4ad926..93cc36d76 100644
--- a/src/llama_stack/providers/remote/inference/oci/config.py
+++ b/src/llama_stack/providers/remote/inference/oci/config.py
@@ -7,10 +7,10 @@
 import os
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 
 class OCIProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/oci/oci.py b/src/llama_stack/providers/remote/inference/oci/oci.py
index 36e56cf6c..239443963 100644
--- a/src/llama_stack/providers/remote/inference/oci/oci.py
+++ b/src/llama_stack/providers/remote/inference/oci/oci.py
@@ -10,11 +10,6 @@ from typing import Any
 
 import httpx
 import oci
-from llama_stack_api import (
-    ModelType,
-    OpenAIEmbeddingsRequestWithExtraBody,
-    OpenAIEmbeddingsResponse,
-)
 from oci.generative_ai.generative_ai_client import GenerativeAiClient
 from oci.generative_ai.models import ModelCollection
 from openai._base_client import DefaultAsyncHttpxClient
@@ -23,6 +18,11 @@ from llama_stack.log import get_logger
 from llama_stack.providers.remote.inference.oci.auth import OciInstancePrincipalAuth, OciUserPrincipalAuth
 from llama_stack.providers.remote.inference.oci.config import OCIConfig
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
+from llama_stack_api import (
+    ModelType,
+    OpenAIEmbeddingsRequestWithExtraBody,
+    OpenAIEmbeddingsResponse,
+)
 
 logger = get_logger(name=__name__, category="inference::oci")
 
diff --git a/src/llama_stack/providers/remote/inference/ollama/ollama.py b/src/llama_stack/providers/remote/inference/ollama/ollama.py
index 6a471429e..d1bf85361 100644
--- a/src/llama_stack/providers/remote/inference/ollama/ollama.py
+++ b/src/llama_stack/providers/remote/inference/ollama/ollama.py
@@ -7,17 +7,17 @@
 
 import asyncio
 
+from ollama import AsyncClient as AsyncOllamaClient
+
+from llama_stack.log import get_logger
+from llama_stack.providers.remote.inference.ollama.config import OllamaImplConfig
+from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 from llama_stack_api import (
     HealthResponse,
     HealthStatus,
     Model,
     UnsupportedModelError,
 )
-from ollama import AsyncClient as AsyncOllamaClient
-
-from llama_stack.log import get_logger
-from llama_stack.providers.remote.inference.ollama.config import OllamaImplConfig
-from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 
 logger = get_logger(name=__name__, category="inference::ollama")
 
diff --git a/src/llama_stack/providers/remote/inference/openai/config.py b/src/llama_stack/providers/remote/inference/openai/config.py
index cbb01b2d0..ab28e571f 100644
--- a/src/llama_stack/providers/remote/inference/openai/config.py
+++ b/src/llama_stack/providers/remote/inference/openai/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 
 class OpenAIProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/passthrough/config.py b/src/llama_stack/providers/remote/inference/passthrough/config.py
index 7045dbf2e..54508b6fb 100644
--- a/src/llama_stack/providers/remote/inference/passthrough/config.py
+++ b/src/llama_stack/providers/remote/inference/passthrough/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/inference/passthrough/passthrough.py b/src/llama_stack/providers/remote/inference/passthrough/passthrough.py
index 19cf0c5d7..75eedf026 100644
--- a/src/llama_stack/providers/remote/inference/passthrough/passthrough.py
+++ b/src/llama_stack/providers/remote/inference/passthrough/passthrough.py
@@ -6,6 +6,9 @@
 
 from collections.abc import AsyncIterator
 
+from openai import AsyncOpenAI
+
+from llama_stack.core.request_headers import NeedsRequestProviderData
 from llama_stack_api import (
     Inference,
     Model,
@@ -17,9 +20,6 @@ from llama_stack_api import (
     OpenAIEmbeddingsRequestWithExtraBody,
     OpenAIEmbeddingsResponse,
 )
-from openai import AsyncOpenAI
-
-from llama_stack.core.request_headers import NeedsRequestProviderData
 
 from .config import PassthroughImplConfig
 
diff --git a/src/llama_stack/providers/remote/inference/runpod/config.py b/src/llama_stack/providers/remote/inference/runpod/config.py
index aaa4230a8..2ee56ca94 100644
--- a/src/llama_stack/providers/remote/inference/runpod/config.py
+++ b/src/llama_stack/providers/remote/inference/runpod/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field, SecretStr
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 
 class RunpodProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/runpod/runpod.py b/src/llama_stack/providers/remote/inference/runpod/runpod.py
index 4596b2df5..9c770cc24 100644
--- a/src/llama_stack/providers/remote/inference/runpod/runpod.py
+++ b/src/llama_stack/providers/remote/inference/runpod/runpod.py
@@ -6,14 +6,13 @@
 
 from collections.abc import AsyncIterator
 
+from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 from llama_stack_api import (
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
     OpenAIChatCompletionRequestWithExtraBody,
 )
 
-from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
-
 from .config import RunpodImplConfig
 
 
diff --git a/src/llama_stack/providers/remote/inference/sambanova/config.py b/src/llama_stack/providers/remote/inference/sambanova/config.py
index 6d72e7205..93679ba99 100644
--- a/src/llama_stack/providers/remote/inference/sambanova/config.py
+++ b/src/llama_stack/providers/remote/inference/sambanova/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 
 class SambaNovaProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/tgi/config.py b/src/llama_stack/providers/remote/inference/tgi/config.py
index 051a2afa3..74edc8523 100644
--- a/src/llama_stack/providers/remote/inference/tgi/config.py
+++ b/src/llama_stack/providers/remote/inference/tgi/config.py
@@ -5,10 +5,10 @@
 # the root directory of this source tree.
 
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field, SecretStr
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/inference/tgi/tgi.py b/src/llama_stack/providers/remote/inference/tgi/tgi.py
index 831a26e39..dd47ccc62 100644
--- a/src/llama_stack/providers/remote/inference/tgi/tgi.py
+++ b/src/llama_stack/providers/remote/inference/tgi/tgi.py
@@ -8,14 +8,14 @@
 from collections.abc import Iterable
 
 from huggingface_hub import AsyncInferenceClient, HfApi
-from llama_stack_api import (
-    OpenAIEmbeddingsRequestWithExtraBody,
-    OpenAIEmbeddingsResponse,
-)
 from pydantic import SecretStr
 
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
+from llama_stack_api import (
+    OpenAIEmbeddingsRequestWithExtraBody,
+    OpenAIEmbeddingsResponse,
+)
 
 from .config import InferenceAPIImplConfig, InferenceEndpointImplConfig, TGIImplConfig
 
diff --git a/src/llama_stack/providers/remote/inference/together/config.py b/src/llama_stack/providers/remote/inference/together/config.py
index 96c0538e3..c1b3c4a55 100644
--- a/src/llama_stack/providers/remote/inference/together/config.py
+++ b/src/llama_stack/providers/remote/inference/together/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/inference/together/together.py b/src/llama_stack/providers/remote/inference/together/together.py
index f1355a760..cd34aec5e 100644
--- a/src/llama_stack/providers/remote/inference/together/together.py
+++ b/src/llama_stack/providers/remote/inference/together/together.py
@@ -8,18 +8,18 @@
 from collections.abc import Iterable
 from typing import Any, cast
 
-from llama_stack_api import (
-    Model,
-    OpenAIEmbeddingsRequestWithExtraBody,
-    OpenAIEmbeddingsResponse,
-    OpenAIEmbeddingUsage,
-)
 from together import AsyncTogether  # type: ignore[import-untyped]
 from together.constants import BASE_URL  # type: ignore[import-untyped]
 
 from llama_stack.core.request_headers import NeedsRequestProviderData
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
+from llama_stack_api import (
+    Model,
+    OpenAIEmbeddingsRequestWithExtraBody,
+    OpenAIEmbeddingsResponse,
+    OpenAIEmbeddingUsage,
+)
 
 from .config import TogetherImplConfig
 
diff --git a/src/llama_stack/providers/remote/inference/vertexai/config.py b/src/llama_stack/providers/remote/inference/vertexai/config.py
index 53e2b3e65..5891f7cd0 100644
--- a/src/llama_stack/providers/remote/inference/vertexai/config.py
+++ b/src/llama_stack/providers/remote/inference/vertexai/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field, SecretStr
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 
 class VertexAIProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/vllm/config.py b/src/llama_stack/providers/remote/inference/vllm/config.py
index 23f713961..c43533ee4 100644
--- a/src/llama_stack/providers/remote/inference/vllm/config.py
+++ b/src/llama_stack/providers/remote/inference/vllm/config.py
@@ -6,10 +6,10 @@
 
 from pathlib import Path
 
-from llama_stack_api import json_schema_type
 from pydantic import Field, SecretStr, field_validator
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/inference/vllm/vllm.py b/src/llama_stack/providers/remote/inference/vllm/vllm.py
index f7938c22c..1510e9384 100644
--- a/src/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/src/llama_stack/providers/remote/inference/vllm/vllm.py
@@ -7,6 +7,10 @@ from collections.abc import AsyncIterator
 from urllib.parse import urljoin
 
 import httpx
+from pydantic import ConfigDict
+
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 from llama_stack_api import (
     HealthResponse,
     HealthStatus,
@@ -15,10 +19,6 @@ from llama_stack_api import (
     OpenAIChatCompletionRequestWithExtraBody,
     ToolChoice,
 )
-from pydantic import ConfigDict
-
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 
 from .config import VLLMInferenceAdapterConfig
 
diff --git a/src/llama_stack/providers/remote/inference/watsonx/config.py b/src/llama_stack/providers/remote/inference/watsonx/config.py
index 1bba040ef..914f80820 100644
--- a/src/llama_stack/providers/remote/inference/watsonx/config.py
+++ b/src/llama_stack/providers/remote/inference/watsonx/config.py
@@ -7,10 +7,10 @@
 import os
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 
 class WatsonXProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/watsonx/watsonx.py b/src/llama_stack/providers/remote/inference/watsonx/watsonx.py
index de23c25d7..aab9e2dca 100644
--- a/src/llama_stack/providers/remote/inference/watsonx/watsonx.py
+++ b/src/llama_stack/providers/remote/inference/watsonx/watsonx.py
@@ -9,6 +9,12 @@ from typing import Any
 
 import litellm
 import requests
+
+from llama_stack.core.telemetry.tracing import get_current_span
+from llama_stack.log import get_logger
+from llama_stack.providers.remote.inference.watsonx.config import WatsonXConfig
+from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
+from llama_stack.providers.utils.inference.openai_compat import prepare_openai_completion_params
 from llama_stack_api import (
     Model,
     ModelType,
@@ -22,12 +28,6 @@ from llama_stack_api import (
     OpenAIEmbeddingsResponse,
 )
 
-from llama_stack.core.telemetry.tracing import get_current_span
-from llama_stack.log import get_logger
-from llama_stack.providers.remote.inference.watsonx.config import WatsonXConfig
-from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
-from llama_stack.providers.utils.inference.openai_compat import prepare_openai_completion_params
-
 logger = get_logger(name=__name__, category="providers::remote::watsonx")
 
 
@@ -238,9 +238,8 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
         )
 
         # Convert response to OpenAI format
-        from llama_stack_api import OpenAIEmbeddingUsage
-
         from llama_stack.providers.utils.inference.litellm_openai_mixin import b64_encode_openai_embeddings_response
+        from llama_stack_api import OpenAIEmbeddingUsage
 
         data = b64_encode_openai_embeddings_response(response.data, params.encoding_format)
 
diff --git a/src/llama_stack/providers/remote/post_training/nvidia/post_training.py b/src/llama_stack/providers/remote/post_training/nvidia/post_training.py
index 02c35241b..830a9f747 100644
--- a/src/llama_stack/providers/remote/post_training/nvidia/post_training.py
+++ b/src/llama_stack/providers/remote/post_training/nvidia/post_training.py
@@ -8,6 +8,11 @@ from datetime import datetime
 from typing import Any, Literal
 
 import aiohttp
+from pydantic import BaseModel, ConfigDict
+
+from llama_stack.providers.remote.post_training.nvidia.config import NvidiaPostTrainingConfig
+from llama_stack.providers.remote.post_training.nvidia.utils import warn_unsupported_params
+from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
 from llama_stack_api import (
     AlgorithmConfig,
     DPOAlignmentConfig,
@@ -17,11 +22,6 @@ from llama_stack_api import (
     PostTrainingJobStatusResponse,
     TrainingConfig,
 )
-from pydantic import BaseModel, ConfigDict
-
-from llama_stack.providers.remote.post_training.nvidia.config import NvidiaPostTrainingConfig
-from llama_stack.providers.remote.post_training.nvidia.utils import warn_unsupported_params
-from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
 
 from .models import _MODEL_ENTRIES
 
diff --git a/src/llama_stack/providers/remote/post_training/nvidia/utils.py b/src/llama_stack/providers/remote/post_training/nvidia/utils.py
index 78762155d..bd40dacb4 100644
--- a/src/llama_stack/providers/remote/post_training/nvidia/utils.py
+++ b/src/llama_stack/providers/remote/post_training/nvidia/utils.py
@@ -7,11 +7,11 @@
 import warnings
 from typing import Any
 
-from llama_stack_api import TrainingConfig
 from pydantic import BaseModel
 
 from llama_stack.log import get_logger
 from llama_stack.providers.remote.post_training.nvidia.config import SFTLoRADefaultConfig
+from llama_stack_api import TrainingConfig
 
 from .config import NvidiaPostTrainingConfig
 
diff --git a/src/llama_stack/providers/remote/safety/bedrock/bedrock.py b/src/llama_stack/providers/remote/safety/bedrock/bedrock.py
index 86b93c32e..c321f759b 100644
--- a/src/llama_stack/providers/remote/safety/bedrock/bedrock.py
+++ b/src/llama_stack/providers/remote/safety/bedrock/bedrock.py
@@ -7,6 +7,8 @@
 import json
 from typing import Any
 
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.bedrock.client import create_bedrock_client
 from llama_stack_api import (
     OpenAIMessageParam,
     RunShieldResponse,
@@ -17,9 +19,6 @@ from llama_stack_api import (
     ViolationLevel,
 )
 
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.bedrock.client import create_bedrock_client
-
 from .config import BedrockSafetyConfig
 
 logger = get_logger(name=__name__, category="safety::bedrock")
diff --git a/src/llama_stack/providers/remote/safety/bedrock/config.py b/src/llama_stack/providers/remote/safety/bedrock/config.py
index ca28924d4..0b1f2581a 100644
--- a/src/llama_stack/providers/remote/safety/bedrock/config.py
+++ b/src/llama_stack/providers/remote/safety/bedrock/config.py
@@ -5,9 +5,8 @@
 # the root directory of this source tree.
 
 
-from llama_stack_api import json_schema_type
-
 from llama_stack.providers.utils.bedrock.config import BedrockBaseConfig
+from llama_stack_api import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/safety/nvidia/config.py b/src/llama_stack/providers/remote/safety/nvidia/config.py
index fc686ae73..f11de5feb 100644
--- a/src/llama_stack/providers/remote/safety/nvidia/config.py
+++ b/src/llama_stack/providers/remote/safety/nvidia/config.py
@@ -6,9 +6,10 @@
 import os
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
+from llama_stack_api import json_schema_type
+
 
 @json_schema_type
 class NVIDIASafetyConfig(BaseModel):
diff --git a/src/llama_stack/providers/remote/safety/nvidia/nvidia.py b/src/llama_stack/providers/remote/safety/nvidia/nvidia.py
index b3b5090e0..43ff45cc9 100644
--- a/src/llama_stack/providers/remote/safety/nvidia/nvidia.py
+++ b/src/llama_stack/providers/remote/safety/nvidia/nvidia.py
@@ -7,6 +7,8 @@
 from typing import Any
 
 import requests
+
+from llama_stack.log import get_logger
 from llama_stack_api import (
     ModerationObject,
     OpenAIMessageParam,
@@ -18,8 +20,6 @@ from llama_stack_api import (
     ViolationLevel,
 )
 
-from llama_stack.log import get_logger
-
 from .config import NVIDIASafetyConfig
 
 logger = get_logger(name=__name__, category="safety::nvidia")
diff --git a/src/llama_stack/providers/remote/safety/sambanova/config.py b/src/llama_stack/providers/remote/safety/sambanova/config.py
index a8e745851..bfb42d88a 100644
--- a/src/llama_stack/providers/remote/safety/sambanova/config.py
+++ b/src/llama_stack/providers/remote/safety/sambanova/config.py
@@ -6,9 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field, SecretStr
 
+from llama_stack_api import json_schema_type
+
 
 class SambaNovaProviderDataValidator(BaseModel):
     sambanova_api_key: str | None = Field(
diff --git a/src/llama_stack/providers/remote/safety/sambanova/sambanova.py b/src/llama_stack/providers/remote/safety/sambanova/sambanova.py
index 119ebb6ed..c11cb544d 100644
--- a/src/llama_stack/providers/remote/safety/sambanova/sambanova.py
+++ b/src/llama_stack/providers/remote/safety/sambanova/sambanova.py
@@ -8,6 +8,9 @@ from typing import Any
 
 import litellm
 import requests
+
+from llama_stack.core.request_headers import NeedsRequestProviderData
+from llama_stack.log import get_logger
 from llama_stack_api import (
     OpenAIMessageParam,
     RunShieldResponse,
@@ -18,9 +21,6 @@ from llama_stack_api import (
     ViolationLevel,
 )
 
-from llama_stack.core.request_headers import NeedsRequestProviderData
-from llama_stack.log import get_logger
-
 from .config import SambaNovaSafetyConfig
 
 logger = get_logger(name=__name__, category="safety::sambanova")
diff --git a/src/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py b/src/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py
index 84e47dd4f..a5a53a9eb 100644
--- a/src/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py
+++ b/src/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py
@@ -8,6 +8,8 @@ import json
 from typing import Any
 
 import httpx
+
+from llama_stack.core.request_headers import NeedsRequestProviderData
 from llama_stack_api import (
     URL,
     ListToolDefsResponse,
@@ -18,8 +20,6 @@ from llama_stack_api import (
     ToolRuntime,
 )
 
-from llama_stack.core.request_headers import NeedsRequestProviderData
-
 from .config import BingSearchToolConfig
 
 
diff --git a/src/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py b/src/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py
index b7eee776a..4888730e4 100644
--- a/src/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py
+++ b/src/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py
@@ -7,6 +7,9 @@
 from typing import Any
 
 import httpx
+
+from llama_stack.core.request_headers import NeedsRequestProviderData
+from llama_stack.models.llama.datatypes import BuiltinTool
 from llama_stack_api import (
     URL,
     ListToolDefsResponse,
@@ -17,9 +20,6 @@ from llama_stack_api import (
     ToolRuntime,
 )
 
-from llama_stack.core.request_headers import NeedsRequestProviderData
-from llama_stack.models.llama.datatypes import BuiltinTool
-
 from .config import BraveSearchToolConfig
 
 
diff --git a/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py b/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py
index efb1eb2df..544597a51 100644
--- a/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py
+++ b/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py
@@ -7,6 +7,9 @@
 from typing import Any
 from urllib.parse import urlparse
 
+from llama_stack.core.request_headers import NeedsRequestProviderData
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.tools.mcp import invoke_mcp_tool, list_mcp_tools
 from llama_stack_api import (
     URL,
     Api,
@@ -17,10 +20,6 @@ from llama_stack_api import (
     ToolRuntime,
 )
 
-from llama_stack.core.request_headers import NeedsRequestProviderData
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.tools.mcp import invoke_mcp_tool, list_mcp_tools
-
 from .config import MCPProviderConfig
 
 logger = get_logger(__name__, category="tools")
diff --git a/src/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py b/src/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py
index d65d66e67..d86cf5d8e 100644
--- a/src/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py
+++ b/src/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py
@@ -8,6 +8,8 @@ import json
 from typing import Any
 
 import httpx
+
+from llama_stack.core.request_headers import NeedsRequestProviderData
 from llama_stack_api import (
     URL,
     ListToolDefsResponse,
@@ -18,8 +20,6 @@ from llama_stack_api import (
     ToolRuntime,
 )
 
-from llama_stack.core.request_headers import NeedsRequestProviderData
-
 from .config import TavilySearchToolConfig
 
 
diff --git a/src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py b/src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py
index 9cc865092..f8d806a5c 100644
--- a/src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py
+++ b/src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py
@@ -8,6 +8,8 @@ import json
 from typing import Any
 
 import httpx
+
+from llama_stack.core.request_headers import NeedsRequestProviderData
 from llama_stack_api import (
     URL,
     ListToolDefsResponse,
@@ -18,8 +20,6 @@ from llama_stack_api import (
     ToolRuntime,
 )
 
-from llama_stack.core.request_headers import NeedsRequestProviderData
-
 from .config import WolframAlphaToolConfig
 
 
diff --git a/src/llama_stack/providers/remote/vector_io/chroma/chroma.py b/src/llama_stack/providers/remote/vector_io/chroma/chroma.py
index eca5d349b..645b40661 100644
--- a/src/llama_stack/providers/remote/vector_io/chroma/chroma.py
+++ b/src/llama_stack/providers/remote/vector_io/chroma/chroma.py
@@ -9,6 +9,14 @@ from typing import Any
 from urllib.parse import urlparse
 
 import chromadb
+from numpy.typing import NDArray
+
+from llama_stack.log import get_logger
+from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig
+from llama_stack.providers.utils.kvstore import kvstore_impl
+from llama_stack.providers.utils.kvstore.api import KVStore
+from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
+from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
 from llama_stack_api import (
     Chunk,
     Files,
@@ -19,14 +27,6 @@ from llama_stack_api import (
     VectorStore,
     VectorStoresProtocolPrivate,
 )
-from numpy.typing import NDArray
-
-from llama_stack.log import get_logger
-from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig
-from llama_stack.providers.utils.kvstore import kvstore_impl
-from llama_stack.providers.utils.kvstore.api import KVStore
-from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
-from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
 
 from .config import ChromaVectorIOConfig as RemoteChromaVectorIOConfig
 
diff --git a/src/llama_stack/providers/remote/vector_io/chroma/config.py b/src/llama_stack/providers/remote/vector_io/chroma/config.py
index b1e4f9a4a..648d641ad 100644
--- a/src/llama_stack/providers/remote/vector_io/chroma/config.py
+++ b/src/llama_stack/providers/remote/vector_io/chroma/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.core.storage.datatypes import KVStoreReference
+from llama_stack_api import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/vector_io/milvus/config.py b/src/llama_stack/providers/remote/vector_io/milvus/config.py
index 2e2c788c7..4b9d6a566 100644
--- a/src/llama_stack/providers/remote/vector_io/milvus/config.py
+++ b/src/llama_stack/providers/remote/vector_io/milvus/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, ConfigDict, Field
 
 from llama_stack.core.storage.datatypes import KVStoreReference
+from llama_stack_api import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/vector_io/milvus/milvus.py b/src/llama_stack/providers/remote/vector_io/milvus/milvus.py
index b856bf918..aefa20317 100644
--- a/src/llama_stack/providers/remote/vector_io/milvus/milvus.py
+++ b/src/llama_stack/providers/remote/vector_io/milvus/milvus.py
@@ -8,17 +8,6 @@ import asyncio
 import os
 from typing import Any
 
-from llama_stack_api import (
-    Chunk,
-    Files,
-    Inference,
-    InterleavedContent,
-    QueryChunksResponse,
-    VectorIO,
-    VectorStore,
-    VectorStoreNotFoundError,
-    VectorStoresProtocolPrivate,
-)
 from numpy.typing import NDArray
 from pymilvus import AnnSearchRequest, DataType, Function, FunctionType, MilvusClient, RRFRanker, WeightedRanker
 
@@ -34,6 +23,17 @@ from llama_stack.providers.utils.memory.vector_store import (
     VectorStoreWithIndex,
 )
 from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collection_name
+from llama_stack_api import (
+    Chunk,
+    Files,
+    Inference,
+    InterleavedContent,
+    QueryChunksResponse,
+    VectorIO,
+    VectorStore,
+    VectorStoreNotFoundError,
+    VectorStoresProtocolPrivate,
+)
 
 from .config import MilvusVectorIOConfig as RemoteMilvusVectorIOConfig
 
diff --git a/src/llama_stack/providers/remote/vector_io/pgvector/config.py b/src/llama_stack/providers/remote/vector_io/pgvector/config.py
index aeb1c83bb..87d40a883 100644
--- a/src/llama_stack/providers/remote/vector_io/pgvector/config.py
+++ b/src/llama_stack/providers/remote/vector_io/pgvector/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.core.storage.datatypes import KVStoreReference
+from llama_stack_api import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py b/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
index 8aa0303b6..2901bad97 100644
--- a/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
+++ b/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
@@ -8,17 +8,6 @@ import heapq
 from typing import Any
 
 import psycopg2
-from llama_stack_api import (
-    Chunk,
-    Files,
-    Inference,
-    InterleavedContent,
-    QueryChunksResponse,
-    VectorIO,
-    VectorStore,
-    VectorStoreNotFoundError,
-    VectorStoresProtocolPrivate,
-)
 from numpy.typing import NDArray
 from psycopg2 import sql
 from psycopg2.extras import Json, execute_values
@@ -31,6 +20,17 @@ from llama_stack.providers.utils.kvstore.api import KVStore
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
 from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
 from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator, sanitize_collection_name
+from llama_stack_api import (
+    Chunk,
+    Files,
+    Inference,
+    InterleavedContent,
+    QueryChunksResponse,
+    VectorIO,
+    VectorStore,
+    VectorStoreNotFoundError,
+    VectorStoresProtocolPrivate,
+)
 
 from .config import PGVectorVectorIOConfig
 
diff --git a/src/llama_stack/providers/remote/vector_io/qdrant/config.py b/src/llama_stack/providers/remote/vector_io/qdrant/config.py
index 8cc4cbb2b..e0a3fe207 100644
--- a/src/llama_stack/providers/remote/vector_io/qdrant/config.py
+++ b/src/llama_stack/providers/remote/vector_io/qdrant/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel
 
 from llama_stack.core.storage.datatypes import KVStoreReference
+from llama_stack_api import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py b/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
index 53d6be2b6..20ab653d0 100644
--- a/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
+++ b/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
@@ -9,6 +9,15 @@ import hashlib
 import uuid
 from typing import Any
 
+from numpy.typing import NDArray
+from qdrant_client import AsyncQdrantClient, models
+from qdrant_client.models import PointStruct
+
+from llama_stack.log import get_logger
+from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig
+from llama_stack.providers.utils.kvstore import kvstore_impl
+from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
+from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
 from llama_stack_api import (
     Chunk,
     Files,
@@ -22,15 +31,6 @@ from llama_stack_api import (
     VectorStoreNotFoundError,
     VectorStoresProtocolPrivate,
 )
-from numpy.typing import NDArray
-from qdrant_client import AsyncQdrantClient, models
-from qdrant_client.models import PointStruct
-
-from llama_stack.log import get_logger
-from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig
-from llama_stack.providers.utils.kvstore import kvstore_impl
-from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
-from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
 
 from .config import QdrantVectorIOConfig as RemoteQdrantVectorIOConfig
 
diff --git a/src/llama_stack/providers/remote/vector_io/weaviate/config.py b/src/llama_stack/providers/remote/vector_io/weaviate/config.py
index 19f9679fb..75d1b7c51 100644
--- a/src/llama_stack/providers/remote/vector_io/weaviate/config.py
+++ b/src/llama_stack/providers/remote/vector_io/weaviate/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.core.storage.datatypes import KVStoreReference
+from llama_stack_api import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py b/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
index c72666f63..ba3e6b7ea 100644
--- a/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
+++ b/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
@@ -8,17 +8,6 @@ from typing import Any
 
 import weaviate
 import weaviate.classes as wvc
-from llama_stack_api import (
-    Chunk,
-    Files,
-    Inference,
-    InterleavedContent,
-    QueryChunksResponse,
-    VectorIO,
-    VectorStore,
-    VectorStoreNotFoundError,
-    VectorStoresProtocolPrivate,
-)
 from numpy.typing import NDArray
 from weaviate.classes.init import Auth
 from weaviate.classes.query import Filter, HybridFusion
@@ -35,6 +24,17 @@ from llama_stack.providers.utils.memory.vector_store import (
     VectorStoreWithIndex,
 )
 from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collection_name
+from llama_stack_api import (
+    Chunk,
+    Files,
+    Inference,
+    InterleavedContent,
+    QueryChunksResponse,
+    VectorIO,
+    VectorStore,
+    VectorStoreNotFoundError,
+    VectorStoresProtocolPrivate,
+)
 
 from .config import WeaviateVectorIOConfig
 
diff --git a/src/llama_stack/providers/utils/common/data_schema_validator.py b/src/llama_stack/providers/utils/common/data_schema_validator.py
index 7ef245779..c9a3b0920 100644
--- a/src/llama_stack/providers/utils/common/data_schema_validator.py
+++ b/src/llama_stack/providers/utils/common/data_schema_validator.py
@@ -7,9 +7,8 @@
 from enum import Enum
 from typing import Any
 
-from llama_stack_api import ChatCompletionInputType, CompletionInputType, StringType
-
 from llama_stack.core.datatypes import Api
+from llama_stack_api import ChatCompletionInputType, CompletionInputType, StringType
 
 
 class ColumnName(Enum):
diff --git a/src/llama_stack/providers/utils/files/form_data.py b/src/llama_stack/providers/utils/files/form_data.py
index 21afbec2b..3fac14f38 100644
--- a/src/llama_stack/providers/utils/files/form_data.py
+++ b/src/llama_stack/providers/utils/files/form_data.py
@@ -7,9 +7,10 @@
 import json
 
 from fastapi import Request
-from llama_stack_api import ExpiresAfter
 from pydantic import BaseModel, ValidationError
 
+from llama_stack_api import ExpiresAfter
+
 
 async def parse_pydantic_from_form[T: BaseModel](request: Request, field_name: str, model_class: type[T]) -> T | None:
     """
diff --git a/src/llama_stack/providers/utils/inference/inference_store.py b/src/llama_stack/providers/utils/inference/inference_store.py
index 3c707dd01..49e3af7a1 100644
--- a/src/llama_stack/providers/utils/inference/inference_store.py
+++ b/src/llama_stack/providers/utils/inference/inference_store.py
@@ -6,6 +6,11 @@
 import asyncio
 from typing import Any
 
+from sqlalchemy.exc import IntegrityError
+
+from llama_stack.core.datatypes import AccessRule
+from llama_stack.core.storage.datatypes import InferenceStoreReference, StorageBackendType
+from llama_stack.log import get_logger
 from llama_stack_api import (
     ListOpenAIChatCompletionResponse,
     OpenAIChatCompletion,
@@ -13,11 +18,6 @@ from llama_stack_api import (
     OpenAIMessageParam,
     Order,
 )
-from sqlalchemy.exc import IntegrityError
-
-from llama_stack.core.datatypes import AccessRule
-from llama_stack.core.storage.datatypes import InferenceStoreReference, StorageBackendType
-from llama_stack.log import get_logger
 
 from ..sqlstore.api import ColumnDefinition, ColumnType
 from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore
diff --git a/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py b/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py
index 4f468725b..c462d1aad 100644
--- a/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py
+++ b/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py
@@ -9,6 +9,13 @@ import struct
 from collections.abc import AsyncIterator
 
 import litellm
+
+from llama_stack.core.request_headers import NeedsRequestProviderData
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper, ProviderModelEntry
+from llama_stack.providers.utils.inference.openai_compat import (
+    prepare_openai_completion_params,
+)
 from llama_stack_api import (
     InferenceProvider,
     OpenAIChatCompletion,
@@ -22,13 +29,6 @@ from llama_stack_api import (
     OpenAIEmbeddingUsage,
 )
 
-from llama_stack.core.request_headers import NeedsRequestProviderData
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper, ProviderModelEntry
-from llama_stack.providers.utils.inference.openai_compat import (
-    prepare_openai_completion_params,
-)
-
 logger = get_logger(name=__name__, category="providers::utils")
 
 
diff --git a/src/llama_stack/providers/utils/inference/model_registry.py b/src/llama_stack/providers/utils/inference/model_registry.py
index e7ca5ab74..42b54497f 100644
--- a/src/llama_stack/providers/utils/inference/model_registry.py
+++ b/src/llama_stack/providers/utils/inference/model_registry.py
@@ -6,13 +6,13 @@
 
 from typing import Any
 
-from llama_stack_api import Model, ModelsProtocolPrivate, ModelType, UnsupportedModelError
 from pydantic import BaseModel, Field, SecretStr
 
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference import (
     ALL_HUGGINGFACE_REPOS_TO_MODEL_DESCRIPTOR,
 )
+from llama_stack_api import Model, ModelsProtocolPrivate, ModelType, UnsupportedModelError
 
 logger = get_logger(name=__name__, category="providers::utils")
 
diff --git a/src/llama_stack/providers/utils/inference/openai_compat.py b/src/llama_stack/providers/utils/inference/openai_compat.py
index c97e42274..32d41ffde 100644
--- a/src/llama_stack/providers/utils/inference/openai_compat.py
+++ b/src/llama_stack/providers/utils/inference/openai_compat.py
@@ -20,18 +20,6 @@ except ImportError:
     from openai.types.chat.chat_completion_message_tool_call import (
         ChatCompletionMessageToolCall as OpenAIChatCompletionMessageFunctionToolCall,
     )
-from llama_stack_api import (
-    URL,
-    GreedySamplingStrategy,
-    ImageContentItem,
-    JsonSchemaResponseFormat,
-    OpenAIResponseFormatParam,
-    SamplingParams,
-    TextContentItem,
-    TopKSamplingStrategy,
-    TopPSamplingStrategy,
-    _URLOrData,
-)
 from openai.types.chat import (
     ChatCompletionMessageToolCall,
 )
@@ -44,6 +32,18 @@ from llama_stack.models.llama.datatypes import (
     ToolCall,
     ToolDefinition,
 )
+from llama_stack_api import (
+    URL,
+    GreedySamplingStrategy,
+    ImageContentItem,
+    JsonSchemaResponseFormat,
+    OpenAIResponseFormatParam,
+    SamplingParams,
+    TextContentItem,
+    TopKSamplingStrategy,
+    TopPSamplingStrategy,
+    _URLOrData,
+)
 
 logger = get_logger(name=__name__, category="providers::utils")
 
diff --git a/src/llama_stack/providers/utils/inference/openai_mixin.py b/src/llama_stack/providers/utils/inference/openai_mixin.py
index c05873df5..559ac90ce 100644
--- a/src/llama_stack/providers/utils/inference/openai_mixin.py
+++ b/src/llama_stack/providers/utils/inference/openai_mixin.py
@@ -10,6 +10,14 @@ from abc import ABC, abstractmethod
 from collections.abc import AsyncIterator, Iterable
 from typing import Any
 
+from openai import AsyncOpenAI
+from pydantic import BaseModel, ConfigDict
+
+from llama_stack.core.request_headers import NeedsRequestProviderData
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack.providers.utils.inference.openai_compat import prepare_openai_completion_params
+from llama_stack.providers.utils.inference.prompt_adapter import localize_image_content
 from llama_stack_api import (
     Model,
     ModelType,
@@ -24,14 +32,6 @@ from llama_stack_api import (
     OpenAIEmbeddingUsage,
     OpenAIMessageParam,
 )
-from openai import AsyncOpenAI
-from pydantic import BaseModel, ConfigDict
-
-from llama_stack.core.request_headers import NeedsRequestProviderData
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.providers.utils.inference.openai_compat import prepare_openai_completion_params
-from llama_stack.providers.utils.inference.prompt_adapter import localize_image_content
 
 logger = get_logger(name=__name__, category="providers::utils")
 
diff --git a/src/llama_stack/providers/utils/inference/prompt_adapter.py b/src/llama_stack/providers/utils/inference/prompt_adapter.py
index ea01a34e9..6272c9eed 100644
--- a/src/llama_stack/providers/utils/inference/prompt_adapter.py
+++ b/src/llama_stack/providers/utils/inference/prompt_adapter.py
@@ -12,24 +12,6 @@ import re
 from typing import Any
 
 import httpx
-from llama_stack_api import (
-    CompletionRequest,
-    ImageContentItem,
-    InterleavedContent,
-    InterleavedContentItem,
-    OpenAIAssistantMessageParam,
-    OpenAIChatCompletionContentPartImageParam,
-    OpenAIChatCompletionContentPartTextParam,
-    OpenAIFile,
-    OpenAIMessageParam,
-    OpenAISystemMessageParam,
-    OpenAIToolMessageParam,
-    OpenAIUserMessageParam,
-    ResponseFormat,
-    ResponseFormatType,
-    TextContentItem,
-    ToolChoice,
-)
 from PIL import Image as PIL_Image
 
 from llama_stack.log import get_logger
@@ -48,6 +30,24 @@ from llama_stack.models.llama.llama3.chat_format import ChatFormat
 from llama_stack.models.llama.llama3.tokenizer import Tokenizer
 from llama_stack.models.llama.sku_list import resolve_model
 from llama_stack.models.llama.sku_types import ModelFamily, is_multimodal
+from llama_stack_api import (
+    CompletionRequest,
+    ImageContentItem,
+    InterleavedContent,
+    InterleavedContentItem,
+    OpenAIAssistantMessageParam,
+    OpenAIChatCompletionContentPartImageParam,
+    OpenAIChatCompletionContentPartTextParam,
+    OpenAIFile,
+    OpenAIMessageParam,
+    OpenAISystemMessageParam,
+    OpenAIToolMessageParam,
+    OpenAIUserMessageParam,
+    ResponseFormat,
+    ResponseFormatType,
+    TextContentItem,
+    ToolChoice,
+)
 
 log = get_logger(name=__name__, category="providers::utils")
 
diff --git a/src/llama_stack/providers/utils/kvstore/sqlite/config.py b/src/llama_stack/providers/utils/kvstore/sqlite/config.py
index 895268a4f..0f8fa0a95 100644
--- a/src/llama_stack/providers/utils/kvstore/sqlite/config.py
+++ b/src/llama_stack/providers/utils/kvstore/sqlite/config.py
@@ -4,9 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
+from llama_stack_api import json_schema_type
+
 
 @json_schema_type
 class SqliteControlPlaneConfig(BaseModel):
diff --git a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
index 68d1c11e5..540ff5940 100644
--- a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
+++ b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
@@ -13,6 +13,16 @@ from abc import ABC, abstractmethod
 from typing import Annotated, Any
 
 from fastapi import Body
+from pydantic import TypeAdapter
+
+from llama_stack.core.id_generation import generate_object_id
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.kvstore.api import KVStore
+from llama_stack.providers.utils.memory.vector_store import (
+    ChunkForDeletion,
+    content_from_data_and_mime_type,
+    make_overlapped_chunks,
+)
 from llama_stack_api import (
     Chunk,
     Files,
@@ -43,16 +53,6 @@ from llama_stack_api import (
     VectorStoreSearchResponse,
     VectorStoreSearchResponsePage,
 )
-from pydantic import TypeAdapter
-
-from llama_stack.core.id_generation import generate_object_id
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.kvstore.api import KVStore
-from llama_stack.providers.utils.memory.vector_store import (
-    ChunkForDeletion,
-    content_from_data_and_mime_type,
-    make_overlapped_chunks,
-)
 
 EMBEDDING_DIMENSION = 768
 
diff --git a/src/llama_stack/providers/utils/memory/vector_store.py b/src/llama_stack/providers/utils/memory/vector_store.py
index 37ac79039..b6a671ddb 100644
--- a/src/llama_stack/providers/utils/memory/vector_store.py
+++ b/src/llama_stack/providers/utils/memory/vector_store.py
@@ -14,6 +14,15 @@ from urllib.parse import unquote
 
 import httpx
 import numpy as np
+from numpy.typing import NDArray
+from pydantic import BaseModel
+
+from llama_stack.log import get_logger
+from llama_stack.models.llama.llama3.tokenizer import Tokenizer
+from llama_stack.providers.utils.inference.prompt_adapter import (
+    interleaved_content_as_str,
+)
+from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
 from llama_stack_api import (
     URL,
     Api,
@@ -25,15 +34,6 @@ from llama_stack_api import (
     RAGDocument,
     VectorStore,
 )
-from numpy.typing import NDArray
-from pydantic import BaseModel
-
-from llama_stack.log import get_logger
-from llama_stack.models.llama.llama3.tokenizer import Tokenizer
-from llama_stack.providers.utils.inference.prompt_adapter import (
-    interleaved_content_as_str,
-)
-from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
 
 log = get_logger(name=__name__, category="providers::utils")
 
diff --git a/src/llama_stack/providers/utils/responses/responses_store.py b/src/llama_stack/providers/utils/responses/responses_store.py
index c7dfed15a..f6e7c435d 100644
--- a/src/llama_stack/providers/utils/responses/responses_store.py
+++ b/src/llama_stack/providers/utils/responses/responses_store.py
@@ -4,6 +4,9 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
+from llama_stack.core.datatypes import AccessRule
+from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqlStoreReference
+from llama_stack.log import get_logger
 from llama_stack_api import (
     ListOpenAIResponseInputItem,
     ListOpenAIResponseObject,
@@ -15,10 +18,6 @@ from llama_stack_api import (
     Order,
 )
 
-from llama_stack.core.datatypes import AccessRule
-from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqlStoreReference
-from llama_stack.log import get_logger
-
 from ..sqlstore.api import ColumnDefinition, ColumnType
 from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore
 from ..sqlstore.sqlstore import sqlstore_impl
diff --git a/src/llama_stack/providers/utils/scoring/base_scoring_fn.py b/src/llama_stack/providers/utils/scoring/base_scoring_fn.py
index d16c75263..f372db8b5 100644
--- a/src/llama_stack/providers/utils/scoring/base_scoring_fn.py
+++ b/src/llama_stack/providers/utils/scoring/base_scoring_fn.py
@@ -6,9 +6,8 @@
 from abc import ABC, abstractmethod
 from typing import Any
 
-from llama_stack_api import ScoringFn, ScoringFnParams, ScoringResultRow
-
 from llama_stack.providers.utils.scoring.aggregation_utils import aggregate_metrics
+from llama_stack_api import ScoringFn, ScoringFnParams, ScoringResultRow
 
 
 class BaseScoringFn(ABC):
diff --git a/src/llama_stack/providers/utils/sqlstore/api.py b/src/llama_stack/providers/utils/sqlstore/api.py
index 033a00edc..708fc7095 100644
--- a/src/llama_stack/providers/utils/sqlstore/api.py
+++ b/src/llama_stack/providers/utils/sqlstore/api.py
@@ -8,9 +8,10 @@ from collections.abc import Mapping, Sequence
 from enum import Enum
 from typing import Any, Literal, Protocol
 
-from llama_stack_api import PaginatedResponse
 from pydantic import BaseModel
 
+from llama_stack_api import PaginatedResponse
+
 
 class ColumnType(Enum):
     INTEGER = "INTEGER"
diff --git a/src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py b/src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py
index 263f5e69f..10009d396 100644
--- a/src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py
+++ b/src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py
@@ -6,7 +6,6 @@
 from collections.abc import Mapping, Sequence
 from typing import Any, Literal, cast
 
-from llama_stack_api import PaginatedResponse
 from sqlalchemy import (
     JSON,
     Boolean,
@@ -29,6 +28,7 @@ from sqlalchemy.sql.elements import ColumnElement
 
 from llama_stack.core.storage.datatypes import SqlAlchemySqlStoreConfig
 from llama_stack.log import get_logger
+from llama_stack_api import PaginatedResponse
 
 from .api import ColumnDefinition, ColumnType, SqlStore
 
diff --git a/src/llama_stack/providers/utils/tools/mcp.py b/src/llama_stack/providers/utils/tools/mcp.py
index 82c85f46c..fad1bf0f0 100644
--- a/src/llama_stack/providers/utils/tools/mcp.py
+++ b/src/llama_stack/providers/utils/tools/mcp.py
@@ -10,6 +10,14 @@ from enum import Enum
 from typing import Any, cast
 
 import httpx
+from mcp import ClientSession, McpError
+from mcp import types as mcp_types
+from mcp.client.sse import sse_client
+from mcp.client.streamable_http import streamablehttp_client
+
+from llama_stack.core.datatypes import AuthenticationRequiredError
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.tools.ttl_dict import TTLDict
 from llama_stack_api import (
     ImageContentItem,
     InterleavedContentItem,
@@ -19,14 +27,6 @@ from llama_stack_api import (
     ToolInvocationResult,
     _URLOrData,
 )
-from mcp import ClientSession, McpError
-from mcp import types as mcp_types
-from mcp.client.sse import sse_client
-from mcp.client.streamable_http import streamablehttp_client
-
-from llama_stack.core.datatypes import AuthenticationRequiredError
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.tools.ttl_dict import TTLDict
 
 logger = get_logger(__name__, category="tools")
 
diff --git a/src/llama-stack-api/README.md b/src/llama_stack_api/README.md
similarity index 98%
rename from src/llama-stack-api/README.md
rename to src/llama_stack_api/README.md
index aa6b05722..9bf1d2726 100644
--- a/src/llama-stack-api/README.md
+++ b/src/llama_stack_api/README.md
@@ -53,7 +53,7 @@ This package follows semantic versioning independently from the main `llama-stac
 - **Minor versions** (0.x.0): New APIs, backward-compatible changes
 - **Major versions** (x.0.0): Breaking changes to existing APIs
 
-Current version: **0.1.0**
+Current version: **0.4.0.dev0**
 
 ## Usage Example
 
diff --git a/src/llama-stack-api/llama_stack_api/__init__.py b/src/llama_stack_api/__init__.py
similarity index 99%
rename from src/llama-stack-api/llama_stack_api/__init__.py
rename to src/llama_stack_api/__init__.py
index 8bbe9f8bd..19b29301b 100644
--- a/src/llama-stack-api/llama_stack_api/__init__.py
+++ b/src/llama_stack_api/__init__.py
@@ -19,7 +19,7 @@ Sub-module imports (e.g., from llama_stack_api.agents import Agents) are NOT sup
 and considered a code smell. All exported symbols are explicitly listed in __all__.
 """
 
-__version__ = "0.4.0"
+__version__ = "0.4.0.dev0"
 
 # Import submodules for those who need them
 from . import common, strong_typing  # noqa: F401
diff --git a/src/llama-stack-api/llama_stack_api/agents.py b/src/llama_stack_api/agents.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/agents.py
rename to src/llama_stack_api/agents.py
diff --git a/src/llama-stack-api/llama_stack_api/batches.py b/src/llama_stack_api/batches.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/batches.py
rename to src/llama_stack_api/batches.py
diff --git a/src/llama-stack-api/llama_stack_api/benchmarks.py b/src/llama_stack_api/benchmarks.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/benchmarks.py
rename to src/llama_stack_api/benchmarks.py
diff --git a/src/llama-stack-api/llama_stack_api/common/__init__.py b/src/llama_stack_api/common/__init__.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/common/__init__.py
rename to src/llama_stack_api/common/__init__.py
diff --git a/src/llama-stack-api/llama_stack_api/common/content_types.py b/src/llama_stack_api/common/content_types.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/common/content_types.py
rename to src/llama_stack_api/common/content_types.py
diff --git a/src/llama-stack-api/llama_stack_api/common/errors.py b/src/llama_stack_api/common/errors.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/common/errors.py
rename to src/llama_stack_api/common/errors.py
diff --git a/src/llama-stack-api/llama_stack_api/common/job_types.py b/src/llama_stack_api/common/job_types.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/common/job_types.py
rename to src/llama_stack_api/common/job_types.py
diff --git a/src/llama-stack-api/llama_stack_api/common/responses.py b/src/llama_stack_api/common/responses.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/common/responses.py
rename to src/llama_stack_api/common/responses.py
diff --git a/src/llama-stack-api/llama_stack_api/common/tracing.py b/src/llama_stack_api/common/tracing.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/common/tracing.py
rename to src/llama_stack_api/common/tracing.py
diff --git a/src/llama-stack-api/llama_stack_api/common/training_types.py b/src/llama_stack_api/common/training_types.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/common/training_types.py
rename to src/llama_stack_api/common/training_types.py
diff --git a/src/llama-stack-api/llama_stack_api/common/type_system.py b/src/llama_stack_api/common/type_system.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/common/type_system.py
rename to src/llama_stack_api/common/type_system.py
diff --git a/src/llama-stack-api/llama_stack_api/conversations.py b/src/llama_stack_api/conversations.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/conversations.py
rename to src/llama_stack_api/conversations.py
diff --git a/src/llama-stack-api/llama_stack_api/datasetio.py b/src/llama_stack_api/datasetio.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/datasetio.py
rename to src/llama_stack_api/datasetio.py
diff --git a/src/llama-stack-api/llama_stack_api/datasets.py b/src/llama_stack_api/datasets.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/datasets.py
rename to src/llama_stack_api/datasets.py
diff --git a/src/llama-stack-api/llama_stack_api/datatypes.py b/src/llama_stack_api/datatypes.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/datatypes.py
rename to src/llama_stack_api/datatypes.py
diff --git a/src/llama-stack-api/llama_stack_api/eval.py b/src/llama_stack_api/eval.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/eval.py
rename to src/llama_stack_api/eval.py
diff --git a/src/llama-stack-api/llama_stack_api/files.py b/src/llama_stack_api/files.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/files.py
rename to src/llama_stack_api/files.py
diff --git a/src/llama-stack-api/llama_stack_api/inference.py b/src/llama_stack_api/inference.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/inference.py
rename to src/llama_stack_api/inference.py
diff --git a/src/llama-stack-api/llama_stack_api/inspect.py b/src/llama_stack_api/inspect.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/inspect.py
rename to src/llama_stack_api/inspect.py
diff --git a/src/llama-stack-api/llama_stack_api/models.py b/src/llama_stack_api/models.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/models.py
rename to src/llama_stack_api/models.py
diff --git a/src/llama-stack-api/llama_stack_api/openai_responses.py b/src/llama_stack_api/openai_responses.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/openai_responses.py
rename to src/llama_stack_api/openai_responses.py
diff --git a/src/llama-stack-api/llama_stack_api/post_training.py b/src/llama_stack_api/post_training.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/post_training.py
rename to src/llama_stack_api/post_training.py
diff --git a/src/llama-stack-api/llama_stack_api/prompts.py b/src/llama_stack_api/prompts.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/prompts.py
rename to src/llama_stack_api/prompts.py
diff --git a/src/llama-stack-api/llama_stack_api/providers.py b/src/llama_stack_api/providers.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/providers.py
rename to src/llama_stack_api/providers.py
diff --git a/src/llama-stack-api/llama_stack_api/py.typed b/src/llama_stack_api/py.typed
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/py.typed
rename to src/llama_stack_api/py.typed
diff --git a/src/llama-stack-api/pyproject.toml b/src/llama_stack_api/pyproject.toml
similarity index 99%
rename from src/llama-stack-api/pyproject.toml
rename to src/llama_stack_api/pyproject.toml
index a00472d36..0ceb2bb4e 100644
--- a/src/llama-stack-api/pyproject.toml
+++ b/src/llama_stack_api/pyproject.toml
@@ -7,7 +7,7 @@ required-version = ">=0.7.0"
 
 [project]
 name = "llama-stack-api"
-version = "0.1.0"
+version = "0.4.0.dev0"
 authors = [{ name = "Meta Llama", email = "llama-oss@meta.com" }]
 description = "API and Provider specifications for Llama Stack - lightweight package with protocol definitions and provider specs"
 readme = "README.md"
diff --git a/src/llama-stack-api/llama_stack_api/rag_tool.py b/src/llama_stack_api/rag_tool.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/rag_tool.py
rename to src/llama_stack_api/rag_tool.py
diff --git a/src/llama-stack-api/llama_stack_api/resource.py b/src/llama_stack_api/resource.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/resource.py
rename to src/llama_stack_api/resource.py
diff --git a/src/llama-stack-api/llama_stack_api/safety.py b/src/llama_stack_api/safety.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/safety.py
rename to src/llama_stack_api/safety.py
diff --git a/src/llama-stack-api/llama_stack_api/schema_utils.py b/src/llama_stack_api/schema_utils.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/schema_utils.py
rename to src/llama_stack_api/schema_utils.py
diff --git a/src/llama-stack-api/llama_stack_api/scoring.py b/src/llama_stack_api/scoring.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/scoring.py
rename to src/llama_stack_api/scoring.py
diff --git a/src/llama-stack-api/llama_stack_api/scoring_functions.py b/src/llama_stack_api/scoring_functions.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/scoring_functions.py
rename to src/llama_stack_api/scoring_functions.py
diff --git a/src/llama-stack-api/llama_stack_api/shields.py b/src/llama_stack_api/shields.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/shields.py
rename to src/llama_stack_api/shields.py
diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/__init__.py b/src/llama_stack_api/strong_typing/__init__.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/strong_typing/__init__.py
rename to src/llama_stack_api/strong_typing/__init__.py
diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/auxiliary.py b/src/llama_stack_api/strong_typing/auxiliary.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/strong_typing/auxiliary.py
rename to src/llama_stack_api/strong_typing/auxiliary.py
diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/classdef.py b/src/llama_stack_api/strong_typing/classdef.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/strong_typing/classdef.py
rename to src/llama_stack_api/strong_typing/classdef.py
diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/core.py b/src/llama_stack_api/strong_typing/core.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/strong_typing/core.py
rename to src/llama_stack_api/strong_typing/core.py
diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/deserializer.py b/src/llama_stack_api/strong_typing/deserializer.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/strong_typing/deserializer.py
rename to src/llama_stack_api/strong_typing/deserializer.py
diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/docstring.py b/src/llama_stack_api/strong_typing/docstring.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/strong_typing/docstring.py
rename to src/llama_stack_api/strong_typing/docstring.py
diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/exception.py b/src/llama_stack_api/strong_typing/exception.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/strong_typing/exception.py
rename to src/llama_stack_api/strong_typing/exception.py
diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/inspection.py b/src/llama_stack_api/strong_typing/inspection.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/strong_typing/inspection.py
rename to src/llama_stack_api/strong_typing/inspection.py
diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/mapping.py b/src/llama_stack_api/strong_typing/mapping.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/strong_typing/mapping.py
rename to src/llama_stack_api/strong_typing/mapping.py
diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/name.py b/src/llama_stack_api/strong_typing/name.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/strong_typing/name.py
rename to src/llama_stack_api/strong_typing/name.py
diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/py.typed b/src/llama_stack_api/strong_typing/py.typed
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/strong_typing/py.typed
rename to src/llama_stack_api/strong_typing/py.typed
diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/schema.py b/src/llama_stack_api/strong_typing/schema.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/strong_typing/schema.py
rename to src/llama_stack_api/strong_typing/schema.py
diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/serialization.py b/src/llama_stack_api/strong_typing/serialization.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/strong_typing/serialization.py
rename to src/llama_stack_api/strong_typing/serialization.py
diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/serializer.py b/src/llama_stack_api/strong_typing/serializer.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/strong_typing/serializer.py
rename to src/llama_stack_api/strong_typing/serializer.py
diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/slots.py b/src/llama_stack_api/strong_typing/slots.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/strong_typing/slots.py
rename to src/llama_stack_api/strong_typing/slots.py
diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/topological.py b/src/llama_stack_api/strong_typing/topological.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/strong_typing/topological.py
rename to src/llama_stack_api/strong_typing/topological.py
diff --git a/src/llama-stack-api/llama_stack_api/tools.py b/src/llama_stack_api/tools.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/tools.py
rename to src/llama_stack_api/tools.py
diff --git a/src/llama_stack_api/uv.lock b/src/llama_stack_api/uv.lock
new file mode 100644
index 000000000..d61eb9be7
--- /dev/null
+++ b/src/llama_stack_api/uv.lock
@@ -0,0 +1,498 @@
+version = 1
+revision = 3
+requires-python = ">=3.12"
+
+[[package]]
+name = "annotated-types"
+version = "0.7.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081, upload-time = "2024-05-20T21:33:25.928Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" },
+]
+
+[[package]]
+name = "attrs"
+version = "25.4.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/6b/5c/685e6633917e101e5dcb62b9dd76946cbb57c26e133bae9e0cd36033c0a9/attrs-25.4.0.tar.gz", hash = "sha256:16d5969b87f0859ef33a48b35d55ac1be6e42ae49d5e853b597db70c35c57e11", size = 934251, upload-time = "2025-10-06T13:54:44.725Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3a/2a/7cc015f5b9f5db42b7d48157e23356022889fc354a2813c15934b7cb5c0e/attrs-25.4.0-py3-none-any.whl", hash = "sha256:adcf7e2a1fb3b36ac48d97835bb6d8ade15b8dcce26aba8bf1d14847b57a3373", size = 67615, upload-time = "2025-10-06T13:54:43.17Z" },
+]
+
+[[package]]
+name = "certifi"
+version = "2025.11.12"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a2/8c/58f469717fa48465e4a50c014a0400602d3c437d7c0c468e17ada824da3a/certifi-2025.11.12.tar.gz", hash = "sha256:d8ab5478f2ecd78af242878415affce761ca6bc54a22a27e026d7c25357c3316", size = 160538, upload-time = "2025-11-12T02:54:51.517Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/70/7d/9bc192684cea499815ff478dfcdc13835ddf401365057044fb721ec6bddb/certifi-2025.11.12-py3-none-any.whl", hash = "sha256:97de8790030bbd5c2d96b7ec782fc2f7820ef8dba6db909ccf95449f2d062d4b", size = 159438, upload-time = "2025-11-12T02:54:49.735Z" },
+]
+
+[[package]]
+name = "charset-normalizer"
+version = "3.4.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/13/69/33ddede1939fdd074bce5434295f38fae7136463422fe4fd3e0e89b98062/charset_normalizer-3.4.4.tar.gz", hash = "sha256:94537985111c35f28720e43603b8e7b43a6ecfb2ce1d3058bbe955b73404e21a", size = 129418, upload-time = "2025-10-14T04:42:32.879Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f3/85/1637cd4af66fa687396e757dec650f28025f2a2f5a5531a3208dc0ec43f2/charset_normalizer-3.4.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0a98e6759f854bd25a58a73fa88833fba3b7c491169f86ce1180c948ab3fd394", size = 208425, upload-time = "2025-10-14T04:40:53.353Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/6a/04130023fef2a0d9c62d0bae2649b69f7b7d8d24ea5536feef50551029df/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b5b290ccc2a263e8d185130284f8501e3e36c5e02750fc6b6bdeb2e9e96f1e25", size = 148162, upload-time = "2025-10-14T04:40:54.558Z" },
+    { url = "https://files.pythonhosted.org/packages/78/29/62328d79aa60da22c9e0b9a66539feae06ca0f5a4171ac4f7dc285b83688/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74bb723680f9f7a6234dcf67aea57e708ec1fbdf5699fb91dfd6f511b0a320ef", size = 144558, upload-time = "2025-10-14T04:40:55.677Z" },
+    { url = "https://files.pythonhosted.org/packages/86/bb/b32194a4bf15b88403537c2e120b817c61cd4ecffa9b6876e941c3ee38fe/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f1e34719c6ed0b92f418c7c780480b26b5d9c50349e9a9af7d76bf757530350d", size = 161497, upload-time = "2025-10-14T04:40:57.217Z" },
+    { url = "https://files.pythonhosted.org/packages/19/89/a54c82b253d5b9b111dc74aca196ba5ccfcca8242d0fb64146d4d3183ff1/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2437418e20515acec67d86e12bf70056a33abdacb5cb1655042f6538d6b085a8", size = 159240, upload-time = "2025-10-14T04:40:58.358Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/10/d20b513afe03acc89ec33948320a5544d31f21b05368436d580dec4e234d/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:11d694519d7f29d6cd09f6ac70028dba10f92f6cdd059096db198c283794ac86", size = 153471, upload-time = "2025-10-14T04:40:59.468Z" },
+    { url = "https://files.pythonhosted.org/packages/61/fa/fbf177b55bdd727010f9c0a3c49eefa1d10f960e5f09d1d887bf93c2e698/charset_normalizer-3.4.4-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ac1c4a689edcc530fc9d9aa11f5774b9e2f33f9a0c6a57864e90908f5208d30a", size = 150864, upload-time = "2025-10-14T04:41:00.623Z" },
+    { url = "https://files.pythonhosted.org/packages/05/12/9fbc6a4d39c0198adeebbde20b619790e9236557ca59fc40e0e3cebe6f40/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:21d142cc6c0ec30d2efee5068ca36c128a30b0f2c53c1c07bd78cb6bc1d3be5f", size = 150647, upload-time = "2025-10-14T04:41:01.754Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/1f/6a9a593d52e3e8c5d2b167daf8c6b968808efb57ef4c210acb907c365bc4/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:5dbe56a36425d26d6cfb40ce79c314a2e4dd6211d51d6d2191c00bed34f354cc", size = 145110, upload-time = "2025-10-14T04:41:03.231Z" },
+    { url = "https://files.pythonhosted.org/packages/30/42/9a52c609e72471b0fc54386dc63c3781a387bb4fe61c20231a4ebcd58bdd/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:5bfbb1b9acf3334612667b61bd3002196fe2a1eb4dd74d247e0f2a4d50ec9bbf", size = 162839, upload-time = "2025-10-14T04:41:04.715Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/5b/c0682bbf9f11597073052628ddd38344a3d673fda35a36773f7d19344b23/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:d055ec1e26e441f6187acf818b73564e6e6282709e9bcb5b63f5b23068356a15", size = 150667, upload-time = "2025-10-14T04:41:05.827Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/24/a41afeab6f990cf2daf6cb8c67419b63b48cf518e4f56022230840c9bfb2/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:af2d8c67d8e573d6de5bc30cdb27e9b95e49115cd9baad5ddbd1a6207aaa82a9", size = 160535, upload-time = "2025-10-14T04:41:06.938Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/e5/6a4ce77ed243c4a50a1fecca6aaaab419628c818a49434be428fe24c9957/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:780236ac706e66881f3b7f2f32dfe90507a09e67d1d454c762cf642e6e1586e0", size = 154816, upload-time = "2025-10-14T04:41:08.101Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/ef/89297262b8092b312d29cdb2517cb1237e51db8ecef2e9af5edbe7b683b1/charset_normalizer-3.4.4-cp312-cp312-win32.whl", hash = "sha256:5833d2c39d8896e4e19b689ffc198f08ea58116bee26dea51e362ecc7cd3ed26", size = 99694, upload-time = "2025-10-14T04:41:09.23Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/2d/1e5ed9dd3b3803994c155cd9aacb60c82c331bad84daf75bcb9c91b3295e/charset_normalizer-3.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:a79cfe37875f822425b89a82333404539ae63dbdddf97f84dcbc3d339aae9525", size = 107131, upload-time = "2025-10-14T04:41:10.467Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/d9/0ed4c7098a861482a7b6a95603edce4c0d9db2311af23da1fb2b75ec26fc/charset_normalizer-3.4.4-cp312-cp312-win_arm64.whl", hash = "sha256:376bec83a63b8021bb5c8ea75e21c4ccb86e7e45ca4eb81146091b56599b80c3", size = 100390, upload-time = "2025-10-14T04:41:11.915Z" },
+    { url = "https://files.pythonhosted.org/packages/97/45/4b3a1239bbacd321068ea6e7ac28875b03ab8bc0aa0966452db17cd36714/charset_normalizer-3.4.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e1f185f86a6f3403aa2420e815904c67b2f9ebc443f045edd0de921108345794", size = 208091, upload-time = "2025-10-14T04:41:13.346Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/62/73a6d7450829655a35bb88a88fca7d736f9882a27eacdca2c6d505b57e2e/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b39f987ae8ccdf0d2642338faf2abb1862340facc796048b604ef14919e55ed", size = 147936, upload-time = "2025-10-14T04:41:14.461Z" },
+    { url = "https://files.pythonhosted.org/packages/89/c5/adb8c8b3d6625bef6d88b251bbb0d95f8205831b987631ab0c8bb5d937c2/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3162d5d8ce1bb98dd51af660f2121c55d0fa541b46dff7bb9b9f86ea1d87de72", size = 144180, upload-time = "2025-10-14T04:41:15.588Z" },
+    { url = "https://files.pythonhosted.org/packages/91/ed/9706e4070682d1cc219050b6048bfd293ccf67b3d4f5a4f39207453d4b99/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:81d5eb2a312700f4ecaa977a8235b634ce853200e828fbadf3a9c50bab278328", size = 161346, upload-time = "2025-10-14T04:41:16.738Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/0d/031f0d95e4972901a2f6f09ef055751805ff541511dc1252ba3ca1f80cf5/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5bd2293095d766545ec1a8f612559f6b40abc0eb18bb2f5d1171872d34036ede", size = 158874, upload-time = "2025-10-14T04:41:17.923Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/83/6ab5883f57c9c801ce5e5677242328aa45592be8a00644310a008d04f922/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a8a8b89589086a25749f471e6a900d3f662d1d3b6e2e59dcecf787b1cc3a1894", size = 153076, upload-time = "2025-10-14T04:41:19.106Z" },
+    { url = "https://files.pythonhosted.org/packages/75/1e/5ff781ddf5260e387d6419959ee89ef13878229732732ee73cdae01800f2/charset_normalizer-3.4.4-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc7637e2f80d8530ee4a78e878bce464f70087ce73cf7c1caf142416923b98f1", size = 150601, upload-time = "2025-10-14T04:41:20.245Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/57/71be810965493d3510a6ca79b90c19e48696fb1ff964da319334b12677f0/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f8bf04158c6b607d747e93949aa60618b61312fe647a6369f88ce2ff16043490", size = 150376, upload-time = "2025-10-14T04:41:21.398Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/d5/c3d057a78c181d007014feb7e9f2e65905a6c4ef182c0ddf0de2924edd65/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:554af85e960429cf30784dd47447d5125aaa3b99a6f0683589dbd27e2f45da44", size = 144825, upload-time = "2025-10-14T04:41:22.583Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/8c/d0406294828d4976f275ffbe66f00266c4b3136b7506941d87c00cab5272/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:74018750915ee7ad843a774364e13a3db91682f26142baddf775342c3f5b1133", size = 162583, upload-time = "2025-10-14T04:41:23.754Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/24/e2aa1f18c8f15c4c0e932d9287b8609dd30ad56dbe41d926bd846e22fb8d/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:c0463276121fdee9c49b98908b3a89c39be45d86d1dbaa22957e38f6321d4ce3", size = 150366, upload-time = "2025-10-14T04:41:25.27Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/5b/1e6160c7739aad1e2df054300cc618b06bf784a7a164b0f238360721ab86/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:362d61fd13843997c1c446760ef36f240cf81d3ebf74ac62652aebaf7838561e", size = 160300, upload-time = "2025-10-14T04:41:26.725Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/10/f882167cd207fbdd743e55534d5d9620e095089d176d55cb22d5322f2afd/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9a26f18905b8dd5d685d6d07b0cdf98a79f3c7a918906af7cc143ea2e164c8bc", size = 154465, upload-time = "2025-10-14T04:41:28.322Z" },
+    { url = "https://files.pythonhosted.org/packages/89/66/c7a9e1b7429be72123441bfdbaf2bc13faab3f90b933f664db506dea5915/charset_normalizer-3.4.4-cp313-cp313-win32.whl", hash = "sha256:9b35f4c90079ff2e2edc5b26c0c77925e5d2d255c42c74fdb70fb49b172726ac", size = 99404, upload-time = "2025-10-14T04:41:29.95Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/26/b9924fa27db384bdcd97ab83b4f0a8058d96ad9626ead570674d5e737d90/charset_normalizer-3.4.4-cp313-cp313-win_amd64.whl", hash = "sha256:b435cba5f4f750aa6c0a0d92c541fb79f69a387c91e61f1795227e4ed9cece14", size = 107092, upload-time = "2025-10-14T04:41:31.188Z" },
+    { url = "https://files.pythonhosted.org/packages/af/8f/3ed4bfa0c0c72a7ca17f0380cd9e4dd842b09f664e780c13cff1dcf2ef1b/charset_normalizer-3.4.4-cp313-cp313-win_arm64.whl", hash = "sha256:542d2cee80be6f80247095cc36c418f7bddd14f4a6de45af91dfad36d817bba2", size = 100408, upload-time = "2025-10-14T04:41:32.624Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/35/7051599bd493e62411d6ede36fd5af83a38f37c4767b92884df7301db25d/charset_normalizer-3.4.4-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:da3326d9e65ef63a817ecbcc0df6e94463713b754fe293eaa03da99befb9a5bd", size = 207746, upload-time = "2025-10-14T04:41:33.773Z" },
+    { url = "https://files.pythonhosted.org/packages/10/9a/97c8d48ef10d6cd4fcead2415523221624bf58bcf68a802721a6bc807c8f/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8af65f14dc14a79b924524b1e7fffe304517b2bff5a58bf64f30b98bbc5079eb", size = 147889, upload-time = "2025-10-14T04:41:34.897Z" },
+    { url = "https://files.pythonhosted.org/packages/10/bf/979224a919a1b606c82bd2c5fa49b5c6d5727aa47b4312bb27b1734f53cd/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74664978bb272435107de04e36db5a9735e78232b85b77d45cfb38f758efd33e", size = 143641, upload-time = "2025-10-14T04:41:36.116Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/33/0ad65587441fc730dc7bd90e9716b30b4702dc7b617e6ba4997dc8651495/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:752944c7ffbfdd10c074dc58ec2d5a8a4cd9493b314d367c14d24c17684ddd14", size = 160779, upload-time = "2025-10-14T04:41:37.229Z" },
+    { url = "https://files.pythonhosted.org/packages/67/ed/331d6b249259ee71ddea93f6f2f0a56cfebd46938bde6fcc6f7b9a3d0e09/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d1f13550535ad8cff21b8d757a3257963e951d96e20ec82ab44bc64aeb62a191", size = 159035, upload-time = "2025-10-14T04:41:38.368Z" },
+    { url = "https://files.pythonhosted.org/packages/67/ff/f6b948ca32e4f2a4576aa129d8bed61f2e0543bf9f5f2b7fc3758ed005c9/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ecaae4149d99b1c9e7b88bb03e3221956f68fd6d50be2ef061b2381b61d20838", size = 152542, upload-time = "2025-10-14T04:41:39.862Z" },
+    { url = "https://files.pythonhosted.org/packages/16/85/276033dcbcc369eb176594de22728541a925b2632f9716428c851b149e83/charset_normalizer-3.4.4-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:cb6254dc36b47a990e59e1068afacdcd02958bdcce30bb50cc1700a8b9d624a6", size = 149524, upload-time = "2025-10-14T04:41:41.319Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/f2/6a2a1f722b6aba37050e626530a46a68f74e63683947a8acff92569f979a/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c8ae8a0f02f57a6e61203a31428fa1d677cbe50c93622b4149d5c0f319c1d19e", size = 150395, upload-time = "2025-10-14T04:41:42.539Z" },
+    { url = "https://files.pythonhosted.org/packages/60/bb/2186cb2f2bbaea6338cad15ce23a67f9b0672929744381e28b0592676824/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:47cc91b2f4dd2833fddaedd2893006b0106129d4b94fdb6af1f4ce5a9965577c", size = 143680, upload-time = "2025-10-14T04:41:43.661Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/a5/bf6f13b772fbb2a90360eb620d52ed8f796f3c5caee8398c3b2eb7b1c60d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:82004af6c302b5d3ab2cfc4cc5f29db16123b1a8417f2e25f9066f91d4411090", size = 162045, upload-time = "2025-10-14T04:41:44.821Z" },
+    { url = "https://files.pythonhosted.org/packages/df/c5/d1be898bf0dc3ef9030c3825e5d3b83f2c528d207d246cbabe245966808d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:2b7d8f6c26245217bd2ad053761201e9f9680f8ce52f0fcd8d0755aeae5b2152", size = 149687, upload-time = "2025-10-14T04:41:46.442Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/42/90c1f7b9341eef50c8a1cb3f098ac43b0508413f33affd762855f67a410e/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:799a7a5e4fb2d5898c60b640fd4981d6a25f1c11790935a44ce38c54e985f828", size = 160014, upload-time = "2025-10-14T04:41:47.631Z" },
+    { url = "https://files.pythonhosted.org/packages/76/be/4d3ee471e8145d12795ab655ece37baed0929462a86e72372fd25859047c/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:99ae2cffebb06e6c22bdc25801d7b30f503cc87dbd283479e7b606f70aff57ec", size = 154044, upload-time = "2025-10-14T04:41:48.81Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/6f/8f7af07237c34a1defe7defc565a9bc1807762f672c0fde711a4b22bf9c0/charset_normalizer-3.4.4-cp314-cp314-win32.whl", hash = "sha256:f9d332f8c2a2fcbffe1378594431458ddbef721c1769d78e2cbc06280d8155f9", size = 99940, upload-time = "2025-10-14T04:41:49.946Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/51/8ade005e5ca5b0d80fb4aff72a3775b325bdc3d27408c8113811a7cbe640/charset_normalizer-3.4.4-cp314-cp314-win_amd64.whl", hash = "sha256:8a6562c3700cce886c5be75ade4a5db4214fda19fede41d9792d100288d8f94c", size = 107104, upload-time = "2025-10-14T04:41:51.051Z" },
+    { url = "https://files.pythonhosted.org/packages/da/5f/6b8f83a55bb8278772c5ae54a577f3099025f9ade59d0136ac24a0df4bde/charset_normalizer-3.4.4-cp314-cp314-win_arm64.whl", hash = "sha256:de00632ca48df9daf77a2c65a484531649261ec9f25489917f09e455cb09ddb2", size = 100743, upload-time = "2025-10-14T04:41:52.122Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/4c/925909008ed5a988ccbb72dcc897407e5d6d3bd72410d69e051fc0c14647/charset_normalizer-3.4.4-py3-none-any.whl", hash = "sha256:7a32c560861a02ff789ad905a2fe94e3f840803362c84fecf1851cb4cf3dc37f", size = 53402, upload-time = "2025-10-14T04:42:31.76Z" },
+]
+
+[[package]]
+name = "googleapis-common-protos"
+version = "1.72.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "protobuf" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/e5/7b/adfd75544c415c487b33061fe7ae526165241c1ea133f9a9125a56b39fd8/googleapis_common_protos-1.72.0.tar.gz", hash = "sha256:e55a601c1b32b52d7a3e65f43563e2aa61bcd737998ee672ac9b951cd49319f5", size = 147433, upload-time = "2025-11-06T18:29:24.087Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c4/ab/09169d5a4612a5f92490806649ac8d41e3ec9129c636754575b3553f4ea4/googleapis_common_protos-1.72.0-py3-none-any.whl", hash = "sha256:4299c5a82d5ae1a9702ada957347726b167f9f8d1fc352477702a1e851ff4038", size = 297515, upload-time = "2025-11-06T18:29:13.14Z" },
+]
+
+[[package]]
+name = "idna"
+version = "3.11"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/0703ccc57f3a7233505399edb88de3cbd678da106337b9fcde432b65ed60/idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902", size = 194582, upload-time = "2025-10-12T14:55:20.501Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" },
+]
+
+[[package]]
+name = "importlib-metadata"
+version = "8.7.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "zipp" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/76/66/650a33bd90f786193e4de4b3ad86ea60b53c89b669a5c7be931fac31cdb0/importlib_metadata-8.7.0.tar.gz", hash = "sha256:d13b81ad223b890aa16c5471f2ac3056cf76c5f10f82d6f9292f0b415f389000", size = 56641, upload-time = "2025-04-27T15:29:01.736Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/20/b0/36bd937216ec521246249be3bf9855081de4c5e06a0c9b4219dbeda50373/importlib_metadata-8.7.0-py3-none-any.whl", hash = "sha256:e5dd1551894c77868a30651cef00984d50e1002d06942a7101d34870c5f02afd", size = 27656, upload-time = "2025-04-27T15:29:00.214Z" },
+]
+
+[[package]]
+name = "jsonschema"
+version = "4.25.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "attrs" },
+    { name = "jsonschema-specifications" },
+    { name = "referencing" },
+    { name = "rpds-py" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/74/69/f7185de793a29082a9f3c7728268ffb31cb5095131a9c139a74078e27336/jsonschema-4.25.1.tar.gz", hash = "sha256:e4a9655ce0da0c0b67a085847e00a3a51449e1157f4f75e9fb5aa545e122eb85", size = 357342, upload-time = "2025-08-18T17:03:50.038Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/bf/9c/8c95d856233c1f82500c2450b8c68576b4cf1c871db3afac5c34ff84e6fd/jsonschema-4.25.1-py3-none-any.whl", hash = "sha256:3fba0169e345c7175110351d456342c364814cfcf3b964ba4587f22915230a63", size = 90040, upload-time = "2025-08-18T17:03:48.373Z" },
+]
+
+[[package]]
+name = "jsonschema-specifications"
+version = "2025.9.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "referencing" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/19/74/a633ee74eb36c44aa6d1095e7cc5569bebf04342ee146178e2d36600708b/jsonschema_specifications-2025.9.1.tar.gz", hash = "sha256:b540987f239e745613c7a9176f3edb72b832a4ac465cf02712288397832b5e8d", size = 32855, upload-time = "2025-09-08T01:34:59.186Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/41/45/1a4ed80516f02155c51f51e8cedb3c1902296743db0bbc66608a0db2814f/jsonschema_specifications-2025.9.1-py3-none-any.whl", hash = "sha256:98802fee3a11ee76ecaca44429fda8a41bff98b00a0f2838151b113f210cc6fe", size = 18437, upload-time = "2025-09-08T01:34:57.871Z" },
+]
+
+[[package]]
+name = "llama-stack-api"
+version = "0.4.0.dev0"
+source = { editable = "." }
+dependencies = [
+    { name = "jsonschema" },
+    { name = "opentelemetry-exporter-otlp-proto-http" },
+    { name = "opentelemetry-sdk" },
+    { name = "pydantic" },
+]
+
+[package.metadata]
+requires-dist = [
+    { name = "jsonschema" },
+    { name = "opentelemetry-exporter-otlp-proto-http", specifier = ">=1.30.0" },
+    { name = "opentelemetry-sdk", specifier = ">=1.30.0" },
+    { name = "pydantic", specifier = ">=2.11.9" },
+]
+
+[[package]]
+name = "opentelemetry-api"
+version = "1.38.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "importlib-metadata" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/08/d8/0f354c375628e048bd0570645b310797299754730079853095bf000fba69/opentelemetry_api-1.38.0.tar.gz", hash = "sha256:f4c193b5e8acb0912b06ac5b16321908dd0843d75049c091487322284a3eea12", size = 65242, upload-time = "2025-10-16T08:35:50.25Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ae/a2/d86e01c28300bd41bab8f18afd613676e2bd63515417b77636fc1add426f/opentelemetry_api-1.38.0-py3-none-any.whl", hash = "sha256:2891b0197f47124454ab9f0cf58f3be33faca394457ac3e09daba13ff50aa582", size = 65947, upload-time = "2025-10-16T08:35:30.23Z" },
+]
+
+[[package]]
+name = "opentelemetry-exporter-otlp-proto-common"
+version = "1.38.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "opentelemetry-proto" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/19/83/dd4660f2956ff88ed071e9e0e36e830df14b8c5dc06722dbde1841accbe8/opentelemetry_exporter_otlp_proto_common-1.38.0.tar.gz", hash = "sha256:e333278afab4695aa8114eeb7bf4e44e65c6607d54968271a249c180b2cb605c", size = 20431, upload-time = "2025-10-16T08:35:53.285Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a7/9e/55a41c9601191e8cd8eb626b54ee6827b9c9d4a46d736f32abc80d8039fc/opentelemetry_exporter_otlp_proto_common-1.38.0-py3-none-any.whl", hash = "sha256:03cb76ab213300fe4f4c62b7d8f17d97fcfd21b89f0b5ce38ea156327ddda74a", size = 18359, upload-time = "2025-10-16T08:35:34.099Z" },
+]
+
+[[package]]
+name = "opentelemetry-exporter-otlp-proto-http"
+version = "1.38.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "googleapis-common-protos" },
+    { name = "opentelemetry-api" },
+    { name = "opentelemetry-exporter-otlp-proto-common" },
+    { name = "opentelemetry-proto" },
+    { name = "opentelemetry-sdk" },
+    { name = "requests" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/81/0a/debcdfb029fbd1ccd1563f7c287b89a6f7bef3b2902ade56797bfd020854/opentelemetry_exporter_otlp_proto_http-1.38.0.tar.gz", hash = "sha256:f16bd44baf15cbe07633c5112ffc68229d0edbeac7b37610be0b2def4e21e90b", size = 17282, upload-time = "2025-10-16T08:35:54.422Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e5/77/154004c99fb9f291f74aa0822a2f5bbf565a72d8126b3a1b63ed8e5f83c7/opentelemetry_exporter_otlp_proto_http-1.38.0-py3-none-any.whl", hash = "sha256:84b937305edfc563f08ec69b9cb2298be8188371217e867c1854d77198d0825b", size = 19579, upload-time = "2025-10-16T08:35:36.269Z" },
+]
+
+[[package]]
+name = "opentelemetry-proto"
+version = "1.38.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "protobuf" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/51/14/f0c4f0f6371b9cb7f9fa9ee8918bfd59ac7040c7791f1e6da32a1839780d/opentelemetry_proto-1.38.0.tar.gz", hash = "sha256:88b161e89d9d372ce723da289b7da74c3a8354a8e5359992be813942969ed468", size = 46152, upload-time = "2025-10-16T08:36:01.612Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b6/6a/82b68b14efca5150b2632f3692d627afa76b77378c4999f2648979409528/opentelemetry_proto-1.38.0-py3-none-any.whl", hash = "sha256:b6ebe54d3217c42e45462e2a1ae28c3e2bf2ec5a5645236a490f55f45f1a0a18", size = 72535, upload-time = "2025-10-16T08:35:45.749Z" },
+]
+
+[[package]]
+name = "opentelemetry-sdk"
+version = "1.38.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "opentelemetry-api" },
+    { name = "opentelemetry-semantic-conventions" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/85/cb/f0eee1445161faf4c9af3ba7b848cc22a50a3d3e2515051ad8628c35ff80/opentelemetry_sdk-1.38.0.tar.gz", hash = "sha256:93df5d4d871ed09cb4272305be4d996236eedb232253e3ab864c8620f051cebe", size = 171942, upload-time = "2025-10-16T08:36:02.257Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2f/2e/e93777a95d7d9c40d270a371392b6d6f1ff170c2a3cb32d6176741b5b723/opentelemetry_sdk-1.38.0-py3-none-any.whl", hash = "sha256:1c66af6564ecc1553d72d811a01df063ff097cdc82ce188da9951f93b8d10f6b", size = 132349, upload-time = "2025-10-16T08:35:46.995Z" },
+]
+
+[[package]]
+name = "opentelemetry-semantic-conventions"
+version = "0.59b0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "opentelemetry-api" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/40/bc/8b9ad3802cd8ac6583a4eb7de7e5d7db004e89cb7efe7008f9c8a537ee75/opentelemetry_semantic_conventions-0.59b0.tar.gz", hash = "sha256:7a6db3f30d70202d5bf9fa4b69bc866ca6a30437287de6c510fb594878aed6b0", size = 129861, upload-time = "2025-10-16T08:36:03.346Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/24/7d/c88d7b15ba8fe5c6b8f93be50fc11795e9fc05386c44afaf6b76fe191f9b/opentelemetry_semantic_conventions-0.59b0-py3-none-any.whl", hash = "sha256:35d3b8833ef97d614136e253c1da9342b4c3c083bbaf29ce31d572a1c3825eed", size = 207954, upload-time = "2025-10-16T08:35:48.054Z" },
+]
+
+[[package]]
+name = "protobuf"
+version = "6.33.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/0a/03/a1440979a3f74f16cab3b75b0da1a1a7f922d56a8ddea96092391998edc0/protobuf-6.33.1.tar.gz", hash = "sha256:97f65757e8d09870de6fd973aeddb92f85435607235d20b2dfed93405d00c85b", size = 443432, upload-time = "2025-11-13T16:44:18.895Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/06/f1/446a9bbd2c60772ca36556bac8bfde40eceb28d9cc7838755bc41e001d8f/protobuf-6.33.1-cp310-abi3-win32.whl", hash = "sha256:f8d3fdbc966aaab1d05046d0240dd94d40f2a8c62856d41eaa141ff64a79de6b", size = 425593, upload-time = "2025-11-13T16:44:06.275Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/79/8780a378c650e3df849b73de8b13cf5412f521ca2ff9b78a45c247029440/protobuf-6.33.1-cp310-abi3-win_amd64.whl", hash = "sha256:923aa6d27a92bf44394f6abf7ea0500f38769d4b07f4be41cb52bd8b1123b9ed", size = 436883, upload-time = "2025-11-13T16:44:09.222Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/93/26213ff72b103ae55bb0d73e7fb91ea570ef407c3ab4fd2f1f27cac16044/protobuf-6.33.1-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:fe34575f2bdde76ac429ec7b570235bf0c788883e70aee90068e9981806f2490", size = 427522, upload-time = "2025-11-13T16:44:10.475Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/32/df4a35247923393aa6b887c3b3244a8c941c32a25681775f96e2b418f90e/protobuf-6.33.1-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:f8adba2e44cde2d7618996b3fc02341f03f5bc3f2748be72dc7b063319276178", size = 324445, upload-time = "2025-11-13T16:44:11.869Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/d0/d796e419e2ec93d2f3fa44888861c3f88f722cde02b7c3488fcc6a166820/protobuf-6.33.1-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:0f4cf01222c0d959c2b399142deb526de420be8236f22c71356e2a544e153c53", size = 339161, upload-time = "2025-11-13T16:44:12.778Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/2a/3c5f05a4af06649547027d288747f68525755de692a26a7720dced3652c0/protobuf-6.33.1-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:8fd7d5e0eb08cd5b87fd3df49bc193f5cfd778701f47e11d127d0afc6c39f1d1", size = 323171, upload-time = "2025-11-13T16:44:14.035Z" },
+    { url = "https://files.pythonhosted.org/packages/08/b4/46310463b4f6ceef310f8348786f3cff181cea671578e3d9743ba61a459e/protobuf-6.33.1-py3-none-any.whl", hash = "sha256:d595a9fd694fdeb061a62fbe10eb039cc1e444df81ec9bb70c7fc59ebcb1eafa", size = 170477, upload-time = "2025-11-13T16:44:17.633Z" },
+]
+
+[[package]]
+name = "pydantic"
+version = "2.12.4"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "annotated-types" },
+    { name = "pydantic-core" },
+    { name = "typing-extensions" },
+    { name = "typing-inspection" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/96/ad/a17bc283d7d81837c061c49e3eaa27a45991759a1b7eae1031921c6bd924/pydantic-2.12.4.tar.gz", hash = "sha256:0f8cb9555000a4b5b617f66bfd2566264c4984b27589d3b845685983e8ea85ac", size = 821038, upload-time = "2025-11-05T10:50:08.59Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/82/2f/e68750da9b04856e2a7ec56fc6f034a5a79775e9b9a81882252789873798/pydantic-2.12.4-py3-none-any.whl", hash = "sha256:92d3d202a745d46f9be6df459ac5a064fdaa3c1c4cd8adcfa332ccf3c05f871e", size = 463400, upload-time = "2025-11-05T10:50:06.732Z" },
+]
+
+[[package]]
+name = "pydantic-core"
+version = "2.41.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/71/70/23b021c950c2addd24ec408e9ab05d59b035b39d97cdc1130e1bce647bb6/pydantic_core-2.41.5.tar.gz", hash = "sha256:08daa51ea16ad373ffd5e7606252cc32f07bc72b28284b6bc9c6df804816476e", size = 460952, upload-time = "2025-11-04T13:43:49.098Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5f/5d/5f6c63eebb5afee93bcaae4ce9a898f3373ca23df3ccaef086d0233a35a7/pydantic_core-2.41.5-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f41a7489d32336dbf2199c8c0a215390a751c5b014c2c1c5366e817202e9cdf7", size = 2110990, upload-time = "2025-11-04T13:39:58.079Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/32/9c2e8ccb57c01111e0fd091f236c7b371c1bccea0fa85247ac55b1e2b6b6/pydantic_core-2.41.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:070259a8818988b9a84a449a2a7337c7f430a22acc0859c6b110aa7212a6d9c0", size = 1896003, upload-time = "2025-11-04T13:39:59.956Z" },
+    { url = "https://files.pythonhosted.org/packages/68/b8/a01b53cb0e59139fbc9e4fda3e9724ede8de279097179be4ff31f1abb65a/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e96cea19e34778f8d59fe40775a7a574d95816eb150850a85a7a4c8f4b94ac69", size = 1919200, upload-time = "2025-11-04T13:40:02.241Z" },
+    { url = "https://files.pythonhosted.org/packages/38/de/8c36b5198a29bdaade07b5985e80a233a5ac27137846f3bc2d3b40a47360/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ed2e99c456e3fadd05c991f8f437ef902e00eedf34320ba2b0842bd1c3ca3a75", size = 2052578, upload-time = "2025-11-04T13:40:04.401Z" },
+    { url = "https://files.pythonhosted.org/packages/00/b5/0e8e4b5b081eac6cb3dbb7e60a65907549a1ce035a724368c330112adfdd/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:65840751b72fbfd82c3c640cff9284545342a4f1eb1586ad0636955b261b0b05", size = 2208504, upload-time = "2025-11-04T13:40:06.072Z" },
+    { url = "https://files.pythonhosted.org/packages/77/56/87a61aad59c7c5b9dc8caad5a41a5545cba3810c3e828708b3d7404f6cef/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e536c98a7626a98feb2d3eaf75944ef6f3dbee447e1f841eae16f2f0a72d8ddc", size = 2335816, upload-time = "2025-11-04T13:40:07.835Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/76/941cc9f73529988688a665a5c0ecff1112b3d95ab48f81db5f7606f522d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eceb81a8d74f9267ef4081e246ffd6d129da5d87e37a77c9bde550cb04870c1c", size = 2075366, upload-time = "2025-11-04T13:40:09.804Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/43/ebef01f69baa07a482844faaa0a591bad1ef129253ffd0cdaa9d8a7f72d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d38548150c39b74aeeb0ce8ee1d8e82696f4a4e16ddc6de7b1d8823f7de4b9b5", size = 2171698, upload-time = "2025-11-04T13:40:12.004Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/87/41f3202e4193e3bacfc2c065fab7706ebe81af46a83d3e27605029c1f5a6/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c23e27686783f60290e36827f9c626e63154b82b116d7fe9adba1fda36da706c", size = 2132603, upload-time = "2025-11-04T13:40:13.868Z" },
+    { url = "https://files.pythonhosted.org/packages/49/7d/4c00df99cb12070b6bccdef4a195255e6020a550d572768d92cc54dba91a/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:482c982f814460eabe1d3bb0adfdc583387bd4691ef00b90575ca0d2b6fe2294", size = 2329591, upload-time = "2025-11-04T13:40:15.672Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/6a/ebf4b1d65d458f3cda6a7335d141305dfa19bdc61140a884d165a8a1bbc7/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:bfea2a5f0b4d8d43adf9d7b8bf019fb46fdd10a2e5cde477fbcb9d1fa08c68e1", size = 2319068, upload-time = "2025-11-04T13:40:17.532Z" },
+    { url = "https://files.pythonhosted.org/packages/49/3b/774f2b5cd4192d5ab75870ce4381fd89cf218af999515baf07e7206753f0/pydantic_core-2.41.5-cp312-cp312-win32.whl", hash = "sha256:b74557b16e390ec12dca509bce9264c3bbd128f8a2c376eaa68003d7f327276d", size = 1985908, upload-time = "2025-11-04T13:40:19.309Z" },
+    { url = "https://files.pythonhosted.org/packages/86/45/00173a033c801cacf67c190fef088789394feaf88a98a7035b0e40d53dc9/pydantic_core-2.41.5-cp312-cp312-win_amd64.whl", hash = "sha256:1962293292865bca8e54702b08a4f26da73adc83dd1fcf26fbc875b35d81c815", size = 2020145, upload-time = "2025-11-04T13:40:21.548Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/22/91fbc821fa6d261b376a3f73809f907cec5ca6025642c463d3488aad22fb/pydantic_core-2.41.5-cp312-cp312-win_arm64.whl", hash = "sha256:1746d4a3d9a794cacae06a5eaaccb4b8643a131d45fbc9af23e353dc0a5ba5c3", size = 1976179, upload-time = "2025-11-04T13:40:23.393Z" },
+    { url = "https://files.pythonhosted.org/packages/87/06/8806241ff1f70d9939f9af039c6c35f2360cf16e93c2ca76f184e76b1564/pydantic_core-2.41.5-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:941103c9be18ac8daf7b7adca8228f8ed6bb7a1849020f643b3a14d15b1924d9", size = 2120403, upload-time = "2025-11-04T13:40:25.248Z" },
+    { url = "https://files.pythonhosted.org/packages/94/02/abfa0e0bda67faa65fef1c84971c7e45928e108fe24333c81f3bfe35d5f5/pydantic_core-2.41.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:112e305c3314f40c93998e567879e887a3160bb8689ef3d2c04b6cc62c33ac34", size = 1896206, upload-time = "2025-11-04T13:40:27.099Z" },
+    { url = "https://files.pythonhosted.org/packages/15/df/a4c740c0943e93e6500f9eb23f4ca7ec9bf71b19e608ae5b579678c8d02f/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0cbaad15cb0c90aa221d43c00e77bb33c93e8d36e0bf74760cd00e732d10a6a0", size = 1919307, upload-time = "2025-11-04T13:40:29.806Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/e3/6324802931ae1d123528988e0e86587c2072ac2e5394b4bc2bc34b61ff6e/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:03ca43e12fab6023fc79d28ca6b39b05f794ad08ec2feccc59a339b02f2b3d33", size = 2063258, upload-time = "2025-11-04T13:40:33.544Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/d4/2230d7151d4957dd79c3044ea26346c148c98fbf0ee6ebd41056f2d62ab5/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dc799088c08fa04e43144b164feb0c13f9a0bc40503f8df3e9fde58a3c0c101e", size = 2214917, upload-time = "2025-11-04T13:40:35.479Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/9f/eaac5df17a3672fef0081b6c1bb0b82b33ee89aa5cec0d7b05f52fd4a1fa/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:97aeba56665b4c3235a0e52b2c2f5ae9cd071b8a8310ad27bddb3f7fb30e9aa2", size = 2332186, upload-time = "2025-11-04T13:40:37.436Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/4e/35a80cae583a37cf15604b44240e45c05e04e86f9cfd766623149297e971/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:406bf18d345822d6c21366031003612b9c77b3e29ffdb0f612367352aab7d586", size = 2073164, upload-time = "2025-11-04T13:40:40.289Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/e3/f6e262673c6140dd3305d144d032f7bd5f7497d3871c1428521f19f9efa2/pydantic_core-2.41.5-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b93590ae81f7010dbe380cdeab6f515902ebcbefe0b9327cc4804d74e93ae69d", size = 2179146, upload-time = "2025-11-04T13:40:42.809Z" },
+    { url = "https://files.pythonhosted.org/packages/75/c7/20bd7fc05f0c6ea2056a4565c6f36f8968c0924f19b7d97bbfea55780e73/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:01a3d0ab748ee531f4ea6c3e48ad9dac84ddba4b0d82291f87248f2f9de8d740", size = 2137788, upload-time = "2025-11-04T13:40:44.752Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/8d/34318ef985c45196e004bc46c6eab2eda437e744c124ef0dbe1ff2c9d06b/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:6561e94ba9dacc9c61bce40e2d6bdc3bfaa0259d3ff36ace3b1e6901936d2e3e", size = 2340133, upload-time = "2025-11-04T13:40:46.66Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/59/013626bf8c78a5a5d9350d12e7697d3d4de951a75565496abd40ccd46bee/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:915c3d10f81bec3a74fbd4faebe8391013ba61e5a1a8d48c4455b923bdda7858", size = 2324852, upload-time = "2025-11-04T13:40:48.575Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/d9/c248c103856f807ef70c18a4f986693a46a8ffe1602e5d361485da502d20/pydantic_core-2.41.5-cp313-cp313-win32.whl", hash = "sha256:650ae77860b45cfa6e2cdafc42618ceafab3a2d9a3811fcfbd3bbf8ac3c40d36", size = 1994679, upload-time = "2025-11-04T13:40:50.619Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/8b/341991b158ddab181cff136acd2552c9f35bd30380422a639c0671e99a91/pydantic_core-2.41.5-cp313-cp313-win_amd64.whl", hash = "sha256:79ec52ec461e99e13791ec6508c722742ad745571f234ea6255bed38c6480f11", size = 2019766, upload-time = "2025-11-04T13:40:52.631Z" },
+    { url = "https://files.pythonhosted.org/packages/73/7d/f2f9db34af103bea3e09735bb40b021788a5e834c81eedb541991badf8f5/pydantic_core-2.41.5-cp313-cp313-win_arm64.whl", hash = "sha256:3f84d5c1b4ab906093bdc1ff10484838aca54ef08de4afa9de0f5f14d69639cd", size = 1981005, upload-time = "2025-11-04T13:40:54.734Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/28/46b7c5c9635ae96ea0fbb779e271a38129df2550f763937659ee6c5dbc65/pydantic_core-2.41.5-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:3f37a19d7ebcdd20b96485056ba9e8b304e27d9904d233d7b1015db320e51f0a", size = 2119622, upload-time = "2025-11-04T13:40:56.68Z" },
+    { url = "https://files.pythonhosted.org/packages/74/1a/145646e5687e8d9a1e8d09acb278c8535ebe9e972e1f162ed338a622f193/pydantic_core-2.41.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1d1d9764366c73f996edd17abb6d9d7649a7eb690006ab6adbda117717099b14", size = 1891725, upload-time = "2025-11-04T13:40:58.807Z" },
+    { url = "https://files.pythonhosted.org/packages/23/04/e89c29e267b8060b40dca97bfc64a19b2a3cf99018167ea1677d96368273/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25e1c2af0fce638d5f1988b686f3b3ea8cd7de5f244ca147c777769e798a9cd1", size = 1915040, upload-time = "2025-11-04T13:41:00.853Z" },
+    { url = "https://files.pythonhosted.org/packages/84/a3/15a82ac7bd97992a82257f777b3583d3e84bdb06ba6858f745daa2ec8a85/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:506d766a8727beef16b7adaeb8ee6217c64fc813646b424d0804d67c16eddb66", size = 2063691, upload-time = "2025-11-04T13:41:03.504Z" },
+    { url = "https://files.pythonhosted.org/packages/74/9b/0046701313c6ef08c0c1cf0e028c67c770a4e1275ca73131563c5f2a310a/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4819fa52133c9aa3c387b3328f25c1facc356491e6135b459f1de698ff64d869", size = 2213897, upload-time = "2025-11-04T13:41:05.804Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/cd/6bac76ecd1b27e75a95ca3a9a559c643b3afcd2dd62086d4b7a32a18b169/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2b761d210c9ea91feda40d25b4efe82a1707da2ef62901466a42492c028553a2", size = 2333302, upload-time = "2025-11-04T13:41:07.809Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/d2/ef2074dc020dd6e109611a8be4449b98cd25e1b9b8a303c2f0fca2f2bcf7/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22f0fb8c1c583a3b6f24df2470833b40207e907b90c928cc8d3594b76f874375", size = 2064877, upload-time = "2025-11-04T13:41:09.827Z" },
+    { url = "https://files.pythonhosted.org/packages/18/66/e9db17a9a763d72f03de903883c057b2592c09509ccfe468187f2a2eef29/pydantic_core-2.41.5-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2782c870e99878c634505236d81e5443092fba820f0373997ff75f90f68cd553", size = 2180680, upload-time = "2025-11-04T13:41:12.379Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/9e/3ce66cebb929f3ced22be85d4c2399b8e85b622db77dad36b73c5387f8f8/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:0177272f88ab8312479336e1d777f6b124537d47f2123f89cb37e0accea97f90", size = 2138960, upload-time = "2025-11-04T13:41:14.627Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/62/205a998f4327d2079326b01abee48e502ea739d174f0a89295c481a2272e/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_armv7l.whl", hash = "sha256:63510af5e38f8955b8ee5687740d6ebf7c2a0886d15a6d65c32814613681bc07", size = 2339102, upload-time = "2025-11-04T13:41:16.868Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/0d/f05e79471e889d74d3d88f5bd20d0ed189ad94c2423d81ff8d0000aab4ff/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:e56ba91f47764cc14f1daacd723e3e82d1a89d783f0f5afe9c364b8bb491ccdb", size = 2326039, upload-time = "2025-11-04T13:41:18.934Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/e1/e08a6208bb100da7e0c4b288eed624a703f4d129bde2da475721a80cab32/pydantic_core-2.41.5-cp314-cp314-win32.whl", hash = "sha256:aec5cf2fd867b4ff45b9959f8b20ea3993fc93e63c7363fe6851424c8a7e7c23", size = 1995126, upload-time = "2025-11-04T13:41:21.418Z" },
+    { url = "https://files.pythonhosted.org/packages/48/5d/56ba7b24e9557f99c9237e29f5c09913c81eeb2f3217e40e922353668092/pydantic_core-2.41.5-cp314-cp314-win_amd64.whl", hash = "sha256:8e7c86f27c585ef37c35e56a96363ab8de4e549a95512445b85c96d3e2f7c1bf", size = 2015489, upload-time = "2025-11-04T13:41:24.076Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/bb/f7a190991ec9e3e0ba22e4993d8755bbc4a32925c0b5b42775c03e8148f9/pydantic_core-2.41.5-cp314-cp314-win_arm64.whl", hash = "sha256:e672ba74fbc2dc8eea59fb6d4aed6845e6905fc2a8afe93175d94a83ba2a01a0", size = 1977288, upload-time = "2025-11-04T13:41:26.33Z" },
+    { url = "https://files.pythonhosted.org/packages/92/ed/77542d0c51538e32e15afe7899d79efce4b81eee631d99850edc2f5e9349/pydantic_core-2.41.5-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:8566def80554c3faa0e65ac30ab0932b9e3a5cd7f8323764303d468e5c37595a", size = 2120255, upload-time = "2025-11-04T13:41:28.569Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/3d/6913dde84d5be21e284439676168b28d8bbba5600d838b9dca99de0fad71/pydantic_core-2.41.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b80aa5095cd3109962a298ce14110ae16b8c1aece8b72f9dafe81cf597ad80b3", size = 1863760, upload-time = "2025-11-04T13:41:31.055Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/f0/e5e6b99d4191da102f2b0eb9687aaa7f5bea5d9964071a84effc3e40f997/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3006c3dd9ba34b0c094c544c6006cc79e87d8612999f1a5d43b769b89181f23c", size = 1878092, upload-time = "2025-11-04T13:41:33.21Z" },
+    { url = "https://files.pythonhosted.org/packages/71/48/36fb760642d568925953bcc8116455513d6e34c4beaa37544118c36aba6d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:72f6c8b11857a856bcfa48c86f5368439f74453563f951e473514579d44aa612", size = 2053385, upload-time = "2025-11-04T13:41:35.508Z" },
+    { url = "https://files.pythonhosted.org/packages/20/25/92dc684dd8eb75a234bc1c764b4210cf2646479d54b47bf46061657292a8/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5cb1b2f9742240e4bb26b652a5aeb840aa4b417c7748b6f8387927bc6e45e40d", size = 2218832, upload-time = "2025-11-04T13:41:37.732Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/09/f53e0b05023d3e30357d82eb35835d0f6340ca344720a4599cd663dca599/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bd3d54f38609ff308209bd43acea66061494157703364ae40c951f83ba99a1a9", size = 2327585, upload-time = "2025-11-04T13:41:40Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/4e/2ae1aa85d6af35a39b236b1b1641de73f5a6ac4d5a7509f77b814885760c/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ff4321e56e879ee8d2a879501c8e469414d948f4aba74a2d4593184eb326660", size = 2041078, upload-time = "2025-11-04T13:41:42.323Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/13/2e215f17f0ef326fc72afe94776edb77525142c693767fc347ed6288728d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d0d2568a8c11bf8225044aa94409e21da0cb09dcdafe9ecd10250b2baad531a9", size = 2173914, upload-time = "2025-11-04T13:41:45.221Z" },
+    { url = "https://files.pythonhosted.org/packages/02/7a/f999a6dcbcd0e5660bc348a3991c8915ce6599f4f2c6ac22f01d7a10816c/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:a39455728aabd58ceabb03c90e12f71fd30fa69615760a075b9fec596456ccc3", size = 2129560, upload-time = "2025-11-04T13:41:47.474Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/b1/6c990ac65e3b4c079a4fb9f5b05f5b013afa0f4ed6780a3dd236d2cbdc64/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_armv7l.whl", hash = "sha256:239edca560d05757817c13dc17c50766136d21f7cd0fac50295499ae24f90fdf", size = 2329244, upload-time = "2025-11-04T13:41:49.992Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/02/3c562f3a51afd4d88fff8dffb1771b30cfdfd79befd9883ee094f5b6c0d8/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:2a5e06546e19f24c6a96a129142a75cee553cc018ffee48a460059b1185f4470", size = 2331955, upload-time = "2025-11-04T13:41:54.079Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/96/5fb7d8c3c17bc8c62fdb031c47d77a1af698f1d7a406b0f79aaa1338f9ad/pydantic_core-2.41.5-cp314-cp314t-win32.whl", hash = "sha256:b4ececa40ac28afa90871c2cc2b9ffd2ff0bf749380fbdf57d165fd23da353aa", size = 1988906, upload-time = "2025-11-04T13:41:56.606Z" },
+    { url = "https://files.pythonhosted.org/packages/22/ed/182129d83032702912c2e2d8bbe33c036f342cc735737064668585dac28f/pydantic_core-2.41.5-cp314-cp314t-win_amd64.whl", hash = "sha256:80aa89cad80b32a912a65332f64a4450ed00966111b6615ca6816153d3585a8c", size = 1981607, upload-time = "2025-11-04T13:41:58.889Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/ed/068e41660b832bb0b1aa5b58011dea2a3fe0ba7861ff38c4d4904c1c1a99/pydantic_core-2.41.5-cp314-cp314t-win_arm64.whl", hash = "sha256:35b44f37a3199f771c3eaa53051bc8a70cd7b54f333531c59e29fd4db5d15008", size = 1974769, upload-time = "2025-11-04T13:42:01.186Z" },
+    { url = "https://files.pythonhosted.org/packages/09/32/59b0c7e63e277fa7911c2fc70ccfb45ce4b98991e7ef37110663437005af/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:7da7087d756b19037bc2c06edc6c170eeef3c3bafcb8f532ff17d64dc427adfd", size = 2110495, upload-time = "2025-11-04T13:42:49.689Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/81/05e400037eaf55ad400bcd318c05bb345b57e708887f07ddb2d20e3f0e98/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:aabf5777b5c8ca26f7824cb4a120a740c9588ed58df9b2d196ce92fba42ff8dc", size = 1915388, upload-time = "2025-11-04T13:42:52.215Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/0d/e3549b2399f71d56476b77dbf3cf8937cec5cd70536bdc0e374a421d0599/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c007fe8a43d43b3969e8469004e9845944f1a80e6acd47c150856bb87f230c56", size = 1942879, upload-time = "2025-11-04T13:42:56.483Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/07/34573da085946b6a313d7c42f82f16e8920bfd730665de2d11c0c37a74b5/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76d0819de158cd855d1cbb8fcafdf6f5cf1eb8e470abe056d5d161106e38062b", size = 2139017, upload-time = "2025-11-04T13:42:59.471Z" },
+]
+
+[[package]]
+name = "referencing"
+version = "0.37.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "attrs" },
+    { name = "rpds-py" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/22/f5/df4e9027acead3ecc63e50fe1e36aca1523e1719559c499951bb4b53188f/referencing-0.37.0.tar.gz", hash = "sha256:44aefc3142c5b842538163acb373e24cce6632bd54bdb01b21ad5863489f50d8", size = 78036, upload-time = "2025-10-13T15:30:48.871Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2c/58/ca301544e1fa93ed4f80d724bf5b194f6e4b945841c5bfd555878eea9fcb/referencing-0.37.0-py3-none-any.whl", hash = "sha256:381329a9f99628c9069361716891d34ad94af76e461dcb0335825aecc7692231", size = 26766, upload-time = "2025-10-13T15:30:47.625Z" },
+]
+
+[[package]]
+name = "requests"
+version = "2.32.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "certifi" },
+    { name = "charset-normalizer" },
+    { name = "idna" },
+    { name = "urllib3" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c9/74/b3ff8e6c8446842c3f5c837e9c3dfcfe2018ea6ecef224c710c85ef728f4/requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf", size = 134517, upload-time = "2025-08-18T20:46:02.573Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload-time = "2025-08-18T20:46:00.542Z" },
+]
+
+[[package]]
+name = "rpds-py"
+version = "0.28.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/48/dc/95f074d43452b3ef5d06276696ece4b3b5d696e7c9ad7173c54b1390cd70/rpds_py-0.28.0.tar.gz", hash = "sha256:abd4df20485a0983e2ca334a216249b6186d6e3c1627e106651943dbdb791aea", size = 27419, upload-time = "2025-10-22T22:24:29.327Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b8/5c/6c3936495003875fe7b14f90ea812841a08fca50ab26bd840e924097d9c8/rpds_py-0.28.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:6b4f28583a4f247ff60cd7bdda83db8c3f5b05a7a82ff20dd4b078571747708f", size = 366439, upload-time = "2025-10-22T22:22:04.525Z" },
+    { url = "https://files.pythonhosted.org/packages/56/f9/a0f1ca194c50aa29895b442771f036a25b6c41a35e4f35b1a0ea713bedae/rpds_py-0.28.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d678e91b610c29c4b3d52a2c148b641df2b4676ffe47c59f6388d58b99cdc424", size = 348170, upload-time = "2025-10-22T22:22:06.397Z" },
+    { url = "https://files.pythonhosted.org/packages/18/ea/42d243d3a586beb72c77fa5def0487daf827210069a95f36328e869599ea/rpds_py-0.28.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e819e0e37a44a78e1383bf1970076e2ccc4dc8c2bbaa2f9bd1dc987e9afff628", size = 378838, upload-time = "2025-10-22T22:22:07.932Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/78/3de32e18a94791af8f33601402d9d4f39613136398658412a4e0b3047327/rpds_py-0.28.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5ee514e0f0523db5d3fb171f397c54875dbbd69760a414dccf9d4d7ad628b5bd", size = 393299, upload-time = "2025-10-22T22:22:09.435Z" },
+    { url = "https://files.pythonhosted.org/packages/13/7e/4bdb435afb18acea2eb8a25ad56b956f28de7c59f8a1d32827effa0d4514/rpds_py-0.28.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5f3fa06d27fdcee47f07a39e02862da0100cb4982508f5ead53ec533cd5fe55e", size = 518000, upload-time = "2025-10-22T22:22:11.326Z" },
+    { url = "https://files.pythonhosted.org/packages/31/d0/5f52a656875cdc60498ab035a7a0ac8f399890cc1ee73ebd567bac4e39ae/rpds_py-0.28.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:46959ef2e64f9e4a41fc89aa20dbca2b85531f9a72c21099a3360f35d10b0d5a", size = 408746, upload-time = "2025-10-22T22:22:13.143Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/cd/49ce51767b879cde77e7ad9fae164ea15dce3616fe591d9ea1df51152706/rpds_py-0.28.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8455933b4bcd6e83fde3fefc987a023389c4b13f9a58c8d23e4b3f6d13f78c84", size = 386379, upload-time = "2025-10-22T22:22:14.602Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/99/e4e1e1ee93a98f72fc450e36c0e4d99c35370220e815288e3ecd2ec36a2a/rpds_py-0.28.0-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:ad50614a02c8c2962feebe6012b52f9802deec4263946cddea37aaf28dd25a66", size = 401280, upload-time = "2025-10-22T22:22:16.063Z" },
+    { url = "https://files.pythonhosted.org/packages/61/35/e0c6a57488392a8b319d2200d03dad2b29c0db9996f5662c3b02d0b86c02/rpds_py-0.28.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e5deca01b271492553fdb6c7fd974659dce736a15bae5dad7ab8b93555bceb28", size = 412365, upload-time = "2025-10-22T22:22:17.504Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/6a/841337980ea253ec797eb084665436007a1aad0faac1ba097fb906c5f69c/rpds_py-0.28.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:735f8495a13159ce6a0d533f01e8674cec0c57038c920495f87dcb20b3ddb48a", size = 559573, upload-time = "2025-10-22T22:22:19.108Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/5e/64826ec58afd4c489731f8b00729c5f6afdb86f1df1df60bfede55d650bb/rpds_py-0.28.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:961ca621ff10d198bbe6ba4957decca61aa2a0c56695384c1d6b79bf61436df5", size = 583973, upload-time = "2025-10-22T22:22:20.768Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/ee/44d024b4843f8386a4eeaa4c171b3d31d55f7177c415545fd1a24c249b5d/rpds_py-0.28.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2374e16cc9131022e7d9a8f8d65d261d9ba55048c78f3b6e017971a4f5e6353c", size = 553800, upload-time = "2025-10-22T22:22:22.25Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/89/33e675dccff11a06d4d85dbb4d1865f878d5020cbb69b2c1e7b2d3f82562/rpds_py-0.28.0-cp312-cp312-win32.whl", hash = "sha256:d15431e334fba488b081d47f30f091e5d03c18527c325386091f31718952fe08", size = 216954, upload-time = "2025-10-22T22:22:24.105Z" },
+    { url = "https://files.pythonhosted.org/packages/af/36/45f6ebb3210887e8ee6dbf1bc710ae8400bb417ce165aaf3024b8360d999/rpds_py-0.28.0-cp312-cp312-win_amd64.whl", hash = "sha256:a410542d61fc54710f750d3764380b53bf09e8c4edbf2f9141a82aa774a04f7c", size = 227844, upload-time = "2025-10-22T22:22:25.551Z" },
+    { url = "https://files.pythonhosted.org/packages/57/91/f3fb250d7e73de71080f9a221d19bd6a1c1eb0d12a1ea26513f6c1052ad6/rpds_py-0.28.0-cp312-cp312-win_arm64.whl", hash = "sha256:1f0cfd1c69e2d14f8c892b893997fa9a60d890a0c8a603e88dca4955f26d1edd", size = 217624, upload-time = "2025-10-22T22:22:26.914Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/03/ce566d92611dfac0085c2f4b048cd53ed7c274a5c05974b882a908d540a2/rpds_py-0.28.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:e9e184408a0297086f880556b6168fa927d677716f83d3472ea333b42171ee3b", size = 366235, upload-time = "2025-10-22T22:22:28.397Z" },
+    { url = "https://files.pythonhosted.org/packages/00/34/1c61da1b25592b86fd285bd7bd8422f4c9d748a7373b46126f9ae792a004/rpds_py-0.28.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:edd267266a9b0448f33dc465a97cfc5d467594b600fe28e7fa2f36450e03053a", size = 348241, upload-time = "2025-10-22T22:22:30.171Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/00/ed1e28616848c61c493a067779633ebf4b569eccaacf9ccbdc0e7cba2b9d/rpds_py-0.28.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:85beb8b3f45e4e32f6802fb6cd6b17f615ef6c6a52f265371fb916fae02814aa", size = 378079, upload-time = "2025-10-22T22:22:31.644Z" },
+    { url = "https://files.pythonhosted.org/packages/11/b2/ccb30333a16a470091b6e50289adb4d3ec656fd9951ba8c5e3aaa0746a67/rpds_py-0.28.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d2412be8d00a1b895f8ad827cc2116455196e20ed994bb704bf138fe91a42724", size = 393151, upload-time = "2025-10-22T22:22:33.453Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/d0/73e2217c3ee486d555cb84920597480627d8c0240ff3062005c6cc47773e/rpds_py-0.28.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cf128350d384b777da0e68796afdcebc2e9f63f0e9f242217754e647f6d32491", size = 517520, upload-time = "2025-10-22T22:22:34.949Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/91/23efe81c700427d0841a4ae7ea23e305654381831e6029499fe80be8a071/rpds_py-0.28.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a2036d09b363aa36695d1cc1a97b36865597f4478470b0697b5ee9403f4fe399", size = 408699, upload-time = "2025-10-22T22:22:36.584Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/ee/a324d3198da151820a326c1f988caaa4f37fc27955148a76fff7a2d787a9/rpds_py-0.28.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8e1e9be4fa6305a16be628959188e4fd5cd6f1b0e724d63c6d8b2a8adf74ea6", size = 385720, upload-time = "2025-10-22T22:22:38.014Z" },
+    { url = "https://files.pythonhosted.org/packages/19/ad/e68120dc05af8b7cab4a789fccd8cdcf0fe7e6581461038cc5c164cd97d2/rpds_py-0.28.0-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:0a403460c9dd91a7f23fc3188de6d8977f1d9603a351d5db6cf20aaea95b538d", size = 401096, upload-time = "2025-10-22T22:22:39.869Z" },
+    { url = "https://files.pythonhosted.org/packages/99/90/c1e070620042459d60df6356b666bb1f62198a89d68881816a7ed121595a/rpds_py-0.28.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d7366b6553cdc805abcc512b849a519167db8f5e5c3472010cd1228b224265cb", size = 411465, upload-time = "2025-10-22T22:22:41.395Z" },
+    { url = "https://files.pythonhosted.org/packages/68/61/7c195b30d57f1b8d5970f600efee72a4fad79ec829057972e13a0370fd24/rpds_py-0.28.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5b43c6a3726efd50f18d8120ec0551241c38785b68952d240c45ea553912ac41", size = 558832, upload-time = "2025-10-22T22:22:42.871Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/3d/06f3a718864773f69941d4deccdf18e5e47dd298b4628062f004c10f3b34/rpds_py-0.28.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:0cb7203c7bc69d7c1585ebb33a2e6074492d2fc21ad28a7b9d40457ac2a51ab7", size = 583230, upload-time = "2025-10-22T22:22:44.877Z" },
+    { url = "https://files.pythonhosted.org/packages/66/df/62fc783781a121e77fee9a21ead0a926f1b652280a33f5956a5e7833ed30/rpds_py-0.28.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7a52a5169c664dfb495882adc75c304ae1d50df552fbd68e100fdc719dee4ff9", size = 553268, upload-time = "2025-10-22T22:22:46.441Z" },
+    { url = "https://files.pythonhosted.org/packages/84/85/d34366e335140a4837902d3dea89b51f087bd6a63c993ebdff59e93ee61d/rpds_py-0.28.0-cp313-cp313-win32.whl", hash = "sha256:2e42456917b6687215b3e606ab46aa6bca040c77af7df9a08a6dcfe8a4d10ca5", size = 217100, upload-time = "2025-10-22T22:22:48.342Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/1c/f25a3f3752ad7601476e3eff395fe075e0f7813fbb9862bd67c82440e880/rpds_py-0.28.0-cp313-cp313-win_amd64.whl", hash = "sha256:e0a0311caedc8069d68fc2bf4c9019b58a2d5ce3cd7cb656c845f1615b577e1e", size = 227759, upload-time = "2025-10-22T22:22:50.219Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/d6/5f39b42b99615b5bc2f36ab90423ea404830bdfee1c706820943e9a645eb/rpds_py-0.28.0-cp313-cp313-win_arm64.whl", hash = "sha256:04c1b207ab8b581108801528d59ad80aa83bb170b35b0ddffb29c20e411acdc1", size = 217326, upload-time = "2025-10-22T22:22:51.647Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/8b/0c69b72d1cee20a63db534be0df271effe715ef6c744fdf1ff23bb2b0b1c/rpds_py-0.28.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:f296ea3054e11fc58ad42e850e8b75c62d9a93a9f981ad04b2e5ae7d2186ff9c", size = 355736, upload-time = "2025-10-22T22:22:53.211Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/6d/0c2ee773cfb55c31a8514d2cece856dd299170a49babd50dcffb15ddc749/rpds_py-0.28.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:5a7306c19b19005ad98468fcefeb7100b19c79fc23a5f24a12e06d91181193fa", size = 342677, upload-time = "2025-10-22T22:22:54.723Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/1c/22513ab25a27ea205144414724743e305e8153e6abe81833b5e678650f5a/rpds_py-0.28.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e5d9b86aa501fed9862a443c5c3116f6ead8bc9296185f369277c42542bd646b", size = 371847, upload-time = "2025-10-22T22:22:56.295Z" },
+    { url = "https://files.pythonhosted.org/packages/60/07/68e6ccdb4b05115ffe61d31afc94adef1833d3a72f76c9632d4d90d67954/rpds_py-0.28.0-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e5bbc701eff140ba0e872691d573b3d5d30059ea26e5785acba9132d10c8c31d", size = 381800, upload-time = "2025-10-22T22:22:57.808Z" },
+    { url = "https://files.pythonhosted.org/packages/73/bf/6d6d15df80781d7f9f368e7c1a00caf764436518c4877fb28b029c4624af/rpds_py-0.28.0-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9a5690671cd672a45aa8616d7374fdf334a1b9c04a0cac3c854b1136e92374fe", size = 518827, upload-time = "2025-10-22T22:22:59.826Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/d3/2decbb2976cc452cbf12a2b0aaac5f1b9dc5dd9d1f7e2509a3ee00421249/rpds_py-0.28.0-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9f1d92ecea4fa12f978a367c32a5375a1982834649cdb96539dcdc12e609ab1a", size = 399471, upload-time = "2025-10-22T22:23:01.968Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/2c/f30892f9e54bd02e5faca3f6a26d6933c51055e67d54818af90abed9748e/rpds_py-0.28.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d252db6b1a78d0a3928b6190156042d54c93660ce4d98290d7b16b5296fb7cc", size = 377578, upload-time = "2025-10-22T22:23:03.52Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/5d/3bce97e5534157318f29ac06bf2d279dae2674ec12f7cb9c12739cee64d8/rpds_py-0.28.0-cp313-cp313t-manylinux_2_31_riscv64.whl", hash = "sha256:d61b355c3275acb825f8777d6c4505f42b5007e357af500939d4a35b19177259", size = 390482, upload-time = "2025-10-22T22:23:05.391Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/f0/886bd515ed457b5bd93b166175edb80a0b21a210c10e993392127f1e3931/rpds_py-0.28.0-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:acbe5e8b1026c0c580d0321c8aae4b0a1e1676861d48d6e8c6586625055b606a", size = 402447, upload-time = "2025-10-22T22:23:06.93Z" },
+    { url = "https://files.pythonhosted.org/packages/42/b5/71e8777ac55e6af1f4f1c05b47542a1eaa6c33c1cf0d300dca6a1c6e159a/rpds_py-0.28.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:8aa23b6f0fc59b85b4c7d89ba2965af274346f738e8d9fc2455763602e62fd5f", size = 552385, upload-time = "2025-10-22T22:23:08.557Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/cb/6ca2d70cbda5a8e36605e7788c4aa3bea7c17d71d213465a5a675079b98d/rpds_py-0.28.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:7b14b0c680286958817c22d76fcbca4800ddacef6f678f3a7c79a1fe7067fe37", size = 575642, upload-time = "2025-10-22T22:23:10.348Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/d4/407ad9960ca7856d7b25c96dcbe019270b5ffdd83a561787bc682c797086/rpds_py-0.28.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:bcf1d210dfee61a6c86551d67ee1031899c0fdbae88b2d44a569995d43797712", size = 544507, upload-time = "2025-10-22T22:23:12.434Z" },
+    { url = "https://files.pythonhosted.org/packages/51/31/2f46fe0efcac23fbf5797c6b6b7e1c76f7d60773e525cb65fcbc582ee0f2/rpds_py-0.28.0-cp313-cp313t-win32.whl", hash = "sha256:3aa4dc0fdab4a7029ac63959a3ccf4ed605fee048ba67ce89ca3168da34a1342", size = 205376, upload-time = "2025-10-22T22:23:13.979Z" },
+    { url = "https://files.pythonhosted.org/packages/92/e4/15947bda33cbedfc134490a41841ab8870a72a867a03d4969d886f6594a2/rpds_py-0.28.0-cp313-cp313t-win_amd64.whl", hash = "sha256:7b7d9d83c942855e4fdcfa75d4f96f6b9e272d42fffcb72cd4bb2577db2e2907", size = 215907, upload-time = "2025-10-22T22:23:15.5Z" },
+    { url = "https://files.pythonhosted.org/packages/08/47/ffe8cd7a6a02833b10623bf765fbb57ce977e9a4318ca0e8cf97e9c3d2b3/rpds_py-0.28.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:dcdcb890b3ada98a03f9f2bb108489cdc7580176cb73b4f2d789e9a1dac1d472", size = 353830, upload-time = "2025-10-22T22:23:17.03Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/9f/890f36cbd83a58491d0d91ae0db1702639edb33fb48eeb356f80ecc6b000/rpds_py-0.28.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:f274f56a926ba2dc02976ca5b11c32855cbd5925534e57cfe1fda64e04d1add2", size = 341819, upload-time = "2025-10-22T22:23:18.57Z" },
+    { url = "https://files.pythonhosted.org/packages/09/e3/921eb109f682aa24fb76207698fbbcf9418738f35a40c21652c29053f23d/rpds_py-0.28.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4fe0438ac4a29a520ea94c8c7f1754cdd8feb1bc490dfda1bfd990072363d527", size = 373127, upload-time = "2025-10-22T22:23:20.216Z" },
+    { url = "https://files.pythonhosted.org/packages/23/13/bce4384d9f8f4989f1a9599c71b7a2d877462e5fd7175e1f69b398f729f4/rpds_py-0.28.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8a358a32dd3ae50e933347889b6af9a1bdf207ba5d1a3f34e1a38cd3540e6733", size = 382767, upload-time = "2025-10-22T22:23:21.787Z" },
+    { url = "https://files.pythonhosted.org/packages/23/e1/579512b2d89a77c64ccef5a0bc46a6ef7f72ae0cf03d4b26dcd52e57ee0a/rpds_py-0.28.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e80848a71c78aa328fefaba9c244d588a342c8e03bda518447b624ea64d1ff56", size = 517585, upload-time = "2025-10-22T22:23:23.699Z" },
+    { url = "https://files.pythonhosted.org/packages/62/3c/ca704b8d324a2591b0b0adcfcaadf9c862375b11f2f667ac03c61b4fd0a6/rpds_py-0.28.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f586db2e209d54fe177e58e0bc4946bea5fb0102f150b1b2f13de03e1f0976f8", size = 399828, upload-time = "2025-10-22T22:23:25.713Z" },
+    { url = "https://files.pythonhosted.org/packages/da/37/e84283b9e897e3adc46b4c88bb3f6ec92a43bd4d2f7ef5b13459963b2e9c/rpds_py-0.28.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ae8ee156d6b586e4292491e885d41483136ab994e719a13458055bec14cf370", size = 375509, upload-time = "2025-10-22T22:23:27.32Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/c2/a980beab869d86258bf76ec42dec778ba98151f253a952b02fe36d72b29c/rpds_py-0.28.0-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:a805e9b3973f7e27f7cab63a6b4f61d90f2e5557cff73b6e97cd5b8540276d3d", size = 392014, upload-time = "2025-10-22T22:23:29.332Z" },
+    { url = "https://files.pythonhosted.org/packages/da/b5/b1d3c5f9d3fa5aeef74265f9c64de3c34a0d6d5cd3c81c8b17d5c8f10ed4/rpds_py-0.28.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5d3fd16b6dc89c73a4da0b4ac8b12a7ecc75b2864b95c9e5afed8003cb50a728", size = 402410, upload-time = "2025-10-22T22:23:31.14Z" },
+    { url = "https://files.pythonhosted.org/packages/74/ae/cab05ff08dfcc052afc73dcb38cbc765ffc86f94e966f3924cd17492293c/rpds_py-0.28.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:6796079e5d24fdaba6d49bda28e2c47347e89834678f2bc2c1b4fc1489c0fb01", size = 553593, upload-time = "2025-10-22T22:23:32.834Z" },
+    { url = "https://files.pythonhosted.org/packages/70/80/50d5706ea2a9bfc9e9c5f401d91879e7c790c619969369800cde202da214/rpds_py-0.28.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:76500820c2af232435cbe215e3324c75b950a027134e044423f59f5b9a1ba515", size = 576925, upload-time = "2025-10-22T22:23:34.47Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/12/85a57d7a5855a3b188d024b099fd09c90db55d32a03626d0ed16352413ff/rpds_py-0.28.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:bbdc5640900a7dbf9dd707fe6388972f5bbd883633eb68b76591044cfe346f7e", size = 542444, upload-time = "2025-10-22T22:23:36.093Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/65/10643fb50179509150eb94d558e8837c57ca8b9adc04bd07b98e57b48f8c/rpds_py-0.28.0-cp314-cp314-win32.whl", hash = "sha256:adc8aa88486857d2b35d75f0640b949759f79dc105f50aa2c27816b2e0dd749f", size = 207968, upload-time = "2025-10-22T22:23:37.638Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/84/0c11fe4d9aaea784ff4652499e365963222481ac647bcd0251c88af646eb/rpds_py-0.28.0-cp314-cp314-win_amd64.whl", hash = "sha256:66e6fa8e075b58946e76a78e69e1a124a21d9a48a5b4766d15ba5b06869d1fa1", size = 218876, upload-time = "2025-10-22T22:23:39.179Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/e0/3ab3b86ded7bb18478392dc3e835f7b754cd446f62f3fc96f4fe2aca78f6/rpds_py-0.28.0-cp314-cp314-win_arm64.whl", hash = "sha256:a6fe887c2c5c59413353b7c0caff25d0e566623501ccfff88957fa438a69377d", size = 212506, upload-time = "2025-10-22T22:23:40.755Z" },
+    { url = "https://files.pythonhosted.org/packages/51/ec/d5681bb425226c3501eab50fc30e9d275de20c131869322c8a1729c7b61c/rpds_py-0.28.0-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:7a69df082db13c7070f7b8b1f155fa9e687f1d6aefb7b0e3f7231653b79a067b", size = 355433, upload-time = "2025-10-22T22:23:42.259Z" },
+    { url = "https://files.pythonhosted.org/packages/be/ec/568c5e689e1cfb1ea8b875cffea3649260955f677fdd7ddc6176902d04cd/rpds_py-0.28.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b1cde22f2c30ebb049a9e74c5374994157b9b70a16147d332f89c99c5960737a", size = 342601, upload-time = "2025-10-22T22:23:44.372Z" },
+    { url = "https://files.pythonhosted.org/packages/32/fe/51ada84d1d2a1d9d8f2c902cfddd0133b4a5eb543196ab5161d1c07ed2ad/rpds_py-0.28.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5338742f6ba7a51012ea470bd4dc600a8c713c0c72adaa0977a1b1f4327d6592", size = 372039, upload-time = "2025-10-22T22:23:46.025Z" },
+    { url = "https://files.pythonhosted.org/packages/07/c1/60144a2f2620abade1a78e0d91b298ac2d9b91bc08864493fa00451ef06e/rpds_py-0.28.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e1460ebde1bcf6d496d80b191d854adedcc619f84ff17dc1c6d550f58c9efbba", size = 382407, upload-time = "2025-10-22T22:23:48.098Z" },
+    { url = "https://files.pythonhosted.org/packages/45/ed/091a7bbdcf4038a60a461df50bc4c82a7ed6d5d5e27649aab61771c17585/rpds_py-0.28.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e3eb248f2feba84c692579257a043a7699e28a77d86c77b032c1d9fbb3f0219c", size = 518172, upload-time = "2025-10-22T22:23:50.16Z" },
+    { url = "https://files.pythonhosted.org/packages/54/dd/02cc90c2fd9c2ef8016fd7813bfacd1c3a1325633ec8f244c47b449fc868/rpds_py-0.28.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bd3bbba5def70b16cd1c1d7255666aad3b290fbf8d0fe7f9f91abafb73611a91", size = 399020, upload-time = "2025-10-22T22:23:51.81Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/81/5d98cc0329bbb911ccecd0b9e19fbf7f3a5de8094b4cda5e71013b2dd77e/rpds_py-0.28.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3114f4db69ac5a1f32e7e4d1cbbe7c8f9cf8217f78e6e002cedf2d54c2a548ed", size = 377451, upload-time = "2025-10-22T22:23:53.711Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/07/4d5bcd49e3dfed2d38e2dcb49ab6615f2ceb9f89f5a372c46dbdebb4e028/rpds_py-0.28.0-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:4b0cb8a906b1a0196b863d460c0222fb8ad0f34041568da5620f9799b83ccf0b", size = 390355, upload-time = "2025-10-22T22:23:55.299Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/79/9f14ba9010fee74e4f40bf578735cfcbb91d2e642ffd1abe429bb0b96364/rpds_py-0.28.0-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:cf681ac76a60b667106141e11a92a3330890257e6f559ca995fbb5265160b56e", size = 403146, upload-time = "2025-10-22T22:23:56.929Z" },
+    { url = "https://files.pythonhosted.org/packages/39/4c/f08283a82ac141331a83a40652830edd3a4a92c34e07e2bbe00baaea2f5f/rpds_py-0.28.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1e8ee6413cfc677ce8898d9cde18cc3a60fc2ba756b0dec5b71eb6eb21c49fa1", size = 552656, upload-time = "2025-10-22T22:23:58.62Z" },
+    { url = "https://files.pythonhosted.org/packages/61/47/d922fc0666f0dd8e40c33990d055f4cc6ecff6f502c2d01569dbed830f9b/rpds_py-0.28.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:b3072b16904d0b5572a15eb9d31c1954e0d3227a585fc1351aa9878729099d6c", size = 576782, upload-time = "2025-10-22T22:24:00.312Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/0c/5bafdd8ccf6aa9d3bfc630cfece457ff5b581af24f46a9f3590f790e3df2/rpds_py-0.28.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:b670c30fd87a6aec281c3c9896d3bae4b205fd75d79d06dc87c2503717e46092", size = 544671, upload-time = "2025-10-22T22:24:02.297Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/37/dcc5d8397caa924988693519069d0beea077a866128719351a4ad95e82fc/rpds_py-0.28.0-cp314-cp314t-win32.whl", hash = "sha256:8014045a15b4d2b3476f0a287fcc93d4f823472d7d1308d47884ecac9e612be3", size = 205749, upload-time = "2025-10-22T22:24:03.848Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/69/64d43b21a10d72b45939a28961216baeb721cc2a430f5f7c3bfa21659a53/rpds_py-0.28.0-cp314-cp314t-win_amd64.whl", hash = "sha256:7a4e59c90d9c27c561eb3160323634a9ff50b04e4f7820600a2beb0ac90db578", size = 216233, upload-time = "2025-10-22T22:24:05.471Z" },
+]
+
+[[package]]
+name = "typing-extensions"
+version = "4.15.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/72/94/1a15dd82efb362ac84269196e94cf00f187f7ed21c242792a923cdb1c61f/typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", size = 109391, upload-time = "2025-08-25T13:49:26.313Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" },
+]
+
+[[package]]
+name = "typing-inspection"
+version = "0.4.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/55/e3/70399cb7dd41c10ac53367ae42139cf4b1ca5f36bb3dc6c9d33acdb43655/typing_inspection-0.4.2.tar.gz", hash = "sha256:ba561c48a67c5958007083d386c3295464928b01faa735ab8547c5692e87f464", size = 75949, upload-time = "2025-10-01T02:14:41.687Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7", size = 14611, upload-time = "2025-10-01T02:14:40.154Z" },
+]
+
+[[package]]
+name = "urllib3"
+version = "2.5.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/15/22/9ee70a2574a4f4599c47dd506532914ce044817c7752a79b6a51286319bc/urllib3-2.5.0.tar.gz", hash = "sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760", size = 393185, upload-time = "2025-06-18T14:07:41.644Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a7/c2/fe1e52489ae3122415c51f387e221dd0773709bad6c6cdaa599e8a2c5185/urllib3-2.5.0-py3-none-any.whl", hash = "sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc", size = 129795, upload-time = "2025-06-18T14:07:40.39Z" },
+]
+
+[[package]]
+name = "zipp"
+version = "3.23.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e3/02/0f2892c661036d50ede074e376733dca2ae7c6eb617489437771209d4180/zipp-3.23.0.tar.gz", hash = "sha256:a07157588a12518c9d4034df3fbbee09c814741a33ff63c05fa29d26a2404166", size = 25547, upload-time = "2025-06-08T17:06:39.4Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2e/54/647ade08bf0db230bfea292f893923872fd20be6ac6f53b2b936ba839d75/zipp-3.23.0-py3-none-any.whl", hash = "sha256:071652d6115ed432f5ce1d34c336c0adfd6a884660d1e9712a256d3d3bd4b14e", size = 10276, upload-time = "2025-06-08T17:06:38.034Z" },
+]
diff --git a/src/llama-stack-api/llama_stack_api/vector_io.py b/src/llama_stack_api/vector_io.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/vector_io.py
rename to src/llama_stack_api/vector_io.py
diff --git a/src/llama-stack-api/llama_stack_api/vector_stores.py b/src/llama_stack_api/vector_stores.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/vector_stores.py
rename to src/llama_stack_api/vector_stores.py
diff --git a/src/llama-stack-api/llama_stack_api/version.py b/src/llama_stack_api/version.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/version.py
rename to src/llama_stack_api/version.py
diff --git a/tests/integration/batches/conftest.py b/tests/integration/batches/conftest.py
index b9c0ac916..4dc5b7993 100644
--- a/tests/integration/batches/conftest.py
+++ b/tests/integration/batches/conftest.py
@@ -13,6 +13,7 @@ from contextlib import contextmanager
 from io import BytesIO
 
 import pytest
+
 from llama_stack_api import OpenAIFilePurpose
 
 
diff --git a/tests/integration/files/test_files.py b/tests/integration/files/test_files.py
index 61878ac4c..1f19c88c5 100644
--- a/tests/integration/files/test_files.py
+++ b/tests/integration/files/test_files.py
@@ -9,9 +9,9 @@ from unittest.mock import patch
 
 import pytest
 import requests
-from llama_stack_api import OpenAIFilePurpose
 
 from llama_stack.core.datatypes import User
+from llama_stack_api import OpenAIFilePurpose
 
 purpose = OpenAIFilePurpose.ASSISTANTS
 
diff --git a/tests/integration/inference/test_provider_data_routing.py b/tests/integration/inference/test_provider_data_routing.py
index d007b57d6..e4a0a24b5 100644
--- a/tests/integration/inference/test_provider_data_routing.py
+++ b/tests/integration/inference/test_provider_data_routing.py
@@ -15,6 +15,9 @@ that enables routing based on provider_data alone.
 from unittest.mock import AsyncMock, patch
 
 import pytest
+
+from llama_stack.core.library_client import LlamaStackAsLibraryClient
+from llama_stack.core.telemetry.telemetry import MetricEvent
 from llama_stack_api import (
     Api,
     OpenAIAssistantMessageParam,
@@ -23,9 +26,6 @@ from llama_stack_api import (
     OpenAIChoice,
 )
 
-from llama_stack.core.library_client import LlamaStackAsLibraryClient
-from llama_stack.core.telemetry.telemetry import MetricEvent
-
 
 class OpenAIChatCompletionWithMetrics(OpenAIChatCompletion):
     metrics: list[MetricEvent] | None = None
diff --git a/tests/integration/post_training/test_post_training.py b/tests/integration/post_training/test_post_training.py
index ff6925b58..e6868019a 100644
--- a/tests/integration/post_training/test_post_training.py
+++ b/tests/integration/post_training/test_post_training.py
@@ -9,6 +9,8 @@ import time
 import uuid
 
 import pytest
+
+from llama_stack.log import get_logger
 from llama_stack_api import (
     DataConfig,
     DatasetFormat,
@@ -18,8 +20,6 @@ from llama_stack_api import (
     TrainingConfig,
 )
 
-from llama_stack.log import get_logger
-
 # Configure logging
 logger = get_logger(name=__name__, category="post_training")
 
diff --git a/tests/integration/safety/test_llama_guard.py b/tests/integration/safety/test_llama_guard.py
index 99b4982f0..a554752cd 100644
--- a/tests/integration/safety/test_llama_guard.py
+++ b/tests/integration/safety/test_llama_guard.py
@@ -12,9 +12,9 @@ import warnings
 from collections.abc import Generator
 
 import pytest
-from llama_stack_api import ViolationLevel
 
 from llama_stack.models.llama.sku_types import CoreModelId
+from llama_stack_api import ViolationLevel
 
 # Llama Guard models available for text and vision shields
 LLAMA_GUARD_TEXT_MODELS = [CoreModelId.llama_guard_4_12b.value]
diff --git a/tests/integration/safety/test_safety.py b/tests/integration/safety/test_safety.py
index 6a926f1d5..857ff2f81 100644
--- a/tests/integration/safety/test_safety.py
+++ b/tests/integration/safety/test_safety.py
@@ -7,6 +7,7 @@ import base64
 import mimetypes
 
 import pytest
+
 from llama_stack_api import ViolationLevel
 
 CODE_SCANNER_ENABLED_PROVIDERS = {"ollama", "together", "fireworks"}
diff --git a/tests/integration/safety/test_vision_safety.py b/tests/integration/safety/test_vision_safety.py
index b85a23263..dc7b7e1ad 100644
--- a/tests/integration/safety/test_vision_safety.py
+++ b/tests/integration/safety/test_vision_safety.py
@@ -9,6 +9,7 @@ import mimetypes
 import os
 
 import pytest
+
 from llama_stack_api import ViolationLevel
 
 VISION_SHIELD_ENABLED_PROVIDERS = {"together"}
diff --git a/tests/integration/tool_runtime/test_registration.py b/tests/integration/tool_runtime/test_registration.py
index 1b1b6ef28..036a5f018 100644
--- a/tests/integration/tool_runtime/test_registration.py
+++ b/tests/integration/tool_runtime/test_registration.py
@@ -7,9 +7,9 @@
 import re
 
 import pytest
-from llama_stack_api import ToolGroupNotFoundError
 
 from llama_stack.core.library_client import LlamaStackAsLibraryClient
+from llama_stack_api import ToolGroupNotFoundError
 from tests.common.mcp import MCP_TOOLGROUP_ID, make_mcp_server
 
 
diff --git a/tests/integration/vector_io/test_openai_vector_stores.py b/tests/integration/vector_io/test_openai_vector_stores.py
index c65dfecac..102f3f00c 100644
--- a/tests/integration/vector_io/test_openai_vector_stores.py
+++ b/tests/integration/vector_io/test_openai_vector_stores.py
@@ -8,12 +8,12 @@ import time
 from io import BytesIO
 
 import pytest
-from llama_stack_api import Chunk, ExpiresAfter
 from llama_stack_client import BadRequestError
 from openai import BadRequestError as OpenAIBadRequestError
 
 from llama_stack.core.library_client import LlamaStackAsLibraryClient
 from llama_stack.log import get_logger
+from llama_stack_api import Chunk, ExpiresAfter
 
 from ..conftest import vector_provider_wrapper
 
diff --git a/tests/integration/vector_io/test_vector_io.py b/tests/integration/vector_io/test_vector_io.py
index acaa44bcb..29dbd3e56 100644
--- a/tests/integration/vector_io/test_vector_io.py
+++ b/tests/integration/vector_io/test_vector_io.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 import pytest
+
 from llama_stack_api import Chunk
 
 from ..conftest import vector_provider_wrapper
diff --git a/tests/unit/conversations/test_conversations.py b/tests/unit/conversations/test_conversations.py
index 2f942eb9c..95c54d379 100644
--- a/tests/unit/conversations/test_conversations.py
+++ b/tests/unit/conversations/test_conversations.py
@@ -8,7 +8,6 @@ import tempfile
 from pathlib import Path
 
 import pytest
-from llama_stack_api import OpenAIResponseInputMessageContentText, OpenAIResponseMessage
 from openai.types.conversations.conversation import Conversation as OpenAIConversation
 from openai.types.conversations.conversation_item import ConversationItem as OpenAIConversationItem
 from pydantic import TypeAdapter
@@ -25,6 +24,7 @@ from llama_stack.core.storage.datatypes import (
     StorageConfig,
 )
 from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
+from llama_stack_api import OpenAIResponseInputMessageContentText, OpenAIResponseMessage
 
 
 @pytest.fixture
diff --git a/tests/unit/core/routers/test_safety_router.py b/tests/unit/core/routers/test_safety_router.py
index 7e465513e..1b24a59a2 100644
--- a/tests/unit/core/routers/test_safety_router.py
+++ b/tests/unit/core/routers/test_safety_router.py
@@ -6,10 +6,9 @@
 
 from unittest.mock import AsyncMock
 
-from llama_stack_api import ListShieldsResponse, ModerationObject, ModerationObjectResults, Shield
-
 from llama_stack.core.datatypes import SafetyConfig
 from llama_stack.core.routers.safety import SafetyRouter
+from llama_stack_api import ListShieldsResponse, ModerationObject, ModerationObjectResults, Shield
 
 
 async def test_run_moderation_uses_default_shield_when_model_missing():
diff --git a/tests/unit/core/routers/test_vector_io.py b/tests/unit/core/routers/test_vector_io.py
index 03bc1ff5f..a6df0694b 100644
--- a/tests/unit/core/routers/test_vector_io.py
+++ b/tests/unit/core/routers/test_vector_io.py
@@ -7,6 +7,8 @@
 from unittest.mock import AsyncMock, Mock
 
 import pytest
+
+from llama_stack.core.routers.vector_io import VectorIORouter
 from llama_stack_api import (
     ModelNotFoundError,
     ModelType,
@@ -14,8 +16,6 @@ from llama_stack_api import (
     OpenAICreateVectorStoreRequestWithExtraBody,
 )
 
-from llama_stack.core.routers.vector_io import VectorIORouter
-
 
 async def test_single_provider_auto_selection():
     # provider_id automatically selected during vector store create() when only one provider available
@@ -127,7 +127,8 @@ async def test_update_vector_store_same_provider_id_succeeds():
 
 
 async def test_create_vector_store_with_unknown_embedding_model_raises_error():
-    """Test that creating a vector store with an unknown embedding model raises ModelNotFoundError."""
+    """Test that creating a vector store with an unknown embedding model raises
+    FoundError."""
     mock_routing_table = Mock(impls_by_provider_id={"provider": "mock"})
     mock_routing_table.get_object_by_identifier = AsyncMock(return_value=None)
 
diff --git a/tests/unit/core/test_stack_validation.py b/tests/unit/core/test_stack_validation.py
index acb31e1c9..462a25c8b 100644
--- a/tests/unit/core/test_stack_validation.py
+++ b/tests/unit/core/test_stack_validation.py
@@ -9,10 +9,10 @@
 from unittest.mock import AsyncMock
 
 import pytest
-from llama_stack_api import Api, ListModelsResponse, ListShieldsResponse, Model, ModelType, Shield
 
 from llama_stack.core.datatypes import QualifiedModel, SafetyConfig, StackRunConfig, StorageConfig, VectorStoresConfig
 from llama_stack.core.stack import validate_safety_config, validate_vector_stores_config
+from llama_stack_api import Api, ListModelsResponse, ListShieldsResponse, Model, ModelType, Shield
 
 
 class TestVectorStoresValidation:
diff --git a/tests/unit/distribution/routers/test_routing_tables.py b/tests/unit/distribution/routers/test_routing_tables.py
index 2405d536e..8fd9d6ec3 100644
--- a/tests/unit/distribution/routers/test_routing_tables.py
+++ b/tests/unit/distribution/routers/test_routing_tables.py
@@ -9,6 +9,14 @@
 from unittest.mock import AsyncMock
 
 import pytest
+
+from llama_stack.core.datatypes import RegistryEntrySource
+from llama_stack.core.routing_tables.benchmarks import BenchmarksRoutingTable
+from llama_stack.core.routing_tables.datasets import DatasetsRoutingTable
+from llama_stack.core.routing_tables.models import ModelsRoutingTable
+from llama_stack.core.routing_tables.scoring_functions import ScoringFunctionsRoutingTable
+from llama_stack.core.routing_tables.shields import ShieldsRoutingTable
+from llama_stack.core.routing_tables.toolgroups import ToolGroupsRoutingTable
 from llama_stack_api import (
     URL,
     Api,
@@ -25,14 +33,6 @@ from llama_stack_api import (
     URIDataSource,
 )
 
-from llama_stack.core.datatypes import RegistryEntrySource
-from llama_stack.core.routing_tables.benchmarks import BenchmarksRoutingTable
-from llama_stack.core.routing_tables.datasets import DatasetsRoutingTable
-from llama_stack.core.routing_tables.models import ModelsRoutingTable
-from llama_stack.core.routing_tables.scoring_functions import ScoringFunctionsRoutingTable
-from llama_stack.core.routing_tables.shields import ShieldsRoutingTable
-from llama_stack.core.routing_tables.toolgroups import ToolGroupsRoutingTable
-
 
 class Impl:
     def __init__(self, api: Api):
diff --git a/tests/unit/distribution/test_api_recordings.py b/tests/unit/distribution/test_api_recordings.py
index f66b57df8..889f063e6 100644
--- a/tests/unit/distribution/test_api_recordings.py
+++ b/tests/unit/distribution/test_api_recordings.py
@@ -9,6 +9,14 @@ from pathlib import Path
 from unittest.mock import patch
 
 import pytest
+from openai import AsyncOpenAI
+
+from llama_stack.testing.api_recorder import (
+    APIRecordingMode,
+    ResponseStorage,
+    api_recording,
+    normalize_inference_request,
+)
 
 # Import the real Pydantic response types instead of using Mocks
 from llama_stack_api import (
@@ -19,14 +27,6 @@ from llama_stack_api import (
     OpenAIEmbeddingsResponse,
     OpenAIEmbeddingUsage,
 )
-from openai import AsyncOpenAI
-
-from llama_stack.testing.api_recorder import (
-    APIRecordingMode,
-    ResponseStorage,
-    api_recording,
-    normalize_inference_request,
-)
 
 
 @pytest.fixture
diff --git a/tests/unit/distribution/test_distribution.py b/tests/unit/distribution/test_distribution.py
index a27455e24..b8d6ba55d 100644
--- a/tests/unit/distribution/test_distribution.py
+++ b/tests/unit/distribution/test_distribution.py
@@ -9,7 +9,6 @@ from unittest.mock import patch
 
 import pytest
 import yaml
-from llama_stack_api import ProviderSpec
 from pydantic import BaseModel, Field, ValidationError
 
 from llama_stack.core.datatypes import Api, Provider, StackRunConfig
@@ -23,6 +22,7 @@ from llama_stack.core.storage.datatypes import (
     SqlStoreReference,
     StorageConfig,
 )
+from llama_stack_api import ProviderSpec
 
 
 class SampleConfig(BaseModel):
@@ -395,9 +395,8 @@ pip_packages:
 
     def test_external_provider_from_module_building(self, mock_providers):
         """Test loading an external provider from a module during build (building=True, partial spec)."""
-        from llama_stack_api import Api
-
         from llama_stack.core.datatypes import BuildConfig, BuildProvider, DistributionSpec
+        from llama_stack_api import Api
 
         # No importlib patch needed, should not import module when type of `config` is BuildConfig or DistributionSpec
         build_config = BuildConfig(
@@ -457,9 +456,8 @@ class TestGetExternalProvidersFromModule:
         """Test provider with module containing version spec (e.g., package==1.0.0)."""
         from types import SimpleNamespace
 
-        from llama_stack_api import ProviderSpec
-
         from llama_stack.core.distribution import get_external_providers_from_module
+        from llama_stack_api import ProviderSpec
 
         fake_spec = ProviderSpec(
             api=Api.inference,
@@ -595,9 +593,8 @@ class TestGetExternalProvidersFromModule:
         """Test when get_provider_spec returns a list of specs."""
         from types import SimpleNamespace
 
-        from llama_stack_api import ProviderSpec
-
         from llama_stack.core.distribution import get_external_providers_from_module
+        from llama_stack_api import ProviderSpec
 
         spec1 = ProviderSpec(
             api=Api.inference,
@@ -644,9 +641,8 @@ class TestGetExternalProvidersFromModule:
         """Test that list return filters specs by provider_type."""
         from types import SimpleNamespace
 
-        from llama_stack_api import ProviderSpec
-
         from llama_stack.core.distribution import get_external_providers_from_module
+        from llama_stack_api import ProviderSpec
 
         spec1 = ProviderSpec(
             api=Api.inference,
@@ -693,9 +689,8 @@ class TestGetExternalProvidersFromModule:
         """Test that list return adds multiple different provider_types when config requests them."""
         from types import SimpleNamespace
 
-        from llama_stack_api import ProviderSpec
-
         from llama_stack.core.distribution import get_external_providers_from_module
+        from llama_stack_api import ProviderSpec
 
         # Module returns both inline and remote variants
         spec1 = ProviderSpec(
@@ -833,9 +828,8 @@ class TestGetExternalProvidersFromModule:
         """Test multiple APIs with providers."""
         from types import SimpleNamespace
 
-        from llama_stack_api import ProviderSpec
-
         from llama_stack.core.distribution import get_external_providers_from_module
+        from llama_stack_api import ProviderSpec
 
         inference_spec = ProviderSpec(
             api=Api.inference,
diff --git a/tests/unit/files/test_files.py b/tests/unit/files/test_files.py
index 080d1ddbe..793f4edd3 100644
--- a/tests/unit/files/test_files.py
+++ b/tests/unit/files/test_files.py
@@ -6,7 +6,6 @@
 
 
 import pytest
-from llama_stack_api import OpenAIFilePurpose, Order, ResourceNotFoundError
 
 from llama_stack.core.access_control.access_control import default_policy
 from llama_stack.core.storage.datatypes import SqliteSqlStoreConfig, SqlStoreReference
@@ -15,6 +14,7 @@ from llama_stack.providers.inline.files.localfs import (
     LocalfsFilesImplConfig,
 )
 from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
+from llama_stack_api import OpenAIFilePurpose, Order, ResourceNotFoundError
 
 
 class MockUploadFile:
diff --git a/tests/unit/providers/batches/test_reference.py b/tests/unit/providers/batches/test_reference.py
index 3c93a578d..32d59234d 100644
--- a/tests/unit/providers/batches/test_reference.py
+++ b/tests/unit/providers/batches/test_reference.py
@@ -58,6 +58,7 @@ import json
 from unittest.mock import AsyncMock, MagicMock
 
 import pytest
+
 from llama_stack_api import BatchObject, ConflictError, ResourceNotFoundError
 
 
diff --git a/tests/unit/providers/batches/test_reference_idempotency.py b/tests/unit/providers/batches/test_reference_idempotency.py
index 4cd5d962d..acb7ca01c 100644
--- a/tests/unit/providers/batches/test_reference_idempotency.py
+++ b/tests/unit/providers/batches/test_reference_idempotency.py
@@ -43,6 +43,7 @@ Key Behaviors Tested:
 import asyncio
 
 import pytest
+
 from llama_stack_api import ConflictError
 
 
diff --git a/tests/unit/providers/files/test_s3_files.py b/tests/unit/providers/files/test_s3_files.py
index ae63c1a78..de6c92e9c 100644
--- a/tests/unit/providers/files/test_s3_files.py
+++ b/tests/unit/providers/files/test_s3_files.py
@@ -8,6 +8,7 @@ from unittest.mock import patch
 
 import pytest
 from botocore.exceptions import ClientError
+
 from llama_stack_api import OpenAIFilePurpose, ResourceNotFoundError
 
 
diff --git a/tests/unit/providers/files/test_s3_files_auth.py b/tests/unit/providers/files/test_s3_files_auth.py
index 873db4e27..e113611bd 100644
--- a/tests/unit/providers/files/test_s3_files_auth.py
+++ b/tests/unit/providers/files/test_s3_files_auth.py
@@ -7,10 +7,10 @@
 from unittest.mock import patch
 
 import pytest
-from llama_stack_api import OpenAIFilePurpose, ResourceNotFoundError
 
 from llama_stack.core.datatypes import User
 from llama_stack.providers.remote.files.s3.files import S3FilesImpl
+from llama_stack_api import OpenAIFilePurpose, ResourceNotFoundError
 
 
 async def test_listing_hides_other_users_file(s3_provider, sample_text_file):
diff --git a/tests/unit/providers/inference/test_bedrock_adapter.py b/tests/unit/providers/inference/test_bedrock_adapter.py
index b3eecc558..a20f2860a 100644
--- a/tests/unit/providers/inference/test_bedrock_adapter.py
+++ b/tests/unit/providers/inference/test_bedrock_adapter.py
@@ -8,11 +8,11 @@ from types import SimpleNamespace
 from unittest.mock import AsyncMock, MagicMock
 
 import pytest
-from llama_stack_api import OpenAIChatCompletionRequestWithExtraBody
 from openai import AuthenticationError
 
 from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
 from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
+from llama_stack_api import OpenAIChatCompletionRequestWithExtraBody
 
 
 def test_adapter_initialization():
diff --git a/tests/unit/providers/inference/test_remote_vllm.py b/tests/unit/providers/inference/test_remote_vllm.py
index e2a5455b7..958895cc4 100644
--- a/tests/unit/providers/inference/test_remote_vllm.py
+++ b/tests/unit/providers/inference/test_remote_vllm.py
@@ -9,6 +9,11 @@ import time
 from unittest.mock import AsyncMock, MagicMock, PropertyMock, patch
 
 import pytest
+
+from llama_stack.core.routers.inference import InferenceRouter
+from llama_stack.core.routing_tables.models import ModelsRoutingTable
+from llama_stack.providers.remote.inference.vllm.config import VLLMInferenceAdapterConfig
+from llama_stack.providers.remote.inference.vllm.vllm import VLLMInferenceAdapter
 from llama_stack_api import (
     HealthStatus,
     Model,
@@ -22,11 +27,6 @@ from llama_stack_api import (
     ToolChoice,
 )
 
-from llama_stack.core.routers.inference import InferenceRouter
-from llama_stack.core.routing_tables.models import ModelsRoutingTable
-from llama_stack.providers.remote.inference.vllm.config import VLLMInferenceAdapterConfig
-from llama_stack.providers.remote.inference.vllm.vllm import VLLMInferenceAdapter
-
 # These are unit test for the remote vllm provider
 # implementation. This should only contain tests which are specific to
 # the implementation details of those classes. More general
diff --git a/tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py b/tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py
index 36d2b86a9..658132340 100644
--- a/tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py
+++ b/tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py
@@ -7,12 +7,12 @@
 from unittest.mock import AsyncMock
 
 import pytest
-from llama_stack_api import ToolDef
 
 from llama_stack.providers.inline.agents.meta_reference.responses.streaming import (
     convert_tooldef_to_chat_tool,
 )
 from llama_stack.providers.inline.agents.meta_reference.responses.types import ChatCompletionContext
+from llama_stack_api import ToolDef
 
 
 @pytest.fixture
diff --git a/tests/unit/providers/nvidia/test_datastore.py b/tests/unit/providers/nvidia/test_datastore.py
index 0d9f1cc35..36006cc39 100644
--- a/tests/unit/providers/nvidia/test_datastore.py
+++ b/tests/unit/providers/nvidia/test_datastore.py
@@ -8,10 +8,10 @@ import os
 from unittest.mock import patch
 
 import pytest
-from llama_stack_api import Dataset, DatasetPurpose, ResourceType, URIDataSource
 
 from llama_stack.providers.remote.datasetio.nvidia.config import NvidiaDatasetIOConfig
 from llama_stack.providers.remote.datasetio.nvidia.datasetio import NvidiaDatasetIOAdapter
+from llama_stack_api import Dataset, DatasetPurpose, ResourceType, URIDataSource
 
 
 @pytest.fixture
diff --git a/tests/unit/providers/nvidia/test_eval.py b/tests/unit/providers/nvidia/test_eval.py
index c41379801..783d664bf 100644
--- a/tests/unit/providers/nvidia/test_eval.py
+++ b/tests/unit/providers/nvidia/test_eval.py
@@ -8,6 +8,10 @@ import os
 from unittest.mock import MagicMock, patch
 
 import pytest
+
+from llama_stack.models.llama.sku_types import CoreModelId
+from llama_stack.providers.remote.eval.nvidia.config import NVIDIAEvalConfig
+from llama_stack.providers.remote.eval.nvidia.eval import NVIDIAEvalImpl
 from llama_stack_api import (
     Benchmark,
     BenchmarkConfig,
@@ -20,10 +24,6 @@ from llama_stack_api import (
     TopPSamplingStrategy,
 )
 
-from llama_stack.models.llama.sku_types import CoreModelId
-from llama_stack.providers.remote.eval.nvidia.config import NVIDIAEvalConfig
-from llama_stack.providers.remote.eval.nvidia.eval import NVIDIAEvalImpl
-
 MOCK_DATASET_ID = "default/test-dataset"
 MOCK_BENCHMARK_ID = "test-benchmark"
 
diff --git a/tests/unit/providers/nvidia/test_parameters.py b/tests/unit/providers/nvidia/test_parameters.py
index ba68a7abe..b714fc607 100644
--- a/tests/unit/providers/nvidia/test_parameters.py
+++ b/tests/unit/providers/nvidia/test_parameters.py
@@ -9,6 +9,12 @@ import warnings
 from unittest.mock import patch
 
 import pytest
+
+from llama_stack.core.library_client import convert_pydantic_to_json_value
+from llama_stack.providers.remote.post_training.nvidia.post_training import (
+    NvidiaPostTrainingAdapter,
+    NvidiaPostTrainingConfig,
+)
 from llama_stack_api import (
     DataConfig,
     DatasetFormat,
@@ -19,12 +25,6 @@ from llama_stack_api import (
     TrainingConfig,
 )
 
-from llama_stack.core.library_client import convert_pydantic_to_json_value
-from llama_stack.providers.remote.post_training.nvidia.post_training import (
-    NvidiaPostTrainingAdapter,
-    NvidiaPostTrainingConfig,
-)
-
 
 class TestNvidiaParameters:
     @pytest.fixture(autouse=True)
diff --git a/tests/unit/providers/nvidia/test_rerank_inference.py b/tests/unit/providers/nvidia/test_rerank_inference.py
index 8b313abcd..ee62910b8 100644
--- a/tests/unit/providers/nvidia/test_rerank_inference.py
+++ b/tests/unit/providers/nvidia/test_rerank_inference.py
@@ -8,11 +8,11 @@ from unittest.mock import AsyncMock, MagicMock, patch
 
 import aiohttp
 import pytest
-from llama_stack_api import ModelType
 
 from llama_stack.providers.remote.inference.nvidia.config import NVIDIAConfig
 from llama_stack.providers.remote.inference.nvidia.nvidia import NVIDIAInferenceAdapter
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
+from llama_stack_api import ModelType
 
 
 class MockResponse:
diff --git a/tests/unit/providers/nvidia/test_safety.py b/tests/unit/providers/nvidia/test_safety.py
index ea6254841..07e04ddea 100644
--- a/tests/unit/providers/nvidia/test_safety.py
+++ b/tests/unit/providers/nvidia/test_safety.py
@@ -9,6 +9,9 @@ from typing import Any
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
+
+from llama_stack.providers.remote.safety.nvidia.config import NVIDIASafetyConfig
+from llama_stack.providers.remote.safety.nvidia.nvidia import NVIDIASafetyAdapter
 from llama_stack_api import (
     OpenAIAssistantMessageParam,
     OpenAIUserMessageParam,
@@ -18,9 +21,6 @@ from llama_stack_api import (
     ViolationLevel,
 )
 
-from llama_stack.providers.remote.safety.nvidia.config import NVIDIASafetyConfig
-from llama_stack.providers.remote.safety.nvidia.nvidia import NVIDIASafetyAdapter
-
 
 class FakeNVIDIASafetyAdapter(NVIDIASafetyAdapter):
     """Test implementation that provides the required shield_store."""
diff --git a/tests/unit/providers/nvidia/test_supervised_fine_tuning.py b/tests/unit/providers/nvidia/test_supervised_fine_tuning.py
index 4d0ce695b..94948da41 100644
--- a/tests/unit/providers/nvidia/test_supervised_fine_tuning.py
+++ b/tests/unit/providers/nvidia/test_supervised_fine_tuning.py
@@ -9,15 +9,6 @@ import warnings
 from unittest.mock import patch
 
 import pytest
-from llama_stack_api import (
-    DataConfig,
-    DatasetFormat,
-    LoraFinetuningConfig,
-    OptimizerConfig,
-    OptimizerType,
-    QATFinetuningConfig,
-    TrainingConfig,
-)
 
 from llama_stack.core.library_client import convert_pydantic_to_json_value
 from llama_stack.providers.remote.post_training.nvidia.post_training import (
@@ -27,6 +18,15 @@ from llama_stack.providers.remote.post_training.nvidia.post_training import (
     NvidiaPostTrainingJob,
     NvidiaPostTrainingJobStatusResponse,
 )
+from llama_stack_api import (
+    DataConfig,
+    DatasetFormat,
+    LoraFinetuningConfig,
+    OptimizerConfig,
+    OptimizerType,
+    QATFinetuningConfig,
+    TrainingConfig,
+)
 
 
 @pytest.fixture
diff --git a/tests/unit/providers/test_bedrock.py b/tests/unit/providers/test_bedrock.py
index df7453712..7126e1b69 100644
--- a/tests/unit/providers/test_bedrock.py
+++ b/tests/unit/providers/test_bedrock.py
@@ -7,10 +7,9 @@
 from types import SimpleNamespace
 from unittest.mock import AsyncMock, PropertyMock, patch
 
-from llama_stack_api import OpenAIChatCompletionRequestWithExtraBody
-
 from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
 from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
+from llama_stack_api import OpenAIChatCompletionRequestWithExtraBody
 
 
 def test_can_create_adapter():
diff --git a/tests/unit/providers/utils/inference/test_openai_mixin.py b/tests/unit/providers/utils/inference/test_openai_mixin.py
index b9b59bb79..5b13a75f4 100644
--- a/tests/unit/providers/utils/inference/test_openai_mixin.py
+++ b/tests/unit/providers/utils/inference/test_openai_mixin.py
@@ -10,12 +10,12 @@ from typing import Any
 from unittest.mock import AsyncMock, MagicMock, Mock, PropertyMock, patch
 
 import pytest
-from llama_stack_api import Model, ModelType, OpenAIChatCompletionRequestWithExtraBody, OpenAIUserMessageParam
 from pydantic import BaseModel, Field
 
 from llama_stack.core.request_headers import request_provider_data_context
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
+from llama_stack_api import Model, ModelType, OpenAIChatCompletionRequestWithExtraBody, OpenAIUserMessageParam
 
 
 class OpenAIMixinImpl(OpenAIMixin):
diff --git a/tests/unit/providers/utils/inference/test_prompt_adapter.py b/tests/unit/providers/utils/inference/test_prompt_adapter.py
index a7c9289d7..ab5736ac5 100644
--- a/tests/unit/providers/utils/inference/test_prompt_adapter.py
+++ b/tests/unit/providers/utils/inference/test_prompt_adapter.py
@@ -4,12 +4,11 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api import OpenAIAssistantMessageParam, OpenAIUserMessageParam
-
 from llama_stack.models.llama.datatypes import RawTextItem
 from llama_stack.providers.utils.inference.prompt_adapter import (
     convert_openai_message_to_raw_message,
 )
+from llama_stack_api import OpenAIAssistantMessageParam, OpenAIUserMessageParam
 
 
 class TestConvertOpenAIMessageToRawMessage:
diff --git a/tests/unit/providers/utils/memory/test_vector_store.py b/tests/unit/providers/utils/memory/test_vector_store.py
index 00db5795a..f3241ba20 100644
--- a/tests/unit/providers/utils/memory/test_vector_store.py
+++ b/tests/unit/providers/utils/memory/test_vector_store.py
@@ -7,9 +7,9 @@
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
-from llama_stack_api import URL, RAGDocument, TextContentItem
 
 from llama_stack.providers.utils.memory.vector_store import content_from_data_and_mime_type, content_from_doc
+from llama_stack_api import URL, RAGDocument, TextContentItem
 
 
 async def test_content_from_doc_with_url():
diff --git a/tests/unit/providers/utils/test_model_registry.py b/tests/unit/providers/utils/test_model_registry.py
index 4a85cf8b8..1e3efafa1 100644
--- a/tests/unit/providers/utils/test_model_registry.py
+++ b/tests/unit/providers/utils/test_model_registry.py
@@ -34,9 +34,9 @@
 #
 
 import pytest
-from llama_stack_api import Model
 
 from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper, ProviderModelEntry
+from llama_stack_api import Model
 
 
 @pytest.fixture
diff --git a/tests/unit/providers/vector_io/conftest.py b/tests/unit/providers/vector_io/conftest.py
index 216e9b8ea..6408e25ab 100644
--- a/tests/unit/providers/vector_io/conftest.py
+++ b/tests/unit/providers/vector_io/conftest.py
@@ -9,7 +9,6 @@ from unittest.mock import AsyncMock, MagicMock, patch
 
 import numpy as np
 import pytest
-from llama_stack_api import Chunk, ChunkMetadata, QueryChunksResponse, VectorStore
 
 from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
@@ -19,6 +18,7 @@ from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import SQLiteV
 from llama_stack.providers.remote.vector_io.pgvector.config import PGVectorVectorIOConfig
 from llama_stack.providers.remote.vector_io.pgvector.pgvector import PGVectorIndex, PGVectorVectorIOAdapter
 from llama_stack.providers.utils.kvstore import register_kvstore_backends
+from llama_stack_api import Chunk, ChunkMetadata, QueryChunksResponse, VectorStore
 
 EMBEDDING_DIMENSION = 768
 COLLECTION_PREFIX = "test_collection"
diff --git a/tests/unit/providers/vector_io/test_faiss.py b/tests/unit/providers/vector_io/test_faiss.py
index 0d5c1399f..075296cbb 100644
--- a/tests/unit/providers/vector_io/test_faiss.py
+++ b/tests/unit/providers/vector_io/test_faiss.py
@@ -9,13 +9,13 @@ from unittest.mock import MagicMock, patch
 
 import numpy as np
 import pytest
-from llama_stack_api import Chunk, Files, HealthStatus, QueryChunksResponse, VectorStore
 
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
 from llama_stack.providers.inline.vector_io.faiss.faiss import (
     FaissIndex,
     FaissVectorIOAdapter,
 )
+from llama_stack_api import Chunk, Files, HealthStatus, QueryChunksResponse, VectorStore
 
 # This test is a unit test for the FaissVectorIOAdapter class. This should only contain
 # tests which are specific to this class. More general (API-level) tests should be placed in
diff --git a/tests/unit/providers/vector_io/test_sqlite_vec.py b/tests/unit/providers/vector_io/test_sqlite_vec.py
index 17a99ce1c..d1548cf37 100644
--- a/tests/unit/providers/vector_io/test_sqlite_vec.py
+++ b/tests/unit/providers/vector_io/test_sqlite_vec.py
@@ -8,13 +8,13 @@ import asyncio
 
 import numpy as np
 import pytest
-from llama_stack_api import Chunk, QueryChunksResponse
 
 from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import (
     SQLiteVecIndex,
     SQLiteVecVectorIOAdapter,
     _create_sqlite_connection,
 )
+from llama_stack_api import Chunk, QueryChunksResponse
 
 # This test is a unit test for the SQLiteVecVectorIOAdapter class. This should only contain
 # tests which are specific to this class. More general (API-level) tests should be placed in
diff --git a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
index 7ba40eefb..3797abb2c 100644
--- a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
+++ b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
@@ -10,6 +10,8 @@ from unittest.mock import AsyncMock, patch
 
 import numpy as np
 import pytest
+
+from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import VECTOR_DBS_PREFIX
 from llama_stack_api import (
     Chunk,
     OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
@@ -21,8 +23,6 @@ from llama_stack_api import (
     VectorStoreNotFoundError,
 )
 
-from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import VECTOR_DBS_PREFIX
-
 # This test is a unit test for the inline VectorIO providers. This should only contain
 # tests which are specific to this class. More general (API-level) tests should be placed in
 # tests/integration/vector_io/
@@ -255,10 +255,9 @@ async def test_insert_chunks_with_missing_document_id(vector_io_adapter):
 
 async def test_document_id_with_invalid_type_raises_error():
     """Ensure TypeError is raised when document_id is not a string."""
-    from llama_stack_api import Chunk
-
     # Integer document_id should raise TypeError
     from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
+    from llama_stack_api import Chunk
 
     chunk = Chunk(content="test", chunk_id=generate_chunk_id("test", "test"), metadata={"document_id": 12345})
     with pytest.raises(TypeError) as exc_info:
diff --git a/tests/unit/providers/vector_io/test_vector_utils.py b/tests/unit/providers/vector_io/test_vector_utils.py
index 678b76fbd..7f6b4af79 100644
--- a/tests/unit/providers/vector_io/test_vector_utils.py
+++ b/tests/unit/providers/vector_io/test_vector_utils.py
@@ -4,9 +4,8 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api import Chunk, ChunkMetadata
-
 from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
+from llama_stack_api import Chunk, ChunkMetadata
 
 # This test is a unit test for the chunk_utils.py helpers. This should only contain
 # tests which are specific to this file. More general (API-level) tests should be placed in
diff --git a/tests/unit/rag/test_rag_query.py b/tests/unit/rag/test_rag_query.py
index e3f5e46d7..7eb17b74b 100644
--- a/tests/unit/rag/test_rag_query.py
+++ b/tests/unit/rag/test_rag_query.py
@@ -7,9 +7,9 @@
 from unittest.mock import AsyncMock, MagicMock
 
 import pytest
-from llama_stack_api import Chunk, ChunkMetadata, QueryChunksResponse, RAGQueryConfig
 
 from llama_stack.providers.inline.tool_runtime.rag.memory import MemoryToolRuntimeImpl
+from llama_stack_api import Chunk, ChunkMetadata, QueryChunksResponse, RAGQueryConfig
 
 
 class TestRagQuery:
diff --git a/tests/unit/rag/test_vector_store.py b/tests/unit/rag/test_vector_store.py
index 23c12dcab..2562df8d6 100644
--- a/tests/unit/rag/test_vector_store.py
+++ b/tests/unit/rag/test_vector_store.py
@@ -12,7 +12,6 @@ from unittest.mock import AsyncMock, MagicMock
 
 import numpy as np
 import pytest
-from llama_stack_api import Chunk, OpenAIEmbeddingData, OpenAIEmbeddingsRequestWithExtraBody, RAGDocument
 
 from llama_stack.providers.utils.memory.vector_store import (
     URL,
@@ -22,6 +21,7 @@ from llama_stack.providers.utils.memory.vector_store import (
     make_overlapped_chunks,
 )
 from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
+from llama_stack_api import Chunk, OpenAIEmbeddingData, OpenAIEmbeddingsRequestWithExtraBody, RAGDocument
 
 DUMMY_PDF_PATH = Path(os.path.abspath(__file__)).parent / "fixtures" / "dummy.pdf"
 # Depending on the machine, this can get parsed a couple of ways
diff --git a/tests/unit/registry/test_registry.py b/tests/unit/registry/test_registry.py
index 01f486ab2..1b5032782 100644
--- a/tests/unit/registry/test_registry.py
+++ b/tests/unit/registry/test_registry.py
@@ -6,7 +6,6 @@
 
 
 import pytest
-from llama_stack_api import Model, VectorStore
 
 from llama_stack.core.datatypes import VectorStoreWithOwner
 from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig
@@ -16,6 +15,7 @@ from llama_stack.core.store.registry import (
     DiskDistributionRegistry,
 )
 from llama_stack.providers.utils.kvstore import kvstore_impl, register_kvstore_backends
+from llama_stack_api import Model, VectorStore
 
 
 @pytest.fixture
@@ -303,9 +303,8 @@ async def test_double_registration_different_objects(disk_dist_registry):
 
 async def test_double_registration_with_cache(cached_disk_dist_registry):
     """Test double registration behavior with caching enabled."""
-    from llama_stack_api import ModelType
-
     from llama_stack.core.datatypes import ModelWithOwner
+    from llama_stack_api import ModelType
 
     model1 = ModelWithOwner(
         identifier="test_model",
diff --git a/tests/unit/registry/test_registry_acl.py b/tests/unit/registry/test_registry_acl.py
index 2827f60b9..a09d2a30d 100644
--- a/tests/unit/registry/test_registry_acl.py
+++ b/tests/unit/registry/test_registry_acl.py
@@ -5,10 +5,9 @@
 # the root directory of this source tree.
 
 
-from llama_stack_api import ModelType
-
 from llama_stack.core.datatypes import ModelWithOwner, User
 from llama_stack.core.store.registry import CachedDiskDistributionRegistry
+from llama_stack_api import ModelType
 
 
 async def test_registry_cache_with_acl(cached_disk_dist_registry):
diff --git a/tests/unit/server/test_access_control.py b/tests/unit/server/test_access_control.py
index 1df933d4d..23a9636d5 100644
--- a/tests/unit/server/test_access_control.py
+++ b/tests/unit/server/test_access_control.py
@@ -8,12 +8,12 @@ from unittest.mock import MagicMock, Mock, patch
 
 import pytest
 import yaml
-from llama_stack_api import Api, ModelType
 from pydantic import TypeAdapter, ValidationError
 
 from llama_stack.core.access_control.access_control import AccessDeniedError, is_action_allowed
 from llama_stack.core.datatypes import AccessRule, ModelWithOwner, User
 from llama_stack.core.routing_tables.models import ModelsRoutingTable
+from llama_stack_api import Api, ModelType
 
 
 class AsyncMock(MagicMock):
diff --git a/tests/unit/server/test_resolver.py b/tests/unit/server/test_resolver.py
index 071178f96..8f8a61ea7 100644
--- a/tests/unit/server/test_resolver.py
+++ b/tests/unit/server/test_resolver.py
@@ -9,7 +9,6 @@ import sys
 from typing import Any, Protocol
 from unittest.mock import AsyncMock, MagicMock
 
-from llama_stack_api import Inference, InlineProviderSpec, ProviderSpec
 from pydantic import BaseModel, Field
 
 from llama_stack.core.datatypes import Api, Provider, StackRunConfig
@@ -27,6 +26,7 @@ from llama_stack.core.storage.datatypes import (
 )
 from llama_stack.providers.utils.kvstore import register_kvstore_backends
 from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
+from llama_stack_api import Inference, InlineProviderSpec, ProviderSpec
 
 
 def add_protocol_methods(cls: type, protocol: type[Protocol]) -> None:
diff --git a/tests/unit/server/test_sse.py b/tests/unit/server/test_sse.py
index fdaf9022b..d82743c80 100644
--- a/tests/unit/server/test_sse.py
+++ b/tests/unit/server/test_sse.py
@@ -9,9 +9,9 @@ import logging  # allow-direct-logging
 from unittest.mock import AsyncMock, MagicMock
 
 import pytest
-from llama_stack_api import PaginatedResponse
 
 from llama_stack.core.server.server import create_dynamic_typed_route, create_sse_event, sse_generator
+from llama_stack_api import PaginatedResponse
 
 
 @pytest.fixture
diff --git a/tests/unit/tools/test_tools_json_schema.py b/tests/unit/tools/test_tools_json_schema.py
index 79e0b6e28..623955984 100644
--- a/tests/unit/tools/test_tools_json_schema.py
+++ b/tests/unit/tools/test_tools_json_schema.py
@@ -9,10 +9,10 @@ Unit tests for JSON Schema-based tool definitions.
 Tests the new input_schema and output_schema fields.
 """
 
-from llama_stack_api import ToolDef
 from pydantic import ValidationError
 
 from llama_stack.models.llama.datatypes import BuiltinTool, ToolDefinition
+from llama_stack_api import ToolDef
 
 
 class TestToolDefValidation:
diff --git a/tests/unit/utils/inference/test_inference_store.py b/tests/unit/utils/inference/test_inference_store.py
index 4da20b125..bdcc529ce 100644
--- a/tests/unit/utils/inference/test_inference_store.py
+++ b/tests/unit/utils/inference/test_inference_store.py
@@ -7,6 +7,10 @@
 import time
 
 import pytest
+
+from llama_stack.core.storage.datatypes import InferenceStoreReference, SqliteSqlStoreConfig
+from llama_stack.providers.utils.inference.inference_store import InferenceStore
+from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
 from llama_stack_api import (
     OpenAIAssistantMessageParam,
     OpenAIChatCompletion,
@@ -15,10 +19,6 @@ from llama_stack_api import (
     Order,
 )
 
-from llama_stack.core.storage.datatypes import InferenceStoreReference, SqliteSqlStoreConfig
-from llama_stack.providers.utils.inference.inference_store import InferenceStore
-from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
-
 
 @pytest.fixture(autouse=True)
 def setup_backends(tmp_path):
diff --git a/tests/unit/utils/responses/test_responses_store.py b/tests/unit/utils/responses/test_responses_store.py
index 1119a93d8..8c108d9c1 100644
--- a/tests/unit/utils/responses/test_responses_store.py
+++ b/tests/unit/utils/responses/test_responses_store.py
@@ -9,11 +9,11 @@ from tempfile import TemporaryDirectory
 from uuid import uuid4
 
 import pytest
-from llama_stack_api import OpenAIMessageParam, OpenAIResponseInput, OpenAIResponseObject, OpenAIUserMessageParam, Order
 
 from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqliteSqlStoreConfig
 from llama_stack.providers.utils.responses.responses_store import ResponsesStore
 from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
+from llama_stack_api import OpenAIMessageParam, OpenAIResponseInput, OpenAIResponseObject, OpenAIUserMessageParam, Order
 
 
 def build_store(db_path: str, policy: list | None = None) -> ResponsesStore:
diff --git a/uv.lock b/uv.lock
index ddf8c1cd4..0b8b555f6 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1,5 +1,5 @@
 version = 1
-revision = 2
+revision = 3
 requires-python = ">=3.12"
 resolution-markers = [
     "(python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux')",
@@ -2095,7 +2095,7 @@ requires-dist = [
     { name = "httpx" },
     { name = "jinja2", specifier = ">=3.1.6" },
     { name = "jsonschema" },
-    { name = "llama-stack-api", editable = "src/llama-stack-api" },
+    { name = "llama-stack-api", editable = "src/llama_stack_api" },
     { name = "llama-stack-client", marker = "extra == 'client'", specifier = ">=0.3.0" },
     { name = "openai", specifier = ">=2.5.0" },
     { name = "opentelemetry-exporter-otlp-proto-http", specifier = ">=1.30.0" },
@@ -2230,8 +2230,8 @@ unit = [
 
 [[package]]
 name = "llama-stack-api"
-version = "0.1.0"
-source = { editable = "src/llama-stack-api" }
+version = "0.4.0.dev0"
+source = { editable = "src/llama_stack_api" }
 dependencies = [
     { name = "jsonschema" },
     { name = "opentelemetry-exporter-otlp-proto-http" },

From dc49ad3f890f7091033ff83581d02c6209b2acf3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Han?= <seb@redhat.com>
Date: Fri, 14 Nov 2025 17:47:37 +0100
Subject: [PATCH 29/62] chore: bump starlette version (#4158)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

# What does this PR do?

Require at least 0.49.1 which fixes a security vulnerability in the
parsing logic of the Range header in FileResponse. Release note:
https://github.com/Kludex/starlette/releases/tag/0.49.1

Signed-off-by: Sébastien Han <seb@redhat.com>
---
 pyproject.toml |  1 +
 uv.lock        | 25 ++++++++++++++++++-------
 2 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 34728d6ea..f6d28fd03 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -50,6 +50,7 @@ dependencies = [
     "aiosqlite>=0.21.0",                              # server - for metadata store
     "asyncpg",                                        # for metadata store
     "sqlalchemy[asyncio]>=2.0.41",                    # server - for conversations
+    "starlette>=0.49.1",
 ]
 
 [project.optional-dependencies]
diff --git a/uv.lock b/uv.lock
index 0b8b555f6..8f45f0564 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1,5 +1,5 @@
 version = 1
-revision = 3
+revision = 2
 requires-python = ">=3.12"
 resolution-markers = [
     "(python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux')",
@@ -139,6 +139,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/aa/f3/0b6ced594e51cc95d8c1fc1640d3623770d01e4969d29c0bd09945fafefa/altair-5.5.0-py3-none-any.whl", hash = "sha256:91a310b926508d560fe0148d02a194f38b824122641ef528113d029fcd129f8c", size = 731200, upload-time = "2024-11-23T23:39:56.4Z" },
 ]
 
+[[package]]
+name = "annotated-doc"
+version = "0.0.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/57/ba/046ceea27344560984e26a590f90bc7f4a75b06701f653222458922b558c/annotated_doc-0.0.4.tar.gz", hash = "sha256:fbcda96e87e9c92ad167c2e53839e57503ecfda18804ea28102353485033faa4", size = 7288, upload-time = "2025-11-10T22:07:42.062Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/1e/d3/26bf1008eb3d2daa8ef4cacc7f3bfdc11818d111f7e2d0201bc6e3b49d45/annotated_doc-0.0.4-py3-none-any.whl", hash = "sha256:571ac1dc6991c450b25a9c2d84a3705e2ae7a53467b5d111c24fa8baabbed320", size = 5303, upload-time = "2025-11-10T22:07:40.673Z" },
+]
+
 [[package]]
 name = "annotated-types"
 version = "0.7.0"
@@ -1037,16 +1046,17 @@ wheels = [
 
 [[package]]
 name = "fastapi"
-version = "0.119.0"
+version = "0.121.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
+    { name = "annotated-doc" },
     { name = "pydantic" },
     { name = "starlette" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/0a/f9/5c5bcce82a7997cc0eb8c47b7800f862f6b56adc40486ed246e5010d443b/fastapi-0.119.0.tar.gz", hash = "sha256:451082403a2c1f0b99c6bd57c09110ed5463856804c8078d38e5a1f1035dbbb7", size = 336756, upload-time = "2025-10-11T17:13:40.53Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/fb/48/f08f264da34cf160db82c62ffb335e838b1fc16cbcc905f474c7d4c815db/fastapi-0.121.2.tar.gz", hash = "sha256:ca8e932b2b823ec1721c641e3669472c855ad9564a2854c9899d904c2848b8b9", size = 342944, upload-time = "2025-11-13T17:05:54.692Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/ce/70/584c4d7cad80f5e833715c0a29962d7c93b4d18eed522a02981a6d1b6ee5/fastapi-0.119.0-py3-none-any.whl", hash = "sha256:90a2e49ed19515320abb864df570dd766be0662c5d577688f1600170f7f73cf2", size = 107095, upload-time = "2025-10-11T17:13:39.048Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/23/dfb161e91db7c92727db505dc72a384ee79681fe0603f706f9f9f52c2901/fastapi-0.121.2-py3-none-any.whl", hash = "sha256:f2d80b49a86a846b70cc3a03eb5ea6ad2939298bf6a7fe377aa9cd3dd079d358", size = 109201, upload-time = "2025-11-13T17:05:52.718Z" },
 ]
 
 [[package]]
@@ -2110,6 +2120,7 @@ requires-dist = [
     { name = "rich" },
     { name = "sqlalchemy", extras = ["asyncio"], specifier = ">=2.0.41" },
     { name = "starlette" },
+    { name = "starlette", specifier = ">=0.49.1" },
     { name = "termcolor" },
     { name = "tiktoken" },
     { name = "uvicorn", specifier = ">=0.34.0" },
@@ -5060,15 +5071,15 @@ wheels = [
 
 [[package]]
 name = "starlette"
-version = "0.47.2"
+version = "0.49.3"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
     { name = "typing-extensions", marker = "python_full_version < '3.13'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/04/57/d062573f391d062710d4088fa1369428c38d51460ab6fedff920efef932e/starlette-0.47.2.tar.gz", hash = "sha256:6ae9aa5db235e4846decc1e7b79c4f346adf41e9777aebeb49dfd09bbd7023d8", size = 2583948, upload-time = "2025-07-20T17:31:58.522Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/de/1a/608df0b10b53b0beb96a37854ee05864d182ddd4b1156a22f1ad3860425a/starlette-0.49.3.tar.gz", hash = "sha256:1c14546f299b5901a1ea0e34410575bc33bbd741377a10484a54445588d00284", size = 2655031, upload-time = "2025-11-01T15:12:26.13Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/f7/1f/b876b1f83aef204198a42dc101613fefccb32258e5428b5f9259677864b4/starlette-0.47.2-py3-none-any.whl", hash = "sha256:c5847e96134e5c5371ee9fac6fdf1a67336d5815e09eb2a01fdb57a351ef915b", size = 72984, upload-time = "2025-07-20T17:31:56.738Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/e0/021c772d6a662f43b63044ab481dc6ac7592447605b5b35a957785363122/starlette-0.49.3-py3-none-any.whl", hash = "sha256:b579b99715fdc2980cf88c8ec96d3bf1ce16f5a8051a7c2b84ef9b1cdecaea2f", size = 74340, upload-time = "2025-11-01T15:12:24.387Z" },
 ]
 
 [[package]]

From eb545034ab2a7d4273fdf54e841b69cb33e45d6f Mon Sep 17 00:00:00 2001
From: Omar Abdelwahab <omaryashraf10@gmail.com>
Date: Fri, 14 Nov 2025 08:54:42 -0800
Subject: [PATCH 30/62] fix: MCP authorization parameter implementation (#4052)

# What does this PR do?
Adding a user-facing `authorization ` parameter to MCP tool definitions
that allows users to explicitly configure credentials per MCP server,
addressing GitHub Issue #4034 in a secure manner.


## Test Plan
tests/integration/responses/test_mcp_authentication.py

---------

Co-authored-by: Omar Abdelwahab <omara@fb.com>
Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com>
---
 client-sdks/stainless/openapi.yml             |  15 +
 docs/static/llama-stack-spec.yaml             |  15 +
 docs/static/stainless-llama-stack-spec.yaml   |  15 +
 src/llama_stack/core/routers/tool_runtime.py  |   8 +-
 .../core/routing_tables/toolgroups.py         |  12 +-
 .../responses/openai_responses.py             |  13 +
 .../meta_reference/responses/streaming.py     |   4 +-
 .../meta_reference/responses/tool_executor.py |   4 +-
 .../inline/tool_runtime/rag/memory.py         |   9 +-
 .../tool_runtime/bing_search/bing_search.py   |   9 +-
 .../tool_runtime/brave_search/brave_search.py |   9 +-
 .../model_context_protocol/config.py          |  10 +-
 .../model_context_protocol.py                 |  72 +-
 .../tavily_search/tavily_search.py            |   9 +-
 .../wolfram_alpha/wolfram_alpha.py            |   9 +-
 src/llama_stack/providers/utils/tools/mcp.py  |  84 ++-
 src/llama_stack/testing/api_recorder.py       |  12 +-
 src/llama_stack_api/openai_responses.py       |   2 +
 src/llama_stack_api/tools.py                  |  14 +-
 .../inference/test_tools_with_schemas.py      |   9 +-
 ...30294237eb43063c00efc83b8a1202c1cc20c.json | 614 ++++++++++++++++++
 ...4866a73cc04ce93db40346beb070f30fafee1.json | 614 ++++++++++++++++++
 ...d0532e8f5b9418b22e5f874afff695601da16.json | 574 ++++++++++++++++
 ...7dc01025aeb2ee6203ef478133313e0a0e250.json | 614 ++++++++++++++++++
 ...b37c6ec15eb17dfaa95f015dcc6f65fa10c94.json | 574 ++++++++++++++++
 ...ea14cd2869c77972c33e66d9b42438e2165cd.json | 574 ++++++++++++++++
 ...b610b38555bb86f93c507ede8752af47cda6a.json | 574 ++++++++++++++++
 ...9b84bf814950e3c8f11eed7ed9f11d4462237.json | 614 ++++++++++++++++++
 .../responses/test_conversation_responses.py  |   1 +
 .../responses/test_mcp_authentication.py      | 105 +++
 .../responses/test_tool_responses.py          |   2 +-
 tests/integration/tool_runtime/test_mcp.py    |  10 +-
 .../tool_runtime/test_mcp_json_schema.py      |  61 +-
 .../routers/test_routing_tables.py            |   2 +-
 34 files changed, 5205 insertions(+), 62 deletions(-)
 create mode 100644 tests/integration/responses/recordings/51e3ddbc9d23c614ead9a8fd6ad30294237eb43063c00efc83b8a1202c1cc20c.json
 create mode 100644 tests/integration/responses/recordings/5236eb1d546e5a1bd0712891d8b4866a73cc04ce93db40346beb070f30fafee1.json
 create mode 100644 tests/integration/responses/recordings/56ddb450d81590f461113ec5a55d0532e8f5b9418b22e5f874afff695601da16.json
 create mode 100644 tests/integration/responses/recordings/59faeeca84b137e9b2c7d310ea47dc01025aeb2ee6203ef478133313e0a0e250.json
 create mode 100644 tests/integration/responses/recordings/775a161a318a252454fd44f9850b37c6ec15eb17dfaa95f015dcc6f65fa10c94.json
 create mode 100644 tests/integration/responses/recordings/c84e894f47a6d7f4d4556829d24ea14cd2869c77972c33e66d9b42438e2165cd.json
 create mode 100644 tests/integration/responses/recordings/c9c723cd01233311d9033f55d6db610b38555bb86f93c507ede8752af47cda6a.json
 create mode 100644 tests/integration/responses/recordings/db81127157a8364ce8f7a81e10d9b84bf814950e3c8f11eed7ed9f11d4462237.json
 create mode 100644 tests/integration/responses/test_mcp_authentication.py

diff --git a/client-sdks/stainless/openapi.yml b/client-sdks/stainless/openapi.yml
index 65a255c17..d0813de4d 100644
--- a/client-sdks/stainless/openapi.yml
+++ b/client-sdks/stainless/openapi.yml
@@ -2054,6 +2054,13 @@ paths:
           required: false
           schema:
             $ref: '#/components/schemas/URL'
+        - name: authorization
+          in: query
+          description: >-
+            (Optional) OAuth access token for authenticating with the MCP server.
+          required: false
+          schema:
+            type: string
       deprecated: false
   /v1/toolgroups:
     get:
@@ -7123,6 +7130,10 @@ components:
               - type: object
           description: >-
             (Optional) HTTP headers to include when connecting to the server
+        authorization:
+          type: string
+          description: >-
+            (Optional) OAuth access token for authenticating with the MCP server
         require_approval:
           oneOf:
             - type: string
@@ -9307,6 +9318,10 @@ components:
               - type: object
           description: >-
             A dictionary of arguments to pass to the tool.
+        authorization:
+          type: string
+          description: >-
+            (Optional) OAuth access token for authenticating with the MCP server.
       additionalProperties: false
       required:
         - tool_name
diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml
index 66eda78c7..759c7501a 100644
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@@ -1878,6 +1878,13 @@ paths:
           required: false
           schema:
             $ref: '#/components/schemas/URL'
+        - name: authorization
+          in: query
+          description: >-
+            (Optional) OAuth access token for authenticating with the MCP server.
+          required: false
+          schema:
+            type: string
       deprecated: false
   /v1/toolgroups:
     get:
@@ -6182,6 +6189,10 @@ components:
               - type: object
           description: >-
             (Optional) HTTP headers to include when connecting to the server
+        authorization:
+          type: string
+          description: >-
+            (Optional) OAuth access token for authenticating with the MCP server
         require_approval:
           oneOf:
             - type: string
@@ -8366,6 +8377,10 @@ components:
               - type: object
           description: >-
             A dictionary of arguments to pass to the tool.
+        authorization:
+          type: string
+          description: >-
+            (Optional) OAuth access token for authenticating with the MCP server.
       additionalProperties: false
       required:
         - tool_name
diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml
index 65a255c17..d0813de4d 100644
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@@ -2054,6 +2054,13 @@ paths:
           required: false
           schema:
             $ref: '#/components/schemas/URL'
+        - name: authorization
+          in: query
+          description: >-
+            (Optional) OAuth access token for authenticating with the MCP server.
+          required: false
+          schema:
+            type: string
       deprecated: false
   /v1/toolgroups:
     get:
@@ -7123,6 +7130,10 @@ components:
               - type: object
           description: >-
             (Optional) HTTP headers to include when connecting to the server
+        authorization:
+          type: string
+          description: >-
+            (Optional) OAuth access token for authenticating with the MCP server
         require_approval:
           oneOf:
             - type: string
@@ -9307,6 +9318,10 @@ components:
               - type: object
           description: >-
             A dictionary of arguments to pass to the tool.
+        authorization:
+          type: string
+          description: >-
+            (Optional) OAuth access token for authenticating with the MCP server.
       additionalProperties: false
       required:
         - tool_name
diff --git a/src/llama_stack/core/routers/tool_runtime.py b/src/llama_stack/core/routers/tool_runtime.py
index eccc05732..b387cb657 100644
--- a/src/llama_stack/core/routers/tool_runtime.py
+++ b/src/llama_stack/core/routers/tool_runtime.py
@@ -34,16 +34,16 @@ class ToolRuntimeRouter(ToolRuntime):
         logger.debug("ToolRuntimeRouter.shutdown")
         pass
 
-    async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> Any:
+    async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None) -> Any:
         logger.debug(f"ToolRuntimeRouter.invoke_tool: {tool_name}")
         provider = await self.routing_table.get_provider_impl(tool_name)
         return await provider.invoke_tool(
             tool_name=tool_name,
             kwargs=kwargs,
+            authorization=authorization,
         )
 
     async def list_runtime_tools(
-        self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
+        self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None, authorization: str | None = None
     ) -> ListToolDefsResponse:
-        logger.debug(f"ToolRuntimeRouter.list_runtime_tools: {tool_group_id}")
-        return await self.routing_table.list_tools(tool_group_id)
+        return await self.routing_table.list_tools(tool_group_id, authorization=authorization)
diff --git a/src/llama_stack/core/routing_tables/toolgroups.py b/src/llama_stack/core/routing_tables/toolgroups.py
index 7e2068608..8676ce35e 100644
--- a/src/llama_stack/core/routing_tables/toolgroups.py
+++ b/src/llama_stack/core/routing_tables/toolgroups.py
@@ -49,7 +49,9 @@ class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups):
             routing_key = self.tool_to_toolgroup[routing_key]
         return await super().get_provider_impl(routing_key, provider_id)
 
-    async def list_tools(self, toolgroup_id: str | None = None) -> ListToolDefsResponse:
+    async def list_tools(
+        self, toolgroup_id: str | None = None, authorization: str | None = None
+    ) -> ListToolDefsResponse:
         if toolgroup_id:
             if group_id := parse_toolgroup_from_toolgroup_name_pair(toolgroup_id):
                 toolgroup_id = group_id
@@ -61,7 +63,7 @@ class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups):
         for toolgroup in toolgroups:
             if toolgroup.identifier not in self.toolgroups_to_tools:
                 try:
-                    await self._index_tools(toolgroup)
+                    await self._index_tools(toolgroup, authorization=authorization)
                 except AuthenticationRequiredError:
                     # Send authentication errors back to the client so it knows
                     # that it needs to supply credentials for remote MCP servers.
@@ -76,9 +78,11 @@ class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups):
 
         return ListToolDefsResponse(data=all_tools)
 
-    async def _index_tools(self, toolgroup: ToolGroup):
+    async def _index_tools(self, toolgroup: ToolGroup, authorization: str | None = None):
         provider_impl = await super().get_provider_impl(toolgroup.identifier, toolgroup.provider_id)
-        tooldefs_response = await provider_impl.list_runtime_tools(toolgroup.identifier, toolgroup.mcp_endpoint)
+        tooldefs_response = await provider_impl.list_runtime_tools(
+            toolgroup.identifier, toolgroup.mcp_endpoint, authorization=authorization
+        )
 
         tooldefs = tooldefs_response.data
         for t in tooldefs:
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
index 3f88b1562..cb0fe284e 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
@@ -257,6 +257,19 @@ class OpenAIResponsesImpl:
         stream = bool(stream)
         text = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")) if text is None else text
 
+        # Validate MCP tools: ensure Authorization header is not passed via headers dict
+        if tools:
+            from llama_stack_api.openai_responses import OpenAIResponseInputToolMCP
+
+            for tool in tools:
+                if isinstance(tool, OpenAIResponseInputToolMCP) and tool.headers:
+                    for key in tool.headers.keys():
+                        if key.lower() == "authorization":
+                            raise ValueError(
+                                "Authorization header cannot be passed via 'headers'. "
+                                "Please use the 'authorization' parameter instead."
+                            )
+
         guardrail_ids = extract_guardrail_ids(guardrails) if guardrails else []
 
         if conversation is not None:
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
index ea4486b62..c0b62958f 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
@@ -1091,10 +1091,12 @@ class StreamingResponseOrchestrator:
                 "server_url": mcp_tool.server_url,
                 "mcp_list_tools_id": list_id,
             }
+            # List MCP tools with authorization from tool config
             async with tracing.span("list_mcp_tools", attributes):
                 tool_defs = await list_mcp_tools(
                     endpoint=mcp_tool.server_url,
-                    headers=mcp_tool.headers or {},
+                    headers=mcp_tool.headers,
+                    authorization=mcp_tool.authorization,
                 )
 
             # Create the MCP list tools message
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py
index 616ec2477..4f294a979 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py
@@ -296,12 +296,14 @@ class ToolExecutor:
                     "server_url": mcp_tool.server_url,
                     "tool_name": function_name,
                 }
+                # Invoke MCP tool with authorization from tool config
                 async with tracing.span("invoke_mcp_tool", attributes):
                     result = await invoke_mcp_tool(
                         endpoint=mcp_tool.server_url,
-                        headers=mcp_tool.headers or {},
                         tool_name=function_name,
                         kwargs=tool_kwargs,
+                        headers=mcp_tool.headers,
+                        authorization=mcp_tool.authorization,
                     )
             elif function_name == "knowledge_search":
                 response_file_search_tool = (
diff --git a/src/llama_stack/providers/inline/tool_runtime/rag/memory.py b/src/llama_stack/providers/inline/tool_runtime/rag/memory.py
index 895d219bb..afb54a8a9 100644
--- a/src/llama_stack/providers/inline/tool_runtime/rag/memory.py
+++ b/src/llama_stack/providers/inline/tool_runtime/rag/memory.py
@@ -276,7 +276,10 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime):
         )
 
     async def list_runtime_tools(
-        self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
+        self,
+        tool_group_id: str | None = None,
+        mcp_endpoint: URL | None = None,
+        authorization: str | None = None,
     ) -> ListToolDefsResponse:
         # Parameters are not listed since these methods are not yet invoked automatically
         # by the LLM. The method is only implemented so things like /tools can list without
@@ -304,7 +307,9 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime):
             ]
         )
 
-    async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult:
+    async def invoke_tool(
+        self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None
+    ) -> ToolInvocationResult:
         vector_store_ids = kwargs.get("vector_store_ids", [])
         query_config = kwargs.get("query_config")
         if query_config:
diff --git a/src/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py b/src/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py
index a5a53a9eb..77c5a3bf7 100644
--- a/src/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py
+++ b/src/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py
@@ -49,7 +49,10 @@ class BingSearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsReq
         return provider_data.bing_search_api_key
 
     async def list_runtime_tools(
-        self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
+        self,
+        tool_group_id: str | None = None,
+        mcp_endpoint: URL | None = None,
+        authorization: str | None = None,
     ) -> ListToolDefsResponse:
         return ListToolDefsResponse(
             data=[
@@ -70,7 +73,9 @@ class BingSearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsReq
             ]
         )
 
-    async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult:
+    async def invoke_tool(
+        self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None
+    ) -> ToolInvocationResult:
         api_key = self._get_api_key()
         headers = {
             "Ocp-Apim-Subscription-Key": api_key,
diff --git a/src/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py b/src/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py
index 4888730e4..1f49671cf 100644
--- a/src/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py
+++ b/src/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py
@@ -48,7 +48,10 @@ class BraveSearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsRe
         return provider_data.brave_search_api_key
 
     async def list_runtime_tools(
-        self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
+        self,
+        tool_group_id: str | None = None,
+        mcp_endpoint: URL | None = None,
+        authorization: str | None = None,
     ) -> ListToolDefsResponse:
         return ListToolDefsResponse(
             data=[
@@ -70,7 +73,9 @@ class BraveSearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsRe
             ]
         )
 
-    async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult:
+    async def invoke_tool(
+        self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None
+    ) -> ToolInvocationResult:
         api_key = self._get_api_key()
         url = "https://api.search.brave.com/res/v1/web/search"
         headers = {
diff --git a/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py b/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py
index b8c5e77fd..9acabfc34 100644
--- a/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py
+++ b/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py
@@ -10,8 +10,14 @@ from pydantic import BaseModel
 
 
 class MCPProviderDataValidator(BaseModel):
-    # mcp_endpoint => dict of headers to send
-    mcp_headers: dict[str, dict[str, str]] | None = None
+    """
+    Validator for MCP provider-specific data passed via request headers.
+
+    Phase 1: Support old header-based authentication for backward compatibility.
+    In Phase 2, this will be deprecated in favor of the authorization parameter.
+    """
+
+    mcp_headers: dict[str, dict[str, str]] | None = None  # Map of URI -> headers dict
 
 
 class MCPProviderConfig(BaseModel):
diff --git a/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py b/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py
index 544597a51..649bddecb 100644
--- a/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py
+++ b/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py
@@ -39,15 +39,29 @@ class ModelContextProtocolToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime
         return
 
     async def list_runtime_tools(
-        self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
+        self,
+        tool_group_id: str | None = None,
+        mcp_endpoint: URL | None = None,
+        authorization: str | None = None,
     ) -> ListToolDefsResponse:
         # this endpoint should be retrieved by getting the tool group right?
         if mcp_endpoint is None:
             raise ValueError("mcp_endpoint is required")
-        headers = await self.get_headers_from_request(mcp_endpoint.uri)
-        return await list_mcp_tools(mcp_endpoint.uri, headers)
 
-    async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult:
+        # Phase 1: Support both old header-based auth AND new authorization parameter
+        # Get headers and auth from provider data (old approach)
+        provider_headers, provider_auth = await self.get_headers_from_request(mcp_endpoint.uri)
+
+        # New authorization parameter takes precedence over provider data
+        final_authorization = authorization or provider_auth
+
+        return await list_mcp_tools(
+            endpoint=mcp_endpoint.uri, headers=provider_headers, authorization=final_authorization
+        )
+
+    async def invoke_tool(
+        self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None
+    ) -> ToolInvocationResult:
         tool = await self.tool_store.get_tool(tool_name)
         if tool.metadata is None or tool.metadata.get("endpoint") is None:
             raise ValueError(f"Tool {tool_name} does not have metadata")
@@ -55,19 +69,57 @@ class ModelContextProtocolToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime
         if urlparse(endpoint).scheme not in ("http", "https"):
             raise ValueError(f"Endpoint {endpoint} is not a valid HTTP(S) URL")
 
-        headers = await self.get_headers_from_request(endpoint)
-        return await invoke_mcp_tool(endpoint, headers, tool_name, kwargs)
+        # Phase 1: Support both old header-based auth AND new authorization parameter
+        # Get headers and auth from provider data (old approach)
+        provider_headers, provider_auth = await self.get_headers_from_request(endpoint)
+
+        # New authorization parameter takes precedence over provider data
+        final_authorization = authorization or provider_auth
+
+        return await invoke_mcp_tool(
+            endpoint=endpoint,
+            tool_name=tool_name,
+            kwargs=kwargs,
+            headers=provider_headers,
+            authorization=final_authorization,
+        )
+
+    async def get_headers_from_request(self, mcp_endpoint_uri: str) -> tuple[dict[str, str], str | None]:
+        """
+        Extract headers and authorization from request provider data (Phase 1 backward compatibility).
+
+        Phase 1: Temporarily allows Authorization to be passed via mcp_headers for backward compatibility.
+        Phase 2: Will enforce that Authorization should use the dedicated authorization parameter instead.
+
+        Returns:
+            Tuple of (headers_dict, authorization_token)
+            - headers_dict: All headers except Authorization
+            - authorization_token: Token from Authorization header (with "Bearer " prefix removed), or None
+        """
 
-    async def get_headers_from_request(self, mcp_endpoint_uri: str) -> dict[str, str]:
         def canonicalize_uri(uri: str) -> str:
             return f"{urlparse(uri).netloc or ''}/{urlparse(uri).path or ''}"
 
         headers = {}
+        authorization = None
 
         provider_data = self.get_request_provider_data()
-        if provider_data and provider_data.mcp_headers:
+        if provider_data and hasattr(provider_data, "mcp_headers") and provider_data.mcp_headers:
             for uri, values in provider_data.mcp_headers.items():
                 if canonicalize_uri(uri) != canonicalize_uri(mcp_endpoint_uri):
                     continue
-                headers.update(values)
-        return headers
+
+                # Phase 1: Extract Authorization from mcp_headers for backward compatibility
+                # (Phase 2 will reject this and require the dedicated authorization parameter)
+                for key in values.keys():
+                    if key.lower() == "authorization":
+                        # Extract authorization token and strip "Bearer " prefix if present
+                        auth_value = values[key]
+                        if auth_value.startswith("Bearer "):
+                            authorization = auth_value[7:]  # Remove "Bearer " prefix
+                        else:
+                            authorization = auth_value
+                    else:
+                        headers[key] = values[key]
+
+        return headers, authorization
diff --git a/src/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py b/src/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py
index d86cf5d8e..e12b41885 100644
--- a/src/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py
+++ b/src/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py
@@ -48,7 +48,10 @@ class TavilySearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsR
         return provider_data.tavily_search_api_key
 
     async def list_runtime_tools(
-        self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
+        self,
+        tool_group_id: str | None = None,
+        mcp_endpoint: URL | None = None,
+        authorization: str | None = None,
     ) -> ListToolDefsResponse:
         return ListToolDefsResponse(
             data=[
@@ -69,7 +72,9 @@ class TavilySearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsR
             ]
         )
 
-    async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult:
+    async def invoke_tool(
+        self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None
+    ) -> ToolInvocationResult:
         api_key = self._get_api_key()
         async with httpx.AsyncClient() as client:
             response = await client.post(
diff --git a/src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py b/src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py
index f8d806a5c..68f0ebaef 100644
--- a/src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py
+++ b/src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py
@@ -49,7 +49,10 @@ class WolframAlphaToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsR
         return provider_data.wolfram_alpha_api_key
 
     async def list_runtime_tools(
-        self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
+        self,
+        tool_group_id: str | None = None,
+        mcp_endpoint: URL | None = None,
+        authorization: str | None = None,
     ) -> ListToolDefsResponse:
         return ListToolDefsResponse(
             data=[
@@ -70,7 +73,9 @@ class WolframAlphaToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsR
             ]
         )
 
-    async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult:
+    async def invoke_tool(
+        self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None
+    ) -> ToolInvocationResult:
         api_key = self._get_api_key()
         params = {
             "input": kwargs["query"],
diff --git a/src/llama_stack/providers/utils/tools/mcp.py b/src/llama_stack/providers/utils/tools/mcp.py
index fad1bf0f0..9c5e9cd96 100644
--- a/src/llama_stack/providers/utils/tools/mcp.py
+++ b/src/llama_stack/providers/utils/tools/mcp.py
@@ -30,6 +30,40 @@ from llama_stack_api import (
 
 logger = get_logger(__name__, category="tools")
 
+
+def prepare_mcp_headers(base_headers: dict[str, str] | None, authorization: str | None) -> dict[str, str]:
+    """
+    Prepare headers for MCP requests with authorization support.
+
+    Args:
+        base_headers: Base headers dictionary (can be None)
+        authorization: OAuth access token (without "Bearer " prefix)
+
+    Returns:
+        Headers dictionary with Authorization header if token provided
+
+    Raises:
+        ValueError: If Authorization header is specified in the headers dict (security risk)
+    """
+    headers = dict(base_headers or {})
+
+    # Security check: reject any Authorization header in the headers dict
+    # Users must use the authorization parameter instead to avoid security risks
+    existing_keys_lower = {k.lower() for k in headers.keys()}
+    if "authorization" in existing_keys_lower:
+        raise ValueError(
+            "For security reasons, Authorization header cannot be passed via 'headers'. "
+            "Please use the 'authorization' parameter instead."
+        )
+
+    # Add Authorization header if token provided
+    if authorization:
+        # OAuth access token - add "Bearer " prefix
+        headers["Authorization"] = f"Bearer {authorization}"
+
+    return headers
+
+
 protocol_cache = TTLDict(ttl_seconds=3600)
 
 
@@ -112,9 +146,29 @@ async def client_wrapper(endpoint: str, headers: dict[str, str]) -> AsyncGenerat
                 raise
 
 
-async def list_mcp_tools(endpoint: str, headers: dict[str, str]) -> ListToolDefsResponse:
+async def list_mcp_tools(
+    endpoint: str,
+    headers: dict[str, str] | None = None,
+    authorization: str | None = None,
+) -> ListToolDefsResponse:
+    """List tools available from an MCP server.
+
+    Args:
+        endpoint: MCP server endpoint URL
+        headers: Optional base headers to include
+        authorization: Optional OAuth access token (just the token, not "Bearer <token>")
+
+    Returns:
+        List of tool definitions from the MCP server
+
+    Raises:
+        ValueError: If Authorization is found in the headers parameter
+    """
+    # Prepare headers with authorization handling
+    final_headers = prepare_mcp_headers(headers, authorization)
+
     tools = []
-    async with client_wrapper(endpoint, headers) as session:
+    async with client_wrapper(endpoint, final_headers) as session:
         tools_result = await session.list_tools()
         for tool in tools_result.tools:
             tools.append(
@@ -132,9 +186,31 @@ async def list_mcp_tools(endpoint: str, headers: dict[str, str]) -> ListToolDefs
 
 
 async def invoke_mcp_tool(
-    endpoint: str, headers: dict[str, str], tool_name: str, kwargs: dict[str, Any]
+    endpoint: str,
+    tool_name: str,
+    kwargs: dict[str, Any],
+    headers: dict[str, str] | None = None,
+    authorization: str | None = None,
 ) -> ToolInvocationResult:
-    async with client_wrapper(endpoint, headers) as session:
+    """Invoke an MCP tool with the given arguments.
+
+    Args:
+        endpoint: MCP server endpoint URL
+        tool_name: Name of the tool to invoke
+        kwargs: Tool invocation arguments
+        headers: Optional base headers to include
+        authorization: Optional OAuth access token (just the token, not "Bearer <token>")
+
+    Returns:
+        Tool invocation result with content and error information
+
+    Raises:
+        ValueError: If Authorization header is found in the headers parameter
+    """
+    # Prepare headers with authorization handling
+    final_headers = prepare_mcp_headers(headers, authorization)
+
+    async with client_wrapper(endpoint, final_headers) as session:
         result = await session.call_tool(tool_name, kwargs)
 
         content: list[InterleavedContentItem] = []
diff --git a/src/llama_stack/testing/api_recorder.py b/src/llama_stack/testing/api_recorder.py
index f46f07458..a7ad582f3 100644
--- a/src/llama_stack/testing/api_recorder.py
+++ b/src/llama_stack/testing/api_recorder.py
@@ -609,14 +609,14 @@ def _combine_model_list_responses(endpoint: str, records: list[dict[str, Any]])
 
 
 async def _patched_tool_invoke_method(
-    original_method, provider_name: str, self, tool_name: str, kwargs: dict[str, Any]
+    original_method, provider_name: str, self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None
 ):
     """Patched version of tool runtime invoke_tool method for recording/replay."""
     global _current_mode, _current_storage
 
     if _current_mode == APIRecordingMode.LIVE or _current_storage is None:
         # Normal operation
-        return await original_method(self, tool_name, kwargs)
+        return await original_method(self, tool_name, kwargs, authorization=authorization)
 
     request_hash = normalize_tool_request(provider_name, tool_name, kwargs)
 
@@ -634,7 +634,7 @@ async def _patched_tool_invoke_method(
 
     if _current_mode in (APIRecordingMode.RECORD, APIRecordingMode.RECORD_IF_MISSING):
         # Make the tool call and record it
-        result = await original_method(self, tool_name, kwargs)
+        result = await original_method(self, tool_name, kwargs, authorization=authorization)
 
         request_data = {
             "test_id": get_test_context(),
@@ -885,9 +885,11 @@ def patch_inference_clients():
     OllamaAsyncClient.list = patched_ollama_list
 
     # Create patched methods for tool runtimes
-    async def patched_tavily_invoke_tool(self, tool_name: str, kwargs: dict[str, Any]):
+    async def patched_tavily_invoke_tool(
+        self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None
+    ):
         return await _patched_tool_invoke_method(
-            _original_methods["tavily_invoke_tool"], "tavily", self, tool_name, kwargs
+            _original_methods["tavily_invoke_tool"], "tavily", self, tool_name, kwargs, authorization=authorization
         )
 
     # Apply tool runtime patches
diff --git a/src/llama_stack_api/openai_responses.py b/src/llama_stack_api/openai_responses.py
index 70139a98a..2dd73e90a 100644
--- a/src/llama_stack_api/openai_responses.py
+++ b/src/llama_stack_api/openai_responses.py
@@ -490,6 +490,7 @@ class OpenAIResponseInputToolMCP(BaseModel):
     :param server_label: Label to identify this MCP server
     :param server_url: URL endpoint of the MCP server
     :param headers: (Optional) HTTP headers to include when connecting to the server
+    :param authorization: (Optional) OAuth access token for authenticating with the MCP server
     :param require_approval: Approval requirement for tool calls ("always", "never", or filter)
     :param allowed_tools: (Optional) Restriction on which tools can be used from this server
     """
@@ -498,6 +499,7 @@ class OpenAIResponseInputToolMCP(BaseModel):
     server_label: str
     server_url: str
     headers: dict[str, Any] | None = None
+    authorization: str | None = Field(default=None, exclude=True)
 
     require_approval: Literal["always"] | Literal["never"] | ApprovalFilter = "never"
     allowed_tools: list[str] | AllowedToolsFilter | None = None
diff --git a/src/llama_stack_api/tools.py b/src/llama_stack_api/tools.py
index 6571c2047..81c989f88 100644
--- a/src/llama_stack_api/tools.py
+++ b/src/llama_stack_api/tools.py
@@ -196,22 +196,32 @@ class ToolRuntime(Protocol):
     # TODO: This needs to be renamed once OPEN API generator name conflict issue is fixed.
     @webmethod(route="/tool-runtime/list-tools", method="GET", level=LLAMA_STACK_API_V1)
     async def list_runtime_tools(
-        self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
+        self,
+        tool_group_id: str | None = None,
+        mcp_endpoint: URL | None = None,
+        authorization: str | None = None,
     ) -> ListToolDefsResponse:
         """List all tools in the runtime.
 
         :param tool_group_id: The ID of the tool group to list tools for.
         :param mcp_endpoint: The MCP endpoint to use for the tool group.
+        :param authorization: (Optional) OAuth access token for authenticating with the MCP server.
         :returns: A ListToolDefsResponse.
         """
         ...
 
     @webmethod(route="/tool-runtime/invoke", method="POST", level=LLAMA_STACK_API_V1)
-    async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult:
+    async def invoke_tool(
+        self,
+        tool_name: str,
+        kwargs: dict[str, Any],
+        authorization: str | None = None,
+    ) -> ToolInvocationResult:
         """Run a tool with the given arguments.
 
         :param tool_name: The name of the tool to invoke.
         :param kwargs: A dictionary of arguments to pass to the tool.
+        :param authorization: (Optional) OAuth access token for authenticating with the MCP server.
         :returns: A ToolInvocationResult.
         """
         ...
diff --git a/tests/integration/inference/test_tools_with_schemas.py b/tests/integration/inference/test_tools_with_schemas.py
index f30e9ece5..5b6e69ae3 100644
--- a/tests/integration/inference/test_tools_with_schemas.py
+++ b/tests/integration/inference/test_tools_with_schemas.py
@@ -193,7 +193,14 @@ class TestMCPToolsInChatCompletion:
             mcp_endpoint=dict(uri=uri),
         )
 
-        provider_data = {"mcp_headers": {uri: {"Authorization": f"Bearer {AUTH_TOKEN}"}}}
+        # Use old header-based approach for Phase 1 (backward compatibility)
+        provider_data = {
+            "mcp_headers": {
+                uri: {
+                    "Authorization": f"Bearer {AUTH_TOKEN}",
+                },
+            },
+        }
         auth_headers = {
             "X-LlamaStack-Provider-Data": json.dumps(provider_data),
         }
diff --git a/tests/integration/responses/recordings/51e3ddbc9d23c614ead9a8fd6ad30294237eb43063c00efc83b8a1202c1cc20c.json b/tests/integration/responses/recordings/51e3ddbc9d23c614ead9a8fd6ad30294237eb43063c00efc83b8a1202c1cc20c.json
new file mode 100644
index 000000000..464de788f
--- /dev/null
+++ b/tests/integration/responses/recordings/51e3ddbc9d23c614ead9a8fd6ad30294237eb43063c00efc83b8a1202c1cc20c.json
@@ -0,0 +1,614 @@
+{
+  "test_id": "tests/integration/responses/test_mcp_authentication.py::test_mcp_authorization_backward_compatibility[openai_client-txt=openai/gpt-4o]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What is the boiling point of myawesomeliquid?"
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_UeAsx9M8mAXo1F1LZj6TsEV9",
+              "type": "function",
+              "function": {
+                "name": "get_boiling_point",
+                "arguments": "{\"liquid_name\":\"myawesomeliquid\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_UeAsx9M8mAXo1F1LZj6TsEV9",
+          "content": [
+            {
+              "type": "text",
+              "text": "-100"
+            }
+          ]
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "greet_everyone",
+            "parameters": {
+              "properties": {
+                "url": {
+                  "title": "Url",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "url"
+              ],
+              "title": "greet_everyoneArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_boiling_point",
+            "description": "\n        Returns the boiling point of a liquid in Celsius or Fahrenheit.\n\n        :param liquid_name: The name of the liquid\n        :param celsius: Whether to return the boiling point in Celsius\n        :return: The boiling point of the liquid in Celcius or Fahrenheit\n        ",
+            "parameters": {
+              "properties": {
+                "liquid_name": {
+                  "title": "Liquid Name",
+                  "type": "string"
+                },
+                "celsius": {
+                  "default": true,
+                  "title": "Celsius",
+                  "type": "boolean"
+                }
+              },
+              "required": [
+                "liquid_name"
+              ],
+              "title": "get_boiling_pointArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-51e3ddbc9d23",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "c5g42LQpiBwmVH"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-51e3ddbc9d23",
+          "choices": [
+            {
+              "delta": {
+                "content": "The",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "MEmQFjCKEsNDL"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-51e3ddbc9d23",
+          "choices": [
+            {
+              "delta": {
+                "content": " boiling",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "dF3UemYO"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-51e3ddbc9d23",
+          "choices": [
+            {
+              "delta": {
+                "content": " point",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "ENDOmjG37D"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-51e3ddbc9d23",
+          "choices": [
+            {
+              "delta": {
+                "content": " of",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "6kb5u2d4ILV59"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-51e3ddbc9d23",
+          "choices": [
+            {
+              "delta": {
+                "content": " \"",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Y6Dp6rbT9OdBG"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-51e3ddbc9d23",
+          "choices": [
+            {
+              "delta": {
+                "content": "my",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "EN0ShAkdxF2jIs"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-51e3ddbc9d23",
+          "choices": [
+            {
+              "delta": {
+                "content": "aw",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "1NHavCOT2fSI63"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-51e3ddbc9d23",
+          "choices": [
+            {
+              "delta": {
+                "content": "esom",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "VTwbnRFtKY2W"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-51e3ddbc9d23",
+          "choices": [
+            {
+              "delta": {
+                "content": "eli",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "VJuNhLeGK43e6"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-51e3ddbc9d23",
+          "choices": [
+            {
+              "delta": {
+                "content": "quid",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "bFgxcYCjU42I"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-51e3ddbc9d23",
+          "choices": [
+            {
+              "delta": {
+                "content": "\"",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "5KR4mGTP0Rpu0O"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-51e3ddbc9d23",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "KCeY3i4Qo9L1j"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-51e3ddbc9d23",
+          "choices": [
+            {
+              "delta": {
+                "content": " -",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "GgtT2kqCUk8jGH"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-51e3ddbc9d23",
+          "choices": [
+            {
+              "delta": {
+                "content": "100",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "H3E18AkuuATh3"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-51e3ddbc9d23",
+          "choices": [
+            {
+              "delta": {
+                "content": "\u00b0C",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "5kuUoomGw6aPf0"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-51e3ddbc9d23",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "CKIiDxWMV3zzcNj"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-51e3ddbc9d23",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "9KZoS4rawE"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-51e3ddbc9d23",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 17,
+            "prompt_tokens": 188,
+            "total_tokens": 205,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "iq2ecCxqopvPO"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/5236eb1d546e5a1bd0712891d8b4866a73cc04ce93db40346beb070f30fafee1.json b/tests/integration/responses/recordings/5236eb1d546e5a1bd0712891d8b4866a73cc04ce93db40346beb070f30fafee1.json
new file mode 100644
index 000000000..66c87e3bb
--- /dev/null
+++ b/tests/integration/responses/recordings/5236eb1d546e5a1bd0712891d8b4866a73cc04ce93db40346beb070f30fafee1.json
@@ -0,0 +1,614 @@
+{
+  "test_id": "tests/integration/responses/test_mcp_authentication.py::test_mcp_authorization_bearer[client_with_models-txt=openai/gpt-4o]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What is the boiling point of myawesomeliquid?"
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_mitVYvmPaFfoSmKjzKo5xmZp",
+              "type": "function",
+              "function": {
+                "name": "get_boiling_point",
+                "arguments": "{\"liquid_name\":\"myawesomeliquid\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_mitVYvmPaFfoSmKjzKo5xmZp",
+          "content": [
+            {
+              "type": "text",
+              "text": "-100"
+            }
+          ]
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "greet_everyone",
+            "parameters": {
+              "properties": {
+                "url": {
+                  "title": "Url",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "url"
+              ],
+              "title": "greet_everyoneArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_boiling_point",
+            "description": "\n        Returns the boiling point of a liquid in Celsius or Fahrenheit.\n\n        :param liquid_name: The name of the liquid\n        :param celsius: Whether to return the boiling point in Celsius\n        :return: The boiling point of the liquid in Celcius or Fahrenheit\n        ",
+            "parameters": {
+              "properties": {
+                "liquid_name": {
+                  "title": "Liquid Name",
+                  "type": "string"
+                },
+                "celsius": {
+                  "default": true,
+                  "title": "Celsius",
+                  "type": "boolean"
+                }
+              },
+              "required": [
+                "liquid_name"
+              ],
+              "title": "get_boiling_pointArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-5236eb1d546e",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "veiGKPHTdRNcOX"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-5236eb1d546e",
+          "choices": [
+            {
+              "delta": {
+                "content": "The",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "u9RK8eZYDguJs"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-5236eb1d546e",
+          "choices": [
+            {
+              "delta": {
+                "content": " boiling",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "U0L1RjHF"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-5236eb1d546e",
+          "choices": [
+            {
+              "delta": {
+                "content": " point",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "TMS6QVLJfj"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-5236eb1d546e",
+          "choices": [
+            {
+              "delta": {
+                "content": " of",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "5zokjwZ0nBNlD"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-5236eb1d546e",
+          "choices": [
+            {
+              "delta": {
+                "content": " \"",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "CmOp3DQRu0AqZ"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-5236eb1d546e",
+          "choices": [
+            {
+              "delta": {
+                "content": "my",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "OlnZU0jlGyE2mD"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-5236eb1d546e",
+          "choices": [
+            {
+              "delta": {
+                "content": "aw",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "PGCsCfw8zUqRAj"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-5236eb1d546e",
+          "choices": [
+            {
+              "delta": {
+                "content": "esom",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "8P65fJ4x3QVF"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-5236eb1d546e",
+          "choices": [
+            {
+              "delta": {
+                "content": "eli",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "HVTNGb62o54Ol"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-5236eb1d546e",
+          "choices": [
+            {
+              "delta": {
+                "content": "quid",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "bdRgQioKQZM6"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-5236eb1d546e",
+          "choices": [
+            {
+              "delta": {
+                "content": "\"",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "5djjyePEzwsPID"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-5236eb1d546e",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "xoN3TaCEum6A9"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-5236eb1d546e",
+          "choices": [
+            {
+              "delta": {
+                "content": " -",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "UmU8LCL6WJIDrf"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-5236eb1d546e",
+          "choices": [
+            {
+              "delta": {
+                "content": "100",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "FFXxvyme7JKyc"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-5236eb1d546e",
+          "choices": [
+            {
+              "delta": {
+                "content": "\u00b0C",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "8BpDPmgFmIBJQQ"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-5236eb1d546e",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Mey7rwshfBQbVlP"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-5236eb1d546e",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "IXaz4vn8As"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-5236eb1d546e",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 17,
+            "prompt_tokens": 188,
+            "total_tokens": 205,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "9ebnd6bFXcdOY"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/56ddb450d81590f461113ec5a55d0532e8f5b9418b22e5f874afff695601da16.json b/tests/integration/responses/recordings/56ddb450d81590f461113ec5a55d0532e8f5b9418b22e5f874afff695601da16.json
new file mode 100644
index 000000000..bacefe818
--- /dev/null
+++ b/tests/integration/responses/recordings/56ddb450d81590f461113ec5a55d0532e8f5b9418b22e5f874afff695601da16.json
@@ -0,0 +1,574 @@
+{
+  "test_id": "tests/integration/responses/test_mcp_authentication.py::test_mcp_authorization_backward_compatibility[openai_client-txt=openai/gpt-4o]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What is the boiling point of myawesomeliquid?"
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "greet_everyone",
+            "parameters": {
+              "properties": {
+                "url": {
+                  "title": "Url",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "url"
+              ],
+              "title": "greet_everyoneArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_boiling_point",
+            "description": "\n        Returns the boiling point of a liquid in Celsius or Fahrenheit.\n\n        :param liquid_name: The name of the liquid\n        :param celsius: Whether to return the boiling point in Celsius\n        :return: The boiling point of the liquid in Celcius or Fahrenheit\n        ",
+            "parameters": {
+              "properties": {
+                "liquid_name": {
+                  "title": "Liquid Name",
+                  "type": "string"
+                },
+                "celsius": {
+                  "default": true,
+                  "title": "Celsius",
+                  "type": "boolean"
+                }
+              },
+              "required": [
+                "liquid_name"
+              ],
+              "title": "get_boiling_pointArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-56ddb450d815",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_UeAsx9M8mAXo1F1LZj6TsEV9",
+                    "function": {
+                      "arguments": "",
+                      "name": "get_boiling_point"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "bKe"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-56ddb450d815",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "kxw"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-56ddb450d815",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "li",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "cKkF"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-56ddb450d815",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "quid",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "md"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-56ddb450d815",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "_name",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "O"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-56ddb450d815",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\":\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "o"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-56ddb450d815",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "my",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "nRfv"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-56ddb450d815",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "aw",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "1M8i"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-56ddb450d815",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "esom",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "7q"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-56ddb450d815",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "eli",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "R2Q"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-56ddb450d815",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "quid",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "lB"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-56ddb450d815",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "MDi"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-56ddb450d815",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "7KwE"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-56ddb450d815",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 22,
+            "prompt_tokens": 154,
+            "total_tokens": 176,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "9IipvPESur5Y7"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/59faeeca84b137e9b2c7d310ea47dc01025aeb2ee6203ef478133313e0a0e250.json b/tests/integration/responses/recordings/59faeeca84b137e9b2c7d310ea47dc01025aeb2ee6203ef478133313e0a0e250.json
new file mode 100644
index 000000000..7ab319fb8
--- /dev/null
+++ b/tests/integration/responses/recordings/59faeeca84b137e9b2c7d310ea47dc01025aeb2ee6203ef478133313e0a0e250.json
@@ -0,0 +1,614 @@
+{
+  "test_id": "tests/integration/responses/test_mcp_authentication.py::test_mcp_authorization_bearer[openai_client-txt=openai/gpt-4o]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What is the boiling point of myawesomeliquid?"
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_2lYntxgdJV66JFvD6OuICQCB",
+              "type": "function",
+              "function": {
+                "name": "get_boiling_point",
+                "arguments": "{\"liquid_name\":\"myawesomeliquid\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_2lYntxgdJV66JFvD6OuICQCB",
+          "content": [
+            {
+              "type": "text",
+              "text": "-100"
+            }
+          ]
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "greet_everyone",
+            "parameters": {
+              "properties": {
+                "url": {
+                  "title": "Url",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "url"
+              ],
+              "title": "greet_everyoneArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_boiling_point",
+            "description": "\n        Returns the boiling point of a liquid in Celsius or Fahrenheit.\n\n        :param liquid_name: The name of the liquid\n        :param celsius: Whether to return the boiling point in Celsius\n        :return: The boiling point of the liquid in Celcius or Fahrenheit\n        ",
+            "parameters": {
+              "properties": {
+                "liquid_name": {
+                  "title": "Liquid Name",
+                  "type": "string"
+                },
+                "celsius": {
+                  "default": true,
+                  "title": "Celsius",
+                  "type": "boolean"
+                }
+              },
+              "required": [
+                "liquid_name"
+              ],
+              "title": "get_boiling_pointArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-59faeeca84b1",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "BNpFmbWkpYEjZX"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-59faeeca84b1",
+          "choices": [
+            {
+              "delta": {
+                "content": "The",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "HdnyHcq2CLvjn"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-59faeeca84b1",
+          "choices": [
+            {
+              "delta": {
+                "content": " boiling",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "gOMuwgrp"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-59faeeca84b1",
+          "choices": [
+            {
+              "delta": {
+                "content": " point",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "OTfqq7Yggw"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-59faeeca84b1",
+          "choices": [
+            {
+              "delta": {
+                "content": " of",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "cwJMhZJyf5PIp"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-59faeeca84b1",
+          "choices": [
+            {
+              "delta": {
+                "content": " \"",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "54NR7IGiuBTw5"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-59faeeca84b1",
+          "choices": [
+            {
+              "delta": {
+                "content": "my",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "q1x9cVVPTflQti"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-59faeeca84b1",
+          "choices": [
+            {
+              "delta": {
+                "content": "aw",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "vcudLe3yaadkvB"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-59faeeca84b1",
+          "choices": [
+            {
+              "delta": {
+                "content": "esom",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "uql1pBt4elRL"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-59faeeca84b1",
+          "choices": [
+            {
+              "delta": {
+                "content": "eli",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "M2kzUEkJctjYp"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-59faeeca84b1",
+          "choices": [
+            {
+              "delta": {
+                "content": "quid",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Waet2ux2zs9P"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-59faeeca84b1",
+          "choices": [
+            {
+              "delta": {
+                "content": "\"",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "KjbjxdGYUZDuiI"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-59faeeca84b1",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Fg8IXJhJv8iAI"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-59faeeca84b1",
+          "choices": [
+            {
+              "delta": {
+                "content": " -",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "wiAqPLAoinVhQq"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-59faeeca84b1",
+          "choices": [
+            {
+              "delta": {
+                "content": "100",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "vJnb9sE969jph"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-59faeeca84b1",
+          "choices": [
+            {
+              "delta": {
+                "content": "\u00b0C",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "5Hgi5CU0aV0sPw"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-59faeeca84b1",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "RDfKhuQo4E4TLXU"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-59faeeca84b1",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "oN1EYVkDbW"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-59faeeca84b1",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 17,
+            "prompt_tokens": 188,
+            "total_tokens": 205,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "OfhOTT3VdJ2s7"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/775a161a318a252454fd44f9850b37c6ec15eb17dfaa95f015dcc6f65fa10c94.json b/tests/integration/responses/recordings/775a161a318a252454fd44f9850b37c6ec15eb17dfaa95f015dcc6f65fa10c94.json
new file mode 100644
index 000000000..c2c8bbd80
--- /dev/null
+++ b/tests/integration/responses/recordings/775a161a318a252454fd44f9850b37c6ec15eb17dfaa95f015dcc6f65fa10c94.json
@@ -0,0 +1,574 @@
+{
+  "test_id": "tests/integration/responses/test_mcp_authentication.py::test_mcp_authorization_bearer[openai_client-txt=openai/gpt-4o]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What is the boiling point of myawesomeliquid?"
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "greet_everyone",
+            "parameters": {
+              "properties": {
+                "url": {
+                  "title": "Url",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "url"
+              ],
+              "title": "greet_everyoneArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_boiling_point",
+            "description": "\n        Returns the boiling point of a liquid in Celsius or Fahrenheit.\n\n        :param liquid_name: The name of the liquid\n        :param celsius: Whether to return the boiling point in Celsius\n        :return: The boiling point of the liquid in Celcius or Fahrenheit\n        ",
+            "parameters": {
+              "properties": {
+                "liquid_name": {
+                  "title": "Liquid Name",
+                  "type": "string"
+                },
+                "celsius": {
+                  "default": true,
+                  "title": "Celsius",
+                  "type": "boolean"
+                }
+              },
+              "required": [
+                "liquid_name"
+              ],
+              "title": "get_boiling_pointArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-775a161a318a",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_2lYntxgdJV66JFvD6OuICQCB",
+                    "function": {
+                      "arguments": "",
+                      "name": "get_boiling_point"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "UmB"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-775a161a318a",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "ejb"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-775a161a318a",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "li",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Loxj"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-775a161a318a",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "quid",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "IQ"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-775a161a318a",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "_name",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "8"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-775a161a318a",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\":\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "G"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-775a161a318a",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "my",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "lo9p"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-775a161a318a",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "aw",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "YWPA"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-775a161a318a",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "esom",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "vV"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-775a161a318a",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "eli",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "e0t"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-775a161a318a",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "quid",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "kv"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-775a161a318a",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "h2F"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-775a161a318a",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "B9QY"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-775a161a318a",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 22,
+            "prompt_tokens": 154,
+            "total_tokens": 176,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "MH88zIptmy2Xs"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/c84e894f47a6d7f4d4556829d24ea14cd2869c77972c33e66d9b42438e2165cd.json b/tests/integration/responses/recordings/c84e894f47a6d7f4d4556829d24ea14cd2869c77972c33e66d9b42438e2165cd.json
new file mode 100644
index 000000000..37a29324e
--- /dev/null
+++ b/tests/integration/responses/recordings/c84e894f47a6d7f4d4556829d24ea14cd2869c77972c33e66d9b42438e2165cd.json
@@ -0,0 +1,574 @@
+{
+  "test_id": "tests/integration/responses/test_mcp_authentication.py::test_mcp_authorization_bearer[client_with_models-txt=openai/gpt-4o]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What is the boiling point of myawesomeliquid?"
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "greet_everyone",
+            "parameters": {
+              "properties": {
+                "url": {
+                  "title": "Url",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "url"
+              ],
+              "title": "greet_everyoneArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_boiling_point",
+            "description": "\n        Returns the boiling point of a liquid in Celsius or Fahrenheit.\n\n        :param liquid_name: The name of the liquid\n        :param celsius: Whether to return the boiling point in Celsius\n        :return: The boiling point of the liquid in Celcius or Fahrenheit\n        ",
+            "parameters": {
+              "properties": {
+                "liquid_name": {
+                  "title": "Liquid Name",
+                  "type": "string"
+                },
+                "celsius": {
+                  "default": true,
+                  "title": "Celsius",
+                  "type": "boolean"
+                }
+              },
+              "required": [
+                "liquid_name"
+              ],
+              "title": "get_boiling_pointArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c84e894f47a6",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_mitVYvmPaFfoSmKjzKo5xmZp",
+                    "function": {
+                      "arguments": "",
+                      "name": "get_boiling_point"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "5Y1"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c84e894f47a6",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "QzQ"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c84e894f47a6",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "li",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "4NPm"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c84e894f47a6",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "quid",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Lh"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c84e894f47a6",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "_name",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "r"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c84e894f47a6",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\":\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "w"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c84e894f47a6",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "my",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "GSVa"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c84e894f47a6",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "aw",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "AWZm"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c84e894f47a6",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "esom",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "DG"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c84e894f47a6",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "eli",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "1Bw"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c84e894f47a6",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "quid",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Oq"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c84e894f47a6",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "cI8"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c84e894f47a6",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "kKqh"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c84e894f47a6",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 22,
+            "prompt_tokens": 154,
+            "total_tokens": 176,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "etTUytEvlkJ99"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/c9c723cd01233311d9033f55d6db610b38555bb86f93c507ede8752af47cda6a.json b/tests/integration/responses/recordings/c9c723cd01233311d9033f55d6db610b38555bb86f93c507ede8752af47cda6a.json
new file mode 100644
index 000000000..e98f64b93
--- /dev/null
+++ b/tests/integration/responses/recordings/c9c723cd01233311d9033f55d6db610b38555bb86f93c507ede8752af47cda6a.json
@@ -0,0 +1,574 @@
+{
+  "test_id": "tests/integration/responses/test_mcp_authentication.py::test_mcp_authorization_backward_compatibility[client_with_models-txt=openai/gpt-4o]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What is the boiling point of myawesomeliquid?"
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "greet_everyone",
+            "parameters": {
+              "properties": {
+                "url": {
+                  "title": "Url",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "url"
+              ],
+              "title": "greet_everyoneArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_boiling_point",
+            "description": "\n        Returns the boiling point of a liquid in Celsius or Fahrenheit.\n\n        :param liquid_name: The name of the liquid\n        :param celsius: Whether to return the boiling point in Celsius\n        :return: The boiling point of the liquid in Celcius or Fahrenheit\n        ",
+            "parameters": {
+              "properties": {
+                "liquid_name": {
+                  "title": "Liquid Name",
+                  "type": "string"
+                },
+                "celsius": {
+                  "default": true,
+                  "title": "Celsius",
+                  "type": "boolean"
+                }
+              },
+              "required": [
+                "liquid_name"
+              ],
+              "title": "get_boiling_pointArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c9c723cd0123",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_wnbihJuwYAfnI8uxy84Yl48j",
+                    "function": {
+                      "arguments": "",
+                      "name": "get_boiling_point"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "TC0"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c9c723cd0123",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "hDL"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c9c723cd0123",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "li",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "4G8Z"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c9c723cd0123",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "quid",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "ow"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c9c723cd0123",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "_name",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "P"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c9c723cd0123",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\":\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "M"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c9c723cd0123",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "my",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "yhAk"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c9c723cd0123",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "aw",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "SdIN"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c9c723cd0123",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "esom",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "2z"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c9c723cd0123",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "eli",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "nEC"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c9c723cd0123",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "quid",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "2B"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c9c723cd0123",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "DoL"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c9c723cd0123",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "cSRf"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c9c723cd0123",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 22,
+            "prompt_tokens": 154,
+            "total_tokens": 176,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "ejlSF0NzXFFso"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/db81127157a8364ce8f7a81e10d9b84bf814950e3c8f11eed7ed9f11d4462237.json b/tests/integration/responses/recordings/db81127157a8364ce8f7a81e10d9b84bf814950e3c8f11eed7ed9f11d4462237.json
new file mode 100644
index 000000000..67c78f3ed
--- /dev/null
+++ b/tests/integration/responses/recordings/db81127157a8364ce8f7a81e10d9b84bf814950e3c8f11eed7ed9f11d4462237.json
@@ -0,0 +1,614 @@
+{
+  "test_id": "tests/integration/responses/test_mcp_authentication.py::test_mcp_authorization_backward_compatibility[client_with_models-txt=openai/gpt-4o]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What is the boiling point of myawesomeliquid?"
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_wnbihJuwYAfnI8uxy84Yl48j",
+              "type": "function",
+              "function": {
+                "name": "get_boiling_point",
+                "arguments": "{\"liquid_name\":\"myawesomeliquid\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_wnbihJuwYAfnI8uxy84Yl48j",
+          "content": [
+            {
+              "type": "text",
+              "text": "-100"
+            }
+          ]
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "greet_everyone",
+            "parameters": {
+              "properties": {
+                "url": {
+                  "title": "Url",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "url"
+              ],
+              "title": "greet_everyoneArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_boiling_point",
+            "description": "\n        Returns the boiling point of a liquid in Celsius or Fahrenheit.\n\n        :param liquid_name: The name of the liquid\n        :param celsius: Whether to return the boiling point in Celsius\n        :return: The boiling point of the liquid in Celcius or Fahrenheit\n        ",
+            "parameters": {
+              "properties": {
+                "liquid_name": {
+                  "title": "Liquid Name",
+                  "type": "string"
+                },
+                "celsius": {
+                  "default": true,
+                  "title": "Celsius",
+                  "type": "boolean"
+                }
+              },
+              "required": [
+                "liquid_name"
+              ],
+              "title": "get_boiling_pointArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-db81127157a8",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Usdowqbd6beiYB"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-db81127157a8",
+          "choices": [
+            {
+              "delta": {
+                "content": "The",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "nVevItSH27TBR"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-db81127157a8",
+          "choices": [
+            {
+              "delta": {
+                "content": " boiling",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "HWyYtVAl"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-db81127157a8",
+          "choices": [
+            {
+              "delta": {
+                "content": " point",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "kvvcut6Eib"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-db81127157a8",
+          "choices": [
+            {
+              "delta": {
+                "content": " of",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "E0osAbGBpCPvy"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-db81127157a8",
+          "choices": [
+            {
+              "delta": {
+                "content": " \"",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "GmH7m44fmv0Mk"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-db81127157a8",
+          "choices": [
+            {
+              "delta": {
+                "content": "my",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "oJ4DV7z5GiqJqX"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-db81127157a8",
+          "choices": [
+            {
+              "delta": {
+                "content": "aw",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "8AmNNAYPXMNrEr"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-db81127157a8",
+          "choices": [
+            {
+              "delta": {
+                "content": "esom",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "JEzK8X8AD9hP"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-db81127157a8",
+          "choices": [
+            {
+              "delta": {
+                "content": "eli",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "8EGj5LyQzpZMt"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-db81127157a8",
+          "choices": [
+            {
+              "delta": {
+                "content": "quid",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "wQG19uBuvC7j"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-db81127157a8",
+          "choices": [
+            {
+              "delta": {
+                "content": "\"",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "8Wyenb7E997f9E"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-db81127157a8",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "SVXiel7RHA6f3"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-db81127157a8",
+          "choices": [
+            {
+              "delta": {
+                "content": " -",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "ynScunJEjmOWBo"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-db81127157a8",
+          "choices": [
+            {
+              "delta": {
+                "content": "100",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "po2PLlPavc9TN"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-db81127157a8",
+          "choices": [
+            {
+              "delta": {
+                "content": "\u00b0C",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "mt2jiL22pWkH93"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-db81127157a8",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "32gJJ61zmjmftOn"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-db81127157a8",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "HszNIiCJ12"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-db81127157a8",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 17,
+            "prompt_tokens": 188,
+            "total_tokens": 205,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "cAx3IDg7toBDJ"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/test_conversation_responses.py b/tests/integration/responses/test_conversation_responses.py
index bbd861e0d..ce249f6a0 100644
--- a/tests/integration/responses/test_conversation_responses.py
+++ b/tests/integration/responses/test_conversation_responses.py
@@ -88,6 +88,7 @@ class TestConversationResponses:
 
         assert "apple" in response.output_text.lower()
 
+    @pytest.mark.timeout(60, method="thread")
     def test_conversation_error_handling(self, openai_client, text_model_id):
         """Test error handling for invalid and nonexistent conversations."""
         # Invalid conversation ID format
diff --git a/tests/integration/responses/test_mcp_authentication.py b/tests/integration/responses/test_mcp_authentication.py
new file mode 100644
index 000000000..5c990ff6a
--- /dev/null
+++ b/tests/integration/responses/test_mcp_authentication.py
@@ -0,0 +1,105 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+
+import pytest
+
+from tests.common.mcp import make_mcp_server
+
+from .helpers import setup_mcp_tools
+
+# MCP authentication tests with recordings
+# Tests for bearer token authorization support in MCP tool configurations
+
+
+def test_mcp_authorization_bearer(responses_client, text_model_id):
+    """Test that bearer authorization is correctly applied to MCP requests."""
+    test_token = "test-bearer-token-789"
+    with make_mcp_server(required_auth_token=test_token) as mcp_server_info:
+        tools = setup_mcp_tools(
+            [
+                {
+                    "type": "mcp",
+                    "server_label": "auth-mcp",
+                    "server_url": "<FILLED_BY_TEST_RUNNER>",
+                    "authorization": test_token,  # Just the token, not "Bearer <token>"
+                }
+            ],
+            mcp_server_info,
+        )
+
+        # Create response - authorization should be applied
+        response = responses_client.responses.create(
+            model=text_model_id,
+            input="What is the boiling point of myawesomeliquid?",
+            tools=tools,
+            stream=False,
+        )
+
+        # Verify list_tools succeeded (requires auth)
+        assert len(response.output) >= 3
+        assert response.output[0].type == "mcp_list_tools"
+        assert len(response.output[0].tools) == 2
+
+        # Verify tool invocation succeeded (requires auth)
+        assert response.output[1].type == "mcp_call"
+        assert response.output[1].error is None
+
+
+def test_mcp_authorization_error_when_header_provided(responses_client, text_model_id):
+    """Test that providing Authorization in headers raises a security error."""
+    test_token = "test-token-123"
+    with make_mcp_server(required_auth_token=test_token) as mcp_server_info:
+        tools = setup_mcp_tools(
+            [
+                {
+                    "type": "mcp",
+                    "server_label": "header-auth-mcp",
+                    "server_url": "<FILLED_BY_TEST_RUNNER>",
+                    "headers": {"Authorization": f"Bearer {test_token}"},  # Security risk - should be rejected
+                }
+            ],
+            mcp_server_info,
+        )
+
+        # Create response - should raise BadRequestError for security reasons
+        with pytest.raises((ValueError, Exception), match="Authorization header cannot be passed via 'headers'"):
+            responses_client.responses.create(
+                model=text_model_id,
+                input="What is the boiling point of myawesomeliquid?",
+                tools=tools,
+                stream=False,
+            )
+
+
+def test_mcp_authorization_backward_compatibility(responses_client, text_model_id):
+    """Test that MCP tools work without authorization (backward compatibility)."""
+    # No authorization required
+    with make_mcp_server(required_auth_token=None) as mcp_server_info:
+        tools = setup_mcp_tools(
+            [
+                {
+                    "type": "mcp",
+                    "server_label": "noauth-mcp",
+                    "server_url": "<FILLED_BY_TEST_RUNNER>",
+                }
+            ],
+            mcp_server_info,
+        )
+
+        # Create response without authorization
+        response = responses_client.responses.create(
+            model=text_model_id,
+            input="What is the boiling point of myawesomeliquid?",
+            tools=tools,
+            stream=False,
+        )
+
+        # Verify operations succeeded without auth
+        assert len(response.output) >= 3
+        assert response.output[0].type == "mcp_list_tools"
+        assert response.output[1].type == "mcp_call"
+        assert response.output[1].error is None
diff --git a/tests/integration/responses/test_tool_responses.py b/tests/integration/responses/test_tool_responses.py
index 2c7c7ef34..742d45f8b 100644
--- a/tests/integration/responses/test_tool_responses.py
+++ b/tests/integration/responses/test_tool_responses.py
@@ -249,7 +249,7 @@ def test_response_non_streaming_mcp_tool(responses_client, text_model_id, case,
 
         for tool in tools:
             if tool["type"] == "mcp":
-                tool["headers"] = {"Authorization": "Bearer test-token"}
+                tool["authorization"] = "test-token"
 
         response = responses_client.responses.create(
             model=text_model_id,
diff --git a/tests/integration/tool_runtime/test_mcp.py b/tests/integration/tool_runtime/test_mcp.py
index 9ce0d1c98..1b7f509d2 100644
--- a/tests/integration/tool_runtime/test_mcp.py
+++ b/tests/integration/tool_runtime/test_mcp.py
@@ -37,6 +37,7 @@ def test_mcp_invocation(llama_stack_client, text_model_id, mcp_server):
         mcp_endpoint=dict(uri=uri),
     )
 
+    # Use old header-based approach for Phase 1 (backward compatibility)
     provider_data = {
         "mcp_headers": {
             uri: {
@@ -53,7 +54,7 @@ def test_mcp_invocation(llama_stack_client, text_model_id, mcp_server):
 
     tools_list = llama_stack_client.tools.list(
         toolgroup_id=test_toolgroup_id,
-        extra_headers=auth_headers,
+        extra_headers=auth_headers,  # Use old header-based approach
     )
     assert len(tools_list) == 2
     assert {t.name for t in tools_list} == {"greet_everyone", "get_boiling_point"}
@@ -61,7 +62,7 @@ def test_mcp_invocation(llama_stack_client, text_model_id, mcp_server):
     response = llama_stack_client.tool_runtime.invoke_tool(
         tool_name="greet_everyone",
         kwargs=dict(url="https://www.google.com"),
-        extra_headers=auth_headers,
+        extra_headers=auth_headers,  # Use old header-based approach
     )
     content = response.content
     assert len(content) == 1
@@ -76,9 +77,7 @@ def test_mcp_invocation(llama_stack_client, text_model_id, mcp_server):
             "server_label": test_toolgroup_id,
             "require_approval": "never",
             "allowed_tools": [tool.name for tool in tools_list],
-            "headers": {
-                "Authorization": f"Bearer {AUTH_TOKEN}",
-            },
+            "authorization": AUTH_TOKEN,
         }
     ]
     agent = Agent(
@@ -104,7 +103,6 @@ def test_mcp_invocation(llama_stack_client, text_model_id, mcp_server):
                 }
             ],
             stream=True,
-            extra_headers=auth_headers,
         )
     )
     events = [chunk.event for chunk in chunks]
diff --git a/tests/integration/tool_runtime/test_mcp_json_schema.py b/tests/integration/tool_runtime/test_mcp_json_schema.py
index def0b27b8..719588c7f 100644
--- a/tests/integration/tool_runtime/test_mcp_json_schema.py
+++ b/tests/integration/tool_runtime/test_mcp_json_schema.py
@@ -4,8 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-"""
-Integration tests for MCP tools with complex JSON Schema support.
+"""Integration tests for MCP tools with complex JSON Schema support.
 Tests $ref, $defs, and other JSON Schema features through MCP integration.
 """
 
@@ -123,7 +122,14 @@ class TestMCPSchemaPreservation:
             mcp_endpoint=dict(uri=uri),
         )
 
-        provider_data = {"mcp_headers": {uri: {"Authorization": f"Bearer {AUTH_TOKEN}"}}}
+        # Use old header-based approach for Phase 1 (backward compatibility)
+        provider_data = {
+            "mcp_headers": {
+                uri: {
+                    "Authorization": f"Bearer {AUTH_TOKEN}",
+                },
+            },
+        }
         auth_headers = {
             "X-LlamaStack-Provider-Data": json.dumps(provider_data),
         }
@@ -166,7 +172,15 @@ class TestMCPSchemaPreservation:
             provider_id="model-context-protocol",
             mcp_endpoint=dict(uri=uri),
         )
-        provider_data = {"mcp_headers": {uri: {"Authorization": f"Bearer {AUTH_TOKEN}"}}}
+
+        # Use old header-based approach for Phase 1 (backward compatibility)
+        provider_data = {
+            "mcp_headers": {
+                uri: {
+                    "Authorization": f"Bearer {AUTH_TOKEN}",
+                },
+            },
+        }
         auth_headers = {
             "X-LlamaStack-Provider-Data": json.dumps(provider_data),
         }
@@ -216,7 +230,14 @@ class TestMCPSchemaPreservation:
             mcp_endpoint=dict(uri=uri),
         )
 
-        provider_data = {"mcp_headers": {uri: {"Authorization": f"Bearer {AUTH_TOKEN}"}}}
+        # Use old header-based approach for Phase 1 (backward compatibility)
+        provider_data = {
+            "mcp_headers": {
+                uri: {
+                    "Authorization": f"Bearer {AUTH_TOKEN}",
+                },
+            },
+        }
         auth_headers = {
             "X-LlamaStack-Provider-Data": json.dumps(provider_data),
         }
@@ -263,7 +284,14 @@ class TestMCPToolInvocation:
             mcp_endpoint=dict(uri=uri),
         )
 
-        provider_data = {"mcp_headers": {uri: {"Authorization": f"Bearer {AUTH_TOKEN}"}}}
+        # Use old header-based approach for Phase 1 (backward compatibility)
+        provider_data = {
+            "mcp_headers": {
+                uri: {
+                    "Authorization": f"Bearer {AUTH_TOKEN}",
+                },
+            },
+        }
         auth_headers = {
             "X-LlamaStack-Provider-Data": json.dumps(provider_data),
         }
@@ -309,7 +337,14 @@ class TestMCPToolInvocation:
             mcp_endpoint=dict(uri=uri),
         )
 
-        provider_data = {"mcp_headers": {uri: {"Authorization": f"Bearer {AUTH_TOKEN}"}}}
+        # Use old header-based approach for Phase 1 (backward compatibility)
+        provider_data = {
+            "mcp_headers": {
+                uri: {
+                    "Authorization": f"Bearer {AUTH_TOKEN}",
+                },
+            },
+        }
         auth_headers = {
             "X-LlamaStack-Provider-Data": json.dumps(provider_data),
         }
@@ -365,7 +400,14 @@ class TestAgentWithMCPTools:
             mcp_endpoint=dict(uri=uri),
         )
 
-        provider_data = {"mcp_headers": {uri: {"Authorization": f"Bearer {AUTH_TOKEN}"}}}
+        # Use old header-based approach for Phase 1 (backward compatibility)
+        provider_data = {
+            "mcp_headers": {
+                uri: {
+                    "Authorization": f"Bearer {AUTH_TOKEN}",
+                },
+            },
+        }
         auth_headers = {
             "X-LlamaStack-Provider-Data": json.dumps(provider_data),
         }
@@ -381,6 +423,7 @@ class TestAgentWithMCPTools:
                 "server_label": test_toolgroup_id,
                 "require_approval": "never",
                 "allowed_tools": [tool.name for tool in tools_list],
+                "authorization": AUTH_TOKEN,
             }
         ]
 
@@ -389,7 +432,6 @@ class TestAgentWithMCPTools:
             model=text_model_id,
             instructions="You are a helpful assistant that can process orders and book flights.",
             tools=tool_defs,
-            extra_headers=auth_headers,
         )
 
         session_id = agent.create_session("test-session-complex")
@@ -411,7 +453,6 @@ class TestAgentWithMCPTools:
                     }
                 ],
                 stream=True,
-                extra_headers=auth_headers,
             )
         )
 
diff --git a/tests/unit/distribution/routers/test_routing_tables.py b/tests/unit/distribution/routers/test_routing_tables.py
index 8fd9d6ec3..292ee8384 100644
--- a/tests/unit/distribution/routers/test_routing_tables.py
+++ b/tests/unit/distribution/routers/test_routing_tables.py
@@ -137,7 +137,7 @@ class ToolGroupsImpl(Impl):
     async def unregister_toolgroup(self, toolgroup_id: str):
         return toolgroup_id
 
-    async def list_runtime_tools(self, toolgroup_id, mcp_endpoint):
+    async def list_runtime_tools(self, toolgroup_id, mcp_endpoint, authorization=None):
         return ListToolDefsResponse(
             data=[
                 ToolDef(

From f596f850bf7e5213e16eb494d91ca77e6480efd2 Mon Sep 17 00:00:00 2001
From: slekkala1 <swapna942@meta.com>
Date: Fri, 14 Nov 2025 13:14:49 -0800
Subject: [PATCH 31/62] fix: Propagate the runtime error message to user
 (#4150)

# What does this PR do?
For Runtime Exception the error is not propagated to the user and can be
opaque.
Before fix:
`ERROR - Error processing message: Error code: 500 - {'detail':
'Internal server error: An unexpected error occurred.'}
`
After fix:
`[ERROR] Error code: 404 - {'detail': "Model
'claude-sonnet-4-5-20250929' not found. Use 'client.models.list()' to
list available Models."}
`

(Ran into this few times, while working with OCI + LLAMAStack and Sabre:
Agentic framework integrations with LLAMAStack)

## Test Plan
CI
---
 .../inline/agents/meta_reference/responses/streaming.py        | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
index c0b62958f..95c690147 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
@@ -16,6 +16,7 @@ from llama_stack_api import (
     ApprovalFilter,
     Inference,
     MCPListToolsTool,
+    ModelNotFoundError,
     OpenAIAssistantMessageParam,
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
@@ -323,6 +324,8 @@ class StreamingResponseOrchestrator:
             if last_completion_result and last_completion_result.finish_reason == "length":
                 final_status = "incomplete"
 
+        except ModelNotFoundError:
+            raise
         except Exception as exc:  # noqa: BLE001
             self.final_messages = messages.copy()
             self.sequence_number += 1

From cc8878907118878d316fb4ae690d974a9fd01434 Mon Sep 17 00:00:00 2001
From: Mike Sager <53917070+msager27@users.noreply.github.com>
Date: Fri, 14 Nov 2025 16:16:03 -0500
Subject: [PATCH 32/62] test: Restore responses unit tests (#4153)

# What does this PR do?
Restores the responses unit tests that were inadvertently deleted in PR
[#4055 ](https://github.com/llamastack/llama-stack/pull/4055)


## Test Plan
I ran the unit tests that I restored. They all passed with one
exception:


tests/unit/providers/agents/meta_reference/test_openai_responses.py::test_reuse_mcp_tool_list

AttributeError: module 'llama_stack.providers.utils.tools' has no
attribute 'mcp'

It's coming from this line:

    @patch("llama_stack.providers.utils.tools.mcp.list_mcp_tools")

The mcp.py module (and \_\_init\_\_.py) exists under tools. There are
some 'from mcp ....' imports (mcp package in this case) within it that
python may be interpreting as circular imports (or maybe I'm overlooking
something).
---
 .../meta_reference/fixtures/__init__.py       |   23 +
 .../fixtures/simple_chat_completion.yaml      |    9 +
 .../fixtures/tool_call_completion.yaml        |   14 +
 .../meta_reference/test_openai_responses.py   | 1244 +++++++++++++++++
 .../test_openai_responses_conversations.py    |  249 ++++
 .../test_response_conversion_utils.py         |  367 +++++
 .../test_response_tool_context.py             |  183 +++
 .../test_responses_safety_utils.py            |  155 ++
 8 files changed, 2244 insertions(+)
 create mode 100644 tests/unit/providers/agents/meta_reference/fixtures/__init__.py
 create mode 100644 tests/unit/providers/agents/meta_reference/fixtures/simple_chat_completion.yaml
 create mode 100644 tests/unit/providers/agents/meta_reference/fixtures/tool_call_completion.yaml
 create mode 100644 tests/unit/providers/agents/meta_reference/test_openai_responses.py
 create mode 100644 tests/unit/providers/agents/meta_reference/test_openai_responses_conversations.py
 create mode 100644 tests/unit/providers/agents/meta_reference/test_response_conversion_utils.py
 create mode 100644 tests/unit/providers/agents/meta_reference/test_response_tool_context.py
 create mode 100644 tests/unit/providers/agents/meta_reference/test_responses_safety_utils.py

diff --git a/tests/unit/providers/agents/meta_reference/fixtures/__init__.py b/tests/unit/providers/agents/meta_reference/fixtures/__init__.py
new file mode 100644
index 000000000..130c46f6d
--- /dev/null
+++ b/tests/unit/providers/agents/meta_reference/fixtures/__init__.py
@@ -0,0 +1,23 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import os
+
+import yaml
+
+from llama_stack_api.inference import (
+    OpenAIChatCompletion,
+)
+
+FIXTURES_DIR = os.path.dirname(os.path.abspath(__file__))
+
+
+def load_chat_completion_fixture(filename: str) -> OpenAIChatCompletion:
+    fixture_path = os.path.join(FIXTURES_DIR, filename)
+
+    with open(fixture_path) as f:
+        data = yaml.safe_load(f)
+    return OpenAIChatCompletion(**data)
diff --git a/tests/unit/providers/agents/meta_reference/fixtures/simple_chat_completion.yaml b/tests/unit/providers/agents/meta_reference/fixtures/simple_chat_completion.yaml
new file mode 100644
index 000000000..4959349a0
--- /dev/null
+++ b/tests/unit/providers/agents/meta_reference/fixtures/simple_chat_completion.yaml
@@ -0,0 +1,9 @@
+id: chat-completion-123
+choices:
+  - message:
+      content: "Dublin"
+      role: assistant
+    finish_reason: stop
+    index: 0
+created: 1234567890
+model: meta-llama/Llama-3.1-8B-Instruct
diff --git a/tests/unit/providers/agents/meta_reference/fixtures/tool_call_completion.yaml b/tests/unit/providers/agents/meta_reference/fixtures/tool_call_completion.yaml
new file mode 100644
index 000000000..f6532e3a9
--- /dev/null
+++ b/tests/unit/providers/agents/meta_reference/fixtures/tool_call_completion.yaml
@@ -0,0 +1,14 @@
+id: chat-completion-123
+choices:
+  - message:
+      tool_calls:
+        - id: tool_call_123
+          type: function
+          function:
+            name: web_search
+            arguments: '{"query":"What is the capital of Ireland?"}'
+      role: assistant
+    finish_reason: stop
+    index: 0
+created: 1234567890
+model: meta-llama/Llama-3.1-8B-Instruct
diff --git a/tests/unit/providers/agents/meta_reference/test_openai_responses.py b/tests/unit/providers/agents/meta_reference/test_openai_responses.py
new file mode 100644
index 000000000..78f0d7cfd
--- /dev/null
+++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py
@@ -0,0 +1,1244 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from unittest.mock import AsyncMock, patch
+
+import pytest
+from openai.types.chat.chat_completion_chunk import (
+    ChatCompletionChunk,
+    Choice,
+    ChoiceDelta,
+    ChoiceDeltaToolCall,
+    ChoiceDeltaToolCallFunction,
+)
+
+from llama_stack.core.access_control.access_control import default_policy
+from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqliteSqlStoreConfig
+from llama_stack.providers.inline.agents.meta_reference.responses.openai_responses import (
+    OpenAIResponsesImpl,
+)
+from llama_stack.providers.utils.responses.responses_store import (
+    ResponsesStore,
+    _OpenAIResponseObjectWithInputAndMessages,
+)
+from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
+from llama_stack_api.agents import Order
+from llama_stack_api.inference import (
+    OpenAIAssistantMessageParam,
+    OpenAIChatCompletionContentPartTextParam,
+    OpenAIChatCompletionRequestWithExtraBody,
+    OpenAIDeveloperMessageParam,
+    OpenAIJSONSchema,
+    OpenAIResponseFormatJSONObject,
+    OpenAIResponseFormatJSONSchema,
+    OpenAIUserMessageParam,
+)
+from llama_stack_api.openai_responses import (
+    ListOpenAIResponseInputItem,
+    OpenAIResponseInputMessageContentText,
+    OpenAIResponseInputToolFunction,
+    OpenAIResponseInputToolMCP,
+    OpenAIResponseInputToolWebSearch,
+    OpenAIResponseMessage,
+    OpenAIResponseOutputMessageContentOutputText,
+    OpenAIResponseOutputMessageFunctionToolCall,
+    OpenAIResponseOutputMessageMCPCall,
+    OpenAIResponseOutputMessageWebSearchToolCall,
+    OpenAIResponseText,
+    OpenAIResponseTextFormat,
+    WebSearchToolTypes,
+)
+from llama_stack_api.tools import ListToolDefsResponse, ToolDef, ToolGroups, ToolInvocationResult, ToolRuntime
+from tests.unit.providers.agents.meta_reference.fixtures import load_chat_completion_fixture
+
+
+@pytest.fixture
+def mock_inference_api():
+    inference_api = AsyncMock()
+    return inference_api
+
+
+@pytest.fixture
+def mock_tool_groups_api():
+    tool_groups_api = AsyncMock(spec=ToolGroups)
+    return tool_groups_api
+
+
+@pytest.fixture
+def mock_tool_runtime_api():
+    tool_runtime_api = AsyncMock(spec=ToolRuntime)
+    return tool_runtime_api
+
+
+@pytest.fixture
+def mock_responses_store():
+    responses_store = AsyncMock(spec=ResponsesStore)
+    return responses_store
+
+
+@pytest.fixture
+def mock_vector_io_api():
+    vector_io_api = AsyncMock()
+    return vector_io_api
+
+
+@pytest.fixture
+def mock_conversations_api():
+    """Mock conversations API for testing."""
+    mock_api = AsyncMock()
+    return mock_api
+
+
+@pytest.fixture
+def mock_safety_api():
+    safety_api = AsyncMock()
+    return safety_api
+
+
+@pytest.fixture
+def openai_responses_impl(
+    mock_inference_api,
+    mock_tool_groups_api,
+    mock_tool_runtime_api,
+    mock_responses_store,
+    mock_vector_io_api,
+    mock_safety_api,
+    mock_conversations_api,
+):
+    return OpenAIResponsesImpl(
+        inference_api=mock_inference_api,
+        tool_groups_api=mock_tool_groups_api,
+        tool_runtime_api=mock_tool_runtime_api,
+        responses_store=mock_responses_store,
+        vector_io_api=mock_vector_io_api,
+        safety_api=mock_safety_api,
+        conversations_api=mock_conversations_api,
+    )
+
+
+async def fake_stream(fixture: str = "simple_chat_completion.yaml"):
+    value = load_chat_completion_fixture(fixture)
+    yield ChatCompletionChunk(
+        id=value.id,
+        choices=[
+            Choice(
+                index=0,
+                delta=ChoiceDelta(
+                    content=c.message.content,
+                    role=c.message.role,
+                    tool_calls=[
+                        ChoiceDeltaToolCall(
+                            index=0,
+                            id=t.id,
+                            function=ChoiceDeltaToolCallFunction(
+                                name=t.function.name,
+                                arguments=t.function.arguments,
+                            ),
+                        )
+                        for t in (c.message.tool_calls or [])
+                    ],
+                ),
+            )
+            for c in value.choices
+        ],
+        created=1,
+        model=value.model,
+        object="chat.completion.chunk",
+    )
+
+
+async def test_create_openai_response_with_string_input(openai_responses_impl, mock_inference_api):
+    """Test creating an OpenAI response with a simple string input."""
+    # Setup
+    input_text = "What is the capital of Ireland?"
+    model = "meta-llama/Llama-3.1-8B-Instruct"
+
+    # Load the chat completion fixture
+    mock_inference_api.openai_chat_completion.return_value = fake_stream()
+
+    # Execute
+    result = await openai_responses_impl.create_openai_response(
+        input=input_text,
+        model=model,
+        temperature=0.1,
+        stream=True,  # Enable streaming to test content part events
+    )
+
+    # For streaming response, collect all chunks
+    chunks = [chunk async for chunk in result]
+
+    mock_inference_api.openai_chat_completion.assert_called_once_with(
+        OpenAIChatCompletionRequestWithExtraBody(
+            model=model,
+            messages=[OpenAIUserMessageParam(role="user", content="What is the capital of Ireland?", name=None)],
+            response_format=None,
+            tools=None,
+            stream=True,
+            temperature=0.1,
+            stream_options={
+                "include_usage": True,
+            },
+        )
+    )
+
+    # Should have content part events for text streaming
+    # Expected: response.created, response.in_progress, content_part.added, output_text.delta, content_part.done, response.completed
+    assert len(chunks) >= 5
+    assert chunks[0].type == "response.created"
+    assert any(chunk.type == "response.in_progress" for chunk in chunks)
+
+    # Check for content part events
+    content_part_added_events = [c for c in chunks if c.type == "response.content_part.added"]
+    content_part_done_events = [c for c in chunks if c.type == "response.content_part.done"]
+    text_delta_events = [c for c in chunks if c.type == "response.output_text.delta"]
+
+    assert len(content_part_added_events) >= 1, "Should have content_part.added event for text"
+    assert len(content_part_done_events) >= 1, "Should have content_part.done event for text"
+    assert len(text_delta_events) >= 1, "Should have text delta events"
+
+    added_event = content_part_added_events[0]
+    done_event = content_part_done_events[0]
+    assert added_event.content_index == 0
+    assert done_event.content_index == 0
+    assert added_event.output_index == done_event.output_index == 0
+    assert added_event.item_id == done_event.item_id
+    assert added_event.response_id == done_event.response_id
+
+    # Verify final event is completion
+    assert chunks[-1].type == "response.completed"
+
+    # When streaming, the final response is in the last chunk
+    final_response = chunks[-1].response
+    assert final_response.model == model
+    assert len(final_response.output) == 1
+    assert isinstance(final_response.output[0], OpenAIResponseMessage)
+    assert final_response.output[0].id == added_event.item_id
+    assert final_response.id == added_event.response_id
+
+    openai_responses_impl.responses_store.store_response_object.assert_called_once()
+    assert final_response.output[0].content[0].text == "Dublin"
+
+
+async def test_create_openai_response_with_string_input_with_tools(openai_responses_impl, mock_inference_api):
+    """Test creating an OpenAI response with a simple string input and tools."""
+    # Setup
+    input_text = "What is the capital of Ireland?"
+    model = "meta-llama/Llama-3.1-8B-Instruct"
+
+    openai_responses_impl.tool_groups_api.get_tool.return_value = ToolDef(
+        name="web_search",
+        toolgroup_id="web_search",
+        description="Search the web for information",
+        input_schema={
+            "type": "object",
+            "properties": {"query": {"type": "string", "description": "The query to search for"}},
+            "required": ["query"],
+        },
+    )
+
+    openai_responses_impl.tool_runtime_api.invoke_tool.return_value = ToolInvocationResult(
+        status="completed",
+        content="Dublin",
+    )
+
+    # Execute
+    for tool_name in WebSearchToolTypes:
+        # Reset mock states as we loop through each tool type
+        mock_inference_api.openai_chat_completion.side_effect = [
+            fake_stream("tool_call_completion.yaml"),
+            fake_stream(),
+        ]
+        openai_responses_impl.tool_groups_api.get_tool.reset_mock()
+        openai_responses_impl.tool_runtime_api.invoke_tool.reset_mock()
+        openai_responses_impl.responses_store.store_response_object.reset_mock()
+
+        result = await openai_responses_impl.create_openai_response(
+            input=input_text,
+            model=model,
+            temperature=0.1,
+            tools=[
+                OpenAIResponseInputToolWebSearch(
+                    name=tool_name,
+                )
+            ],
+        )
+
+        # Verify
+        first_call = mock_inference_api.openai_chat_completion.call_args_list[0]
+        first_params = first_call.args[0]
+        assert first_params.messages[0].content == "What is the capital of Ireland?"
+        assert first_params.tools is not None
+        assert first_params.temperature == 0.1
+
+        second_call = mock_inference_api.openai_chat_completion.call_args_list[1]
+        second_params = second_call.args[0]
+        assert second_params.messages[-1].content == "Dublin"
+        assert second_params.temperature == 0.1
+
+        openai_responses_impl.tool_groups_api.get_tool.assert_called_once_with("web_search")
+        openai_responses_impl.tool_runtime_api.invoke_tool.assert_called_once_with(
+            tool_name="web_search",
+            kwargs={"query": "What is the capital of Ireland?"},
+        )
+
+        openai_responses_impl.responses_store.store_response_object.assert_called_once()
+
+        # Check that we got the content from our mocked tool execution result
+        assert len(result.output) >= 1
+        assert isinstance(result.output[1], OpenAIResponseMessage)
+        assert result.output[1].content[0].text == "Dublin"
+        assert result.output[1].content[0].annotations == []
+
+
+async def test_create_openai_response_with_tool_call_type_none(openai_responses_impl, mock_inference_api):
+    """Test creating an OpenAI response with a tool call response that has a type of None."""
+    # Setup
+    input_text = "How hot it is in San Francisco today?"
+    model = "meta-llama/Llama-3.1-8B-Instruct"
+
+    async def fake_stream_toolcall():
+        yield ChatCompletionChunk(
+            id="123",
+            choices=[
+                Choice(
+                    index=0,
+                    delta=ChoiceDelta(
+                        tool_calls=[
+                            ChoiceDeltaToolCall(
+                                index=0,
+                                id="tc_123",
+                                function=ChoiceDeltaToolCallFunction(name="get_weather", arguments="{}"),
+                                type=None,
+                            )
+                        ]
+                    ),
+                ),
+            ],
+            created=1,
+            model=model,
+            object="chat.completion.chunk",
+        )
+
+    mock_inference_api.openai_chat_completion.return_value = fake_stream_toolcall()
+
+    # Execute
+    result = await openai_responses_impl.create_openai_response(
+        input=input_text,
+        model=model,
+        stream=True,
+        temperature=0.1,
+        tools=[
+            OpenAIResponseInputToolFunction(
+                name="get_weather",
+                description="Get current temperature for a given location.",
+                parameters={
+                    "location": "string",
+                },
+            )
+        ],
+    )
+
+    # Check that we got the content from our mocked tool execution result
+    chunks = [chunk async for chunk in result]
+
+    # Verify event types
+    # Should have: response.created, response.in_progress, output_item.added,
+    # function_call_arguments.delta, function_call_arguments.done, output_item.done, response.completed
+    assert len(chunks) == 7
+
+    event_types = [chunk.type for chunk in chunks]
+    assert event_types == [
+        "response.created",
+        "response.in_progress",
+        "response.output_item.added",
+        "response.function_call_arguments.delta",
+        "response.function_call_arguments.done",
+        "response.output_item.done",
+        "response.completed",
+    ]
+
+    # Verify inference API was called correctly (after iterating over result)
+    first_call = mock_inference_api.openai_chat_completion.call_args_list[0]
+    first_params = first_call.args[0]
+    assert first_params.messages[0].content == input_text
+    assert first_params.tools is not None
+    assert first_params.temperature == 0.1
+
+    # Check response.created event (should have empty output)
+    assert len(chunks[0].response.output) == 0
+
+    # Check response.completed event (should have the tool call)
+    completed_chunk = chunks[-1]
+    assert completed_chunk.type == "response.completed"
+    assert len(completed_chunk.response.output) == 1
+    assert completed_chunk.response.output[0].type == "function_call"
+    assert completed_chunk.response.output[0].name == "get_weather"
+
+
+async def test_create_openai_response_with_tool_call_function_arguments_none(openai_responses_impl, mock_inference_api):
+    """Test creating an OpenAI response with tool calls that omit arguments."""
+
+    input_text = "What is the time right now?"
+    model = "meta-llama/Llama-3.1-8B-Instruct"
+
+    async def fake_stream_toolcall():
+        yield ChatCompletionChunk(
+            id="123",
+            choices=[
+                Choice(
+                    index=0,
+                    delta=ChoiceDelta(
+                        tool_calls=[
+                            ChoiceDeltaToolCall(
+                                index=0,
+                                id="tc_123",
+                                function=ChoiceDeltaToolCallFunction(name="get_current_time", arguments=None),
+                                type=None,
+                            )
+                        ]
+                    ),
+                ),
+            ],
+            created=1,
+            model=model,
+            object="chat.completion.chunk",
+        )
+
+    def assert_common_expectations(chunks) -> None:
+        first_call = mock_inference_api.openai_chat_completion.call_args_list[0]
+        first_params = first_call.args[0]
+        assert first_params.messages[0].content == input_text
+        assert first_params.tools is not None
+        assert first_params.temperature == 0.1
+        assert len(chunks[0].response.output) == 0
+        completed_chunk = chunks[-1]
+        assert completed_chunk.type == "response.completed"
+        assert len(completed_chunk.response.output) == 1
+        assert completed_chunk.response.output[0].type == "function_call"
+        assert completed_chunk.response.output[0].name == "get_current_time"
+        assert completed_chunk.response.output[0].arguments == "{}"
+
+    # Function does not accept arguments
+    mock_inference_api.openai_chat_completion.return_value = fake_stream_toolcall()
+    result = await openai_responses_impl.create_openai_response(
+        input=input_text,
+        model=model,
+        stream=True,
+        temperature=0.1,
+        tools=[
+            OpenAIResponseInputToolFunction(
+                name="get_current_time", description="Get current time for system's timezone", parameters={}
+            )
+        ],
+    )
+    chunks = [chunk async for chunk in result]
+    assert [chunk.type for chunk in chunks] == [
+        "response.created",
+        "response.in_progress",
+        "response.output_item.added",
+        "response.function_call_arguments.done",
+        "response.output_item.done",
+        "response.completed",
+    ]
+    assert_common_expectations(chunks)
+
+    # Function accepts optional arguments
+    mock_inference_api.openai_chat_completion.return_value = fake_stream_toolcall()
+    result = await openai_responses_impl.create_openai_response(
+        input=input_text,
+        model=model,
+        stream=True,
+        temperature=0.1,
+        tools=[
+            OpenAIResponseInputToolFunction(
+                name="get_current_time",
+                description="Get current time for system's timezone",
+                parameters={"timezone": "string"},
+            )
+        ],
+    )
+    chunks = [chunk async for chunk in result]
+    assert [chunk.type for chunk in chunks] == [
+        "response.created",
+        "response.in_progress",
+        "response.output_item.added",
+        "response.function_call_arguments.done",
+        "response.output_item.done",
+        "response.completed",
+    ]
+    assert_common_expectations(chunks)
+
+    # Function accepts optional arguments with additional optional fields
+    mock_inference_api.openai_chat_completion.return_value = fake_stream_toolcall()
+    result = await openai_responses_impl.create_openai_response(
+        input=input_text,
+        model=model,
+        stream=True,
+        temperature=0.1,
+        tools=[
+            OpenAIResponseInputToolFunction(
+                name="get_current_time",
+                description="Get current time for system's timezone",
+                parameters={"timezone": "string", "location": "string"},
+            )
+        ],
+    )
+    chunks = [chunk async for chunk in result]
+    assert [chunk.type for chunk in chunks] == [
+        "response.created",
+        "response.in_progress",
+        "response.output_item.added",
+        "response.function_call_arguments.done",
+        "response.output_item.done",
+        "response.completed",
+    ]
+    assert_common_expectations(chunks)
+    mock_inference_api.openai_chat_completion.return_value = fake_stream_toolcall()
+
+
+async def test_create_openai_response_with_multiple_messages(openai_responses_impl, mock_inference_api):
+    """Test creating an OpenAI response with multiple messages."""
+    # Setup
+    input_messages = [
+        OpenAIResponseMessage(role="developer", content="You are a helpful assistant", name=None),
+        OpenAIResponseMessage(role="user", content="Name some towns in Ireland", name=None),
+        OpenAIResponseMessage(
+            role="assistant",
+            content=[
+                OpenAIResponseInputMessageContentText(text="Galway, Longford, Sligo"),
+                OpenAIResponseInputMessageContentText(text="Dublin"),
+            ],
+            name=None,
+        ),
+        OpenAIResponseMessage(role="user", content="Which is the largest town in Ireland?", name=None),
+    ]
+    model = "meta-llama/Llama-3.1-8B-Instruct"
+
+    mock_inference_api.openai_chat_completion.return_value = fake_stream()
+
+    # Execute
+    await openai_responses_impl.create_openai_response(
+        input=input_messages,
+        model=model,
+        temperature=0.1,
+    )
+
+    # Verify the the correct messages were sent to the inference API i.e.
+    # All of the responses message were convered to the chat completion message objects
+    call_args = mock_inference_api.openai_chat_completion.call_args_list[0]
+    params = call_args.args[0]
+    inference_messages = params.messages
+    for i, m in enumerate(input_messages):
+        if isinstance(m.content, str):
+            assert inference_messages[i].content == m.content
+        else:
+            assert inference_messages[i].content[0].text == m.content[0].text
+            assert isinstance(inference_messages[i].content[0], OpenAIChatCompletionContentPartTextParam)
+        assert inference_messages[i].role == m.role
+        if m.role == "user":
+            assert isinstance(inference_messages[i], OpenAIUserMessageParam)
+        elif m.role == "assistant":
+            assert isinstance(inference_messages[i], OpenAIAssistantMessageParam)
+        else:
+            assert isinstance(inference_messages[i], OpenAIDeveloperMessageParam)
+
+
+async def test_prepend_previous_response_basic(openai_responses_impl, mock_responses_store):
+    """Test prepending a basic previous response to a new response."""
+
+    input_item_message = OpenAIResponseMessage(
+        id="123",
+        content=[OpenAIResponseInputMessageContentText(text="fake_previous_input")],
+        role="user",
+    )
+    response_output_message = OpenAIResponseMessage(
+        id="123",
+        content=[OpenAIResponseOutputMessageContentOutputText(text="fake_response")],
+        status="completed",
+        role="assistant",
+    )
+    previous_response = _OpenAIResponseObjectWithInputAndMessages(
+        created_at=1,
+        id="resp_123",
+        model="fake_model",
+        output=[response_output_message],
+        status="completed",
+        text=OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")),
+        input=[input_item_message],
+        messages=[OpenAIUserMessageParam(content="fake_previous_input")],
+    )
+    mock_responses_store.get_response_object.return_value = previous_response
+
+    input = await openai_responses_impl._prepend_previous_response("fake_input", previous_response)
+
+    assert len(input) == 3
+    # Check for previous input
+    assert isinstance(input[0], OpenAIResponseMessage)
+    assert input[0].content[0].text == "fake_previous_input"
+    # Check for previous output
+    assert isinstance(input[1], OpenAIResponseMessage)
+    assert input[1].content[0].text == "fake_response"
+    # Check for new input
+    assert isinstance(input[2], OpenAIResponseMessage)
+    assert input[2].content == "fake_input"
+
+
+async def test_prepend_previous_response_web_search(openai_responses_impl, mock_responses_store):
+    """Test prepending a web search previous response to a new response."""
+    input_item_message = OpenAIResponseMessage(
+        id="123",
+        content=[OpenAIResponseInputMessageContentText(text="fake_previous_input")],
+        role="user",
+    )
+    output_web_search = OpenAIResponseOutputMessageWebSearchToolCall(
+        id="ws_123",
+        status="completed",
+    )
+    output_message = OpenAIResponseMessage(
+        id="123",
+        content=[OpenAIResponseOutputMessageContentOutputText(text="fake_web_search_response")],
+        status="completed",
+        role="assistant",
+    )
+    response = _OpenAIResponseObjectWithInputAndMessages(
+        created_at=1,
+        id="resp_123",
+        model="fake_model",
+        output=[output_web_search, output_message],
+        status="completed",
+        text=OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")),
+        input=[input_item_message],
+        messages=[OpenAIUserMessageParam(content="test input")],
+    )
+    mock_responses_store.get_response_object.return_value = response
+
+    input_messages = [OpenAIResponseMessage(content="fake_input", role="user")]
+    input = await openai_responses_impl._prepend_previous_response(input_messages, response)
+
+    assert len(input) == 4
+    # Check for previous input
+    assert isinstance(input[0], OpenAIResponseMessage)
+    assert input[0].content[0].text == "fake_previous_input"
+    # Check for previous output web search tool call
+    assert isinstance(input[1], OpenAIResponseOutputMessageWebSearchToolCall)
+    # Check for previous output web search response
+    assert isinstance(input[2], OpenAIResponseMessage)
+    assert input[2].content[0].text == "fake_web_search_response"
+    # Check for new input
+    assert isinstance(input[3], OpenAIResponseMessage)
+    assert input[3].content == "fake_input"
+
+
+async def test_prepend_previous_response_mcp_tool_call(openai_responses_impl, mock_responses_store):
+    """Test prepending a previous response which included an mcp tool call to a new response."""
+    input_item_message = OpenAIResponseMessage(
+        id="123",
+        content=[OpenAIResponseInputMessageContentText(text="fake_previous_input")],
+        role="user",
+    )
+    output_tool_call = OpenAIResponseOutputMessageMCPCall(
+        id="ws_123",
+        name="fake-tool",
+        arguments="fake-arguments",
+        server_label="fake-label",
+    )
+    output_message = OpenAIResponseMessage(
+        id="123",
+        content=[OpenAIResponseOutputMessageContentOutputText(text="fake_tool_call_response")],
+        status="completed",
+        role="assistant",
+    )
+    response = _OpenAIResponseObjectWithInputAndMessages(
+        created_at=1,
+        id="resp_123",
+        model="fake_model",
+        output=[output_tool_call, output_message],
+        status="completed",
+        text=OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")),
+        input=[input_item_message],
+        messages=[OpenAIUserMessageParam(content="test input")],
+    )
+    mock_responses_store.get_response_object.return_value = response
+
+    input_messages = [OpenAIResponseMessage(content="fake_input", role="user")]
+    input = await openai_responses_impl._prepend_previous_response(input_messages, response)
+
+    assert len(input) == 4
+    # Check for previous input
+    assert isinstance(input[0], OpenAIResponseMessage)
+    assert input[0].content[0].text == "fake_previous_input"
+    # Check for previous output MCP tool call
+    assert isinstance(input[1], OpenAIResponseOutputMessageMCPCall)
+    # Check for previous output web search response
+    assert isinstance(input[2], OpenAIResponseMessage)
+    assert input[2].content[0].text == "fake_tool_call_response"
+    # Check for new input
+    assert isinstance(input[3], OpenAIResponseMessage)
+    assert input[3].content == "fake_input"
+
+
+async def test_create_openai_response_with_instructions(openai_responses_impl, mock_inference_api):
+    # Setup
+    input_text = "What is the capital of Ireland?"
+    model = "meta-llama/Llama-3.1-8B-Instruct"
+    instructions = "You are a geography expert. Provide concise answers."
+
+    mock_inference_api.openai_chat_completion.return_value = fake_stream()
+
+    # Execute
+    await openai_responses_impl.create_openai_response(
+        input=input_text,
+        model=model,
+        instructions=instructions,
+    )
+
+    # Verify
+    mock_inference_api.openai_chat_completion.assert_called_once()
+    call_args = mock_inference_api.openai_chat_completion.call_args
+    params = call_args.args[0]
+    sent_messages = params.messages
+
+    # Check that instructions were prepended as a system message
+    assert len(sent_messages) == 2
+    assert sent_messages[0].role == "system"
+    assert sent_messages[0].content == instructions
+    assert sent_messages[1].role == "user"
+    assert sent_messages[1].content == input_text
+
+
+async def test_create_openai_response_with_instructions_and_multiple_messages(
+    openai_responses_impl, mock_inference_api
+):
+    # Setup
+    input_messages = [
+        OpenAIResponseMessage(role="user", content="Name some towns in Ireland", name=None),
+        OpenAIResponseMessage(
+            role="assistant",
+            content="Galway, Longford, Sligo",
+            name=None,
+        ),
+        OpenAIResponseMessage(role="user", content="Which is the largest?", name=None),
+    ]
+    model = "meta-llama/Llama-3.1-8B-Instruct"
+    instructions = "You are a geography expert. Provide concise answers."
+
+    mock_inference_api.openai_chat_completion.return_value = fake_stream()
+
+    # Execute
+    await openai_responses_impl.create_openai_response(
+        input=input_messages,
+        model=model,
+        instructions=instructions,
+    )
+
+    # Verify
+    mock_inference_api.openai_chat_completion.assert_called_once()
+    call_args = mock_inference_api.openai_chat_completion.call_args
+    params = call_args.args[0]
+    sent_messages = params.messages
+
+    # Check that instructions were prepended as a system message
+    assert len(sent_messages) == 4  # 1 system + 3 input messages
+    assert sent_messages[0].role == "system"
+    assert sent_messages[0].content == instructions
+
+    # Check the rest of the messages were converted correctly
+    assert sent_messages[1].role == "user"
+    assert sent_messages[1].content == "Name some towns in Ireland"
+    assert sent_messages[2].role == "assistant"
+    assert sent_messages[2].content == "Galway, Longford, Sligo"
+    assert sent_messages[3].role == "user"
+    assert sent_messages[3].content == "Which is the largest?"
+
+
+async def test_create_openai_response_with_instructions_and_previous_response(
+    openai_responses_impl, mock_responses_store, mock_inference_api
+):
+    """Test prepending both instructions and previous response."""
+
+    input_item_message = OpenAIResponseMessage(
+        id="123",
+        content="Name some towns in Ireland",
+        role="user",
+    )
+    response_output_message = OpenAIResponseMessage(
+        id="123",
+        content="Galway, Longford, Sligo",
+        status="completed",
+        role="assistant",
+    )
+    response = _OpenAIResponseObjectWithInputAndMessages(
+        created_at=1,
+        id="resp_123",
+        model="fake_model",
+        output=[response_output_message],
+        status="completed",
+        text=OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")),
+        input=[input_item_message],
+        messages=[
+            OpenAIUserMessageParam(content="Name some towns in Ireland"),
+            OpenAIAssistantMessageParam(content="Galway, Longford, Sligo"),
+        ],
+    )
+    mock_responses_store.get_response_object.return_value = response
+
+    model = "meta-llama/Llama-3.1-8B-Instruct"
+    instructions = "You are a geography expert. Provide concise answers."
+
+    mock_inference_api.openai_chat_completion.return_value = fake_stream()
+
+    # Execute
+    await openai_responses_impl.create_openai_response(
+        input="Which is the largest?", model=model, instructions=instructions, previous_response_id="123"
+    )
+
+    # Verify
+    mock_inference_api.openai_chat_completion.assert_called_once()
+    call_args = mock_inference_api.openai_chat_completion.call_args
+    params = call_args.args[0]
+    sent_messages = params.messages
+
+    # Check that instructions were prepended as a system message
+    assert len(sent_messages) == 4, sent_messages
+    assert sent_messages[0].role == "system"
+    assert sent_messages[0].content == instructions
+
+    # Check the rest of the messages were converted correctly
+    assert sent_messages[1].role == "user"
+    assert sent_messages[1].content == "Name some towns in Ireland"
+    assert sent_messages[2].role == "assistant"
+    assert sent_messages[2].content == "Galway, Longford, Sligo"
+    assert sent_messages[3].role == "user"
+    assert sent_messages[3].content == "Which is the largest?"
+
+
+async def test_create_openai_response_with_previous_response_instructions(
+    openai_responses_impl, mock_responses_store, mock_inference_api
+):
+    """Test prepending instructions and previous response with instructions."""
+
+    input_item_message = OpenAIResponseMessage(
+        id="123",
+        content="Name some towns in Ireland",
+        role="user",
+    )
+    response_output_message = OpenAIResponseMessage(
+        id="123",
+        content="Galway, Longford, Sligo",
+        status="completed",
+        role="assistant",
+    )
+    response = _OpenAIResponseObjectWithInputAndMessages(
+        created_at=1,
+        id="resp_123",
+        model="fake_model",
+        output=[response_output_message],
+        status="completed",
+        text=OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")),
+        input=[input_item_message],
+        messages=[
+            OpenAIUserMessageParam(content="Name some towns in Ireland"),
+            OpenAIAssistantMessageParam(content="Galway, Longford, Sligo"),
+        ],
+        instructions="You are a helpful assistant.",
+    )
+    mock_responses_store.get_response_object.return_value = response
+
+    model = "meta-llama/Llama-3.1-8B-Instruct"
+    instructions = "You are a geography expert. Provide concise answers."
+
+    mock_inference_api.openai_chat_completion.return_value = fake_stream()
+
+    # Execute
+    await openai_responses_impl.create_openai_response(
+        input="Which is the largest?", model=model, instructions=instructions, previous_response_id="123"
+    )
+
+    # Verify
+    mock_inference_api.openai_chat_completion.assert_called_once()
+    call_args = mock_inference_api.openai_chat_completion.call_args
+    params = call_args.args[0]
+    sent_messages = params.messages
+
+    # Check that instructions were prepended as a system message
+    # and that the previous response instructions were not carried over
+    assert len(sent_messages) == 4, sent_messages
+    assert sent_messages[0].role == "system"
+    assert sent_messages[0].content == instructions
+
+    # Check the rest of the messages were converted correctly
+    assert sent_messages[1].role == "user"
+    assert sent_messages[1].content == "Name some towns in Ireland"
+    assert sent_messages[2].role == "assistant"
+    assert sent_messages[2].content == "Galway, Longford, Sligo"
+    assert sent_messages[3].role == "user"
+    assert sent_messages[3].content == "Which is the largest?"
+
+
+async def test_list_openai_response_input_items_delegation(openai_responses_impl, mock_responses_store):
+    """Test that list_openai_response_input_items properly delegates to responses_store with correct parameters."""
+    # Setup
+    response_id = "resp_123"
+    after = "msg_after"
+    before = "msg_before"
+    include = ["metadata"]
+    limit = 5
+    order = Order.asc
+
+    input_message = OpenAIResponseMessage(
+        id="msg_123",
+        content="Test message",
+        role="user",
+    )
+
+    expected_result = ListOpenAIResponseInputItem(data=[input_message])
+    mock_responses_store.list_response_input_items.return_value = expected_result
+
+    # Execute with all parameters to test delegation
+    result = await openai_responses_impl.list_openai_response_input_items(
+        response_id, after=after, before=before, include=include, limit=limit, order=order
+    )
+
+    # Verify all parameters are passed through correctly to the store
+    mock_responses_store.list_response_input_items.assert_called_once_with(
+        response_id, after, before, include, limit, order
+    )
+
+    # Verify the result is returned as-is from the store
+    assert result.object == "list"
+    assert len(result.data) == 1
+    assert result.data[0].id == "msg_123"
+
+
+async def test_responses_store_list_input_items_logic():
+    """Test ResponsesStore list_response_input_items logic - mocks get_response_object to test actual ordering/limiting."""
+
+    # Create mock store and response store
+    mock_sql_store = AsyncMock()
+    backend_name = "sql_responses_test"
+    register_sqlstore_backends({backend_name: SqliteSqlStoreConfig(db_path="mock_db_path")})
+    responses_store = ResponsesStore(
+        ResponsesStoreReference(backend=backend_name, table_name="responses"), policy=default_policy()
+    )
+    responses_store.sql_store = mock_sql_store
+
+    # Setup test data - multiple input items
+    input_items = [
+        OpenAIResponseMessage(id="msg_1", content="First message", role="user"),
+        OpenAIResponseMessage(id="msg_2", content="Second message", role="user"),
+        OpenAIResponseMessage(id="msg_3", content="Third message", role="user"),
+        OpenAIResponseMessage(id="msg_4", content="Fourth message", role="user"),
+    ]
+
+    response_with_input = _OpenAIResponseObjectWithInputAndMessages(
+        id="resp_123",
+        model="test_model",
+        created_at=1234567890,
+        object="response",
+        status="completed",
+        output=[],
+        text=OpenAIResponseText(format=(OpenAIResponseTextFormat(type="text"))),
+        input=input_items,
+        messages=[OpenAIUserMessageParam(content="First message")],
+    )
+
+    # Mock the get_response_object method to return our test data
+    mock_sql_store.fetch_one.return_value = {"response_object": response_with_input.model_dump()}
+
+    # Test 1: Default behavior (no limit, desc order)
+    result = await responses_store.list_response_input_items("resp_123")
+    assert result.object == "list"
+    assert len(result.data) == 4
+    # Should be reversed for desc order
+    assert result.data[0].id == "msg_4"
+    assert result.data[1].id == "msg_3"
+    assert result.data[2].id == "msg_2"
+    assert result.data[3].id == "msg_1"
+
+    # Test 2: With limit=2, desc order
+    result = await responses_store.list_response_input_items("resp_123", limit=2, order=Order.desc)
+    assert result.object == "list"
+    assert len(result.data) == 2
+    # Should be first 2 items in desc order
+    assert result.data[0].id == "msg_4"
+    assert result.data[1].id == "msg_3"
+
+    # Test 3: With limit=2, asc order
+    result = await responses_store.list_response_input_items("resp_123", limit=2, order=Order.asc)
+    assert result.object == "list"
+    assert len(result.data) == 2
+    # Should be first 2 items in original order (asc)
+    assert result.data[0].id == "msg_1"
+    assert result.data[1].id == "msg_2"
+
+    # Test 4: Asc order without limit
+    result = await responses_store.list_response_input_items("resp_123", order=Order.asc)
+    assert result.object == "list"
+    assert len(result.data) == 4
+    # Should be in original order (asc)
+    assert result.data[0].id == "msg_1"
+    assert result.data[1].id == "msg_2"
+    assert result.data[2].id == "msg_3"
+    assert result.data[3].id == "msg_4"
+
+    # Test 5: Large limit (larger than available items)
+    result = await responses_store.list_response_input_items("resp_123", limit=10, order=Order.desc)
+    assert result.object == "list"
+    assert len(result.data) == 4  # Should return all available items
+    assert result.data[0].id == "msg_4"
+
+    # Test 6: Zero limit edge case
+    result = await responses_store.list_response_input_items("resp_123", limit=0, order=Order.asc)
+    assert result.object == "list"
+    assert len(result.data) == 0  # Should return no items
+
+
+async def test_store_response_uses_rehydrated_input_with_previous_response(
+    openai_responses_impl, mock_responses_store, mock_inference_api
+):
+    """Test that _store_response uses the full re-hydrated input (including previous responses)
+    rather than just the original input when previous_response_id is provided."""
+
+    # Setup - Create a previous response that should be included in the stored input
+    previous_response = _OpenAIResponseObjectWithInputAndMessages(
+        id="resp-previous-123",
+        object="response",
+        created_at=1234567890,
+        model="meta-llama/Llama-3.1-8B-Instruct",
+        status="completed",
+        text=OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")),
+        input=[
+            OpenAIResponseMessage(
+                id="msg-prev-user", role="user", content=[OpenAIResponseInputMessageContentText(text="What is 2+2?")]
+            )
+        ],
+        output=[
+            OpenAIResponseMessage(
+                id="msg-prev-assistant",
+                role="assistant",
+                content=[OpenAIResponseOutputMessageContentOutputText(text="2+2 equals 4.")],
+            )
+        ],
+        messages=[
+            OpenAIUserMessageParam(content="What is 2+2?"),
+            OpenAIAssistantMessageParam(content="2+2 equals 4."),
+        ],
+    )
+
+    mock_responses_store.get_response_object.return_value = previous_response
+
+    current_input = "Now what is 3+3?"
+    model = "meta-llama/Llama-3.1-8B-Instruct"
+
+    mock_inference_api.openai_chat_completion.return_value = fake_stream()
+
+    # Execute - Create response with previous_response_id
+    result = await openai_responses_impl.create_openai_response(
+        input=current_input,
+        model=model,
+        previous_response_id="resp-previous-123",
+        store=True,
+    )
+
+    store_call_args = mock_responses_store.store_response_object.call_args
+    stored_input = store_call_args.kwargs["input"]
+
+    # Verify that the stored input contains the full re-hydrated conversation:
+    # 1. Previous user message
+    # 2. Previous assistant response
+    # 3. Current user message
+    assert len(stored_input) == 3
+
+    assert stored_input[0].role == "user"
+    assert stored_input[0].content[0].text == "What is 2+2?"
+
+    assert stored_input[1].role == "assistant"
+    assert stored_input[1].content[0].text == "2+2 equals 4."
+
+    assert stored_input[2].role == "user"
+    assert stored_input[2].content == "Now what is 3+3?"
+
+    # Verify the response itself is correct
+    assert result.model == model
+    assert result.status == "completed"
+
+
+@patch("llama_stack.providers.utils.tools.mcp.list_mcp_tools")
+async def test_reuse_mcp_tool_list(
+    mock_list_mcp_tools, openai_responses_impl, mock_responses_store, mock_inference_api
+):
+    """Test that mcp_list_tools can be reused where appropriate."""
+
+    mock_inference_api.openai_chat_completion.return_value = fake_stream()
+    mock_list_mcp_tools.return_value = ListToolDefsResponse(
+        data=[ToolDef(name="test_tool", description="a test tool", input_schema={}, output_schema={})]
+    )
+
+    res1 = await openai_responses_impl.create_openai_response(
+        input="What is 2+2?",
+        model="meta-llama/Llama-3.1-8B-Instruct",
+        store=True,
+        tools=[
+            OpenAIResponseInputToolFunction(name="fake", parameters=None),
+            OpenAIResponseInputToolMCP(server_label="alabel", server_url="aurl"),
+        ],
+    )
+    args = mock_responses_store.store_response_object.call_args
+    data = args.kwargs["response_object"].model_dump()
+    data["input"] = [input_item.model_dump() for input_item in args.kwargs["input"]]
+    data["messages"] = [msg.model_dump() for msg in args.kwargs["messages"]]
+    stored = _OpenAIResponseObjectWithInputAndMessages(**data)
+    mock_responses_store.get_response_object.return_value = stored
+
+    res2 = await openai_responses_impl.create_openai_response(
+        previous_response_id=res1.id,
+        input="Now what is 3+3?",
+        model="meta-llama/Llama-3.1-8B-Instruct",
+        store=True,
+        tools=[
+            OpenAIResponseInputToolMCP(server_label="alabel", server_url="aurl"),
+        ],
+    )
+    assert len(mock_inference_api.openai_chat_completion.call_args_list) == 2
+    second_call = mock_inference_api.openai_chat_completion.call_args_list[1]
+    second_params = second_call.args[0]
+    tools_seen = second_params.tools
+    assert len(tools_seen) == 1
+    assert tools_seen[0]["function"]["name"] == "test_tool"
+    assert tools_seen[0]["function"]["description"] == "a test tool"
+
+    assert mock_list_mcp_tools.call_count == 1
+    listings = [obj for obj in res2.output if obj.type == "mcp_list_tools"]
+    assert len(listings) == 1
+    assert listings[0].server_label == "alabel"
+    assert len(listings[0].tools) == 1
+    assert listings[0].tools[0].name == "test_tool"
+
+
+@pytest.mark.parametrize(
+    "text_format, response_format",
+    [
+        (OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")), None),
+        (
+            OpenAIResponseText(format=OpenAIResponseTextFormat(name="Test", schema={"foo": "bar"}, type="json_schema")),
+            OpenAIResponseFormatJSONSchema(json_schema=OpenAIJSONSchema(name="Test", schema={"foo": "bar"})),
+        ),
+        (OpenAIResponseText(format=OpenAIResponseTextFormat(type="json_object")), OpenAIResponseFormatJSONObject()),
+        # ensure text param with no format specified defaults to None
+        (OpenAIResponseText(format=None), None),
+        # ensure text param of None defaults to None
+        (None, None),
+    ],
+)
+async def test_create_openai_response_with_text_format(
+    openai_responses_impl, mock_inference_api, text_format, response_format
+):
+    """Test creating Responses with text formats."""
+    # Setup
+    input_text = "How hot it is in San Francisco today?"
+    model = "meta-llama/Llama-3.1-8B-Instruct"
+
+    mock_inference_api.openai_chat_completion.return_value = fake_stream()
+
+    # Execute
+    _result = await openai_responses_impl.create_openai_response(
+        input=input_text,
+        model=model,
+        text=text_format,
+    )
+
+    # Verify
+    first_call = mock_inference_api.openai_chat_completion.call_args_list[0]
+    first_params = first_call.args[0]
+    assert first_params.messages[0].content == input_text
+    assert first_params.response_format == response_format
+
+
+async def test_create_openai_response_with_invalid_text_format(openai_responses_impl, mock_inference_api):
+    """Test creating an OpenAI response with an invalid text format."""
+    # Setup
+    input_text = "How hot it is in San Francisco today?"
+    model = "meta-llama/Llama-3.1-8B-Instruct"
+
+    # Execute
+    with pytest.raises(ValueError):
+        _result = await openai_responses_impl.create_openai_response(
+            input=input_text,
+            model=model,
+            text=OpenAIResponseText(format={"type": "invalid"}),
+        )
+
+
+async def test_create_openai_response_with_output_types_as_input(
+    openai_responses_impl, mock_inference_api, mock_responses_store
+):
+    """Test that response outputs can be used as inputs in multi-turn conversations.
+
+    Before adding OpenAIResponseOutput types to OpenAIResponseInput,
+    creating a _OpenAIResponseObjectWithInputAndMessages with some output types
+    in the input field would fail with a Pydantic ValidationError.
+
+    This test simulates storing a response where the input contains output message
+    types (MCP calls, function calls), which happens in multi-turn conversations.
+    """
+    model = "meta-llama/Llama-3.1-8B-Instruct"
+
+    # Mock the inference response
+    mock_inference_api.openai_chat_completion.return_value = fake_stream()
+
+    # Create a response with store=True to trigger the storage path
+    result = await openai_responses_impl.create_openai_response(
+        input="What's the weather?",
+        model=model,
+        stream=True,
+        temperature=0.1,
+        store=True,
+    )
+
+    # Consume the stream
+    _ = [chunk async for chunk in result]
+
+    # Verify store was called
+    assert mock_responses_store.store_response_object.called
+
+    # Get the stored data
+    store_call_args = mock_responses_store.store_response_object.call_args
+    stored_response = store_call_args.kwargs["response_object"]
+
+    # Now simulate a multi-turn conversation where outputs become inputs
+    input_with_output_types = [
+        OpenAIResponseMessage(role="user", content="What's the weather?", name=None),
+        # These output types need to be valid OpenAIResponseInput
+        OpenAIResponseOutputMessageFunctionToolCall(
+            call_id="call_123",
+            name="get_weather",
+            arguments='{"city": "Tokyo"}',
+            type="function_call",
+        ),
+        OpenAIResponseOutputMessageMCPCall(
+            id="mcp_456",
+            type="mcp_call",
+            server_label="weather_server",
+            name="get_temperature",
+            arguments='{"location": "Tokyo"}',
+            output="25°C",
+        ),
+    ]
+
+    # This simulates storing a response in a multi-turn conversation
+    # where previous outputs are included in the input.
+    stored_with_outputs = _OpenAIResponseObjectWithInputAndMessages(
+        id=stored_response.id,
+        created_at=stored_response.created_at,
+        model=stored_response.model,
+        status=stored_response.status,
+        output=stored_response.output,
+        input=input_with_output_types,  # This will trigger Pydantic validation
+        messages=None,
+    )
+
+    assert stored_with_outputs.input == input_with_output_types
+    assert len(stored_with_outputs.input) == 3
diff --git a/tests/unit/providers/agents/meta_reference/test_openai_responses_conversations.py b/tests/unit/providers/agents/meta_reference/test_openai_responses_conversations.py
new file mode 100644
index 000000000..fa1ddae78
--- /dev/null
+++ b/tests/unit/providers/agents/meta_reference/test_openai_responses_conversations.py
@@ -0,0 +1,249 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+
+import pytest
+
+from llama_stack_api.common.errors import (
+    ConversationNotFoundError,
+    InvalidConversationIdError,
+)
+from llama_stack_api.conversations import (
+    ConversationItemList,
+)
+from llama_stack_api.openai_responses import (
+    OpenAIResponseMessage,
+    OpenAIResponseObject,
+    OpenAIResponseObjectStreamResponseCompleted,
+    OpenAIResponseObjectStreamResponseOutputItemDone,
+    OpenAIResponseOutputMessageContentOutputText,
+)
+
+# Import existing fixtures from the main responses test file
+pytest_plugins = ["tests.unit.providers.agents.meta_reference.test_openai_responses"]
+
+from llama_stack.providers.inline.agents.meta_reference.responses.openai_responses import (
+    OpenAIResponsesImpl,
+)
+
+
+@pytest.fixture
+def responses_impl_with_conversations(
+    mock_inference_api,
+    mock_tool_groups_api,
+    mock_tool_runtime_api,
+    mock_responses_store,
+    mock_vector_io_api,
+    mock_conversations_api,
+    mock_safety_api,
+):
+    """Create OpenAIResponsesImpl instance with conversations API."""
+    return OpenAIResponsesImpl(
+        inference_api=mock_inference_api,
+        tool_groups_api=mock_tool_groups_api,
+        tool_runtime_api=mock_tool_runtime_api,
+        responses_store=mock_responses_store,
+        vector_io_api=mock_vector_io_api,
+        conversations_api=mock_conversations_api,
+        safety_api=mock_safety_api,
+    )
+
+
+class TestConversationValidation:
+    """Test conversation ID validation logic."""
+
+    async def test_nonexistent_conversation_raises_error(
+        self, responses_impl_with_conversations, mock_conversations_api
+    ):
+        """Test that ConversationNotFoundError is raised for non-existent conversation."""
+        conv_id = "conv_nonexistent"
+
+        # Mock conversation not found
+        mock_conversations_api.list_items.side_effect = ConversationNotFoundError("conv_nonexistent")
+
+        with pytest.raises(ConversationNotFoundError):
+            await responses_impl_with_conversations.create_openai_response(
+                input="Hello", model="test-model", conversation=conv_id, stream=False
+            )
+
+
+class TestMessageSyncing:
+    """Test message syncing to conversations."""
+
+    async def test_sync_response_to_conversation_simple(
+        self, responses_impl_with_conversations, mock_conversations_api
+    ):
+        """Test syncing simple response to conversation."""
+        conv_id = "conv_test123"
+        input_text = "What are the 5 Ds of dodgeball?"
+
+        # Output items (what the model generated)
+        output_items = [
+            OpenAIResponseMessage(
+                id="msg_response",
+                content=[
+                    OpenAIResponseOutputMessageContentOutputText(
+                        text="The 5 Ds are: Dodge, Duck, Dip, Dive, and Dodge.", type="output_text", annotations=[]
+                    )
+                ],
+                role="assistant",
+                status="completed",
+                type="message",
+            )
+        ]
+
+        await responses_impl_with_conversations._sync_response_to_conversation(conv_id, input_text, output_items)
+
+        # should call add_items with user input and assistant response
+        mock_conversations_api.add_items.assert_called_once()
+        call_args = mock_conversations_api.add_items.call_args
+
+        assert call_args[0][0] == conv_id  # conversation_id
+        items = call_args[0][1]  # conversation_items
+
+        assert len(items) == 2
+        # User message
+        assert items[0].type == "message"
+        assert items[0].role == "user"
+        assert items[0].content[0].type == "input_text"
+        assert items[0].content[0].text == input_text
+
+        # Assistant message
+        assert items[1].type == "message"
+        assert items[1].role == "assistant"
+
+    async def test_sync_response_to_conversation_api_error(
+        self, responses_impl_with_conversations, mock_conversations_api
+    ):
+        mock_conversations_api.add_items.side_effect = Exception("API Error")
+        output_items = []
+
+        # matching the behavior of OpenAI here
+        with pytest.raises(Exception, match="API Error"):
+            await responses_impl_with_conversations._sync_response_to_conversation(
+                "conv_test123", "Hello", output_items
+            )
+
+    async def test_sync_with_list_input(self, responses_impl_with_conversations, mock_conversations_api):
+        """Test syncing with list of input messages."""
+        conv_id = "conv_test123"
+        input_messages = [
+            OpenAIResponseMessage(role="user", content=[{"type": "input_text", "text": "First message"}]),
+        ]
+        output_items = [
+            OpenAIResponseMessage(
+                id="msg_response",
+                content=[OpenAIResponseOutputMessageContentOutputText(text="Response", type="output_text")],
+                role="assistant",
+                status="completed",
+                type="message",
+            )
+        ]
+
+        await responses_impl_with_conversations._sync_response_to_conversation(conv_id, input_messages, output_items)
+
+        mock_conversations_api.add_items.assert_called_once()
+        call_args = mock_conversations_api.add_items.call_args
+
+        items = call_args[0][1]
+        # Should have input message + output message
+        assert len(items) == 2
+
+
+class TestIntegrationWorkflow:
+    """Integration tests for the full conversation workflow."""
+
+    async def test_create_response_with_valid_conversation(
+        self, responses_impl_with_conversations, mock_conversations_api
+    ):
+        """Test creating a response with a valid conversation parameter."""
+        mock_conversations_api.list_items.return_value = ConversationItemList(
+            data=[], first_id=None, has_more=False, last_id=None, object="list"
+        )
+
+        async def mock_streaming_response(*args, **kwargs):
+            message_item = OpenAIResponseMessage(
+                id="msg_response",
+                content=[
+                    OpenAIResponseOutputMessageContentOutputText(
+                        text="Test response", type="output_text", annotations=[]
+                    )
+                ],
+                role="assistant",
+                status="completed",
+                type="message",
+            )
+
+            # Emit output_item.done event first (needed for conversation sync)
+            yield OpenAIResponseObjectStreamResponseOutputItemDone(
+                response_id="resp_test123",
+                item=message_item,
+                output_index=0,
+                sequence_number=1,
+                type="response.output_item.done",
+            )
+
+            # Then emit response.completed
+            mock_response = OpenAIResponseObject(
+                id="resp_test123",
+                created_at=1234567890,
+                model="test-model",
+                object="response",
+                output=[message_item],
+                status="completed",
+            )
+
+            yield OpenAIResponseObjectStreamResponseCompleted(response=mock_response, type="response.completed")
+
+        responses_impl_with_conversations._create_streaming_response = mock_streaming_response
+
+        input_text = "Hello, how are you?"
+        conversation_id = "conv_test123"
+
+        response = await responses_impl_with_conversations.create_openai_response(
+            input=input_text, model="test-model", conversation=conversation_id, stream=False
+        )
+
+        assert response is not None
+        assert response.id == "resp_test123"
+
+        # Note: conversation sync happens inside _create_streaming_response,
+        # which we're mocking here, so we can't test it in this unit test.
+        # The sync logic is tested separately in TestMessageSyncing.
+
+    async def test_create_response_with_invalid_conversation_id(self, responses_impl_with_conversations):
+        """Test creating a response with an invalid conversation ID."""
+        with pytest.raises(InvalidConversationIdError) as exc_info:
+            await responses_impl_with_conversations.create_openai_response(
+                input="Hello", model="test-model", conversation="invalid_id", stream=False
+            )
+
+        assert "Expected an ID that begins with 'conv_'" in str(exc_info.value)
+
+    async def test_create_response_with_nonexistent_conversation(
+        self, responses_impl_with_conversations, mock_conversations_api
+    ):
+        """Test creating a response with a non-existent conversation."""
+        mock_conversations_api.list_items.side_effect = ConversationNotFoundError("conv_nonexistent")
+
+        with pytest.raises(ConversationNotFoundError) as exc_info:
+            await responses_impl_with_conversations.create_openai_response(
+                input="Hello", model="test-model", conversation="conv_nonexistent", stream=False
+            )
+
+        assert "not found" in str(exc_info.value)
+
+    async def test_conversation_and_previous_response_id(
+        self, responses_impl_with_conversations, mock_conversations_api, mock_responses_store
+    ):
+        with pytest.raises(ValueError) as exc_info:
+            await responses_impl_with_conversations.create_openai_response(
+                input="test", model="test", conversation="conv_123", previous_response_id="resp_123"
+            )
+
+        assert "Mutually exclusive parameters" in str(exc_info.value)
+        assert "previous_response_id" in str(exc_info.value)
+        assert "conversation" in str(exc_info.value)
diff --git a/tests/unit/providers/agents/meta_reference/test_response_conversion_utils.py b/tests/unit/providers/agents/meta_reference/test_response_conversion_utils.py
new file mode 100644
index 000000000..b7a437686
--- /dev/null
+++ b/tests/unit/providers/agents/meta_reference/test_response_conversion_utils.py
@@ -0,0 +1,367 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+
+import pytest
+
+from llama_stack.providers.inline.agents.meta_reference.responses.utils import (
+    _extract_citations_from_text,
+    convert_chat_choice_to_response_message,
+    convert_response_content_to_chat_content,
+    convert_response_input_to_chat_messages,
+    convert_response_text_to_chat_response_format,
+    get_message_type_by_role,
+    is_function_tool_call,
+)
+from llama_stack_api.inference import (
+    OpenAIAssistantMessageParam,
+    OpenAIChatCompletionContentPartImageParam,
+    OpenAIChatCompletionContentPartTextParam,
+    OpenAIChatCompletionToolCall,
+    OpenAIChatCompletionToolCallFunction,
+    OpenAIChoice,
+    OpenAIDeveloperMessageParam,
+    OpenAIResponseFormatJSONObject,
+    OpenAIResponseFormatJSONSchema,
+    OpenAIResponseFormatText,
+    OpenAISystemMessageParam,
+    OpenAIToolMessageParam,
+    OpenAIUserMessageParam,
+)
+from llama_stack_api.openai_responses import (
+    OpenAIResponseAnnotationFileCitation,
+    OpenAIResponseInputFunctionToolCallOutput,
+    OpenAIResponseInputMessageContentImage,
+    OpenAIResponseInputMessageContentText,
+    OpenAIResponseInputToolFunction,
+    OpenAIResponseInputToolWebSearch,
+    OpenAIResponseMessage,
+    OpenAIResponseOutputMessageContentOutputText,
+    OpenAIResponseOutputMessageFunctionToolCall,
+    OpenAIResponseText,
+    OpenAIResponseTextFormat,
+)
+
+
+class TestConvertChatChoiceToResponseMessage:
+    async def test_convert_string_content(self):
+        choice = OpenAIChoice(
+            message=OpenAIAssistantMessageParam(content="Test message"),
+            finish_reason="stop",
+            index=0,
+        )
+
+        result = await convert_chat_choice_to_response_message(choice)
+
+        assert result.role == "assistant"
+        assert result.status == "completed"
+        assert len(result.content) == 1
+        assert isinstance(result.content[0], OpenAIResponseOutputMessageContentOutputText)
+        assert result.content[0].text == "Test message"
+
+    async def test_convert_text_param_content(self):
+        choice = OpenAIChoice(
+            message=OpenAIAssistantMessageParam(
+                content=[OpenAIChatCompletionContentPartTextParam(text="Test text param")]
+            ),
+            finish_reason="stop",
+            index=0,
+        )
+
+        with pytest.raises(ValueError) as exc_info:
+            await convert_chat_choice_to_response_message(choice)
+
+        assert "does not yet support output content type" in str(exc_info.value)
+
+
+class TestConvertResponseContentToChatContent:
+    async def test_convert_string_content(self):
+        result = await convert_response_content_to_chat_content("Simple string")
+        assert result == "Simple string"
+
+    async def test_convert_text_content_parts(self):
+        content = [
+            OpenAIResponseInputMessageContentText(text="First part"),
+            OpenAIResponseOutputMessageContentOutputText(text="Second part"),
+        ]
+
+        result = await convert_response_content_to_chat_content(content)
+
+        assert len(result) == 2
+        assert isinstance(result[0], OpenAIChatCompletionContentPartTextParam)
+        assert result[0].text == "First part"
+        assert isinstance(result[1], OpenAIChatCompletionContentPartTextParam)
+        assert result[1].text == "Second part"
+
+    async def test_convert_image_content(self):
+        content = [OpenAIResponseInputMessageContentImage(image_url="https://example.com/image.jpg", detail="high")]
+
+        result = await convert_response_content_to_chat_content(content)
+
+        assert len(result) == 1
+        assert isinstance(result[0], OpenAIChatCompletionContentPartImageParam)
+        assert result[0].image_url.url == "https://example.com/image.jpg"
+        assert result[0].image_url.detail == "high"
+
+
+class TestConvertResponseInputToChatMessages:
+    async def test_convert_string_input(self):
+        result = await convert_response_input_to_chat_messages("User message")
+
+        assert len(result) == 1
+        assert isinstance(result[0], OpenAIUserMessageParam)
+        assert result[0].content == "User message"
+
+    async def test_convert_function_tool_call_output(self):
+        input_items = [
+            OpenAIResponseOutputMessageFunctionToolCall(
+                call_id="call_123",
+                name="test_function",
+                arguments='{"param": "value"}',
+            ),
+            OpenAIResponseInputFunctionToolCallOutput(
+                output="Tool output",
+                call_id="call_123",
+            ),
+        ]
+
+        result = await convert_response_input_to_chat_messages(input_items)
+
+        assert len(result) == 2
+        assert isinstance(result[0], OpenAIAssistantMessageParam)
+        assert result[0].tool_calls[0].id == "call_123"
+        assert result[0].tool_calls[0].function.name == "test_function"
+        assert result[0].tool_calls[0].function.arguments == '{"param": "value"}'
+        assert isinstance(result[1], OpenAIToolMessageParam)
+        assert result[1].content == "Tool output"
+        assert result[1].tool_call_id == "call_123"
+
+    async def test_convert_function_tool_call(self):
+        input_items = [
+            OpenAIResponseOutputMessageFunctionToolCall(
+                call_id="call_456",
+                name="test_function",
+                arguments='{"param": "value"}',
+            )
+        ]
+
+        result = await convert_response_input_to_chat_messages(input_items)
+
+        assert len(result) == 1
+        assert isinstance(result[0], OpenAIAssistantMessageParam)
+        assert len(result[0].tool_calls) == 1
+        assert result[0].tool_calls[0].id == "call_456"
+        assert result[0].tool_calls[0].function.name == "test_function"
+        assert result[0].tool_calls[0].function.arguments == '{"param": "value"}'
+
+    async def test_convert_function_call_ordering(self):
+        input_items = [
+            OpenAIResponseOutputMessageFunctionToolCall(
+                call_id="call_123",
+                name="test_function_a",
+                arguments='{"param": "value"}',
+            ),
+            OpenAIResponseOutputMessageFunctionToolCall(
+                call_id="call_456",
+                name="test_function_b",
+                arguments='{"param": "value"}',
+            ),
+            OpenAIResponseInputFunctionToolCallOutput(
+                output="AAA",
+                call_id="call_123",
+            ),
+            OpenAIResponseInputFunctionToolCallOutput(
+                output="BBB",
+                call_id="call_456",
+            ),
+        ]
+
+        result = await convert_response_input_to_chat_messages(input_items)
+        assert len(result) == 4
+        assert isinstance(result[0], OpenAIAssistantMessageParam)
+        assert len(result[0].tool_calls) == 1
+        assert result[0].tool_calls[0].id == "call_123"
+        assert result[0].tool_calls[0].function.name == "test_function_a"
+        assert result[0].tool_calls[0].function.arguments == '{"param": "value"}'
+        assert isinstance(result[1], OpenAIToolMessageParam)
+        assert result[1].content == "AAA"
+        assert result[1].tool_call_id == "call_123"
+        assert isinstance(result[2], OpenAIAssistantMessageParam)
+        assert len(result[2].tool_calls) == 1
+        assert result[2].tool_calls[0].id == "call_456"
+        assert result[2].tool_calls[0].function.name == "test_function_b"
+        assert result[2].tool_calls[0].function.arguments == '{"param": "value"}'
+        assert isinstance(result[3], OpenAIToolMessageParam)
+        assert result[3].content == "BBB"
+        assert result[3].tool_call_id == "call_456"
+
+    async def test_convert_response_message(self):
+        input_items = [
+            OpenAIResponseMessage(
+                role="user",
+                content=[OpenAIResponseInputMessageContentText(text="User text")],
+            )
+        ]
+
+        result = await convert_response_input_to_chat_messages(input_items)
+
+        assert len(result) == 1
+        assert isinstance(result[0], OpenAIUserMessageParam)
+        # Content should be converted to chat content format
+        assert len(result[0].content) == 1
+        assert result[0].content[0].text == "User text"
+
+
+class TestConvertResponseTextToChatResponseFormat:
+    async def test_convert_text_format(self):
+        text = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text"))
+        result = await convert_response_text_to_chat_response_format(text)
+
+        assert isinstance(result, OpenAIResponseFormatText)
+        assert result.type == "text"
+
+    async def test_convert_json_object_format(self):
+        text = OpenAIResponseText(format={"type": "json_object"})
+        result = await convert_response_text_to_chat_response_format(text)
+
+        assert isinstance(result, OpenAIResponseFormatJSONObject)
+
+    async def test_convert_json_schema_format(self):
+        schema_def = {"type": "object", "properties": {"test": {"type": "string"}}}
+        text = OpenAIResponseText(
+            format={
+                "type": "json_schema",
+                "name": "test_schema",
+                "schema": schema_def,
+            }
+        )
+        result = await convert_response_text_to_chat_response_format(text)
+
+        assert isinstance(result, OpenAIResponseFormatJSONSchema)
+        assert result.json_schema["name"] == "test_schema"
+        assert result.json_schema["schema"] == schema_def
+
+    async def test_default_text_format(self):
+        text = OpenAIResponseText()
+        result = await convert_response_text_to_chat_response_format(text)
+
+        assert isinstance(result, OpenAIResponseFormatText)
+        assert result.type == "text"
+
+
+class TestGetMessageTypeByRole:
+    async def test_user_role(self):
+        result = await get_message_type_by_role("user")
+        assert result == OpenAIUserMessageParam
+
+    async def test_system_role(self):
+        result = await get_message_type_by_role("system")
+        assert result == OpenAISystemMessageParam
+
+    async def test_assistant_role(self):
+        result = await get_message_type_by_role("assistant")
+        assert result == OpenAIAssistantMessageParam
+
+    async def test_developer_role(self):
+        result = await get_message_type_by_role("developer")
+        assert result == OpenAIDeveloperMessageParam
+
+    async def test_unknown_role(self):
+        result = await get_message_type_by_role("unknown")
+        assert result is None
+
+
+class TestIsFunctionToolCall:
+    def test_is_function_tool_call_true(self):
+        tool_call = OpenAIChatCompletionToolCall(
+            index=0,
+            id="call_123",
+            function=OpenAIChatCompletionToolCallFunction(
+                name="test_function",
+                arguments="{}",
+            ),
+        )
+        tools = [
+            OpenAIResponseInputToolFunction(
+                type="function", name="test_function", parameters={"type": "object", "properties": {}}
+            ),
+            OpenAIResponseInputToolWebSearch(type="web_search"),
+        ]
+
+        result = is_function_tool_call(tool_call, tools)
+        assert result is True
+
+    def test_is_function_tool_call_false_different_name(self):
+        tool_call = OpenAIChatCompletionToolCall(
+            index=0,
+            id="call_123",
+            function=OpenAIChatCompletionToolCallFunction(
+                name="other_function",
+                arguments="{}",
+            ),
+        )
+        tools = [
+            OpenAIResponseInputToolFunction(
+                type="function", name="test_function", parameters={"type": "object", "properties": {}}
+            ),
+        ]
+
+        result = is_function_tool_call(tool_call, tools)
+        assert result is False
+
+    def test_is_function_tool_call_false_no_function(self):
+        tool_call = OpenAIChatCompletionToolCall(
+            index=0,
+            id="call_123",
+            function=None,
+        )
+        tools = [
+            OpenAIResponseInputToolFunction(
+                type="function", name="test_function", parameters={"type": "object", "properties": {}}
+            ),
+        ]
+
+        result = is_function_tool_call(tool_call, tools)
+        assert result is False
+
+    def test_is_function_tool_call_false_wrong_type(self):
+        tool_call = OpenAIChatCompletionToolCall(
+            index=0,
+            id="call_123",
+            function=OpenAIChatCompletionToolCallFunction(
+                name="web_search",
+                arguments="{}",
+            ),
+        )
+        tools = [
+            OpenAIResponseInputToolWebSearch(type="web_search"),
+        ]
+
+        result = is_function_tool_call(tool_call, tools)
+        assert result is False
+
+
+class TestExtractCitationsFromText:
+    def test_extract_citations_and_annotations(self):
+        text = "Start [not-a-file]. New source <|file-abc123|>. "
+        text += "Other source <|file-def456|>? Repeat source <|file-abc123|>! No citation."
+        file_mapping = {"file-abc123": "doc1.pdf", "file-def456": "doc2.txt"}
+
+        annotations, cleaned_text = _extract_citations_from_text(text, file_mapping)
+
+        expected_annotations = [
+            OpenAIResponseAnnotationFileCitation(file_id="file-abc123", filename="doc1.pdf", index=30),
+            OpenAIResponseAnnotationFileCitation(file_id="file-def456", filename="doc2.txt", index=44),
+            OpenAIResponseAnnotationFileCitation(file_id="file-abc123", filename="doc1.pdf", index=59),
+        ]
+        expected_clean_text = "Start [not-a-file]. New source. Other source? Repeat source! No citation."
+
+        assert cleaned_text == expected_clean_text
+        assert annotations == expected_annotations
+        # OpenAI cites at the end of the sentence
+        assert cleaned_text[expected_annotations[0].index] == "."
+        assert cleaned_text[expected_annotations[1].index] == "?"
+        assert cleaned_text[expected_annotations[2].index] == "!"
diff --git a/tests/unit/providers/agents/meta_reference/test_response_tool_context.py b/tests/unit/providers/agents/meta_reference/test_response_tool_context.py
new file mode 100644
index 000000000..4054debd5
--- /dev/null
+++ b/tests/unit/providers/agents/meta_reference/test_response_tool_context.py
@@ -0,0 +1,183 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+
+from llama_stack.providers.inline.agents.meta_reference.responses.types import ToolContext
+from llama_stack_api.openai_responses import (
+    MCPListToolsTool,
+    OpenAIResponseInputToolFileSearch,
+    OpenAIResponseInputToolFunction,
+    OpenAIResponseInputToolMCP,
+    OpenAIResponseInputToolWebSearch,
+    OpenAIResponseObject,
+    OpenAIResponseOutputMessageMCPListTools,
+    OpenAIResponseToolMCP,
+)
+
+
+class TestToolContext:
+    def test_no_tools(self):
+        tools = []
+        context = ToolContext(tools)
+        previous_response = OpenAIResponseObject(created_at=1234, id="test", model="mymodel", output=[], status="")
+        context.recover_tools_from_previous_response(previous_response)
+
+        assert len(context.tools_to_process) == 0
+        assert len(context.previous_tools) == 0
+        assert len(context.previous_tool_listings) == 0
+
+    def test_no_previous_tools(self):
+        tools = [
+            OpenAIResponseInputToolFileSearch(vector_store_ids=["fake"]),
+            OpenAIResponseInputToolMCP(server_label="label", server_url="url"),
+        ]
+        context = ToolContext(tools)
+        previous_response = OpenAIResponseObject(created_at=1234, id="test", model="mymodel", output=[], status="")
+        context.recover_tools_from_previous_response(previous_response)
+
+        assert len(context.tools_to_process) == 2
+        assert len(context.previous_tools) == 0
+        assert len(context.previous_tool_listings) == 0
+
+    def test_reusable_server(self):
+        tools = [
+            OpenAIResponseInputToolFileSearch(vector_store_ids=["fake"]),
+            OpenAIResponseInputToolMCP(server_label="alabel", server_url="aurl"),
+        ]
+        context = ToolContext(tools)
+        output = [
+            OpenAIResponseOutputMessageMCPListTools(
+                id="test", server_label="alabel", tools=[MCPListToolsTool(name="test_tool", input_schema={})]
+            )
+        ]
+        previous_response = OpenAIResponseObject(created_at=1234, id="test", model="fake", output=output, status="")
+        previous_response.tools = [
+            OpenAIResponseInputToolFileSearch(vector_store_ids=["fake"]),
+            OpenAIResponseToolMCP(server_label="alabel"),
+        ]
+        context.recover_tools_from_previous_response(previous_response)
+
+        assert len(context.tools_to_process) == 1
+        assert context.tools_to_process[0].type == "file_search"
+        assert len(context.previous_tools) == 1
+        assert context.previous_tools["test_tool"].server_label == "alabel"
+        assert context.previous_tools["test_tool"].server_url == "aurl"
+        assert len(context.previous_tool_listings) == 1
+        assert len(context.previous_tool_listings[0].tools) == 1
+        assert context.previous_tool_listings[0].server_label == "alabel"
+
+    def test_multiple_reusable_servers(self):
+        tools = [
+            OpenAIResponseInputToolFunction(name="fake", parameters=None),
+            OpenAIResponseInputToolMCP(server_label="anotherlabel", server_url="anotherurl"),
+            OpenAIResponseInputToolWebSearch(),
+            OpenAIResponseInputToolMCP(server_label="alabel", server_url="aurl"),
+        ]
+        context = ToolContext(tools)
+        output = [
+            OpenAIResponseOutputMessageMCPListTools(
+                id="test1", server_label="alabel", tools=[MCPListToolsTool(name="test_tool", input_schema={})]
+            ),
+            OpenAIResponseOutputMessageMCPListTools(
+                id="test2",
+                server_label="anotherlabel",
+                tools=[MCPListToolsTool(name="some_other_tool", input_schema={})],
+            ),
+        ]
+        previous_response = OpenAIResponseObject(created_at=1234, id="test", model="fake", output=output, status="")
+        previous_response.tools = [
+            OpenAIResponseInputToolFunction(name="fake", parameters=None),
+            OpenAIResponseToolMCP(server_label="anotherlabel", server_url="anotherurl"),
+            OpenAIResponseInputToolWebSearch(type="web_search"),
+            OpenAIResponseToolMCP(server_label="alabel", server_url="aurl"),
+        ]
+        context.recover_tools_from_previous_response(previous_response)
+
+        assert len(context.tools_to_process) == 2
+        assert context.tools_to_process[0].type == "function"
+        assert context.tools_to_process[1].type == "web_search"
+        assert len(context.previous_tools) == 2
+        assert context.previous_tools["test_tool"].server_label == "alabel"
+        assert context.previous_tools["test_tool"].server_url == "aurl"
+        assert context.previous_tools["some_other_tool"].server_label == "anotherlabel"
+        assert context.previous_tools["some_other_tool"].server_url == "anotherurl"
+        assert len(context.previous_tool_listings) == 2
+        assert len(context.previous_tool_listings[0].tools) == 1
+        assert context.previous_tool_listings[0].server_label == "alabel"
+        assert len(context.previous_tool_listings[1].tools) == 1
+        assert context.previous_tool_listings[1].server_label == "anotherlabel"
+
+    def test_multiple_servers_only_one_reusable(self):
+        tools = [
+            OpenAIResponseInputToolFunction(name="fake", parameters=None),
+            OpenAIResponseInputToolMCP(server_label="anotherlabel", server_url="anotherurl"),
+            OpenAIResponseInputToolWebSearch(type="web_search"),
+            OpenAIResponseInputToolMCP(server_label="alabel", server_url="aurl"),
+        ]
+        context = ToolContext(tools)
+        output = [
+            OpenAIResponseOutputMessageMCPListTools(
+                id="test2",
+                server_label="anotherlabel",
+                tools=[MCPListToolsTool(name="some_other_tool", input_schema={})],
+            )
+        ]
+        previous_response = OpenAIResponseObject(created_at=1234, id="test", model="fake", output=output, status="")
+        previous_response.tools = [
+            OpenAIResponseInputToolFunction(name="fake", parameters=None),
+            OpenAIResponseToolMCP(server_label="anotherlabel", server_url="anotherurl"),
+            OpenAIResponseInputToolWebSearch(type="web_search"),
+        ]
+        context.recover_tools_from_previous_response(previous_response)
+
+        assert len(context.tools_to_process) == 3
+        assert context.tools_to_process[0].type == "function"
+        assert context.tools_to_process[1].type == "web_search"
+        assert context.tools_to_process[2].type == "mcp"
+        assert len(context.previous_tools) == 1
+        assert context.previous_tools["some_other_tool"].server_label == "anotherlabel"
+        assert context.previous_tools["some_other_tool"].server_url == "anotherurl"
+        assert len(context.previous_tool_listings) == 1
+        assert len(context.previous_tool_listings[0].tools) == 1
+        assert context.previous_tool_listings[0].server_label == "anotherlabel"
+
+    def test_mismatched_allowed_tools(self):
+        tools = [
+            OpenAIResponseInputToolFunction(name="fake", parameters=None),
+            OpenAIResponseInputToolMCP(server_label="anotherlabel", server_url="anotherurl"),
+            OpenAIResponseInputToolWebSearch(type="web_search"),
+            OpenAIResponseInputToolMCP(server_label="alabel", server_url="aurl", allowed_tools=["test_tool_2"]),
+        ]
+        context = ToolContext(tools)
+        output = [
+            OpenAIResponseOutputMessageMCPListTools(
+                id="test1", server_label="alabel", tools=[MCPListToolsTool(name="test_tool_1", input_schema={})]
+            ),
+            OpenAIResponseOutputMessageMCPListTools(
+                id="test2",
+                server_label="anotherlabel",
+                tools=[MCPListToolsTool(name="some_other_tool", input_schema={})],
+            ),
+        ]
+        previous_response = OpenAIResponseObject(created_at=1234, id="test", model="fake", output=output, status="")
+        previous_response.tools = [
+            OpenAIResponseInputToolFunction(name="fake", parameters=None),
+            OpenAIResponseToolMCP(server_label="anotherlabel", server_url="anotherurl"),
+            OpenAIResponseInputToolWebSearch(type="web_search"),
+            OpenAIResponseToolMCP(server_label="alabel", server_url="aurl"),
+        ]
+        context.recover_tools_from_previous_response(previous_response)
+
+        assert len(context.tools_to_process) == 3
+        assert context.tools_to_process[0].type == "function"
+        assert context.tools_to_process[1].type == "web_search"
+        assert context.tools_to_process[2].type == "mcp"
+        assert len(context.previous_tools) == 1
+        assert context.previous_tools["some_other_tool"].server_label == "anotherlabel"
+        assert context.previous_tools["some_other_tool"].server_url == "anotherurl"
+        assert len(context.previous_tool_listings) == 1
+        assert len(context.previous_tool_listings[0].tools) == 1
+        assert context.previous_tool_listings[0].server_label == "anotherlabel"
diff --git a/tests/unit/providers/agents/meta_reference/test_responses_safety_utils.py b/tests/unit/providers/agents/meta_reference/test_responses_safety_utils.py
new file mode 100644
index 000000000..d4d1b872a
--- /dev/null
+++ b/tests/unit/providers/agents/meta_reference/test_responses_safety_utils.py
@@ -0,0 +1,155 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from unittest.mock import AsyncMock
+
+import pytest
+
+from llama_stack.providers.inline.agents.meta_reference.responses.openai_responses import (
+    OpenAIResponsesImpl,
+)
+from llama_stack.providers.inline.agents.meta_reference.responses.utils import (
+    extract_guardrail_ids,
+    run_guardrails,
+)
+from llama_stack_api.agents import ResponseGuardrailSpec
+from llama_stack_api.safety import ModerationObject, ModerationObjectResults
+
+
+@pytest.fixture
+def mock_apis():
+    """Create mock APIs for testing."""
+    return {
+        "inference_api": AsyncMock(),
+        "tool_groups_api": AsyncMock(),
+        "tool_runtime_api": AsyncMock(),
+        "responses_store": AsyncMock(),
+        "vector_io_api": AsyncMock(),
+        "conversations_api": AsyncMock(),
+        "safety_api": AsyncMock(),
+    }
+
+
+@pytest.fixture
+def responses_impl(mock_apis):
+    """Create OpenAIResponsesImpl instance with mocked dependencies."""
+    return OpenAIResponsesImpl(**mock_apis)
+
+
+def test_extract_guardrail_ids_from_strings(responses_impl):
+    """Test extraction from simple string guardrail IDs."""
+    guardrails = ["llama-guard", "content-filter", "nsfw-detector"]
+    result = extract_guardrail_ids(guardrails)
+    assert result == ["llama-guard", "content-filter", "nsfw-detector"]
+
+
+def test_extract_guardrail_ids_from_objects(responses_impl):
+    """Test extraction from ResponseGuardrailSpec objects."""
+    guardrails = [
+        ResponseGuardrailSpec(type="llama-guard"),
+        ResponseGuardrailSpec(type="content-filter"),
+    ]
+    result = extract_guardrail_ids(guardrails)
+    assert result == ["llama-guard", "content-filter"]
+
+
+def test_extract_guardrail_ids_mixed_formats(responses_impl):
+    """Test extraction from mixed string and object formats."""
+    guardrails = [
+        "llama-guard",
+        ResponseGuardrailSpec(type="content-filter"),
+        "nsfw-detector",
+    ]
+    result = extract_guardrail_ids(guardrails)
+    assert result == ["llama-guard", "content-filter", "nsfw-detector"]
+
+
+def test_extract_guardrail_ids_none_input(responses_impl):
+    """Test extraction with None input."""
+    result = extract_guardrail_ids(None)
+    assert result == []
+
+
+def test_extract_guardrail_ids_empty_list(responses_impl):
+    """Test extraction with empty list."""
+    result = extract_guardrail_ids([])
+    assert result == []
+
+
+def test_extract_guardrail_ids_unknown_format(responses_impl):
+    """Test extraction with unknown guardrail format raises ValueError."""
+    # Create an object that's neither string nor ResponseGuardrailSpec
+    unknown_object = {"invalid": "format"}  # Plain dict, not ResponseGuardrailSpec
+    guardrails = ["valid-guardrail", unknown_object, "another-guardrail"]
+    with pytest.raises(ValueError, match="Unknown guardrail format.*expected str or ResponseGuardrailSpec"):
+        extract_guardrail_ids(guardrails)
+
+
+@pytest.fixture
+def mock_safety_api():
+    """Create mock safety API for guardrails testing."""
+    safety_api = AsyncMock()
+    # Mock the routing table and shields list for guardrails lookup
+    safety_api.routing_table = AsyncMock()
+    shield = AsyncMock()
+    shield.identifier = "llama-guard"
+    shield.provider_resource_id = "llama-guard-model"
+    safety_api.routing_table.list_shields.return_value = AsyncMock(data=[shield])
+    return safety_api
+
+
+async def test_run_guardrails_no_violation(mock_safety_api):
+    """Test guardrails validation with no violations."""
+    text = "Hello world"
+    guardrail_ids = ["llama-guard"]
+
+    # Mock moderation to return non-flagged content
+    unflagged_result = ModerationObjectResults(flagged=False, categories={"violence": False})
+    mock_moderation_object = ModerationObject(id="test-mod-id", model="llama-guard-model", results=[unflagged_result])
+    mock_safety_api.run_moderation.return_value = mock_moderation_object
+
+    result = await run_guardrails(mock_safety_api, text, guardrail_ids)
+
+    assert result is None
+    # Verify run_moderation was called with the correct model
+    mock_safety_api.run_moderation.assert_called_once()
+    call_args = mock_safety_api.run_moderation.call_args
+    assert call_args[1]["model"] == "llama-guard-model"
+
+
+async def test_run_guardrails_with_violation(mock_safety_api):
+    """Test guardrails validation with safety violation."""
+    text = "Harmful content"
+    guardrail_ids = ["llama-guard"]
+
+    # Mock moderation to return flagged content
+    flagged_result = ModerationObjectResults(
+        flagged=True,
+        categories={"violence": True},
+        user_message="Content flagged by moderation",
+        metadata={"violation_type": ["S1"]},
+    )
+    mock_moderation_object = ModerationObject(id="test-mod-id", model="llama-guard-model", results=[flagged_result])
+    mock_safety_api.run_moderation.return_value = mock_moderation_object
+
+    result = await run_guardrails(mock_safety_api, text, guardrail_ids)
+
+    assert result == "Content flagged by moderation (flagged for: violence) (violation type: S1)"
+
+
+async def test_run_guardrails_empty_inputs(mock_safety_api):
+    """Test guardrails validation with empty inputs."""
+    # Test empty guardrail_ids
+    result = await run_guardrails(mock_safety_api, "test", [])
+    assert result is None
+
+    # Test empty text
+    result = await run_guardrails(mock_safety_api, "", ["llama-guard"])
+    assert result is None
+
+    # Test both empty
+    result = await run_guardrails(mock_safety_api, "", [])
+    assert result is None

From 97f535c4f141a248b66dc034e1684b1d24b8de74 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Han?= <seb@redhat.com>
Date: Sat, 15 Nov 2025 00:53:53 +0100
Subject: [PATCH 33/62] feat(openapi): switch to fastapi-based generator
 (#3944)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

# What does this PR do?
This replaces the legacy "pyopenapi + strong_typing" pipeline with a
FastAPI-backed generator that has an explicit schema registry inside
`llama_stack_api`. The key changes:

1. **New generator architecture.** FastAPI now builds the OpenAPI schema
directly from the real routes, while helper modules
(`schema_collection`, `endpoints`, `schema_transforms`, etc.)
post-process the result. The old pyopenapi stack and its strong_typing
helpers are removed entirely, so we no longer rely on fragile AST
analysis or top-level import side effects.

2. **Schema registry in `llama_stack_api`.** `schema_utils.py` keeps a
`SchemaInfo` record for every `@json_schema_type`, `register_schema`,
and dynamically created request model. The OpenAPI generator and other
tooling query this registry instead of scanning the package tree,
producing deterministic names (e.g., `{MethodName}Request`), capturing
all optional/nullable fields, and making schema discovery testable. A
new unit test covers the registry behavior.

3. **Regenerated specs + CI alignment.** All docs/Stainless specs are
regenerated from the new pipeline, so optional/nullable fields now match
reality (expect the API Conformance workflow to report breaking
changes—this PR establishes the new baseline). The workflow itself is
back to the stock oasdiff invocation so future regressions surface
normally.

*Conformance will be RED on this PR; we choose to accept the
deviations.*

## Test Plan
- `uv run pytest tests/unit/server/test_schema_registry.py`
- `uv run python -m scripts.openapi_generator.main docs/static`

---------

Signed-off-by: Sébastien Han <seb@redhat.com>
Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com>
---
 .pre-commit-config.yaml                       |     7 +-
 CONTRIBUTING.md                               |     2 +-
 client-sdks/stainless/README.md               |     2 +-
 client-sdks/stainless/config.yml              |    24 +-
 client-sdks/stainless/openapi.yml             | 15875 +++++++++-------
 docs/openapi_generator/README.md              |     1 -
 docs/openapi_generator/generate.py            |   134 -
 docs/openapi_generator/pyopenapi/README.md    |     1 -
 docs/openapi_generator/pyopenapi/generator.py |  1175 --
 .../openapi_generator/pyopenapi/operations.py |   459 -
 docs/openapi_generator/pyopenapi/options.py   |    78 -
 .../pyopenapi/specification.py                |   269 -
 .../openapi_generator/pyopenapi/template.html |    41 -
 docs/openapi_generator/pyopenapi/utility.py   |   287 -
 .../run_openapi_generator.sh                  |    34 -
 docs/static/deprecated-llama-stack-spec.yaml  | 10580 +++++++++-
 .../static/experimental-llama-stack-spec.yaml | 10305 ++++++++--
 docs/static/llama-stack-spec.yaml             | 14390 ++++++++------
 docs/static/stainless-llama-stack-spec.yaml   | 15875 +++++++++-------
 pyproject.toml                                |    24 +-
 scripts/openapi_generator/__init__.py         |    16 +
 .../openapi_generator/__main__.py             |     9 +
 scripts/openapi_generator/_legacy_order.py    |   502 +
 scripts/openapi_generator/app.py              |    91 +
 scripts/openapi_generator/endpoints.py        |   657 +
 scripts/openapi_generator/main.py             |   241 +
 .../openapi_generator/schema_collection.py    |   131 +
 scripts/openapi_generator/schema_filtering.py |   297 +
 .../openapi_generator/schema_transforms.py    |   963 +
 scripts/openapi_generator/state.py            |    41 +
 scripts/run_openapi_generator.sh              |    19 +
 src/llama_stack/core/library_client.py        |    13 +-
 src/llama_stack/core/utils/type_inspection.py |    45 +
 src/llama_stack_api/__init__.py               |    35 +-
 src/llama_stack_api/benchmarks.py             |     1 +
 src/llama_stack_api/datasets.py               |     1 +
 src/llama_stack_api/inspect.py                |     1 +
 src/llama_stack_api/models.py                 |     1 +
 src/llama_stack_api/openai_responses.py       |     1 +
 src/llama_stack_api/post_training.py          |     2 +
 src/llama_stack_api/prompts.py                |     1 +
 src/llama_stack_api/providers.py              |     1 +
 src/llama_stack_api/schema_utils.py           |    98 +-
 src/llama_stack_api/scoring_functions.py      |     1 +
 src/llama_stack_api/shields.py                |     1 +
 src/llama_stack_api/strong_typing/__init__.py |    19 -
 .../strong_typing/auxiliary.py                |   229 -
 src/llama_stack_api/strong_typing/classdef.py |   440 -
 src/llama_stack_api/strong_typing/core.py     |    46 -
 .../strong_typing/deserializer.py             |   872 -
 .../strong_typing/docstring.py                |   410 -
 .../strong_typing/exception.py                |    23 -
 .../strong_typing/inspection.py               |  1104 --
 src/llama_stack_api/strong_typing/mapping.py  |    39 -
 src/llama_stack_api/strong_typing/name.py     |   188 -
 src/llama_stack_api/strong_typing/schema.py   |   791 -
 .../strong_typing/serialization.py            |    97 -
 .../strong_typing/serializer.py               |   494 -
 src/llama_stack_api/strong_typing/slots.py    |    27 -
 .../strong_typing/topological.py              |    90 -
 src/llama_stack_api/tools.py                  |     2 +
 src/llama_stack_api/vector_io.py              |     7 +-
 tests/unit/server/test_schema_registry.py     |    48 +
 uv.lock                                       |   152 +-
 64 files changed, 47592 insertions(+), 30218 deletions(-)
 delete mode 100644 docs/openapi_generator/README.md
 delete mode 100644 docs/openapi_generator/generate.py
 delete mode 100644 docs/openapi_generator/pyopenapi/README.md
 delete mode 100644 docs/openapi_generator/pyopenapi/generator.py
 delete mode 100644 docs/openapi_generator/pyopenapi/operations.py
 delete mode 100644 docs/openapi_generator/pyopenapi/options.py
 delete mode 100644 docs/openapi_generator/pyopenapi/specification.py
 delete mode 100644 docs/openapi_generator/pyopenapi/template.html
 delete mode 100644 docs/openapi_generator/pyopenapi/utility.py
 delete mode 100755 docs/openapi_generator/run_openapi_generator.sh
 create mode 100644 scripts/openapi_generator/__init__.py
 rename docs/openapi_generator/pyopenapi/__init__.py => scripts/openapi_generator/__main__.py (58%)
 create mode 100644 scripts/openapi_generator/_legacy_order.py
 create mode 100644 scripts/openapi_generator/app.py
 create mode 100644 scripts/openapi_generator/endpoints.py
 create mode 100755 scripts/openapi_generator/main.py
 create mode 100644 scripts/openapi_generator/schema_collection.py
 create mode 100644 scripts/openapi_generator/schema_filtering.py
 create mode 100644 scripts/openapi_generator/schema_transforms.py
 create mode 100644 scripts/openapi_generator/state.py
 create mode 100755 scripts/run_openapi_generator.sh
 create mode 100644 src/llama_stack/core/utils/type_inspection.py
 delete mode 100644 src/llama_stack_api/strong_typing/__init__.py
 delete mode 100644 src/llama_stack_api/strong_typing/auxiliary.py
 delete mode 100644 src/llama_stack_api/strong_typing/classdef.py
 delete mode 100644 src/llama_stack_api/strong_typing/core.py
 delete mode 100644 src/llama_stack_api/strong_typing/deserializer.py
 delete mode 100644 src/llama_stack_api/strong_typing/docstring.py
 delete mode 100644 src/llama_stack_api/strong_typing/exception.py
 delete mode 100644 src/llama_stack_api/strong_typing/inspection.py
 delete mode 100644 src/llama_stack_api/strong_typing/mapping.py
 delete mode 100644 src/llama_stack_api/strong_typing/name.py
 delete mode 100644 src/llama_stack_api/strong_typing/schema.py
 delete mode 100644 src/llama_stack_api/strong_typing/serialization.py
 delete mode 100644 src/llama_stack_api/strong_typing/serializer.py
 delete mode 100644 src/llama_stack_api/strong_typing/slots.py
 delete mode 100644 src/llama_stack_api/strong_typing/topological.py
 create mode 100644 tests/unit/server/test_schema_registry.py

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index c60440173..c31a39406 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -42,7 +42,6 @@ repos:
     hooks:
     -   id: ruff
         args: [ --fix ]
-        exclude: ^(src/llama_stack_api/strong_typing/.*)$
     -   id: ruff-format
 
 -   repo: https://github.com/adamchainz/blacken-docs
@@ -106,16 +105,16 @@ repos:
         language: python
         pass_filenames: false
         require_serial: true
-        files: ^src/llama_stack/providers/.*$
+        files: ^src/llama_stack/providers/.*$|^scripts/run_openapi_generator.sh$
       - id: openapi-codegen
         name: API Spec Codegen
         additional_dependencies:
           - uv==0.7.8
-        entry: sh -c './scripts/uv-run-with-index.sh run ./docs/openapi_generator/run_openapi_generator.sh > /dev/null'
+        entry: sh -c './scripts/uv-run-with-index.sh run scripts/run_openapi_generator.sh'
         language: python
         pass_filenames: false
         require_serial: true
-        files: ^src/llama_stack/apis/|^docs/openapi_generator/
+        files: ^src/llama_stack_api/.*$
       - id: check-workflows-use-hashes
         name: Check GitHub Actions use SHA-pinned actions
         entry: ./scripts/check-workflows-use-hashes.sh
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index d84332829..ba6c2eaf2 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -231,7 +231,7 @@ npm run serve
 If you modify or add new API endpoints, update the API documentation accordingly. You can do this by running the following command:
 
 ```bash
-uv run ./docs/openapi_generator/run_openapi_generator.sh
+uv run ./scripts/run_openapi_generator.sh
 ```
 
 The generated API schema will be available in `docs/static/`. Make sure to review the changes before committing.
diff --git a/client-sdks/stainless/README.md b/client-sdks/stainless/README.md
index 5551e90d5..73e7082d4 100644
--- a/client-sdks/stainless/README.md
+++ b/client-sdks/stainless/README.md
@@ -5,4 +5,4 @@ These are the source-of-truth configuration files used to generate the Stainless
 
 A small side note: notice the `.yml` suffixes since Stainless uses that suffix typically for its configuration files.
 
-These files go hand-in-hand. As of now, only the `openapi.yml` file is automatically generated using the `run_openapi_generator.sh` script.
+These files go hand-in-hand. As of now, only the `openapi.yml` file is automatically generated using the `scripts/run_openapi_generator.sh` script.
diff --git a/client-sdks/stainless/config.yml b/client-sdks/stainless/config.yml
index c61b53654..9b26114fe 100644
--- a/client-sdks/stainless/config.yml
+++ b/client-sdks/stainless/config.yml
@@ -115,9 +115,6 @@ resources:
       sampling_params: SamplingParams
       scoring_result: ScoringResult
       system_message: SystemMessage
-      query_result: RAGQueryResult
-      document: RAGDocument
-      query_config: RAGQueryConfig
   toolgroups:
     models:
       tool_group: ToolGroup
@@ -143,11 +140,6 @@ resources:
         endpoint: get /v1/tool-runtime/list-tools
         paginated: false
       invoke_tool: post /v1/tool-runtime/invoke
-    subresources:
-      rag_tool:
-        methods:
-          insert: post /v1/tool-runtime/rag-tool/insert
-          query: post /v1/tool-runtime/rag-tool/query
 
   responses:
     models:
@@ -173,6 +165,7 @@ resources:
           list:
             type: http
             endpoint: get /v1/responses/{response_id}/input_items
+            paginated: false
 
   prompts:
     models:
@@ -220,6 +213,9 @@ resources:
           create:
             type: http
             endpoint: post /v1/conversations/{conversation_id}/items
+          delete:
+            type: http
+            endpoint: delete /v1/conversations/{conversation_id}/items/{item_id}
 
   inspect:
     models:
@@ -252,6 +248,7 @@ resources:
           list:
             type: http
             endpoint: get /v1/chat/completions
+            paginated: false
           retrieve:
             type: http
             endpoint: get /v1/chat/completions/{completion_id}
@@ -375,6 +372,7 @@ resources:
         endpoint: get /v1/scoring-functions
         paginated: false
       register: post /v1/scoring-functions
+      unregister: delete /v1/scoring-functions/{scoring_fn_id}
     models:
       scoring_fn: ScoringFn
       scoring_fn_params: ScoringFnParams
@@ -392,6 +390,13 @@ resources:
       list_files_response: ListOpenAIFileResponse
       delete_file_response: OpenAIFileDeleteResponse
 
+  batches:
+    methods:
+      create: post /v1/batches
+      list: get /v1/batches
+      retrieve: get /v1/batches/{batch_id}
+      cancel: post /v1/batches/{batch_id}/cancel
+
   alpha:
     subresources:
       inference:
@@ -423,6 +428,7 @@ resources:
             endpoint: get /v1alpha/eval/benchmarks
             paginated: false
           register: post /v1alpha/eval/benchmarks
+          unregister: delete /v1alpha/eval/benchmarks/{benchmark_id}
         models:
           benchmark: Benchmark
           list_benchmarks_response: ListBenchmarksResponse
@@ -519,7 +525,7 @@ readme:
       params: &ref_0 {}
     headline:
       type: request
-      endpoint: post /v1/models
+      endpoint: get /v1/models
       params: *ref_0
     pagination:
       type: request
diff --git a/client-sdks/stainless/openapi.yml b/client-sdks/stainless/openapi.yml
index d0813de4d..ff86e30e1 100644
--- a/client-sdks/stainless/openapi.yml
+++ b/client-sdks/stainless/openapi.yml
@@ -1,19 +1,18 @@
 openapi: 3.1.0
 info:
-  title: >-
-    Llama Stack Specification - Stable & Experimental APIs
-  version: v1
-  description: >-
+  title: Llama Stack Specification - Stable & Experimental APIs
+  description: |-
     This is the specification of the Llama Stack that provides
-                    a set of endpoints and their corresponding interfaces that are
-    tailored to
-                    best leverage Llama Models.
+                        a set of endpoints and their corresponding interfaces that are
+        tailored to
+                        best leverage Llama Models.
 
-    **🔗 COMBINED**: This specification includes both stable production-ready APIs
-    and experimental pre-release APIs. Use stable APIs for production deployments
-    and experimental APIs for testing new features.
+        **🔗 COMBINED**: This specification includes both stable production-ready APIs
+        and experimental pre-release APIs. Use stable APIs for production deployments
+        and experimental APIs for testing new features.
+  version: v1
 servers:
-  - url: http://any-hosted-llama-stack.com
+- url: http://any-hosted-llama-stack.com
 paths:
   /v1/batches:
     get:
@@ -26,34 +25,37 @@ paths:
                 $ref: '#/components/schemas/ListBatchesResponse'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - Batches
-      summary: List all batches for the current user.
+      - Batches
+      summary: List Batches
       description: List all batches for the current user.
+      operationId: list_batches_v1_batches_get
       parameters:
-        - name: after
-          in: query
-          description: >-
-            A cursor for pagination; returns batches after this batch ID.
-          required: false
-          schema:
-            type: string
-        - name: limit
-          in: query
-          description: >-
-            Number of batches to return (default 20, max 100).
-          required: true
-          schema:
-            type: integer
-      deprecated: false
+      - name: after
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: After
+      - name: limit
+        in: query
+        required: false
+        schema:
+          type: integer
+          default: 20
+          title: Limit
     post:
       responses:
         '200':
@@ -64,28 +66,27 @@ paths:
                 $ref: '#/components/schemas/Batch'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - Batches
-      summary: >-
-        Create a new batch for processing multiple API requests.
-      description: >-
-        Create a new batch for processing multiple API requests.
-      parameters: []
+      - Batches
+      summary: Create Batch
+      description: Create a new batch for processing multiple API requests.
+      operationId: create_batch_v1_batches_post
       requestBody:
+        required: true
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/CreateBatchRequest'
-        required: true
-      deprecated: false
   /v1/batches/{batch_id}:
     get:
       responses:
@@ -96,29 +97,29 @@ paths:
               schema:
                 $ref: '#/components/schemas/Batch'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Batches
-      summary: >-
-        Retrieve information about a specific batch.
-      description: >-
-        Retrieve information about a specific batch.
+      - Batches
+      summary: Retrieve Batch
+      description: Retrieve information about a specific batch.
+      operationId: retrieve_batch_v1_batches__batch_id__get
       parameters:
-        - name: batch_id
-          in: path
-          description: The ID of the batch to retrieve.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: batch_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: batch_id'
   /v1/batches/{batch_id}/cancel:
     post:
       responses:
@@ -129,27 +130,29 @@ paths:
               schema:
                 $ref: '#/components/schemas/Batch'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Batches
-      summary: Cancel a batch that is in progress.
+      - Batches
+      summary: Cancel Batch
       description: Cancel a batch that is in progress.
+      operationId: cancel_batch_v1_batches__batch_id__cancel_post
       parameters:
-        - name: batch_id
-          in: path
-          description: The ID of the batch to cancel.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: batch_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: batch_id'
   /v1/chat/completions:
     get:
       responses:
@@ -161,48 +164,56 @@ paths:
                 $ref: '#/components/schemas/ListOpenAIChatCompletionResponse'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - Inference
-      summary: List chat completions.
+      - Inference
+      summary: List Chat Completions
       description: List chat completions.
+      operationId: list_chat_completions_v1_chat_completions_get
       parameters:
-        - name: after
-          in: query
-          description: >-
-            The ID of the last chat completion to return.
-          required: false
-          schema:
-            type: string
-        - name: limit
-          in: query
-          description: >-
-            The maximum number of chat completions to return.
-          required: false
-          schema:
-            type: integer
-        - name: model
-          in: query
-          description: The model to filter by.
-          required: false
-          schema:
-            type: string
-        - name: order
-          in: query
-          description: >-
-            The order to sort the chat completions by: "asc" or "desc". Defaults to
-            "desc".
-          required: false
-          schema:
-            $ref: '#/components/schemas/Order'
-      deprecated: false
+      - name: after
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: After
+      - name: limit
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: integer
+          - type: 'null'
+          default: 20
+          title: Limit
+      - name: model
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: Model
+      - name: order
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - $ref: '#/components/schemas/Order'
+          - type: 'null'
+          default: desc
+          title: Order
     post:
       responses:
         '200':
@@ -210,35 +221,36 @@ paths:
           content:
             application/json:
               schema:
-                oneOf:
-                  - $ref: '#/components/schemas/OpenAIChatCompletion'
-                  - $ref: '#/components/schemas/OpenAIChatCompletionChunk'
+                $ref: '#/components/schemas/OpenAIChatCompletion'
+            text/event-stream:
+              schema:
+                $ref: '#/components/schemas/OpenAIChatCompletionChunk'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - Inference
-      summary: Create chat completions.
-      description: >-
+      - Inference
+      summary: Openai Chat Completion
+      description: |-
         Create chat completions.
 
-        Generate an OpenAI-compatible chat completion for the given messages using
-        the specified model.
-      parameters: []
+        Generate an OpenAI-compatible chat completion for the given messages using the specified model.
+      operationId: openai_chat_completion_v1_chat_completions_post
       requestBody:
+        required: true
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/OpenAIChatCompletionRequestWithExtraBody'
-        required: true
-      deprecated: false
   /v1/chat/completions/{completion_id}:
     get:
       responses:
@@ -249,30 +261,32 @@ paths:
               schema:
                 $ref: '#/components/schemas/OpenAICompletionWithInputMessages'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Inference
-      summary: Get chat completion.
-      description: >-
+      - Inference
+      summary: Get Chat Completion
+      description: |-
         Get chat completion.
 
         Describe a chat completion by its ID.
+      operationId: get_chat_completion_v1_chat_completions__completion_id__get
       parameters:
-        - name: completion_id
-          in: path
-          description: ID of the chat completion.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: completion_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: completion_id'
   /v1/completions:
     post:
       responses:
@@ -283,31 +297,31 @@ paths:
               schema:
                 $ref: '#/components/schemas/OpenAICompletion'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Inference
-      summary: Create completion.
-      description: >-
+      - Inference
+      summary: Openai Completion
+      description: |-
         Create completion.
 
-        Generate an OpenAI-compatible completion for the given prompt using the specified
-        model.
-      parameters: []
+        Generate an OpenAI-compatible completion for the given prompt using the specified model.
+      operationId: openai_completion_v1_completions_post
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/OpenAICompletionRequestWithExtraBody'
         required: true
-      deprecated: false
   /v1/conversations:
     post:
       responses:
@@ -318,30 +332,31 @@ paths:
               schema:
                 $ref: '#/components/schemas/Conversation'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Conversations
-      summary: Create a conversation.
-      description: >-
+      - Conversations
+      summary: Create Conversation
+      description: |-
         Create a conversation.
 
         Create a conversation.
-      parameters: []
+      operationId: create_conversation_v1_conversations_post
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/CreateConversationRequest'
         required: true
-      deprecated: false
   /v1/conversations/{conversation_id}:
     get:
       responses:
@@ -352,30 +367,32 @@ paths:
               schema:
                 $ref: '#/components/schemas/Conversation'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Conversations
-      summary: Retrieve a conversation.
-      description: >-
+      - Conversations
+      summary: Get Conversation
+      description: |-
         Retrieve a conversation.
 
         Get a conversation with the given ID.
+      operationId: get_conversation_v1_conversations__conversation_id__get
       parameters:
-        - name: conversation_id
-          in: path
-          description: The conversation identifier.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: conversation_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: conversation_id'
     post:
       responses:
         '200':
@@ -385,36 +402,38 @@ paths:
               schema:
                 $ref: '#/components/schemas/Conversation'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Conversations
-      summary: Update a conversation.
-      description: >-
+      - Conversations
+      summary: Update Conversation
+      description: |-
         Update a conversation.
 
         Update a conversation's metadata with the given ID.
+      operationId: update_conversation_v1_conversations__conversation_id__post
       parameters:
-        - name: conversation_id
-          in: path
-          description: The conversation identifier.
-          required: true
-          schema:
-            type: string
+      - name: conversation_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: conversation_id'
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/UpdateConversationRequest'
         required: true
-      deprecated: false
     delete:
       responses:
         '200':
@@ -424,30 +443,32 @@ paths:
               schema:
                 $ref: '#/components/schemas/ConversationDeletedResource'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Conversations
-      summary: Delete a conversation.
-      description: >-
+      - Conversations
+      summary: Openai Delete Conversation
+      description: |-
         Delete a conversation.
 
         Delete a conversation with the given ID.
+      operationId: openai_delete_conversation_v1_conversations__conversation_id__delete
       parameters:
-        - name: conversation_id
-          in: path
-          description: The conversation identifier.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: conversation_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: conversation_id'
   /v1/conversations/{conversation_id}/items:
     get:
       responses:
@@ -459,73 +480,68 @@ paths:
                 $ref: '#/components/schemas/ConversationItemList'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - Conversations
-      summary: List items.
-      description: >-
+      - Conversations
+      summary: List Items
+      description: |-
         List items.
 
         List items in the conversation.
+      operationId: list_items_v1_conversations__conversation_id__items_get
       parameters:
-        - name: conversation_id
-          in: path
-          description: The conversation identifier.
-          required: true
-          schema:
+      - name: after
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: After
+      - name: limit
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: integer
+          - type: 'null'
+          title: Limit
+      - name: order
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - enum:
+            - asc
+            - desc
             type: string
-        - name: after
-          in: query
-          description: >-
-            An item ID to list items after, used in pagination.
-          required: false
-          schema:
-            type: string
-        - name: include
-          in: query
-          description: >-
-            Specify additional output data to include in the response.
-          required: false
-          schema:
-            type: array
+          - type: 'null'
+          title: Order
+      - name: conversation_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: conversation_id'
+      - name: include
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: array
             items:
-              type: string
-              enum:
-                - web_search_call.action.sources
-                - code_interpreter_call.outputs
-                - computer_call_output.output.image_url
-                - file_search_call.results
-                - message.input_image.image_url
-                - message.output_text.logprobs
-                - reasoning.encrypted_content
-              title: ConversationItemInclude
-              description: >-
-                Specify additional output data to include in the model response.
-        - name: limit
-          in: query
-          description: >-
-            A limit on the number of objects to be returned (1-100, default 20).
-          required: false
-          schema:
-            type: integer
-        - name: order
-          in: query
-          description: >-
-            The order to return items in (asc or desc, default desc).
-          required: false
-          schema:
-            type: string
-            enum:
-              - asc
-              - desc
-      deprecated: false
+              $ref: '#/components/schemas/ConversationItemInclude'
+          - type: 'null'
+          title: Include
     post:
       responses:
         '200':
@@ -536,35 +552,37 @@ paths:
                 $ref: '#/components/schemas/ConversationItemList'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - Conversations
-      summary: Create items.
-      description: >-
+      - Conversations
+      summary: Add Items
+      description: |-
         Create items.
 
         Create items in the conversation.
+      operationId: add_items_v1_conversations__conversation_id__items_post
       parameters:
-        - name: conversation_id
-          in: path
-          description: The conversation identifier.
-          required: true
-          schema:
-            type: string
+      - name: conversation_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: conversation_id'
       requestBody:
+        required: true
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/AddItemsRequest'
-        required: true
-      deprecated: false
   /v1/conversations/{conversation_id}/items/{item_id}:
     get:
       responses:
@@ -573,38 +591,40 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ConversationItem'
+                $ref: '#/components/schemas/OpenAIResponseMessage'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Conversations
-      summary: Retrieve an item.
-      description: >-
+      - Conversations
+      summary: Retrieve
+      description: |-
         Retrieve an item.
 
         Retrieve a conversation item.
+      operationId: retrieve_v1_conversations__conversation_id__items__item_id__get
       parameters:
-        - name: conversation_id
-          in: path
-          description: The conversation identifier.
-          required: true
-          schema:
-            type: string
-        - name: item_id
-          in: path
-          description: The item identifier.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: conversation_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: conversation_id'
+      - name: item_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: item_id'
     delete:
       responses:
         '200':
@@ -614,365 +634,352 @@ paths:
               schema:
                 $ref: '#/components/schemas/ConversationItemDeletedResource'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Conversations
-      summary: Delete an item.
-      description: >-
+      - Conversations
+      summary: Openai Delete Conversation Item
+      description: |-
         Delete an item.
 
         Delete a conversation item.
+      operationId: openai_delete_conversation_item_v1_conversations__conversation_id__items__item_id__delete
       parameters:
-        - name: conversation_id
-          in: path
-          description: The conversation identifier.
-          required: true
-          schema:
-            type: string
-        - name: item_id
-          in: path
-          description: The item identifier.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: conversation_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: conversation_id'
+      - name: item_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: item_id'
   /v1/embeddings:
     post:
       responses:
         '200':
-          description: >-
-            An OpenAIEmbeddingsResponse containing the embeddings.
+          description: An OpenAIEmbeddingsResponse containing the embeddings.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/OpenAIEmbeddingsResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Inference
-      summary: Create embeddings.
-      description: >-
+      - Inference
+      summary: Openai Embeddings
+      description: |-
         Create embeddings.
 
-        Generate OpenAI-compatible embeddings for the given input using the specified
-        model.
-      parameters: []
+        Generate OpenAI-compatible embeddings for the given input using the specified model.
+      operationId: openai_embeddings_v1_embeddings_post
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/OpenAIEmbeddingsRequestWithExtraBody'
         required: true
-      deprecated: false
   /v1/files:
     get:
       responses:
         '200':
-          description: >-
-            An ListOpenAIFileResponse containing the list of files.
+          description: An ListOpenAIFileResponse containing the list of files.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ListOpenAIFileResponse'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - Files
-      summary: List files.
-      description: >-
+      - Files
+      summary: Openai List Files
+      description: |-
         List files.
 
         Returns a list of files that belong to the user's organization.
+      operationId: openai_list_files_v1_files_get
       parameters:
-        - name: after
-          in: query
-          description: >-
-            A cursor for use in pagination. `after` is an object ID that defines your
-            place in the list. For instance, if you make a list request and receive
-            100 objects, ending with obj_foo, your subsequent call can include after=obj_foo
-            in order to fetch the next page of the list.
-          required: false
-          schema:
-            type: string
-        - name: limit
-          in: query
-          description: >-
-            A limit on the number of objects to be returned. Limit can range between
-            1 and 10,000, and the default is 10,000.
-          required: false
-          schema:
-            type: integer
-        - name: order
-          in: query
-          description: >-
-            Sort order by the `created_at` timestamp of the objects. `asc` for ascending
-            order and `desc` for descending order.
-          required: false
-          schema:
-            $ref: '#/components/schemas/Order'
-        - name: purpose
-          in: query
-          description: >-
-            Only return files with the given purpose.
-          required: false
-          schema:
-            $ref: '#/components/schemas/OpenAIFilePurpose'
-      deprecated: false
+      - name: after
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: After
+      - name: limit
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: integer
+          - type: 'null'
+          default: 10000
+          title: Limit
+      - name: order
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - $ref: '#/components/schemas/Order'
+          - type: 'null'
+          default: desc
+          title: Order
+      - name: purpose
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIFilePurpose'
+          - type: 'null'
+          title: Purpose
     post:
       responses:
         '200':
-          description: >-
-            An OpenAIFileObject representing the uploaded file.
+          description: An OpenAIFileObject representing the uploaded file.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/OpenAIFileObject'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - Files
-      summary: Upload file.
-      description: >-
+      - Files
+      summary: Openai Upload File
+      description: |-
         Upload file.
 
         Upload a file that can be used across various endpoints.
 
-
         The file upload should be a multipart form request with:
-
         - file: The File object (not file name) to be uploaded.
-
         - purpose: The intended purpose of the uploaded file.
-
         - expires_after: Optional form values describing expiration for the file.
-      parameters: []
+      operationId: openai_upload_file_v1_files_post
       requestBody:
+        required: true
         content:
           multipart/form-data:
             schema:
-              type: object
-              properties:
-                file:
-                  type: string
-                  format: binary
-                purpose:
-                  $ref: '#/components/schemas/OpenAIFilePurpose'
-                expires_after:
-                  $ref: '#/components/schemas/ExpiresAfter'
-              required:
-                - file
-                - purpose
-        required: true
-      deprecated: false
+              $ref: '#/components/schemas/Body_openai_upload_file_v1_files_post'
   /v1/files/{file_id}:
     get:
       responses:
         '200':
-          description: >-
-            An OpenAIFileObject containing file information.
+          description: An OpenAIFileObject containing file information.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/OpenAIFileObject'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Files
-      summary: Retrieve file.
-      description: >-
+      - Files
+      summary: Openai Retrieve File
+      description: |-
         Retrieve file.
 
         Returns information about a specific file.
+      operationId: openai_retrieve_file_v1_files__file_id__get
       parameters:
-        - name: file_id
-          in: path
-          description: >-
-            The ID of the file to use for this request.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: file_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: file_id'
     delete:
       responses:
         '200':
-          description: >-
-            An OpenAIFileDeleteResponse indicating successful deletion.
+          description: An OpenAIFileDeleteResponse indicating successful deletion.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/OpenAIFileDeleteResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Files
-      summary: Delete file.
+      - Files
+      summary: Openai Delete File
       description: Delete file.
+      operationId: openai_delete_file_v1_files__file_id__delete
       parameters:
-        - name: file_id
-          in: path
-          description: >-
-            The ID of the file to use for this request.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: file_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: file_id'
   /v1/files/{file_id}/content:
     get:
       responses:
         '200':
-          description: >-
-            The raw file content as a binary response.
+          description: The raw file content as a binary response.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/Response'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Files
-      summary: Retrieve file content.
-      description: >-
+      - Files
+      summary: Openai Retrieve File Content
+      description: |-
         Retrieve file content.
 
         Returns the contents of the specified file.
+      operationId: openai_retrieve_file_content_v1_files__file_id__content_get
       parameters:
-        - name: file_id
-          in: path
-          description: >-
-            The ID of the file to use for this request.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: file_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: file_id'
   /v1/health:
     get:
       responses:
         '200':
-          description: >-
-            Health information indicating if the service is operational.
+          description: Health information indicating if the service is operational.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/HealthInfo'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Inspect
-      summary: Get health status.
-      description: >-
+      - Inspect
+      summary: Health
+      description: |-
         Get health status.
 
         Get the current health status of the service.
-      parameters: []
-      deprecated: false
+      operationId: health_v1_health_get
   /v1/inspect/routes:
     get:
       responses:
         '200':
-          description: >-
-            Response containing information about all available routes.
+          description: Response containing information about all available routes.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ListRoutesResponse'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - Inspect
-      summary: List routes.
-      description: >-
+      - Inspect
+      summary: List Routes
+      description: |-
         List routes.
 
         List all available API routes with their methods and implementing providers.
+      operationId: list_routes_v1_inspect_routes_get
       parameters:
-        - name: api_filter
-          in: query
-          description: >-
-            Optional filter to control which routes are returned. Can be an API level
-            ('v1', 'v1alpha', 'v1beta') to show non-deprecated routes at that level,
-            or 'deprecated' to show deprecated routes across all levels. If not specified,
-            returns all non-deprecated routes.
-          required: false
-          schema:
+      - name: api_filter
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - enum:
+            - v1
+            - v1alpha
+            - v1beta
+            - deprecated
             type: string
-            enum:
-              - v1
-              - v1alpha
-              - v1beta
-              - deprecated
-      deprecated: false
+          - type: 'null'
+          title: Api Filter
   /v1/models:
     get:
       responses:
@@ -983,21 +990,22 @@ paths:
               schema:
                 $ref: '#/components/schemas/OpenAIListModelsResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Models
-      summary: List models using the OpenAI API.
+      - Models
+      summary: Openai List Models
       description: List models using the OpenAI API.
-      parameters: []
-      deprecated: false
+      operationId: openai_list_models_v1_models_get
     post:
       responses:
         '200':
@@ -1007,23 +1015,25 @@ paths:
               schema:
                 $ref: '#/components/schemas/Model'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Models
-      summary: Register model.
-      description: >-
+      - Models
+      summary: Register Model
+      description: |-
         Register model.
 
         Register a model.
-      parameters: []
+      operationId: register_model_v1_models_post
       requestBody:
         content:
           application/json:
@@ -1041,59 +1051,63 @@ paths:
               schema:
                 $ref: '#/components/schemas/Model'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Models
-      summary: Get model.
-      description: >-
+      - Models
+      summary: Get Model
+      description: |-
         Get model.
 
         Get a model by its identifier.
+      operationId: get_model_v1_models__model_id__get
       parameters:
-        - name: model_id
-          in: path
-          description: The identifier of the model to get.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: model_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: model_id'
     delete:
       responses:
-        '200':
-          description: OK
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
+        '204':
+          description: Successful Response
       tags:
-        - Models
-      summary: Unregister model.
-      description: >-
+      - Models
+      summary: Unregister Model
+      description: |-
         Unregister model.
 
         Unregister a model.
+      operationId: unregister_model_v1_models__model_id__delete
       parameters:
-        - name: model_id
-          in: path
-          description: >-
-            The identifier of the model to unregister.
-          required: true
-          schema:
-            type: string
+      - name: model_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: model_id'
       deprecated: true
   /v1/moderations:
     post:
@@ -1105,56 +1119,57 @@ paths:
               schema:
                 $ref: '#/components/schemas/ModerationObject'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Safety
-      summary: Create moderation.
-      description: >-
+      - Safety
+      summary: Run Moderation
+      description: |-
         Create moderation.
 
         Classifies if text and/or image inputs are potentially harmful.
-      parameters: []
+      operationId: run_moderation_v1_moderations_post
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/RunModerationRequest'
         required: true
-      deprecated: false
   /v1/prompts:
     get:
       responses:
         '200':
-          description: >-
-            A ListPromptsResponse containing all prompts.
+          description: A ListPromptsResponse containing all prompts.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ListPromptsResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Prompts
-      summary: List all prompts.
+      - Prompts
+      summary: List Prompts
       description: List all prompts.
-      parameters: []
-      deprecated: false
+      operationId: list_prompts_v1_prompts_get
     post:
       responses:
         '200':
@@ -1164,30 +1179,31 @@ paths:
               schema:
                 $ref: '#/components/schemas/Prompt'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Prompts
-      summary: Create prompt.
-      description: >-
+      - Prompts
+      summary: Create Prompt
+      description: |-
         Create prompt.
 
         Create a new prompt.
-      parameters: []
+      operationId: create_prompt_v1_prompts_post
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/CreatePromptRequest'
         required: true
-      deprecated: false
   /v1/prompts/{prompt_id}:
     get:
       responses:
@@ -1199,246 +1215,254 @@ paths:
                 $ref: '#/components/schemas/Prompt'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - Prompts
-      summary: Get prompt.
-      description: >-
+      - Prompts
+      summary: Get Prompt
+      description: |-
         Get prompt.
 
         Get a prompt by its identifier and optional version.
+      operationId: get_prompt_v1_prompts__prompt_id__get
       parameters:
-        - name: prompt_id
-          in: path
-          description: The identifier of the prompt to get.
-          required: true
-          schema:
-            type: string
-        - name: version
-          in: query
-          description: >-
-            The version of the prompt to get (defaults to latest).
-          required: false
-          schema:
-            type: integer
-      deprecated: false
+      - name: version
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: integer
+          - type: 'null'
+          title: Version
+      - name: prompt_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: prompt_id'
     post:
       responses:
         '200':
-          description: >-
-            The updated Prompt resource with incremented version.
+          description: The updated Prompt resource with incremented version.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/Prompt'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - Prompts
-      summary: Update prompt.
-      description: >-
+      - Prompts
+      summary: Update Prompt
+      description: |-
         Update prompt.
 
         Update an existing prompt (increments version).
+      operationId: update_prompt_v1_prompts__prompt_id__post
       parameters:
-        - name: prompt_id
-          in: path
-          description: The identifier of the prompt to update.
-          required: true
-          schema:
-            type: string
+      - name: prompt_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: prompt_id'
       requestBody:
+        required: true
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/UpdatePromptRequest'
-        required: true
-      deprecated: false
     delete:
       responses:
-        '200':
-          description: OK
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
+        '204':
+          description: Successful Response
       tags:
-        - Prompts
-      summary: Delete prompt.
-      description: >-
+      - Prompts
+      summary: Delete Prompt
+      description: |-
         Delete prompt.
 
         Delete a prompt.
+      operationId: delete_prompt_v1_prompts__prompt_id__delete
       parameters:
-        - name: prompt_id
-          in: path
-          description: The identifier of the prompt to delete.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: prompt_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: prompt_id'
   /v1/prompts/{prompt_id}/set-default-version:
     post:
       responses:
         '200':
-          description: >-
-            The prompt with the specified version now set as default.
+          description: The prompt with the specified version now set as default.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/Prompt'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Prompts
-      summary: Set prompt version.
-      description: >-
+      - Prompts
+      summary: Set Default Version
+      description: |-
         Set prompt version.
 
         Set which version of a prompt should be the default in get_prompt (latest).
+      operationId: set_default_version_v1_prompts__prompt_id__set_default_version_post
       parameters:
-        - name: prompt_id
-          in: path
-          description: The identifier of the prompt.
-          required: true
-          schema:
-            type: string
+      - name: prompt_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: prompt_id'
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/SetDefaultVersionRequest'
         required: true
-      deprecated: false
   /v1/prompts/{prompt_id}/versions:
     get:
       responses:
         '200':
-          description: >-
-            A ListPromptsResponse containing all versions of the prompt.
+          description: A ListPromptsResponse containing all versions of the prompt.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ListPromptsResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Prompts
-      summary: List prompt versions.
-      description: >-
+      - Prompts
+      summary: List Prompt Versions
+      description: |-
         List prompt versions.
 
         List all versions of a specific prompt.
+      operationId: list_prompt_versions_v1_prompts__prompt_id__versions_get
       parameters:
-        - name: prompt_id
-          in: path
-          description: >-
-            The identifier of the prompt to list versions for.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: prompt_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: prompt_id'
   /v1/providers:
     get:
       responses:
         '200':
-          description: >-
-            A ListProvidersResponse containing information about all providers.
+          description: A ListProvidersResponse containing information about all providers.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ListProvidersResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Providers
-      summary: List providers.
-      description: >-
+      - Providers
+      summary: List Providers
+      description: |-
         List providers.
 
         List all available providers.
-      parameters: []
-      deprecated: false
+      operationId: list_providers_v1_providers_get
   /v1/providers/{provider_id}:
     get:
       responses:
         '200':
-          description: >-
-            A ProviderInfo object containing the provider's details.
+          description: A ProviderInfo object containing the provider's details.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ProviderInfo'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Providers
-      summary: Get provider.
-      description: >-
+      - Providers
+      summary: Inspect Provider
+      description: |-
         Get provider.
 
         Get detailed information about a specific provider.
+      operationId: inspect_provider_v1_providers__provider_id__get
       parameters:
-        - name: provider_id
-          in: path
-          description: The ID of the provider to inspect.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: provider_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: provider_id'
   /v1/responses:
     get:
       responses:
@@ -1450,45 +1474,56 @@ paths:
                 $ref: '#/components/schemas/ListOpenAIResponseObject'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - Agents
-      summary: List all responses.
+      - Agents
+      summary: List Openai Responses
       description: List all responses.
+      operationId: list_openai_responses_v1_responses_get
       parameters:
-        - name: after
-          in: query
-          description: The ID of the last response to return.
-          required: false
-          schema:
-            type: string
-        - name: limit
-          in: query
-          description: The number of responses to return.
-          required: false
-          schema:
-            type: integer
-        - name: model
-          in: query
-          description: The model to filter responses by.
-          required: false
-          schema:
-            type: string
-        - name: order
-          in: query
-          description: >-
-            The order to sort responses by when sorted by created_at ('asc' or 'desc').
-          required: false
-          schema:
-            $ref: '#/components/schemas/Order'
-      deprecated: false
+      - name: after
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: After
+      - name: limit
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: integer
+          - type: 'null'
+          default: 50
+          title: Limit
+      - name: model
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: Model
+      - name: order
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - $ref: '#/components/schemas/Order'
+          - type: 'null'
+          default: desc
+          title: Order
     post:
       responses:
         '200':
@@ -1502,38 +1537,51 @@ paths:
                 $ref: '#/components/schemas/OpenAIResponseObjectStream'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - Agents
-      summary: Create a model response.
+      - Agents
+      summary: Create Openai Response
       description: Create a model response.
-      parameters: []
+      operationId: create_openai_response_v1_responses_post
       requestBody:
+        required: true
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/CreateOpenaiResponseRequest'
-        required: true
-      deprecated: false
-      x-llama-stack-extra-body-params:
-        - name: guardrails
-          schema:
-            type: array
-            items:
-              oneOf:
+        x-llama-stack-extra-body-params:
+          guardrails:
+            $defs:
+              ResponseGuardrailSpec:
+                description: |-
+                  Specification for a guardrail to apply during response generation.
+
+                  :param type: The type/identifier of the guardrail.
+                properties:
+                  type:
+                    title: Type
+                    type: string
+                required:
+                - type
+                title: ResponseGuardrailSpec
+                type: object
+            anyOf:
+            - items:
+                anyOf:
                 - type: string
                 - $ref: '#/components/schemas/ResponseGuardrailSpec'
-          description: >-
-            List of guardrails to apply during response generation. Guardrails provide
-            safety and content moderation.
-          required: false
+              type: array
+            - type: 'null'
+            description: List of guardrails to apply during response generation. Guardrails provide safety and content moderation.
   /v1/responses/{response_id}:
     get:
       responses:
@@ -1544,28 +1592,29 @@ paths:
               schema:
                 $ref: '#/components/schemas/OpenAIResponseObject'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Agents
-      summary: Get a model response.
+      - Agents
+      summary: Get Openai Response
       description: Get a model response.
+      operationId: get_openai_response_v1_responses__response_id__get
       parameters:
-        - name: response_id
-          in: path
-          description: >-
-            The ID of the OpenAI response to retrieve.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: response_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: response_id'
     delete:
       responses:
         '200':
@@ -1575,27 +1624,29 @@ paths:
               schema:
                 $ref: '#/components/schemas/OpenAIDeleteResponseObject'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Agents
-      summary: Delete a response.
+      - Agents
+      summary: Delete Openai Response
       description: Delete a response.
+      operationId: delete_openai_response_v1_responses__response_id__delete
       parameters:
-        - name: response_id
-          in: path
-          description: The ID of the OpenAI response to delete.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: response_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: response_id'
   /v1/responses/{response_id}/input_items:
     get:
       responses:
@@ -1607,65 +1658,72 @@ paths:
                 $ref: '#/components/schemas/ListOpenAIResponseInputItem'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - Agents
-      summary: List input items.
+      - Agents
+      summary: List Openai Response Input Items
       description: List input items.
+      operationId: list_openai_response_input_items_v1_responses__response_id__input_items_get
       parameters:
-        - name: response_id
-          in: path
-          description: >-
-            The ID of the response to retrieve input items for.
-          required: true
-          schema:
-            type: string
-        - name: after
-          in: query
-          description: >-
-            An item ID to list items after, used for pagination.
-          required: false
-          schema:
-            type: string
-        - name: before
-          in: query
-          description: >-
-            An item ID to list items before, used for pagination.
-          required: false
-          schema:
-            type: string
-        - name: include
-          in: query
-          description: >-
-            Additional fields to include in the response.
-          required: false
-          schema:
-            type: array
+      - name: after
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: After
+      - name: before
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: Before
+      - name: limit
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: integer
+          - type: 'null'
+          default: 20
+          title: Limit
+      - name: order
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - $ref: '#/components/schemas/Order'
+          - type: 'null'
+          default: desc
+          title: Order
+      - name: response_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: response_id'
+      - name: include
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: array
             items:
               type: string
-        - name: limit
-          in: query
-          description: >-
-            A limit on the number of objects to be returned. Limit can range between
-            1 and 100, and the default is 20.
-          required: false
-          schema:
-            type: integer
-        - name: order
-          in: query
-          description: >-
-            The order to return the input items in. Default is desc.
-          required: false
-          schema:
-            $ref: '#/components/schemas/Order'
-      deprecated: false
+          - type: 'null'
+          title: Include
   /v1/safety/run-shield:
     post:
       responses:
@@ -1676,30 +1734,31 @@ paths:
               schema:
                 $ref: '#/components/schemas/RunShieldResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Safety
-      summary: Run shield.
-      description: >-
+      - Safety
+      summary: Run Shield
+      description: |-
         Run shield.
 
         Run a shield.
-      parameters: []
+      operationId: run_shield_v1_safety_run_shield_post
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/RunShieldRequest'
         required: true
-      deprecated: false
   /v1/scoring-functions:
     get:
       responses:
@@ -1710,45 +1769,48 @@ paths:
               schema:
                 $ref: '#/components/schemas/ListScoringFunctionsResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - ScoringFunctions
-      summary: List all scoring functions.
+      - Scoring Functions
+      summary: List Scoring Functions
       description: List all scoring functions.
-      parameters: []
-      deprecated: false
+      operationId: list_scoring_functions_v1_scoring_functions_get
     post:
       responses:
-        '200':
-          description: OK
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
+        '204':
+          description: Successful Response
       tags:
-        - ScoringFunctions
-      summary: Register a scoring function.
+      - Scoring Functions
+      summary: Register Scoring Function
       description: Register a scoring function.
-      parameters: []
+      operationId: register_scoring_function_v1_scoring_functions_post
       requestBody:
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/RegisterScoringFunctionRequest'
+              $ref: '#/components/schemas/RegisterScoringFunctionRequestLoose'
         required: true
       deprecated: true
   /v1/scoring-functions/{scoring_fn_id}:
@@ -1761,86 +1823,90 @@ paths:
               schema:
                 $ref: '#/components/schemas/ScoringFn'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - ScoringFunctions
-      summary: Get a scoring function by its ID.
+      - Scoring Functions
+      summary: Get Scoring Function
       description: Get a scoring function by its ID.
+      operationId: get_scoring_function_v1_scoring_functions__scoring_fn_id__get
       parameters:
-        - name: scoring_fn_id
-          in: path
-          description: The ID of the scoring function to get.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: scoring_fn_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: scoring_fn_id'
     delete:
       responses:
-        '200':
-          description: OK
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
+        '204':
+          description: Successful Response
       tags:
-        - ScoringFunctions
-      summary: Unregister a scoring function.
+      - Scoring Functions
+      summary: Unregister Scoring Function
       description: Unregister a scoring function.
+      operationId: unregister_scoring_function_v1_scoring_functions__scoring_fn_id__delete
       parameters:
-        - name: scoring_fn_id
-          in: path
-          description: >-
-            The ID of the scoring function to unregister.
-          required: true
-          schema:
-            type: string
+      - name: scoring_fn_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: scoring_fn_id'
       deprecated: true
   /v1/scoring/score:
     post:
       responses:
         '200':
-          description: >-
-            A ScoreResponse object containing rows and aggregated results.
+          description: A ScoreResponse object containing rows and aggregated results.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ScoreResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Scoring
-      summary: Score a list of rows.
+      - Scoring
+      summary: Score
       description: Score a list of rows.
-      parameters: []
+      operationId: score_v1_scoring_score_post
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/ScoreRequest'
         required: true
-      deprecated: false
   /v1/scoring/score-batch:
     post:
       responses:
@@ -1851,27 +1917,28 @@ paths:
               schema:
                 $ref: '#/components/schemas/ScoreBatchResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Scoring
-      summary: Score a batch of rows.
+      - Scoring
+      summary: Score Batch
       description: Score a batch of rows.
-      parameters: []
+      operationId: score_batch_v1_scoring_score_batch_post
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/ScoreBatchRequest'
         required: true
-      deprecated: false
   /v1/shields:
     get:
       responses:
@@ -1882,21 +1949,22 @@ paths:
               schema:
                 $ref: '#/components/schemas/ListShieldsResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Shields
-      summary: List all shields.
+      - Shields
+      summary: List Shields
       description: List all shields.
-      parameters: []
-      deprecated: false
+      operationId: list_shields_v1_shields_get
     post:
       responses:
         '200':
@@ -1906,20 +1974,22 @@ paths:
               schema:
                 $ref: '#/components/schemas/Shield'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Shields
-      summary: Register a shield.
+      - Shields
+      summary: Register Shield
       description: Register a shield.
-      parameters: []
+      operationId: register_shield_v1_shields_post
       requestBody:
         content:
           application/json:
@@ -1937,53 +2007,57 @@ paths:
               schema:
                 $ref: '#/components/schemas/Shield'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Shields
-      summary: Get a shield by its identifier.
+      - Shields
+      summary: Get Shield
       description: Get a shield by its identifier.
+      operationId: get_shield_v1_shields__identifier__get
       parameters:
-        - name: identifier
-          in: path
-          description: The identifier of the shield to get.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: identifier
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: identifier'
     delete:
       responses:
-        '200':
-          description: OK
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
+        '204':
+          description: Successful Response
       tags:
-        - Shields
-      summary: Unregister a shield.
+      - Shields
+      summary: Unregister Shield
       description: Unregister a shield.
+      operationId: unregister_shield_v1_shields__identifier__delete
       parameters:
-        - name: identifier
-          in: path
-          description: >-
-            The identifier of the shield to unregister.
-          required: true
-          schema:
-            type: string
+      - name: identifier
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: identifier'
       deprecated: true
   /v1/tool-runtime/invoke:
     post:
@@ -1995,27 +2069,28 @@ paths:
               schema:
                 $ref: '#/components/schemas/ToolInvocationResult'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - ToolRuntime
-      summary: Run a tool with the given arguments.
+      - Tool Runtime
+      summary: Invoke Tool
       description: Run a tool with the given arguments.
-      parameters: []
+      operationId: invoke_tool_v1_tool_runtime_invoke_post
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/InvokeToolRequest'
         required: true
-      deprecated: false
   /v1/tool-runtime/list-tools:
     get:
       responses:
@@ -2027,41 +2102,46 @@ paths:
                 $ref: '#/components/schemas/ListToolDefsResponse'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - ToolRuntime
-      summary: List all tools in the runtime.
+      - Tool Runtime
+      summary: List Runtime Tools
       description: List all tools in the runtime.
+      operationId: list_runtime_tools_v1_tool_runtime_list_tools_get
       parameters:
-        - name: tool_group_id
-          in: query
-          description: >-
-            The ID of the tool group to list tools for.
-          required: false
-          schema:
-            type: string
-        - name: mcp_endpoint
-          in: query
-          description: >-
-            The MCP endpoint to use for the tool group.
-          required: false
-          schema:
-            $ref: '#/components/schemas/URL'
-        - name: authorization
-          in: query
-          description: >-
-            (Optional) OAuth access token for authenticating with the MCP server.
-          required: false
-          schema:
-            type: string
-      deprecated: false
+      - name: authorization
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: Authorization
+      - name: tool_group_id
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: Tool Group Id
+      - name: mcp_endpoint
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - $ref: '#/components/schemas/URL'
+          - type: 'null'
+          title: Mcp Endpoint
   /v1/toolgroups:
     get:
       responses:
@@ -2072,40 +2152,43 @@ paths:
               schema:
                 $ref: '#/components/schemas/ListToolGroupsResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - ToolGroups
-      summary: List tool groups with optional provider.
+      - Tool Groups
+      summary: List Tool Groups
       description: List tool groups with optional provider.
-      parameters: []
-      deprecated: false
+      operationId: list_tool_groups_v1_toolgroups_get
     post:
       responses:
-        '200':
-          description: OK
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
+        '204':
+          description: Successful Response
       tags:
-        - ToolGroups
-      summary: Register a tool group.
+      - Tool Groups
+      summary: Register Tool Group
       description: Register a tool group.
-      parameters: []
+      operationId: register_tool_group_v1_toolgroups_post
       requestBody:
         content:
           application/json:
@@ -2123,52 +2206,57 @@ paths:
               schema:
                 $ref: '#/components/schemas/ToolGroup'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - ToolGroups
-      summary: Get a tool group by its ID.
+      - Tool Groups
+      summary: Get Tool Group
       description: Get a tool group by its ID.
+      operationId: get_tool_group_v1_toolgroups__toolgroup_id__get
       parameters:
-        - name: toolgroup_id
-          in: path
-          description: The ID of the tool group to get.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: toolgroup_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: toolgroup_id'
     delete:
       responses:
-        '200':
-          description: OK
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
+        '204':
+          description: Successful Response
       tags:
-        - ToolGroups
-      summary: Unregister a tool group.
+      - Tool Groups
+      summary: Unregister Toolgroup
       description: Unregister a tool group.
+      operationId: unregister_toolgroup_v1_toolgroups__toolgroup_id__delete
       parameters:
-        - name: toolgroup_id
-          in: path
-          description: The ID of the tool group to unregister.
-          required: true
-          schema:
-            type: string
+      - name: toolgroup_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: toolgroup_id'
       deprecated: true
   /v1/tools:
     get:
@@ -2181,27 +2269,30 @@ paths:
                 $ref: '#/components/schemas/ListToolDefsResponse'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - ToolGroups
-      summary: List tools with optional tool group.
+      - Tool Groups
+      summary: List Tools
       description: List tools with optional tool group.
+      operationId: list_tools_v1_tools_get
       parameters:
-        - name: toolgroup_id
-          in: query
-          description: >-
-            The ID of the tool group to list tools for.
-          required: false
-          schema:
-            type: string
-      deprecated: false
+      - name: toolgroup_id
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: Toolgroup Id
   /v1/tools/{tool_name}:
     get:
       responses:
@@ -2212,54 +2303,57 @@ paths:
               schema:
                 $ref: '#/components/schemas/ToolDef'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - ToolGroups
-      summary: Get a tool by its name.
+      - Tool Groups
+      summary: Get Tool
       description: Get a tool by its name.
+      operationId: get_tool_v1_tools__tool_name__get
       parameters:
-        - name: tool_name
-          in: path
-          description: The name of the tool to get.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: tool_name
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: tool_name'
   /v1/vector-io/insert:
     post:
       responses:
-        '200':
-          description: OK
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
+        '204':
+          description: Successful Response
       tags:
-        - VectorIO
-      summary: Insert chunks into a vector database.
+      - Vector Io
+      summary: Insert Chunks
       description: Insert chunks into a vector database.
-      parameters: []
+      operationId: insert_chunks_v1_vector_io_insert_post
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/InsertChunksRequest'
         required: true
-      deprecated: false
   /v1/vector-io/query:
     post:
       responses:
@@ -2270,815 +2364,829 @@ paths:
               schema:
                 $ref: '#/components/schemas/QueryChunksResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - VectorIO
-      summary: Query chunks from a vector database.
+      - Vector Io
+      summary: Query Chunks
       description: Query chunks from a vector database.
-      parameters: []
+      operationId: query_chunks_v1_vector_io_query_post
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/QueryChunksRequest'
         required: true
-      deprecated: false
   /v1/vector_stores:
     get:
       responses:
         '200':
-          description: >-
-            A VectorStoreListResponse containing the list of vector stores.
+          description: A VectorStoreListResponse containing the list of vector stores.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/VectorStoreListResponse'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - VectorIO
-      summary: Returns a list of vector stores.
+      - Vector Io
+      summary: Openai List Vector Stores
       description: Returns a list of vector stores.
+      operationId: openai_list_vector_stores_v1_vector_stores_get
       parameters:
-        - name: limit
-          in: query
-          description: >-
-            A limit on the number of objects to be returned. Limit can range between
-            1 and 100, and the default is 20.
-          required: false
-          schema:
-            type: integer
-        - name: order
-          in: query
-          description: >-
-            Sort order by the `created_at` timestamp of the objects. `asc` for ascending
-            order and `desc` for descending order.
-          required: false
-          schema:
-            type: string
-        - name: after
-          in: query
-          description: >-
-            A cursor for use in pagination. `after` is an object ID that defines your
-            place in the list.
-          required: false
-          schema:
-            type: string
-        - name: before
-          in: query
-          description: >-
-            A cursor for use in pagination. `before` is an object ID that defines
-            your place in the list.
-          required: false
-          schema:
-            type: string
-      deprecated: false
+      - name: after
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: After
+      - name: before
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: Before
+      - name: limit
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: integer
+          - type: 'null'
+          default: 20
+          title: Limit
+      - name: order
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          default: desc
+          title: Order
     post:
       responses:
         '200':
-          description: >-
-            A VectorStoreObject representing the created vector store.
+          description: A VectorStoreObject representing the created vector store.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/VectorStoreObject'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - VectorIO
-      summary: Creates a vector store.
-      description: >-
+      - Vector Io
+      summary: Openai Create Vector Store
+      description: |-
         Creates a vector store.
 
         Generate an OpenAI-compatible vector store with the given parameters.
-      parameters: []
+      operationId: openai_create_vector_store_v1_vector_stores_post
       requestBody:
+        required: true
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/OpenAICreateVectorStoreRequestWithExtraBody'
-        required: true
-      deprecated: false
   /v1/vector_stores/{vector_store_id}:
     get:
       responses:
         '200':
-          description: >-
-            A VectorStoreObject representing the vector store.
+          description: A VectorStoreObject representing the vector store.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/VectorStoreObject'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - VectorIO
-      summary: Retrieves a vector store.
+      - Vector Io
+      summary: Openai Retrieve Vector Store
       description: Retrieves a vector store.
+      operationId: openai_retrieve_vector_store_v1_vector_stores__vector_store_id__get
       parameters:
-        - name: vector_store_id
-          in: path
-          description: The ID of the vector store to retrieve.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: vector_store_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: vector_store_id'
     post:
       responses:
         '200':
-          description: >-
-            A VectorStoreObject representing the updated vector store.
+          description: A VectorStoreObject representing the updated vector store.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/VectorStoreObject'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - VectorIO
-      summary: Updates a vector store.
+      - Vector Io
+      summary: Openai Update Vector Store
       description: Updates a vector store.
+      operationId: openai_update_vector_store_v1_vector_stores__vector_store_id__post
       parameters:
-        - name: vector_store_id
-          in: path
-          description: The ID of the vector store to update.
-          required: true
-          schema:
-            type: string
+      - name: vector_store_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: vector_store_id'
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/OpenaiUpdateVectorStoreRequest'
         required: true
-      deprecated: false
     delete:
       responses:
         '200':
-          description: >-
-            A VectorStoreDeleteResponse indicating the deletion status.
+          description: A VectorStoreDeleteResponse indicating the deletion status.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/VectorStoreDeleteResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - VectorIO
-      summary: Delete a vector store.
+      - Vector Io
+      summary: Openai Delete Vector Store
       description: Delete a vector store.
+      operationId: openai_delete_vector_store_v1_vector_stores__vector_store_id__delete
       parameters:
-        - name: vector_store_id
-          in: path
-          description: The ID of the vector store to delete.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: vector_store_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: vector_store_id'
   /v1/vector_stores/{vector_store_id}/file_batches:
     post:
       responses:
         '200':
-          description: >-
-            A VectorStoreFileBatchObject representing the created file batch.
+          description: A VectorStoreFileBatchObject representing the created file batch.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/VectorStoreFileBatchObject'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - VectorIO
-      summary: Create a vector store file batch.
-      description: >-
+      - Vector Io
+      summary: Openai Create Vector Store File Batch
+      description: |-
         Create a vector store file batch.
 
-        Generate an OpenAI-compatible vector store file batch for the given vector
-        store.
+        Generate an OpenAI-compatible vector store file batch for the given vector store.
+      operationId: openai_create_vector_store_file_batch_v1_vector_stores__vector_store_id__file_batches_post
       parameters:
-        - name: vector_store_id
-          in: path
-          description: >-
-            The ID of the vector store to create the file batch for.
-          required: true
-          schema:
-            type: string
+      - name: vector_store_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: vector_store_id'
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/OpenAICreateVectorStoreFileBatchRequestWithExtraBody'
         required: true
-      deprecated: false
   /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}:
     get:
       responses:
         '200':
-          description: >-
-            A VectorStoreFileBatchObject representing the file batch.
+          description: A VectorStoreFileBatchObject representing the file batch.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/VectorStoreFileBatchObject'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - VectorIO
-      summary: Retrieve a vector store file batch.
+      - Vector Io
+      summary: Openai Retrieve Vector Store File Batch
       description: Retrieve a vector store file batch.
+      operationId: openai_retrieve_vector_store_file_batch_v1_vector_stores__vector_store_id__file_batches__batch_id__get
       parameters:
-        - name: batch_id
-          in: path
-          description: The ID of the file batch to retrieve.
-          required: true
-          schema:
-            type: string
-        - name: vector_store_id
-          in: path
-          description: >-
-            The ID of the vector store containing the file batch.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: vector_store_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: vector_store_id'
+      - name: batch_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: batch_id'
   /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel:
     post:
       responses:
         '200':
-          description: >-
-            A VectorStoreFileBatchObject representing the cancelled file batch.
+          description: A VectorStoreFileBatchObject representing the cancelled file batch.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/VectorStoreFileBatchObject'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - VectorIO
-      summary: Cancels a vector store file batch.
+      - Vector Io
+      summary: Openai Cancel Vector Store File Batch
       description: Cancels a vector store file batch.
+      operationId: openai_cancel_vector_store_file_batch_v1_vector_stores__vector_store_id__file_batches__batch_id__cancel_post
       parameters:
-        - name: batch_id
-          in: path
-          description: The ID of the file batch to cancel.
-          required: true
-          schema:
-            type: string
-        - name: vector_store_id
-          in: path
-          description: >-
-            The ID of the vector store containing the file batch.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: vector_store_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: vector_store_id'
+      - name: batch_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: batch_id'
   /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/files:
     get:
       responses:
         '200':
-          description: >-
-            A VectorStoreFilesListInBatchResponse containing the list of files in
-            the batch.
+          description: A VectorStoreFilesListInBatchResponse containing the list of files in the batch.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/VectorStoreFilesListInBatchResponse'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - VectorIO
-      summary: >-
-        Returns a list of vector store files in a batch.
-      description: >-
-        Returns a list of vector store files in a batch.
+      - Vector Io
+      summary: Openai List Files In Vector Store File Batch
+      description: Returns a list of vector store files in a batch.
+      operationId: openai_list_files_in_vector_store_file_batch_v1_vector_stores__vector_store_id__file_batches__batch_id__files_get
       parameters:
-        - name: batch_id
-          in: path
-          description: >-
-            The ID of the file batch to list files from.
-          required: true
-          schema:
-            type: string
-        - name: vector_store_id
-          in: path
-          description: >-
-            The ID of the vector store containing the file batch.
-          required: true
-          schema:
-            type: string
-        - name: after
-          in: query
-          description: >-
-            A cursor for use in pagination. `after` is an object ID that defines your
-            place in the list.
-          required: false
-          schema:
-            type: string
-        - name: before
-          in: query
-          description: >-
-            A cursor for use in pagination. `before` is an object ID that defines
-            your place in the list.
-          required: false
-          schema:
-            type: string
-        - name: filter
-          in: query
-          description: >-
-            Filter by file status. One of in_progress, completed, failed, cancelled.
-          required: false
-          schema:
-            type: string
-        - name: limit
-          in: query
-          description: >-
-            A limit on the number of objects to be returned. Limit can range between
-            1 and 100, and the default is 20.
-          required: false
-          schema:
-            type: integer
-        - name: order
-          in: query
-          description: >-
-            Sort order by the `created_at` timestamp of the objects. `asc` for ascending
-            order and `desc` for descending order.
-          required: false
-          schema:
-            type: string
-      deprecated: false
+      - name: after
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: After
+      - name: before
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: Before
+      - name: filter
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: Filter
+      - name: limit
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: integer
+          - type: 'null'
+          default: 20
+          title: Limit
+      - name: order
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          default: desc
+          title: Order
+      - name: vector_store_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: vector_store_id'
+      - name: batch_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: batch_id'
   /v1/vector_stores/{vector_store_id}/files:
     get:
       responses:
         '200':
-          description: >-
-            A VectorStoreListFilesResponse containing the list of files.
+          description: A VectorStoreListFilesResponse containing the list of files.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/VectorStoreListFilesResponse'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - VectorIO
-      summary: List files in a vector store.
+      - Vector Io
+      summary: Openai List Files In Vector Store
       description: List files in a vector store.
+      operationId: openai_list_files_in_vector_store_v1_vector_stores__vector_store_id__files_get
       parameters:
-        - name: vector_store_id
-          in: path
-          description: >-
-            The ID of the vector store to list files from.
-          required: true
-          schema:
-            type: string
-        - name: limit
-          in: query
-          description: >-
-            (Optional) A limit on the number of objects to be returned. Limit can
-            range between 1 and 100, and the default is 20.
-          required: false
-          schema:
-            type: integer
-        - name: order
-          in: query
-          description: >-
-            (Optional) Sort order by the `created_at` timestamp of the objects. `asc`
-            for ascending order and `desc` for descending order.
-          required: false
-          schema:
-            type: string
-        - name: after
-          in: query
-          description: >-
-            (Optional) A cursor for use in pagination. `after` is an object ID that
-            defines your place in the list.
-          required: false
-          schema:
-            type: string
-        - name: before
-          in: query
-          description: >-
-            (Optional) A cursor for use in pagination. `before` is an object ID that
-            defines your place in the list.
-          required: false
-          schema:
-            type: string
-        - name: filter
-          in: query
-          description: >-
-            (Optional) Filter by file status to only return files with the specified
-            status.
-          required: false
-          schema:
-            $ref: '#/components/schemas/VectorStoreFileStatus'
-      deprecated: false
+      - name: after
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: After
+      - name: before
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: Before
+      - name: filter
+        in: query
+        required: false
+        schema:
+          title: Filter
+          type: string
+          enum:
+          - completed
+          - in_progress
+          - cancelled
+          - failed
+          default: completed
+          nullable: true
+      - name: limit
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: integer
+          - type: 'null'
+          default: 20
+          title: Limit
+      - name: order
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          default: desc
+          title: Order
+      - name: vector_store_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: vector_store_id'
     post:
       responses:
         '200':
-          description: >-
-            A VectorStoreFileObject representing the attached file.
+          description: A VectorStoreFileObject representing the attached file.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/VectorStoreFileObject'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - VectorIO
-      summary: Attach a file to a vector store.
+      - Vector Io
+      summary: Openai Attach File To Vector Store
       description: Attach a file to a vector store.
+      operationId: openai_attach_file_to_vector_store_v1_vector_stores__vector_store_id__files_post
       parameters:
-        - name: vector_store_id
-          in: path
-          description: >-
-            The ID of the vector store to attach the file to.
-          required: true
-          schema:
-            type: string
+      - name: vector_store_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: vector_store_id'
       requestBody:
+        required: true
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/OpenaiAttachFileToVectorStoreRequest'
-        required: true
-      deprecated: false
   /v1/vector_stores/{vector_store_id}/files/{file_id}:
     get:
       responses:
         '200':
-          description: >-
-            A VectorStoreFileObject representing the file.
+          description: A VectorStoreFileObject representing the file.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/VectorStoreFileObject'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - VectorIO
-      summary: Retrieves a vector store file.
+      - Vector Io
+      summary: Openai Retrieve Vector Store File
       description: Retrieves a vector store file.
+      operationId: openai_retrieve_vector_store_file_v1_vector_stores__vector_store_id__files__file_id__get
       parameters:
-        - name: vector_store_id
-          in: path
-          description: >-
-            The ID of the vector store containing the file to retrieve.
-          required: true
-          schema:
-            type: string
-        - name: file_id
-          in: path
-          description: The ID of the file to retrieve.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: vector_store_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: vector_store_id'
+      - name: file_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: file_id'
     post:
       responses:
         '200':
-          description: >-
-            A VectorStoreFileObject representing the updated file.
+          description: A VectorStoreFileObject representing the updated file.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/VectorStoreFileObject'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - VectorIO
-      summary: Updates a vector store file.
+      - Vector Io
+      summary: Openai Update Vector Store File
       description: Updates a vector store file.
+      operationId: openai_update_vector_store_file_v1_vector_stores__vector_store_id__files__file_id__post
       parameters:
-        - name: vector_store_id
-          in: path
-          description: >-
-            The ID of the vector store containing the file to update.
-          required: true
-          schema:
-            type: string
-        - name: file_id
-          in: path
-          description: The ID of the file to update.
-          required: true
-          schema:
-            type: string
+      - name: vector_store_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: vector_store_id'
+      - name: file_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: file_id'
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/OpenaiUpdateVectorStoreFileRequest'
         required: true
-      deprecated: false
     delete:
       responses:
         '200':
-          description: >-
-            A VectorStoreFileDeleteResponse indicating the deletion status.
+          description: A VectorStoreFileDeleteResponse indicating the deletion status.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/VectorStoreFileDeleteResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - VectorIO
-      summary: Delete a vector store file.
+      - Vector Io
+      summary: Openai Delete Vector Store File
       description: Delete a vector store file.
+      operationId: openai_delete_vector_store_file_v1_vector_stores__vector_store_id__files__file_id__delete
       parameters:
-        - name: vector_store_id
-          in: path
-          description: >-
-            The ID of the vector store containing the file to delete.
-          required: true
-          schema:
-            type: string
-        - name: file_id
-          in: path
-          description: The ID of the file to delete.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: vector_store_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: vector_store_id'
+      - name: file_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: file_id'
   /v1/vector_stores/{vector_store_id}/files/{file_id}/content:
     get:
       responses:
         '200':
-          description: >-
-            File contents, optionally with embeddings and metadata based on query
-            parameters.
+          description: File contents, optionally with embeddings and metadata based on query parameters.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/VectorStoreFileContentResponse'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - VectorIO
-      summary: >-
-        Retrieves the contents of a vector store file.
-      description: >-
-        Retrieves the contents of a vector store file.
+      - Vector Io
+      summary: Openai Retrieve Vector Store File Contents
+      description: Retrieves the contents of a vector store file.
+      operationId: openai_retrieve_vector_store_file_contents_v1_vector_stores__vector_store_id__files__file_id__content_get
       parameters:
-        - name: vector_store_id
-          in: path
-          description: >-
-            The ID of the vector store containing the file to retrieve.
-          required: true
-          schema:
-            type: string
-        - name: file_id
-          in: path
-          description: The ID of the file to retrieve.
-          required: true
-          schema:
-            type: string
-        - name: include_embeddings
-          in: query
-          description: >-
-            Whether to include embedding vectors in the response.
-          required: false
-          schema:
-            $ref: '#/components/schemas/bool'
-        - name: include_metadata
-          in: query
-          description: >-
-            Whether to include chunk metadata in the response.
-          required: false
-          schema:
-            $ref: '#/components/schemas/bool'
-      deprecated: false
+      - name: include_embeddings
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          default: false
+          title: Include Embeddings
+      - name: include_metadata
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          default: false
+          title: Include Metadata
+      - name: vector_store_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: vector_store_id'
+      - name: file_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: file_id'
   /v1/vector_stores/{vector_store_id}/search:
     post:
       responses:
         '200':
-          description: >-
-            A VectorStoreSearchResponse containing the search results.
+          description: A VectorStoreSearchResponse containing the search results.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/VectorStoreSearchResponsePage'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - VectorIO
-      summary: Search for chunks in a vector store.
-      description: >-
+      - Vector Io
+      summary: Openai Search Vector Store
+      description: |-
         Search for chunks in a vector store.
 
-        Searches a vector store for relevant chunks based on a query and optional
-        file attribute filters.
+        Searches a vector store for relevant chunks based on a query and optional file attribute filters.
+      operationId: openai_search_vector_store_v1_vector_stores__vector_store_id__search_post
       parameters:
-        - name: vector_store_id
-          in: path
-          description: The ID of the vector store to search.
-          required: true
-          schema:
-            type: string
+      - name: vector_store_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: vector_store_id'
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/OpenaiSearchVectorStoreRequest'
         required: true
-      deprecated: false
   /v1/version:
     get:
       responses:
         '200':
-          description: >-
-            Version information containing the service version number.
+          description: Version information containing the service version number.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/VersionInfo'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Inspect
-      summary: Get version.
-      description: >-
+      - Inspect
+      summary: Version
+      description: |-
         Get version.
 
         Get the version of the service.
-      parameters: []
-      deprecated: false
+      operationId: version_v1_version_get
   /v1beta/datasetio/append-rows/{dataset_id}:
     post:
       responses:
-        '200':
-          description: OK
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
+        '204':
+          description: Successful Response
       tags:
-        - DatasetIO
-      summary: Append rows to a dataset.
+      - Datasetio
+      summary: Append Rows
       description: Append rows to a dataset.
+      operationId: append_rows_v1beta_datasetio_append_rows__dataset_id__post
       parameters:
-        - name: dataset_id
-          in: path
-          description: >-
-            The ID of the dataset to append the rows to.
-          required: true
-          schema:
-            type: string
+      - name: dataset_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: dataset_id'
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/AppendRowsRequest'
         required: true
-      deprecated: false
   /v1beta/datasetio/iterrows/{dataset_id}:
     get:
       responses:
@@ -3090,55 +3198,53 @@ paths:
                 $ref: '#/components/schemas/PaginatedResponse'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - DatasetIO
-      summary: >-
-        Get a paginated list of rows from a dataset.
-      description: >-
+      - Datasetio
+      summary: Iterrows
+      description: |-
         Get a paginated list of rows from a dataset.
 
         Uses offset-based pagination where:
-
         - start_index: The starting index (0-based). If None, starts from beginning.
-
         - limit: Number of items to return. If None or -1, returns all items.
 
-
         The response includes:
-
         - data: List of items for the current page.
-
         - has_more: Whether there are more items available after this set.
+      operationId: iterrows_v1beta_datasetio_iterrows__dataset_id__get
       parameters:
-        - name: dataset_id
-          in: path
-          description: >-
-            The ID of the dataset to get the rows from.
-          required: true
-          schema:
-            type: string
-        - name: start_index
-          in: query
-          description: >-
-            Index into dataset for the first row to get. Get all rows if None.
-          required: false
-          schema:
-            type: integer
-        - name: limit
-          in: query
-          description: The number of rows to get.
-          required: false
-          schema:
-            type: integer
-      deprecated: false
+      - name: limit
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: integer
+          - type: 'null'
+          title: Limit
+      - name: start_index
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: integer
+          - type: 'null'
+          title: Start Index
+      - name: dataset_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: dataset_id'
   /v1beta/datasets:
     get:
       responses:
@@ -3149,21 +3255,22 @@ paths:
               schema:
                 $ref: '#/components/schemas/ListDatasetsResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Datasets
-      summary: List all datasets.
+      - Datasets
+      summary: List Datasets
       description: List all datasets.
-      parameters: []
-      deprecated: false
+      operationId: list_datasets_v1beta_datasets_get
     post:
       responses:
         '200':
@@ -3173,25 +3280,27 @@ paths:
               schema:
                 $ref: '#/components/schemas/Dataset'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Datasets
-      summary: Register a new dataset.
+      - Datasets
+      summary: Register Dataset
       description: Register a new dataset.
-      parameters: []
+      operationId: register_dataset_v1beta_datasets_post
       requestBody:
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/RegisterDatasetRequest'
+              $ref: '#/components/schemas/RegisterDatasetRequestLoose'
         required: true
       deprecated: true
   /v1beta/datasets/{dataset_id}:
@@ -3204,52 +3313,57 @@ paths:
               schema:
                 $ref: '#/components/schemas/Dataset'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Datasets
-      summary: Get a dataset by its ID.
+      - Datasets
+      summary: Get Dataset
       description: Get a dataset by its ID.
+      operationId: get_dataset_v1beta_datasets__dataset_id__get
       parameters:
-        - name: dataset_id
-          in: path
-          description: The ID of the dataset to get.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: dataset_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: dataset_id'
     delete:
       responses:
-        '200':
-          description: OK
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
+        '204':
+          description: Successful Response
       tags:
-        - Datasets
-      summary: Unregister a dataset by its ID.
+      - Datasets
+      summary: Unregister Dataset
       description: Unregister a dataset by its ID.
+      operationId: unregister_dataset_v1beta_datasets__dataset_id__delete
       parameters:
-        - name: dataset_id
-          in: path
-          description: The ID of the dataset to unregister.
-          required: true
-          schema:
-            type: string
+      - name: dataset_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: dataset_id'
       deprecated: true
   /v1alpha/eval/benchmarks:
     get:
@@ -3261,40 +3375,43 @@ paths:
               schema:
                 $ref: '#/components/schemas/ListBenchmarksResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Benchmarks
-      summary: List all benchmarks.
+      - Benchmarks
+      summary: List Benchmarks
       description: List all benchmarks.
-      parameters: []
-      deprecated: false
+      operationId: list_benchmarks_v1alpha_eval_benchmarks_get
     post:
       responses:
-        '200':
-          description: OK
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
+        '204':
+          description: Successful Response
       tags:
-        - Benchmarks
-      summary: Register a benchmark.
+      - Benchmarks
+      summary: Register Benchmark
       description: Register a benchmark.
-      parameters: []
+      operationId: register_benchmark_v1alpha_eval_benchmarks_post
       requestBody:
         content:
           application/json:
@@ -3312,131 +3429,136 @@ paths:
               schema:
                 $ref: '#/components/schemas/Benchmark'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Benchmarks
-      summary: Get a benchmark by its ID.
+      - Benchmarks
+      summary: Get Benchmark
       description: Get a benchmark by its ID.
+      operationId: get_benchmark_v1alpha_eval_benchmarks__benchmark_id__get
       parameters:
-        - name: benchmark_id
-          in: path
-          description: The ID of the benchmark to get.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: benchmark_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: benchmark_id'
     delete:
       responses:
-        '200':
-          description: OK
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
+        '204':
+          description: Successful Response
       tags:
-        - Benchmarks
-      summary: Unregister a benchmark.
+      - Benchmarks
+      summary: Unregister Benchmark
       description: Unregister a benchmark.
+      operationId: unregister_benchmark_v1alpha_eval_benchmarks__benchmark_id__delete
       parameters:
-        - name: benchmark_id
-          in: path
-          description: The ID of the benchmark to unregister.
-          required: true
-          schema:
-            type: string
+      - name: benchmark_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: benchmark_id'
       deprecated: true
   /v1alpha/eval/benchmarks/{benchmark_id}/evaluations:
     post:
       responses:
         '200':
-          description: >-
-            EvaluateResponse object containing generations and scores.
+          description: EvaluateResponse object containing generations and scores.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/EvaluateResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Eval
-      summary: Evaluate a list of rows on a benchmark.
+      - Eval
+      summary: Evaluate Rows
       description: Evaluate a list of rows on a benchmark.
+      operationId: evaluate_rows_v1alpha_eval_benchmarks__benchmark_id__evaluations_post
       parameters:
-        - name: benchmark_id
-          in: path
-          description: >-
-            The ID of the benchmark to run the evaluation on.
-          required: true
-          schema:
-            type: string
+      - name: benchmark_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: benchmark_id'
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/EvaluateRowsRequest'
         required: true
-      deprecated: false
   /v1alpha/eval/benchmarks/{benchmark_id}/jobs:
     post:
       responses:
         '200':
-          description: >-
-            The job that was created to run the evaluation.
+          description: The job that was created to run the evaluation.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/Job'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Eval
-      summary: Run an evaluation on a benchmark.
+      - Eval
+      summary: Run Eval
       description: Run an evaluation on a benchmark.
+      operationId: run_eval_v1alpha_eval_benchmarks__benchmark_id__jobs_post
       parameters:
-        - name: benchmark_id
-          in: path
-          description: >-
-            The ID of the benchmark to run the evaluation on.
-          required: true
-          schema:
-            type: string
+      - name: benchmark_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: benchmark_id'
       requestBody:
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/RunEvalRequest'
+              $ref: '#/components/schemas/BenchmarkConfig'
         required: true
-      deprecated: false
   /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}:
     get:
       responses:
@@ -3447,67 +3569,69 @@ paths:
               schema:
                 $ref: '#/components/schemas/Job'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Eval
-      summary: Get the status of a job.
+      - Eval
+      summary: Job Status
       description: Get the status of a job.
+      operationId: job_status_v1alpha_eval_benchmarks__benchmark_id__jobs__job_id__get
       parameters:
-        - name: benchmark_id
-          in: path
-          description: >-
-            The ID of the benchmark to run the evaluation on.
-          required: true
-          schema:
-            type: string
-        - name: job_id
-          in: path
-          description: The ID of the job to get the status of.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: benchmark_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: benchmark_id'
+      - name: job_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: job_id'
     delete:
       responses:
-        '200':
-          description: OK
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
+        '204':
+          description: Successful Response
       tags:
-        - Eval
-      summary: Cancel a job.
+      - Eval
+      summary: Job Cancel
       description: Cancel a job.
+      operationId: job_cancel_v1alpha_eval_benchmarks__benchmark_id__jobs__job_id__delete
       parameters:
-        - name: benchmark_id
-          in: path
-          description: >-
-            The ID of the benchmark to run the evaluation on.
-          required: true
-          schema:
-            type: string
-        - name: job_id
-          in: path
-          description: The ID of the job to cancel.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: benchmark_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: benchmark_id'
+      - name: job_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: job_id'
   /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result:
     get:
       responses:
@@ -3518,68 +3642,67 @@ paths:
               schema:
                 $ref: '#/components/schemas/EvaluateResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Eval
-      summary: Get the result of a job.
+      - Eval
+      summary: Job Result
       description: Get the result of a job.
+      operationId: job_result_v1alpha_eval_benchmarks__benchmark_id__jobs__job_id__result_get
       parameters:
-        - name: benchmark_id
-          in: path
-          description: >-
-            The ID of the benchmark to run the evaluation on.
-          required: true
-          schema:
-            type: string
-        - name: job_id
-          in: path
-          description: The ID of the job to get the result of.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: benchmark_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: benchmark_id'
+      - name: job_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: job_id'
   /v1alpha/inference/rerank:
     post:
       responses:
         '200':
-          description: >-
-            RerankResponse with indices sorted by relevance score (descending).
+          description: RerankResponse with indices sorted by relevance score (descending).
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/RerankResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Inference
-      summary: >-
-        Rerank a list of documents based on their relevance to a query.
-      description: >-
-        Rerank a list of documents based on their relevance to a query.
-      parameters: []
+      - Inference
+      summary: Rerank
+      description: Rerank a list of documents based on their relevance to a query.
+      operationId: rerank_v1alpha_inference_rerank_post
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/RerankRequest'
         required: true
-      deprecated: false
   /v1alpha/post-training/job/artifacts:
     get:
       responses:
@@ -3591,54 +3714,56 @@ paths:
                 $ref: '#/components/schemas/PostTrainingJobArtifactsResponse'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - PostTraining (Coming Soon)
-      summary: Get the artifacts of a training job.
+      - Post Training
+      summary: Get Training Job Artifacts
       description: Get the artifacts of a training job.
+      operationId: get_training_job_artifacts_v1alpha_post_training_job_artifacts_get
       parameters:
-        - name: job_uuid
-          in: query
-          description: >-
-            The UUID of the job to get the artifacts of.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: job_uuid
+        in: query
+        required: true
+        schema:
+          type: string
+          title: Job Uuid
   /v1alpha/post-training/job/cancel:
     post:
       responses:
-        '200':
-          description: OK
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
+        '204':
+          description: Successful Response
       tags:
-        - PostTraining (Coming Soon)
-      summary: Cancel a training job.
+      - Post Training
+      summary: Cancel Training Job
       description: Cancel a training job.
-      parameters: []
+      operationId: cancel_training_job_v1alpha_post_training_job_cancel_post
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/CancelTrainingJobRequest'
         required: true
-      deprecated: false
   /v1alpha/post-training/job/status:
     get:
       responses:
@@ -3650,27 +3775,28 @@ paths:
                 $ref: '#/components/schemas/PostTrainingJobStatusResponse'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - PostTraining (Coming Soon)
-      summary: Get the status of a training job.
+      - Post Training
+      summary: Get Training Job Status
       description: Get the status of a training job.
+      operationId: get_training_job_status_v1alpha_post_training_job_status_get
       parameters:
-        - name: job_uuid
-          in: query
-          description: >-
-            The UUID of the job to get the status of.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: job_uuid
+        in: query
+        required: true
+        schema:
+          type: string
+          title: Job Uuid
   /v1alpha/post-training/jobs:
     get:
       responses:
@@ -3681,21 +3807,22 @@ paths:
               schema:
                 $ref: '#/components/schemas/ListPostTrainingJobsResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - PostTraining (Coming Soon)
-      summary: Get all training jobs.
+      - Post Training
+      summary: Get Training Jobs
       description: Get all training jobs.
-      parameters: []
-      deprecated: false
+      operationId: get_training_jobs_v1alpha_post_training_jobs_get
   /v1alpha/post-training/preference-optimize:
     post:
       responses:
@@ -3706,27 +3833,28 @@ paths:
               schema:
                 $ref: '#/components/schemas/PostTrainingJob'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - PostTraining (Coming Soon)
-      summary: Run preference optimization of a model.
+      - Post Training
+      summary: Preference Optimize
       description: Run preference optimization of a model.
-      parameters: []
+      operationId: preference_optimize_v1alpha_post_training_preference_optimize_post
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/PreferenceOptimizeRequest'
         required: true
-      deprecated: false
   /v1alpha/post-training/supervised-fine-tune:
     post:
       responses:
@@ -3737,1473 +3865,1277 @@ paths:
               schema:
                 $ref: '#/components/schemas/PostTrainingJob'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - PostTraining (Coming Soon)
-      summary: Run supervised fine-tuning of a model.
+      - Post Training
+      summary: Supervised Fine Tune
       description: Run supervised fine-tuning of a model.
-      parameters: []
+      operationId: supervised_fine_tune_v1alpha_post_training_supervised_fine_tune_post
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/SupervisedFineTuneRequest'
         required: true
-      deprecated: false
-jsonSchemaDialect: >-
-  https://json-schema.org/draft/2020-12/schema
 components:
   schemas:
     Error:
-      type: object
+      description: Error response from the API. Roughly follows RFC 7807.
       properties:
         status:
+          title: Status
           type: integer
-          description: HTTP status code
         title:
+          title: Title
           type: string
-          description: >-
-            Error title, a short summary of the error which is invariant for an error
-            type
         detail:
+          title: Detail
           type: string
-          description: >-
-            Error detail, a longer human-readable description of the error
         instance:
-          type: string
-          description: >-
-            (Optional) A URL which can be used to retrieve more information about
-            the specific occurrence of the error
-      additionalProperties: false
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
       required:
-        - status
-        - title
-        - detail
+      - status
+      - title
+      - detail
       title: Error
-      description: >-
-        Error response from the API. Roughly follows RFC 7807.
-    ListBatchesResponse:
       type: object
+    ListBatchesResponse:
       properties:
         object:
           type: string
           const: list
+          title: Object
           default: list
         data:
-          type: array
           items:
-            type: object
-            properties:
-              id:
-                type: string
-              completion_window:
-                type: string
-              created_at:
-                type: integer
-              endpoint:
-                type: string
-              input_file_id:
-                type: string
-              object:
-                type: string
-                const: batch
-              status:
-                type: string
-                enum:
-                  - validating
-                  - failed
-                  - in_progress
-                  - finalizing
-                  - completed
-                  - expired
-                  - cancelling
-                  - cancelled
-              cancelled_at:
-                type: integer
-              cancelling_at:
-                type: integer
-              completed_at:
-                type: integer
-              error_file_id:
-                type: string
-              errors:
-                type: object
-                properties:
-                  data:
-                    type: array
-                    items:
-                      type: object
-                      properties:
-                        code:
-                          type: string
-                        line:
-                          type: integer
-                        message:
-                          type: string
-                        param:
-                          type: string
-                      additionalProperties: false
-                      title: BatchError
-                  object:
-                    type: string
-                additionalProperties: false
-                title: Errors
-              expired_at:
-                type: integer
-              expires_at:
-                type: integer
-              failed_at:
-                type: integer
-              finalizing_at:
-                type: integer
-              in_progress_at:
-                type: integer
-              metadata:
-                type: object
-                additionalProperties:
-                  type: string
-              model:
-                type: string
-              output_file_id:
-                type: string
-              request_counts:
-                type: object
-                properties:
-                  completed:
-                    type: integer
-                  failed:
-                    type: integer
-                  total:
-                    type: integer
-                additionalProperties: false
-                required:
-                  - completed
-                  - failed
-                  - total
-                title: BatchRequestCounts
-              usage:
-                type: object
-                properties:
-                  input_tokens:
-                    type: integer
-                  input_tokens_details:
-                    type: object
-                    properties:
-                      cached_tokens:
-                        type: integer
-                    additionalProperties: false
-                    required:
-                      - cached_tokens
-                    title: InputTokensDetails
-                  output_tokens:
-                    type: integer
-                  output_tokens_details:
-                    type: object
-                    properties:
-                      reasoning_tokens:
-                        type: integer
-                    additionalProperties: false
-                    required:
-                      - reasoning_tokens
-                    title: OutputTokensDetails
-                  total_tokens:
-                    type: integer
-                additionalProperties: false
-                required:
-                  - input_tokens
-                  - input_tokens_details
-                  - output_tokens
-                  - output_tokens_details
-                  - total_tokens
-                title: BatchUsage
-            additionalProperties: false
-            required:
-              - id
-              - completion_window
-              - created_at
-              - endpoint
-              - input_file_id
-              - object
-              - status
-            title: Batch
+            $ref: '#/components/schemas/Batch'
+          type: array
+          title: Data
+          description: List of batch objects
         first_id:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: ID of the first batch in the list
         last_id:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: ID of the last batch in the list
         has_more:
           type: boolean
+          title: Has More
+          description: Whether there are more batches available
           default: false
-      additionalProperties: false
-      required:
-        - object
-        - data
-        - has_more
-      title: ListBatchesResponse
-      description: >-
-        Response containing a list of batch objects.
-    CreateBatchRequest:
       type: object
+      required:
+      - data
+      title: ListBatchesResponse
+      description: Response containing a list of batch objects.
+    CreateBatchRequest:
       properties:
         input_file_id:
           type: string
-          description: >-
-            The ID of an uploaded file containing requests for the batch.
+          title: Input File Id
         endpoint:
           type: string
-          description: >-
-            The endpoint to be used for all requests in the batch.
+          title: Endpoint
         completion_window:
           type: string
           const: 24h
-          description: >-
-            The time window within which the batch should be processed.
+          title: Completion Window
         metadata:
-          type: object
-          additionalProperties:
-            type: string
-          description: Optional metadata for the batch.
+          anyOf:
+          - additionalProperties:
+              type: string
+            type: object
+          - type: 'null'
         idempotency_key:
-          type: string
-          description: >-
-            Optional idempotency key. When provided, enables idempotent behavior.
-      additionalProperties: false
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
       required:
-        - input_file_id
-        - endpoint
-        - completion_window
+      - input_file_id
+      - endpoint
+      - completion_window
       title: CreateBatchRequest
     Batch:
-      type: object
       properties:
         id:
           type: string
+          title: Id
         completion_window:
           type: string
+          title: Completion Window
         created_at:
           type: integer
+          title: Created At
         endpoint:
           type: string
+          title: Endpoint
         input_file_id:
           type: string
+          title: Input File Id
         object:
           type: string
           const: batch
+          title: Object
         status:
           type: string
           enum:
-            - validating
-            - failed
-            - in_progress
-            - finalizing
-            - completed
-            - expired
-            - cancelling
-            - cancelled
+          - validating
+          - failed
+          - in_progress
+          - finalizing
+          - completed
+          - expired
+          - cancelling
+          - cancelled
+          title: Status
         cancelled_at:
-          type: integer
+          anyOf:
+          - type: integer
+          - type: 'null'
         cancelling_at:
-          type: integer
+          anyOf:
+          - type: integer
+          - type: 'null'
         completed_at:
-          type: integer
+          anyOf:
+          - type: integer
+          - type: 'null'
         error_file_id:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
         errors:
-          type: object
-          properties:
-            data:
-              type: array
-              items:
-                type: object
-                properties:
-                  code:
-                    type: string
-                  line:
-                    type: integer
-                  message:
-                    type: string
-                  param:
-                    type: string
-                additionalProperties: false
-                title: BatchError
-            object:
-              type: string
-          additionalProperties: false
+          anyOf:
+          - $ref: '#/components/schemas/Errors'
+            title: Errors
+          - type: 'null'
           title: Errors
         expired_at:
-          type: integer
+          anyOf:
+          - type: integer
+          - type: 'null'
         expires_at:
-          type: integer
+          anyOf:
+          - type: integer
+          - type: 'null'
         failed_at:
-          type: integer
+          anyOf:
+          - type: integer
+          - type: 'null'
         finalizing_at:
-          type: integer
+          anyOf:
+          - type: integer
+          - type: 'null'
         in_progress_at:
-          type: integer
+          anyOf:
+          - type: integer
+          - type: 'null'
         metadata:
-          type: object
-          additionalProperties:
-            type: string
+          anyOf:
+          - additionalProperties:
+              type: string
+            type: object
+          - type: 'null'
         model:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
         output_file_id:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
         request_counts:
-          type: object
-          properties:
-            completed:
-              type: integer
-            failed:
-              type: integer
-            total:
-              type: integer
-          additionalProperties: false
-          required:
-            - completed
-            - failed
-            - total
+          anyOf:
+          - $ref: '#/components/schemas/BatchRequestCounts'
+            title: BatchRequestCounts
+          - type: 'null'
           title: BatchRequestCounts
         usage:
-          type: object
-          properties:
-            input_tokens:
-              type: integer
-            input_tokens_details:
-              type: object
-              properties:
-                cached_tokens:
-                  type: integer
-              additionalProperties: false
-              required:
-                - cached_tokens
-              title: InputTokensDetails
-            output_tokens:
-              type: integer
-            output_tokens_details:
-              type: object
-              properties:
-                reasoning_tokens:
-                  type: integer
-              additionalProperties: false
-              required:
-                - reasoning_tokens
-              title: OutputTokensDetails
-            total_tokens:
-              type: integer
-          additionalProperties: false
-          required:
-            - input_tokens
-            - input_tokens_details
-            - output_tokens
-            - output_tokens_details
-            - total_tokens
+          anyOf:
+          - $ref: '#/components/schemas/BatchUsage'
+            title: BatchUsage
+          - type: 'null'
           title: BatchUsage
-      additionalProperties: false
+      additionalProperties: true
+      type: object
       required:
-        - id
-        - completion_window
-        - created_at
-        - endpoint
-        - input_file_id
-        - object
-        - status
+      - id
+      - completion_window
+      - created_at
+      - endpoint
+      - input_file_id
+      - object
+      - status
       title: Batch
     Order:
       type: string
       enum:
-        - asc
-        - desc
+      - asc
+      - desc
       title: Order
       description: Sort order for paginated responses.
     ListOpenAIChatCompletionResponse:
-      type: object
       properties:
         data:
-          type: array
           items:
-            type: object
-            properties:
-              id:
-                type: string
-                description: The ID of the chat completion
-              choices:
-                type: array
-                items:
-                  $ref: '#/components/schemas/OpenAIChoice'
-                description: List of choices
-              object:
-                type: string
-                const: chat.completion
-                default: chat.completion
-                description: >-
-                  The object type, which will be "chat.completion"
-              created:
-                type: integer
-                description: >-
-                  The Unix timestamp in seconds when the chat completion was created
-              model:
-                type: string
-                description: >-
-                  The model that was used to generate the chat completion
-              usage:
-                $ref: '#/components/schemas/OpenAIChatCompletionUsage'
-                description: >-
-                  Token usage information for the completion
-              input_messages:
-                type: array
-                items:
-                  $ref: '#/components/schemas/OpenAIMessageParam'
-            additionalProperties: false
-            required:
-              - id
-              - choices
-              - object
-              - created
-              - model
-              - input_messages
-            title: OpenAICompletionWithInputMessages
-          description: >-
-            List of chat completion objects with their input messages
+            $ref: '#/components/schemas/OpenAICompletionWithInputMessages'
+          type: array
+          title: Data
         has_more:
           type: boolean
-          description: >-
-            Whether there are more completions available beyond this list
+          title: Has More
         first_id:
           type: string
-          description: ID of the first completion in this list
+          title: First Id
         last_id:
           type: string
-          description: ID of the last completion in this list
+          title: Last Id
         object:
           type: string
           const: list
+          title: Object
           default: list
-          description: >-
-            Must be "list" to identify this as a list response
-      additionalProperties: false
-      required:
-        - data
-        - has_more
-        - first_id
-        - last_id
-        - object
-      title: ListOpenAIChatCompletionResponse
-      description: >-
-        Response from listing OpenAI-compatible chat completions.
-    OpenAIAssistantMessageParam:
       type: object
+      required:
+      - data
+      - has_more
+      - first_id
+      - last_id
+      title: ListOpenAIChatCompletionResponse
+      description: Response from listing OpenAI-compatible chat completions.
+    OpenAIAssistantMessageParam:
+      description: A message containing the model's (assistant) response in an OpenAI-compatible chat completion request.
       properties:
         role:
-          type: string
           const: assistant
           default: assistant
-          description: >-
-            Must be "assistant" to identify this as the model's response
-        content:
-          oneOf:
-            - type: string
-            - type: array
-              items:
-                $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
-          description: The content of the model's response
-        name:
+          title: Role
           type: string
-          description: >-
-            (Optional) The name of the assistant message participant.
+        content:
+          anyOf:
+          - type: string
+          - items:
+              $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+            type: array
+            title: list[OpenAIChatCompletionContentPartTextParam]
+          - type: 'null'
+          title: string | list[OpenAIChatCompletionContentPartTextParam]
+          nullable: true
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
         tool_calls:
-          type: array
-          items:
-            $ref: '#/components/schemas/OpenAIChatCompletionToolCall'
-          description: >-
-            List of tool calls. Each tool call is an OpenAIChatCompletionToolCall
-            object.
-      additionalProperties: false
-      required:
-        - role
+          anyOf:
+          - items:
+              $ref: '#/components/schemas/OpenAIChatCompletionToolCall'
+            type: array
+          - type: 'null'
+          nullable: true
       title: OpenAIAssistantMessageParam
-      description: >-
-        A message containing the model's (assistant) response in an OpenAI-compatible
-        chat completion request.
-    "OpenAIChatCompletionContentPartImageParam":
       type: object
+    OpenAIChatCompletionContentPartImageParam:
       properties:
         type:
           type: string
           const: image_url
+          title: Type
           default: image_url
-          description: >-
-            Must be "image_url" to identify this as image content
         image_url:
           $ref: '#/components/schemas/OpenAIImageURL'
-          description: >-
-            Image URL specification and processing details
-      additionalProperties: false
-      required:
-        - type
-        - image_url
-      title: >-
-        OpenAIChatCompletionContentPartImageParam
-      description: >-
-        Image content part for OpenAI-compatible chat completion messages.
-    OpenAIChatCompletionContentPartParam:
-      oneOf:
-        - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
-        - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
-        - $ref: '#/components/schemas/OpenAIFile'
-      discriminator:
-        propertyName: type
-        mapping:
-          text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
-          image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
-          file: '#/components/schemas/OpenAIFile'
-    OpenAIChatCompletionContentPartTextParam:
       type: object
+      required:
+      - image_url
+      title: OpenAIChatCompletionContentPartImageParam
+      description: Image content part for OpenAI-compatible chat completion messages.
+    OpenAIChatCompletionContentPartParam:
+      discriminator:
+        mapping:
+          file: '#/components/schemas/OpenAIFile'
+          image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+          text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+        title: OpenAIChatCompletionContentPartTextParam
+      - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+        title: OpenAIChatCompletionContentPartImageParam
+      - $ref: '#/components/schemas/OpenAIFile'
+        title: OpenAIFile
+      title: OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile
+    OpenAIChatCompletionContentPartTextParam:
       properties:
         type:
           type: string
           const: text
+          title: Type
           default: text
-          description: >-
-            Must be "text" to identify this as text content
         text:
           type: string
-          description: The text content of the message
-      additionalProperties: false
-      required:
-        - type
-        - text
-      title: OpenAIChatCompletionContentPartTextParam
-      description: >-
-        Text content part for OpenAI-compatible chat completion messages.
-    OpenAIChatCompletionToolCall:
+          title: Text
       type: object
+      required:
+      - text
+      title: OpenAIChatCompletionContentPartTextParam
+      description: Text content part for OpenAI-compatible chat completion messages.
+    OpenAIChatCompletionToolCall:
       properties:
         index:
-          type: integer
-          description: >-
-            (Optional) Index of the tool call in the list
+          anyOf:
+          - type: integer
+          - type: 'null'
         id:
-          type: string
-          description: >-
-            (Optional) Unique identifier for the tool call
+          anyOf:
+          - type: string
+          - type: 'null'
         type:
           type: string
           const: function
+          title: Type
           default: function
-          description: >-
-            Must be "function" to identify this as a function call
         function:
-          $ref: '#/components/schemas/OpenAIChatCompletionToolCallFunction'
-          description: (Optional) Function call details
-      additionalProperties: false
-      required:
-        - type
-      title: OpenAIChatCompletionToolCall
-      description: >-
-        Tool call specification for OpenAI-compatible chat completion responses.
-    OpenAIChatCompletionToolCallFunction:
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIChatCompletionToolCallFunction'
+            title: OpenAIChatCompletionToolCallFunction
+          - type: 'null'
+          title: OpenAIChatCompletionToolCallFunction
       type: object
+      title: OpenAIChatCompletionToolCall
+      description: Tool call specification for OpenAI-compatible chat completion responses.
+    OpenAIChatCompletionToolCallFunction:
       properties:
         name:
-          type: string
-          description: (Optional) Name of the function to call
+          anyOf:
+          - type: string
+          - type: 'null'
         arguments:
-          type: string
-          description: >-
-            (Optional) Arguments to pass to the function as a JSON string
-      additionalProperties: false
-      title: OpenAIChatCompletionToolCallFunction
-      description: >-
-        Function call details for OpenAI-compatible tool calls.
-    OpenAIChatCompletionUsage:
+          anyOf:
+          - type: string
+          - type: 'null'
       type: object
+      title: OpenAIChatCompletionToolCallFunction
+      description: Function call details for OpenAI-compatible tool calls.
+    OpenAIChatCompletionUsage:
       properties:
         prompt_tokens:
           type: integer
-          description: Number of tokens in the prompt
+          title: Prompt Tokens
         completion_tokens:
           type: integer
-          description: Number of tokens in the completion
+          title: Completion Tokens
         total_tokens:
           type: integer
-          description: Total tokens used (prompt + completion)
+          title: Total Tokens
         prompt_tokens_details:
-          type: object
-          properties:
-            cached_tokens:
-              type: integer
-              description: Number of tokens retrieved from cache
-          additionalProperties: false
-          title: >-
-            OpenAIChatCompletionUsagePromptTokensDetails
-          description: >-
-            Token details for prompt tokens in OpenAI chat completion usage.
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIChatCompletionUsagePromptTokensDetails'
+            title: OpenAIChatCompletionUsagePromptTokensDetails
+          - type: 'null'
+          title: OpenAIChatCompletionUsagePromptTokensDetails
         completion_tokens_details:
-          type: object
-          properties:
-            reasoning_tokens:
-              type: integer
-              description: >-
-                Number of tokens used for reasoning (o1/o3 models)
-          additionalProperties: false
-          title: >-
-            OpenAIChatCompletionUsageCompletionTokensDetails
-          description: >-
-            Token details for output tokens in OpenAI chat completion usage.
-      additionalProperties: false
-      required:
-        - prompt_tokens
-        - completion_tokens
-        - total_tokens
-      title: OpenAIChatCompletionUsage
-      description: >-
-        Usage information for OpenAI chat completion.
-    OpenAIChoice:
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIChatCompletionUsageCompletionTokensDetails'
+            title: OpenAIChatCompletionUsageCompletionTokensDetails
+          - type: 'null'
+          title: OpenAIChatCompletionUsageCompletionTokensDetails
       type: object
+      required:
+      - prompt_tokens
+      - completion_tokens
+      - total_tokens
+      title: OpenAIChatCompletionUsage
+      description: Usage information for OpenAI chat completion.
+    OpenAIChoice:
       properties:
         message:
           oneOf:
-            - $ref: '#/components/schemas/OpenAIUserMessageParam'
-            - $ref: '#/components/schemas/OpenAISystemMessageParam'
-            - $ref: '#/components/schemas/OpenAIAssistantMessageParam'
-            - $ref: '#/components/schemas/OpenAIToolMessageParam'
-            - $ref: '#/components/schemas/OpenAIDeveloperMessageParam'
+          - $ref: '#/components/schemas/OpenAIUserMessageParam-Output'
+            title: OpenAIUserMessageParam-Output
+          - $ref: '#/components/schemas/OpenAISystemMessageParam'
+            title: OpenAISystemMessageParam
+          - $ref: '#/components/schemas/OpenAIAssistantMessageParam-Output'
+            title: OpenAIAssistantMessageParam-Output
+          - $ref: '#/components/schemas/OpenAIToolMessageParam'
+            title: OpenAIToolMessageParam
+          - $ref: '#/components/schemas/OpenAIDeveloperMessageParam'
+            title: OpenAIDeveloperMessageParam
+          title: OpenAIUserMessageParam-Output | ... (5 variants)
           discriminator:
             propertyName: role
             mapping:
-              user: '#/components/schemas/OpenAIUserMessageParam'
-              system: '#/components/schemas/OpenAISystemMessageParam'
-              assistant: '#/components/schemas/OpenAIAssistantMessageParam'
-              tool: '#/components/schemas/OpenAIToolMessageParam'
+              assistant: '#/components/schemas/OpenAIAssistantMessageParam-Output'
               developer: '#/components/schemas/OpenAIDeveloperMessageParam'
-          description: The message from the model
+              system: '#/components/schemas/OpenAISystemMessageParam'
+              tool: '#/components/schemas/OpenAIToolMessageParam'
+              user: '#/components/schemas/OpenAIUserMessageParam-Output'
         finish_reason:
           type: string
-          description: The reason the model stopped generating
+          title: Finish Reason
         index:
           type: integer
-          description: The index of the choice
+          title: Index
         logprobs:
-          $ref: '#/components/schemas/OpenAIChoiceLogprobs'
-          description: >-
-            (Optional) The log probabilities for the tokens in the message
-      additionalProperties: false
-      required:
-        - message
-        - finish_reason
-        - index
-      title: OpenAIChoice
-      description: >-
-        A choice from an OpenAI-compatible chat completion response.
-    OpenAIChoiceLogprobs:
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIChoiceLogprobs'
+            title: OpenAIChoiceLogprobs
+          - type: 'null'
+          title: OpenAIChoiceLogprobs
       type: object
+      required:
+      - message
+      - finish_reason
+      - index
+      title: OpenAIChoice
+      description: A choice from an OpenAI-compatible chat completion response.
+    OpenAIChoiceLogprobs:
       properties:
         content:
-          type: array
-          items:
-            $ref: '#/components/schemas/OpenAITokenLogProb'
-          description: >-
-            (Optional) The log probabilities for the tokens in the message
+          anyOf:
+          - items:
+              $ref: '#/components/schemas/OpenAITokenLogProb'
+            type: array
+          - type: 'null'
         refusal:
-          type: array
-          items:
-            $ref: '#/components/schemas/OpenAITokenLogProb'
-          description: >-
-            (Optional) The log probabilities for the tokens in the message
-      additionalProperties: false
-      title: OpenAIChoiceLogprobs
-      description: >-
-        The log probabilities for the tokens in the message from an OpenAI-compatible
-        chat completion response.
-    OpenAIDeveloperMessageParam:
+          anyOf:
+          - items:
+              $ref: '#/components/schemas/OpenAITokenLogProb'
+            type: array
+          - type: 'null'
       type: object
+      title: OpenAIChoiceLogprobs
+      description: The log probabilities for the tokens in the message from an OpenAI-compatible chat completion response.
+    OpenAIDeveloperMessageParam:
       properties:
         role:
           type: string
           const: developer
+          title: Role
           default: developer
-          description: >-
-            Must be "developer" to identify this as a developer message
         content:
-          oneOf:
-            - type: string
-            - type: array
-              items:
-                $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
-          description: The content of the developer message
+          anyOf:
+          - type: string
+          - items:
+              $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+            type: array
+            title: list[OpenAIChatCompletionContentPartTextParam]
+          title: string | list[OpenAIChatCompletionContentPartTextParam]
         name:
-          type: string
-          description: >-
-            (Optional) The name of the developer message participant.
-      additionalProperties: false
-      required:
-        - role
-        - content
-      title: OpenAIDeveloperMessageParam
-      description: >-
-        A message from the developer in an OpenAI-compatible chat completion request.
-    OpenAIFile:
+          anyOf:
+          - type: string
+          - type: 'null'
       type: object
+      required:
+      - content
+      title: OpenAIDeveloperMessageParam
+      description: A message from the developer in an OpenAI-compatible chat completion request.
+    OpenAIFile:
       properties:
         type:
           type: string
           const: file
+          title: Type
           default: file
         file:
           $ref: '#/components/schemas/OpenAIFileFile'
-      additionalProperties: false
+      type: object
       required:
-        - type
-        - file
+      - file
       title: OpenAIFile
     OpenAIFileFile:
-      type: object
       properties:
         file_data:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
         file_id:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
         filename:
-          type: string
-      additionalProperties: false
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
       title: OpenAIFileFile
     OpenAIImageURL:
-      type: object
       properties:
         url:
           type: string
-          description: >-
-            URL of the image to include in the message
+          title: Url
         detail:
-          type: string
-          description: >-
-            (Optional) Level of detail for image processing. Can be "low", "high",
-            or "auto"
-      additionalProperties: false
-      required:
-        - url
-      title: OpenAIImageURL
-      description: >-
-        Image URL specification for OpenAI-compatible chat completion messages.
-    OpenAIMessageParam:
-      oneOf:
-        - $ref: '#/components/schemas/OpenAIUserMessageParam'
-        - $ref: '#/components/schemas/OpenAISystemMessageParam'
-        - $ref: '#/components/schemas/OpenAIAssistantMessageParam'
-        - $ref: '#/components/schemas/OpenAIToolMessageParam'
-        - $ref: '#/components/schemas/OpenAIDeveloperMessageParam'
-      discriminator:
-        propertyName: role
-        mapping:
-          user: '#/components/schemas/OpenAIUserMessageParam'
-          system: '#/components/schemas/OpenAISystemMessageParam'
-          assistant: '#/components/schemas/OpenAIAssistantMessageParam'
-          tool: '#/components/schemas/OpenAIToolMessageParam'
-          developer: '#/components/schemas/OpenAIDeveloperMessageParam'
-    OpenAISystemMessageParam:
+          anyOf:
+          - type: string
+          - type: 'null'
       type: object
+      required:
+      - url
+      title: OpenAIImageURL
+      description: Image URL specification for OpenAI-compatible chat completion messages.
+    OpenAIMessageParam:
+      discriminator:
+        mapping:
+          assistant: '#/components/schemas/OpenAIAssistantMessageParam'
+          developer: '#/components/schemas/OpenAIDeveloperMessageParam'
+          system: '#/components/schemas/OpenAISystemMessageParam'
+          tool: '#/components/schemas/OpenAIToolMessageParam'
+          user: '#/components/schemas/OpenAIUserMessageParam'
+        propertyName: role
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIUserMessageParam'
+        title: OpenAIUserMessageParam
+      - $ref: '#/components/schemas/OpenAISystemMessageParam'
+        title: OpenAISystemMessageParam
+      - $ref: '#/components/schemas/OpenAIAssistantMessageParam'
+        title: OpenAIAssistantMessageParam
+      - $ref: '#/components/schemas/OpenAIToolMessageParam'
+        title: OpenAIToolMessageParam
+      - $ref: '#/components/schemas/OpenAIDeveloperMessageParam'
+        title: OpenAIDeveloperMessageParam
+      title: OpenAIUserMessageParam | ... (5 variants)
+    OpenAISystemMessageParam:
       properties:
         role:
           type: string
           const: system
+          title: Role
           default: system
-          description: >-
-            Must be "system" to identify this as a system message
         content:
-          oneOf:
-            - type: string
-            - type: array
-              items:
-                $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
-          description: >-
-            The content of the "system prompt". If multiple system messages are provided,
-            they are concatenated. The underlying Llama Stack code may also add other
-            system messages (for example, for formatting tool definitions).
+          anyOf:
+          - type: string
+          - items:
+              $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+            type: array
+            title: list[OpenAIChatCompletionContentPartTextParam]
+          title: string | list[OpenAIChatCompletionContentPartTextParam]
         name:
-          type: string
-          description: >-
-            (Optional) The name of the system message participant.
-      additionalProperties: false
-      required:
-        - role
-        - content
-      title: OpenAISystemMessageParam
-      description: >-
-        A system message providing instructions or context to the model.
-    OpenAITokenLogProb:
+          anyOf:
+          - type: string
+          - type: 'null'
       type: object
+      required:
+      - content
+      title: OpenAISystemMessageParam
+      description: A system message providing instructions or context to the model.
+    OpenAITokenLogProb:
       properties:
         token:
           type: string
+          title: Token
         bytes:
-          type: array
-          items:
-            type: integer
+          anyOf:
+          - items:
+              type: integer
+            type: array
+          - type: 'null'
         logprob:
           type: number
+          title: Logprob
         top_logprobs:
-          type: array
           items:
             $ref: '#/components/schemas/OpenAITopLogProb'
-      additionalProperties: false
-      required:
-        - token
-        - logprob
-        - top_logprobs
-      title: OpenAITokenLogProb
-      description: >-
-        The log probability for a token from an OpenAI-compatible chat completion
-        response.
-    OpenAIToolMessageParam:
+          type: array
+          title: Top Logprobs
       type: object
+      required:
+      - token
+      - logprob
+      - top_logprobs
+      title: OpenAITokenLogProb
+      description: |-
+        The log probability for a token from an OpenAI-compatible chat completion response.
+
+        :token: The token
+        :bytes: (Optional) The bytes for the token
+        :logprob: The log probability of the token
+        :top_logprobs: The top log probabilities for the token
+    OpenAIToolMessageParam:
       properties:
         role:
           type: string
           const: tool
+          title: Role
           default: tool
-          description: >-
-            Must be "tool" to identify this as a tool response
         tool_call_id:
           type: string
-          description: >-
-            Unique identifier for the tool call this response is for
+          title: Tool Call Id
         content:
-          oneOf:
-            - type: string
-            - type: array
-              items:
-                $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
-          description: The response content from the tool
-      additionalProperties: false
-      required:
-        - role
-        - tool_call_id
-        - content
-      title: OpenAIToolMessageParam
-      description: >-
-        A message representing the result of a tool invocation in an OpenAI-compatible
-        chat completion request.
-    OpenAITopLogProb:
+          anyOf:
+          - type: string
+          - items:
+              $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+            type: array
+            title: list[OpenAIChatCompletionContentPartTextParam]
+          title: string | list[OpenAIChatCompletionContentPartTextParam]
       type: object
+      required:
+      - tool_call_id
+      - content
+      title: OpenAIToolMessageParam
+      description: A message representing the result of a tool invocation in an OpenAI-compatible chat completion request.
+    OpenAITopLogProb:
       properties:
         token:
           type: string
+          title: Token
         bytes:
-          type: array
-          items:
-            type: integer
+          anyOf:
+          - items:
+              type: integer
+            type: array
+          - type: 'null'
         logprob:
           type: number
-      additionalProperties: false
-      required:
-        - token
-        - logprob
-      title: OpenAITopLogProb
-      description: >-
-        The top log probability for a token from an OpenAI-compatible chat completion
-        response.
-    OpenAIUserMessageParam:
+          title: Logprob
       type: object
+      required:
+      - token
+      - logprob
+      title: OpenAITopLogProb
+      description: |-
+        The top log probability for a token from an OpenAI-compatible chat completion response.
+
+        :token: The token
+        :bytes: (Optional) The bytes for the token
+        :logprob: The log probability of the token
+    OpenAIUserMessageParam:
+      description: A message from the user in an OpenAI-compatible chat completion request.
       properties:
         role:
-          type: string
           const: user
           default: user
-          description: >-
-            Must be "user" to identify this as a user message
-        content:
-          oneOf:
-            - type: string
-            - type: array
-              items:
-                $ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
-          description: >-
-            The content of the message, which can include text and other media
-        name:
+          title: Role
           type: string
-          description: >-
-            (Optional) The name of the user message participant.
-      additionalProperties: false
+        content:
+          anyOf:
+          - type: string
+          - items:
+              discriminator:
+                mapping:
+                  file: '#/components/schemas/OpenAIFile'
+                  image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+                  text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+                propertyName: type
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+                title: OpenAIChatCompletionContentPartTextParam
+              - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+                title: OpenAIChatCompletionContentPartImageParam
+              - $ref: '#/components/schemas/OpenAIFile'
+                title: OpenAIFile
+              title: OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile
+            type: array
+            title: list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile]
+          title: string | list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile]
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
       required:
-        - role
-        - content
+      - content
       title: OpenAIUserMessageParam
-      description: >-
-        A message from the user in an OpenAI-compatible chat completion request.
-    OpenAIJSONSchema:
       type: object
+    OpenAIJSONSchema:
       properties:
         name:
           type: string
-          description: Name of the schema
+          title: Name
         description:
-          type: string
-          description: (Optional) Description of the schema
+          anyOf:
+          - type: string
+          - type: 'null'
         strict:
-          type: boolean
-          description: >-
-            (Optional) Whether to enforce strict adherence to the schema
+          anyOf:
+          - type: boolean
+          - type: 'null'
         schema:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: (Optional) The JSON schema definition
-      additionalProperties: false
-      required:
-        - name
-      title: OpenAIJSONSchema
-      description: >-
-        JSON schema specification for OpenAI-compatible structured response format.
-    OpenAIResponseFormatJSONObject:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
       type: object
+      title: OpenAIJSONSchema
+      description: JSON schema specification for OpenAI-compatible structured response format.
+    OpenAIResponseFormatJSONObject:
       properties:
         type:
           type: string
           const: json_object
+          title: Type
           default: json_object
-          description: >-
-            Must be "json_object" to indicate generic JSON object response format
-      additionalProperties: false
-      required:
-        - type
-      title: OpenAIResponseFormatJSONObject
-      description: >-
-        JSON object response format for OpenAI-compatible chat completion requests.
-    OpenAIResponseFormatJSONSchema:
       type: object
+      title: OpenAIResponseFormatJSONObject
+      description: JSON object response format for OpenAI-compatible chat completion requests.
+    OpenAIResponseFormatJSONSchema:
       properties:
         type:
           type: string
           const: json_schema
+          title: Type
           default: json_schema
-          description: >-
-            Must be "json_schema" to indicate structured JSON response format
         json_schema:
           $ref: '#/components/schemas/OpenAIJSONSchema'
-          description: >-
-            The JSON schema specification for the response
-      additionalProperties: false
-      required:
-        - type
-        - json_schema
-      title: OpenAIResponseFormatJSONSchema
-      description: >-
-        JSON schema response format for OpenAI-compatible chat completion requests.
-    OpenAIResponseFormatParam:
-      oneOf:
-        - $ref: '#/components/schemas/OpenAIResponseFormatText'
-        - $ref: '#/components/schemas/OpenAIResponseFormatJSONSchema'
-        - $ref: '#/components/schemas/OpenAIResponseFormatJSONObject'
-      discriminator:
-        propertyName: type
-        mapping:
-          text: '#/components/schemas/OpenAIResponseFormatText'
-          json_schema: '#/components/schemas/OpenAIResponseFormatJSONSchema'
-          json_object: '#/components/schemas/OpenAIResponseFormatJSONObject'
-    OpenAIResponseFormatText:
       type: object
+      required:
+      - json_schema
+      title: OpenAIResponseFormatJSONSchema
+      description: JSON schema response format for OpenAI-compatible chat completion requests.
+    OpenAIResponseFormatParam:
+      discriminator:
+        mapping:
+          json_object: '#/components/schemas/OpenAIResponseFormatJSONObject'
+          json_schema: '#/components/schemas/OpenAIResponseFormatJSONSchema'
+          text: '#/components/schemas/OpenAIResponseFormatText'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIResponseFormatText'
+        title: OpenAIResponseFormatText
+      - $ref: '#/components/schemas/OpenAIResponseFormatJSONSchema'
+        title: OpenAIResponseFormatJSONSchema
+      - $ref: '#/components/schemas/OpenAIResponseFormatJSONObject'
+        title: OpenAIResponseFormatJSONObject
+      title: OpenAIResponseFormatText | OpenAIResponseFormatJSONSchema | OpenAIResponseFormatJSONObject
+    OpenAIResponseFormatText:
       properties:
         type:
           type: string
           const: text
+          title: Type
           default: text
-          description: >-
-            Must be "text" to indicate plain text response format
-      additionalProperties: false
-      required:
-        - type
-      title: OpenAIResponseFormatText
-      description: >-
-        Text response format for OpenAI-compatible chat completion requests.
-    OpenAIChatCompletionRequestWithExtraBody:
       type: object
+      title: OpenAIResponseFormatText
+      description: Text response format for OpenAI-compatible chat completion requests.
+    OpenAIChatCompletionRequestWithExtraBody:
       properties:
         model:
           type: string
-          description: >-
-            The identifier of the model to use. The model must be registered with
-            Llama Stack and available via the /models endpoint.
+          title: Model
         messages:
-          type: array
           items:
-            $ref: '#/components/schemas/OpenAIMessageParam'
-          description: List of messages in the conversation.
-        frequency_penalty:
-          type: number
-          description: >-
-            (Optional) The penalty for repeated tokens.
-        function_call:
-          oneOf:
-            - type: string
-            - type: object
-              additionalProperties:
-                oneOf:
-                  - type: 'null'
-                  - type: boolean
-                  - type: number
-                  - type: string
-                  - type: array
-                  - type: object
-          description: (Optional) The function call to use.
-        functions:
-          type: array
-          items:
-            type: object
-            additionalProperties:
-              oneOf:
-                - type: 'null'
-                - type: boolean
-                - type: number
-                - type: string
-                - type: array
-                - type: object
-          description: (Optional) List of functions to use.
-        logit_bias:
-          type: object
-          additionalProperties:
-            type: number
-          description: (Optional) The logit bias to use.
-        logprobs:
-          type: boolean
-          description: (Optional) The log probabilities to use.
-        max_completion_tokens:
-          type: integer
-          description: >-
-            (Optional) The maximum number of tokens to generate.
-        max_tokens:
-          type: integer
-          description: >-
-            (Optional) The maximum number of tokens to generate.
-        n:
-          type: integer
-          description: >-
-            (Optional) The number of completions to generate.
-        parallel_tool_calls:
-          type: boolean
-          description: >-
-            (Optional) Whether to parallelize tool calls.
-        presence_penalty:
-          type: number
-          description: >-
-            (Optional) The penalty for repeated tokens.
-        response_format:
-          $ref: '#/components/schemas/OpenAIResponseFormatParam'
-          description: (Optional) The response format to use.
-        seed:
-          type: integer
-          description: (Optional) The seed to use.
-        stop:
-          oneOf:
-            - type: string
-            - type: array
-              items:
-                type: string
-          description: (Optional) The stop tokens to use.
-        stream:
-          type: boolean
-          description: >-
-            (Optional) Whether to stream the response.
-        stream_options:
-          type: object
-          additionalProperties:
             oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: (Optional) The stream options to use.
-        temperature:
-          type: number
-          description: (Optional) The temperature to use.
-        tool_choice:
-          oneOf:
-            - type: string
-            - type: object
-              additionalProperties:
-                oneOf:
-                  - type: 'null'
-                  - type: boolean
-                  - type: number
-                  - type: string
-                  - type: array
-                  - type: object
-          description: (Optional) The tool choice to use.
-        tools:
+            - $ref: '#/components/schemas/OpenAIUserMessageParam-Input'
+              title: OpenAIUserMessageParam-Input
+            - $ref: '#/components/schemas/OpenAISystemMessageParam'
+              title: OpenAISystemMessageParam
+            - $ref: '#/components/schemas/OpenAIAssistantMessageParam-Input'
+              title: OpenAIAssistantMessageParam-Input
+            - $ref: '#/components/schemas/OpenAIToolMessageParam'
+              title: OpenAIToolMessageParam
+            - $ref: '#/components/schemas/OpenAIDeveloperMessageParam'
+              title: OpenAIDeveloperMessageParam
+            discriminator:
+              propertyName: role
+              mapping:
+                assistant: '#/components/schemas/OpenAIAssistantMessageParam-Input'
+                developer: '#/components/schemas/OpenAIDeveloperMessageParam'
+                system: '#/components/schemas/OpenAISystemMessageParam'
+                tool: '#/components/schemas/OpenAIToolMessageParam'
+                user: '#/components/schemas/OpenAIUserMessageParam-Input'
+            title: OpenAIUserMessageParam-Input | ... (5 variants)
           type: array
-          items:
+          minItems: 1
+          title: Messages
+        frequency_penalty:
+          anyOf:
+          - type: number
+          - type: 'null'
+        function_call:
+          anyOf:
+          - type: string
+          - additionalProperties: true
             type: object
-            additionalProperties:
-              oneOf:
-                - type: 'null'
-                - type: boolean
-                - type: number
-                - type: string
-                - type: array
-                - type: object
-          description: (Optional) The tools to use.
+          - type: 'null'
+          title: string | object
+        functions:
+          anyOf:
+          - items:
+              additionalProperties: true
+              type: object
+            type: array
+          - type: 'null'
+        logit_bias:
+          anyOf:
+          - additionalProperties:
+              type: number
+            type: object
+          - type: 'null'
+        logprobs:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+        max_completion_tokens:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        max_tokens:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        n:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        parallel_tool_calls:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+        presence_penalty:
+          anyOf:
+          - type: number
+          - type: 'null'
+        response_format:
+          anyOf:
+          - oneOf:
+            - $ref: '#/components/schemas/OpenAIResponseFormatText'
+              title: OpenAIResponseFormatText
+            - $ref: '#/components/schemas/OpenAIResponseFormatJSONSchema'
+              title: OpenAIResponseFormatJSONSchema
+            - $ref: '#/components/schemas/OpenAIResponseFormatJSONObject'
+              title: OpenAIResponseFormatJSONObject
+            discriminator:
+              propertyName: type
+              mapping:
+                json_object: '#/components/schemas/OpenAIResponseFormatJSONObject'
+                json_schema: '#/components/schemas/OpenAIResponseFormatJSONSchema'
+                text: '#/components/schemas/OpenAIResponseFormatText'
+            title: OpenAIResponseFormatText | OpenAIResponseFormatJSONSchema | OpenAIResponseFormatJSONObject
+          - type: 'null'
+          title: Response Format
+        seed:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        stop:
+          anyOf:
+          - type: string
+          - items:
+              type: string
+            type: array
+            title: list[string]
+          - type: 'null'
+          title: string | list[string]
+        stream:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+        stream_options:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+        temperature:
+          anyOf:
+          - type: number
+          - type: 'null'
+        tool_choice:
+          anyOf:
+          - type: string
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+          title: string | object
+        tools:
+          anyOf:
+          - items:
+              additionalProperties: true
+              type: object
+            type: array
+          - type: 'null'
         top_logprobs:
-          type: integer
-          description: >-
-            (Optional) The top log probabilities to use.
+          anyOf:
+          - type: integer
+          - type: 'null'
         top_p:
-          type: number
-          description: (Optional) The top p to use.
+          anyOf:
+          - type: number
+          - type: 'null'
         user:
-          type: string
-          description: (Optional) The user to use.
-      additionalProperties: false
-      required:
-        - model
-        - messages
-      title: OpenAIChatCompletionRequestWithExtraBody
-      description: >-
-        Request parameters for OpenAI-compatible chat completion endpoint.
-    OpenAIChatCompletion:
+          anyOf:
+          - type: string
+          - type: 'null'
+      additionalProperties: true
       type: object
+      required:
+      - model
+      - messages
+      title: OpenAIChatCompletionRequestWithExtraBody
+      description: Request parameters for OpenAI-compatible chat completion endpoint.
+    OpenAIChatCompletion:
       properties:
         id:
           type: string
-          description: The ID of the chat completion
+          title: Id
         choices:
-          type: array
           items:
             $ref: '#/components/schemas/OpenAIChoice'
-          description: List of choices
+          type: array
+          title: Choices
         object:
           type: string
           const: chat.completion
+          title: Object
           default: chat.completion
-          description: >-
-            The object type, which will be "chat.completion"
         created:
           type: integer
-          description: >-
-            The Unix timestamp in seconds when the chat completion was created
+          title: Created
         model:
           type: string
-          description: >-
-            The model that was used to generate the chat completion
+          title: Model
         usage:
-          $ref: '#/components/schemas/OpenAIChatCompletionUsage'
-          description: >-
-            Token usage information for the completion
-      additionalProperties: false
-      required:
-        - id
-        - choices
-        - object
-        - created
-        - model
-      title: OpenAIChatCompletion
-      description: >-
-        Response from an OpenAI-compatible chat completion request.
-    OpenAIChatCompletionChunk:
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIChatCompletionUsage'
+            title: OpenAIChatCompletionUsage
+          - type: 'null'
+          title: OpenAIChatCompletionUsage
       type: object
+      required:
+      - id
+      - choices
+      - created
+      - model
+      title: OpenAIChatCompletion
+      description: Response from an OpenAI-compatible chat completion request.
+    OpenAIChatCompletionChunk:
+      description: Chunk from a streaming response to an OpenAI-compatible chat completion request.
       properties:
         id:
+          title: Id
           type: string
-          description: The ID of the chat completion
         choices:
-          type: array
           items:
             $ref: '#/components/schemas/OpenAIChunkChoice'
-          description: List of choices
+          title: Choices
+          type: array
         object:
-          type: string
           const: chat.completion.chunk
           default: chat.completion.chunk
-          description: >-
-            The object type, which will be "chat.completion.chunk"
-        created:
-          type: integer
-          description: >-
-            The Unix timestamp in seconds when the chat completion was created
-        model:
+          title: Object
+          type: string
+        created:
+          title: Created
+          type: integer
+        model:
+          title: Model
           type: string
-          description: >-
-            The model that was used to generate the chat completion
         usage:
-          $ref: '#/components/schemas/OpenAIChatCompletionUsage'
-          description: >-
-            Token usage information (typically included in final chunk with stream_options)
-      additionalProperties: false
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIChatCompletionUsage'
+            title: OpenAIChatCompletionUsage
+          - type: 'null'
+          nullable: true
+          title: OpenAIChatCompletionUsage
       required:
-        - id
-        - choices
-        - object
-        - created
-        - model
+      - id
+      - choices
+      - created
+      - model
       title: OpenAIChatCompletionChunk
-      description: >-
-        Chunk from a streaming response to an OpenAI-compatible chat completion request.
-    OpenAIChoiceDelta:
       type: object
+    OpenAIChoiceDelta:
+      description: A delta from an OpenAI-compatible chat completion streaming response.
       properties:
         content:
-          type: string
-          description: (Optional) The content of the delta
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
         refusal:
-          type: string
-          description: (Optional) The refusal of the delta
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
         role:
-          type: string
-          description: (Optional) The role of the delta
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
         tool_calls:
-          type: array
-          items:
-            $ref: '#/components/schemas/OpenAIChatCompletionToolCall'
-          description: (Optional) The tool calls of the delta
+          anyOf:
+          - items:
+              $ref: '#/components/schemas/OpenAIChatCompletionToolCall'
+            type: array
+          - type: 'null'
+          nullable: true
         reasoning_content:
-          type: string
-          description: >-
-            (Optional) The reasoning content from the model (non-standard, for o1/o3
-            models)
-      additionalProperties: false
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
       title: OpenAIChoiceDelta
-      description: >-
-        A delta from an OpenAI-compatible chat completion streaming response.
-    OpenAIChunkChoice:
       type: object
+    OpenAIChunkChoice:
+      description: A chunk choice from an OpenAI-compatible chat completion streaming response.
       properties:
         delta:
           $ref: '#/components/schemas/OpenAIChoiceDelta'
-          description: The delta from the chunk
         finish_reason:
+          title: Finish Reason
           type: string
-          description: The reason the model stopped generating
         index:
+          title: Index
           type: integer
-          description: The index of the choice
         logprobs:
-          $ref: '#/components/schemas/OpenAIChoiceLogprobs'
-          description: >-
-            (Optional) The log probabilities for the tokens in the message
-      additionalProperties: false
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIChoiceLogprobs'
+            title: OpenAIChoiceLogprobs
+          - type: 'null'
+          nullable: true
+          title: OpenAIChoiceLogprobs
       required:
-        - delta
-        - finish_reason
-        - index
+      - delta
+      - finish_reason
+      - index
       title: OpenAIChunkChoice
-      description: >-
-        A chunk choice from an OpenAI-compatible chat completion streaming response.
-    OpenAICompletionWithInputMessages:
       type: object
+    OpenAICompletionWithInputMessages:
       properties:
         id:
           type: string
-          description: The ID of the chat completion
+          title: Id
         choices:
-          type: array
           items:
             $ref: '#/components/schemas/OpenAIChoice'
-          description: List of choices
+          type: array
+          title: Choices
         object:
           type: string
           const: chat.completion
+          title: Object
           default: chat.completion
-          description: >-
-            The object type, which will be "chat.completion"
         created:
           type: integer
-          description: >-
-            The Unix timestamp in seconds when the chat completion was created
+          title: Created
         model:
           type: string
-          description: >-
-            The model that was used to generate the chat completion
+          title: Model
         usage:
-          $ref: '#/components/schemas/OpenAIChatCompletionUsage'
-          description: >-
-            Token usage information for the completion
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIChatCompletionUsage'
+            title: OpenAIChatCompletionUsage
+          - type: 'null'
+          title: OpenAIChatCompletionUsage
         input_messages:
-          type: array
           items:
-            $ref: '#/components/schemas/OpenAIMessageParam'
-      additionalProperties: false
+            oneOf:
+            - $ref: '#/components/schemas/OpenAIUserMessageParam-Output'
+              title: OpenAIUserMessageParam-Output
+            - $ref: '#/components/schemas/OpenAISystemMessageParam'
+              title: OpenAISystemMessageParam
+            - $ref: '#/components/schemas/OpenAIAssistantMessageParam-Output'
+              title: OpenAIAssistantMessageParam-Output
+            - $ref: '#/components/schemas/OpenAIToolMessageParam'
+              title: OpenAIToolMessageParam
+            - $ref: '#/components/schemas/OpenAIDeveloperMessageParam'
+              title: OpenAIDeveloperMessageParam
+            discriminator:
+              propertyName: role
+              mapping:
+                assistant: '#/components/schemas/OpenAIAssistantMessageParam-Output'
+                developer: '#/components/schemas/OpenAIDeveloperMessageParam'
+                system: '#/components/schemas/OpenAISystemMessageParam'
+                tool: '#/components/schemas/OpenAIToolMessageParam'
+                user: '#/components/schemas/OpenAIUserMessageParam-Output'
+            title: OpenAIUserMessageParam-Output | ... (5 variants)
+          type: array
+          title: Input Messages
+      type: object
       required:
-        - id
-        - choices
-        - object
-        - created
-        - model
-        - input_messages
+      - id
+      - choices
+      - created
+      - model
+      - input_messages
       title: OpenAICompletionWithInputMessages
     OpenAICompletionRequestWithExtraBody:
-      type: object
       properties:
         model:
           type: string
-          description: >-
-            The identifier of the model to use. The model must be registered with
-            Llama Stack and available via the /models endpoint.
+          title: Model
         prompt:
-          oneOf:
-            - type: string
-            - type: array
-              items:
-                type: string
-            - type: array
+          anyOf:
+          - type: string
+          - items:
+              type: string
+            type: array
+            title: list[string]
+          - items:
+              type: integer
+            type: array
+            title: list[integer]
+          - items:
               items:
                 type: integer
-            - type: array
-              items:
-                type: array
-                items:
-                  type: integer
-          description: The prompt to generate a completion for.
+              type: array
+            type: array
+            title: list[array]
+          title: string | ... (4 variants)
         best_of:
-          type: integer
-          description: >-
-            (Optional) The number of completions to generate.
+          anyOf:
+          - type: integer
+          - type: 'null'
         echo:
-          type: boolean
-          description: (Optional) Whether to echo the prompt.
+          anyOf:
+          - type: boolean
+          - type: 'null'
         frequency_penalty:
-          type: number
-          description: >-
-            (Optional) The penalty for repeated tokens.
+          anyOf:
+          - type: number
+          - type: 'null'
         logit_bias:
-          type: object
-          additionalProperties:
-            type: number
-          description: (Optional) The logit bias to use.
+          anyOf:
+          - additionalProperties:
+              type: number
+            type: object
+          - type: 'null'
         logprobs:
-          type: boolean
-          description: (Optional) The log probabilities to use.
+          anyOf:
+          - type: boolean
+          - type: 'null'
         max_tokens:
-          type: integer
-          description: >-
-            (Optional) The maximum number of tokens to generate.
+          anyOf:
+          - type: integer
+          - type: 'null'
         n:
-          type: integer
-          description: >-
-            (Optional) The number of completions to generate.
+          anyOf:
+          - type: integer
+          - type: 'null'
         presence_penalty:
-          type: number
-          description: >-
-            (Optional) The penalty for repeated tokens.
+          anyOf:
+          - type: number
+          - type: 'null'
         seed:
-          type: integer
-          description: (Optional) The seed to use.
+          anyOf:
+          - type: integer
+          - type: 'null'
         stop:
-          oneOf:
-            - type: string
-            - type: array
-              items:
-                type: string
-          description: (Optional) The stop tokens to use.
+          anyOf:
+          - type: string
+          - items:
+              type: string
+            type: array
+            title: list[string]
+          - type: 'null'
+          title: string | list[string]
         stream:
-          type: boolean
-          description: >-
-            (Optional) Whether to stream the response.
+          anyOf:
+          - type: boolean
+          - type: 'null'
         stream_options:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: (Optional) The stream options to use.
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
         temperature:
-          type: number
-          description: (Optional) The temperature to use.
+          anyOf:
+          - type: number
+          - type: 'null'
         top_p:
-          type: number
-          description: (Optional) The top p to use.
+          anyOf:
+          - type: number
+          - type: 'null'
         user:
-          type: string
-          description: (Optional) The user to use.
+          anyOf:
+          - type: string
+          - type: 'null'
         suffix:
-          type: string
-          description: >-
-            (Optional) The suffix that should be appended to the completion.
-      additionalProperties: false
-      required:
-        - model
-        - prompt
-      title: OpenAICompletionRequestWithExtraBody
-      description: >-
-        Request parameters for OpenAI-compatible completion endpoint.
-    OpenAICompletion:
+          anyOf:
+          - type: string
+          - type: 'null'
+      additionalProperties: true
       type: object
+      required:
+      - model
+      - prompt
+      title: OpenAICompletionRequestWithExtraBody
+      description: Request parameters for OpenAI-compatible completion endpoint.
+    OpenAICompletion:
       properties:
         id:
           type: string
+          title: Id
         choices:
-          type: array
           items:
             $ref: '#/components/schemas/OpenAICompletionChoice'
+          type: array
+          title: Choices
         created:
           type: integer
+          title: Created
         model:
           type: string
+          title: Model
         object:
           type: string
           const: text_completion
+          title: Object
           default: text_completion
-      additionalProperties: false
-      required:
-        - id
-        - choices
-        - created
-        - model
-        - object
-      title: OpenAICompletion
-      description: >-
-        Response from an OpenAI-compatible completion request.
-    OpenAICompletionChoice:
       type: object
+      required:
+      - id
+      - choices
+      - created
+      - model
+      title: OpenAICompletion
+      description: |-
+        Response from an OpenAI-compatible completion request.
+
+        :id: The ID of the completion
+        :choices: List of choices
+        :created: The Unix timestamp in seconds when the completion was created
+        :model: The model that was used to generate the completion
+        :object: The object type, which will be "text_completion"
+    OpenAICompletionChoice:
       properties:
         finish_reason:
           type: string
+          title: Finish Reason
         text:
           type: string
+          title: Text
         index:
           type: integer
+          title: Index
         logprobs:
-          $ref: '#/components/schemas/OpenAIChoiceLogprobs'
-      additionalProperties: false
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIChoiceLogprobs'
+            title: OpenAIChoiceLogprobs
+          - type: 'null'
+          title: OpenAIChoiceLogprobs
+      type: object
       required:
-        - finish_reason
-        - text
-        - index
+      - finish_reason
+      - text
+      - index
       title: OpenAICompletionChoice
-      description: >-
+      description: |-
         A choice from an OpenAI-compatible completion response.
+
+        :finish_reason: The reason the model stopped generating
+        :text: The text of the choice
+        :index: The index of the choice
+        :logprobs: (Optional) The log probabilities for the tokens in the choice
     ConversationItem:
-      oneOf:
-        - $ref: '#/components/schemas/OpenAIResponseMessage'
-        - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
-        - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
-        - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
-        - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
-        - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
-        - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
-        - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
-        - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
       discriminator:
-        propertyName: type
         mapping:
-          message: '#/components/schemas/OpenAIResponseMessage'
-          web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
           file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
           function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
           function_call_output: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
@@ -5211,6704 +5143,8240 @@ components:
           mcp_approval_response: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
           mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
           mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+          message: '#/components/schemas/OpenAIResponseMessage'
+          web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIResponseMessage'
+        title: OpenAIResponseMessage
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+        title: OpenAIResponseOutputMessageWebSearchToolCall
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+        title: OpenAIResponseOutputMessageFileSearchToolCall
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+        title: OpenAIResponseOutputMessageFunctionToolCall
+      - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+        title: OpenAIResponseInputFunctionToolCallOutput
+      - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+        title: OpenAIResponseMCPApprovalRequest
+      - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+        title: OpenAIResponseMCPApprovalResponse
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+        title: OpenAIResponseOutputMessageMCPCall
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+        title: OpenAIResponseOutputMessageMCPListTools
+      title: OpenAIResponseMessage | ... (9 variants)
     OpenAIResponseAnnotationCitation:
-      type: object
       properties:
         type:
           type: string
           const: url_citation
+          title: Type
           default: url_citation
-          description: >-
-            Annotation type identifier, always "url_citation"
         end_index:
           type: integer
-          description: >-
-            End position of the citation span in the content
+          title: End Index
         start_index:
           type: integer
-          description: >-
-            Start position of the citation span in the content
+          title: Start Index
         title:
           type: string
-          description: Title of the referenced web resource
+          title: Title
         url:
           type: string
-          description: URL of the referenced web resource
-      additionalProperties: false
-      required:
-        - type
-        - end_index
-        - start_index
-        - title
-        - url
-      title: OpenAIResponseAnnotationCitation
-      description: >-
-        URL citation annotation for referencing external web resources.
-    "OpenAIResponseAnnotationContainerFileCitation":
+          title: Url
       type: object
+      required:
+      - end_index
+      - start_index
+      - title
+      - url
+      title: OpenAIResponseAnnotationCitation
+      description: URL citation annotation for referencing external web resources.
+    OpenAIResponseAnnotationContainerFileCitation:
       properties:
         type:
           type: string
           const: container_file_citation
+          title: Type
           default: container_file_citation
         container_id:
           type: string
+          title: Container Id
         end_index:
           type: integer
+          title: End Index
         file_id:
           type: string
+          title: File Id
         filename:
           type: string
+          title: Filename
         start_index:
           type: integer
-      additionalProperties: false
-      required:
-        - type
-        - container_id
-        - end_index
-        - file_id
-        - filename
-        - start_index
-      title: >-
-        OpenAIResponseAnnotationContainerFileCitation
-    OpenAIResponseAnnotationFileCitation:
+          title: Start Index
       type: object
+      required:
+      - container_id
+      - end_index
+      - file_id
+      - filename
+      - start_index
+      title: OpenAIResponseAnnotationContainerFileCitation
+    OpenAIResponseAnnotationFileCitation:
       properties:
         type:
           type: string
           const: file_citation
+          title: Type
           default: file_citation
-          description: >-
-            Annotation type identifier, always "file_citation"
         file_id:
           type: string
-          description: Unique identifier of the referenced file
+          title: File Id
         filename:
           type: string
-          description: Name of the referenced file
+          title: Filename
         index:
           type: integer
-          description: >-
-            Position index of the citation within the content
-      additionalProperties: false
-      required:
-        - type
-        - file_id
-        - filename
-        - index
-      title: OpenAIResponseAnnotationFileCitation
-      description: >-
-        File citation annotation for referencing specific files in response content.
-    OpenAIResponseAnnotationFilePath:
+          title: Index
       type: object
+      required:
+      - file_id
+      - filename
+      - index
+      title: OpenAIResponseAnnotationFileCitation
+      description: File citation annotation for referencing specific files in response content.
+    OpenAIResponseAnnotationFilePath:
       properties:
         type:
           type: string
           const: file_path
+          title: Type
           default: file_path
         file_id:
           type: string
+          title: File Id
         index:
           type: integer
-      additionalProperties: false
+          title: Index
+      type: object
       required:
-        - type
-        - file_id
-        - index
+      - file_id
+      - index
       title: OpenAIResponseAnnotationFilePath
     OpenAIResponseAnnotations:
-      oneOf:
-        - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
-        - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation'
-        - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
-        - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath'
       discriminator:
-        propertyName: type
         mapping:
-          file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
-          url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation'
           container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
+          file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
           file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath'
+          url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
+        title: OpenAIResponseAnnotationFileCitation
+      - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation'
+        title: OpenAIResponseAnnotationCitation
+      - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
+        title: OpenAIResponseAnnotationContainerFileCitation
+      - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath'
+        title: OpenAIResponseAnnotationFilePath
+      title: OpenAIResponseAnnotationFileCitation | ... (4 variants)
     OpenAIResponseContentPartRefusal:
-      type: object
       properties:
         type:
           type: string
           const: refusal
+          title: Type
           default: refusal
-          description: >-
-            Content part type identifier, always "refusal"
         refusal:
           type: string
-          description: Refusal text supplied by the model
-      additionalProperties: false
-      required:
-        - type
-        - refusal
-      title: OpenAIResponseContentPartRefusal
-      description: >-
-        Refusal content within a streamed response part.
-    "OpenAIResponseInputFunctionToolCallOutput":
+          title: Refusal
       type: object
+      required:
+      - refusal
+      title: OpenAIResponseContentPartRefusal
+      description: Refusal content within a streamed response part.
+    OpenAIResponseInputFunctionToolCallOutput:
       properties:
         call_id:
           type: string
+          title: Call Id
         output:
           type: string
+          title: Output
         type:
           type: string
           const: function_call_output
+          title: Type
           default: function_call_output
         id:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
         status:
-          type: string
-      additionalProperties: false
-      required:
-        - call_id
-        - output
-        - type
-      title: >-
-        OpenAIResponseInputFunctionToolCallOutput
-      description: >-
-        This represents the output of a function call that gets passed back to the
-        model.
-    OpenAIResponseInputMessageContent:
-      oneOf:
-        - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
-        - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
-        - $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile'
-      discriminator:
-        propertyName: type
-        mapping:
-          input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
-          input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
-          input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile'
-    OpenAIResponseInputMessageContentFile:
+          anyOf:
+          - type: string
+          - type: 'null'
       type: object
+      required:
+      - call_id
+      - output
+      title: OpenAIResponseInputFunctionToolCallOutput
+      description: This represents the output of a function call that gets passed back to the model.
+    OpenAIResponseInputMessageContent:
+      discriminator:
+        mapping:
+          input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile'
+          input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+          input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
+        title: OpenAIResponseInputMessageContentText
+      - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+        title: OpenAIResponseInputMessageContentImage
+      - $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile'
+        title: OpenAIResponseInputMessageContentFile
+      title: OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile
+    OpenAIResponseInputMessageContentFile:
       properties:
         type:
           type: string
           const: input_file
+          title: Type
           default: input_file
-          description: >-
-            The type of the input item. Always `input_file`.
         file_data:
-          type: string
-          description: >-
-            The data of the file to be sent to the model.
+          anyOf:
+          - type: string
+          - type: 'null'
         file_id:
-          type: string
-          description: >-
-            (Optional) The ID of the file to be sent to the model.
+          anyOf:
+          - type: string
+          - type: 'null'
         file_url:
-          type: string
-          description: >-
-            The URL of the file to be sent to the model.
+          anyOf:
+          - type: string
+          - type: 'null'
         filename:
-          type: string
-          description: >-
-            The name of the file to be sent to the model.
-      additionalProperties: false
-      required:
-        - type
-      title: OpenAIResponseInputMessageContentFile
-      description: >-
-        File content for input messages in OpenAI response format.
-    OpenAIResponseInputMessageContentImage:
+          anyOf:
+          - type: string
+          - type: 'null'
       type: object
+      title: OpenAIResponseInputMessageContentFile
+      description: File content for input messages in OpenAI response format.
+    OpenAIResponseInputMessageContentImage:
       properties:
         detail:
-          oneOf:
-            - type: string
-              const: low
-            - type: string
-              const: high
-            - type: string
-              const: auto
+          title: Detail
           default: auto
-          description: >-
-            Level of detail for image processing, can be "low", "high", or "auto"
+          type: string
+          enum:
+          - low
+          - high
+          - auto
         type:
           type: string
           const: input_image
+          title: Type
           default: input_image
-          description: >-
-            Content type identifier, always "input_image"
         file_id:
-          type: string
-          description: >-
-            (Optional) The ID of the file to be sent to the model.
+          anyOf:
+          - type: string
+          - type: 'null'
         image_url:
-          type: string
-          description: (Optional) URL of the image content
-      additionalProperties: false
-      required:
-        - detail
-        - type
-      title: OpenAIResponseInputMessageContentImage
-      description: >-
-        Image content for input messages in OpenAI response format.
-    OpenAIResponseInputMessageContentText:
+          anyOf:
+          - type: string
+          - type: 'null'
       type: object
+      title: OpenAIResponseInputMessageContentImage
+      description: Image content for input messages in OpenAI response format.
+    OpenAIResponseInputMessageContentText:
       properties:
         text:
           type: string
-          description: The text content of the input message
+          title: Text
         type:
           type: string
           const: input_text
+          title: Type
           default: input_text
-          description: >-
-            Content type identifier, always "input_text"
-      additionalProperties: false
-      required:
-        - text
-        - type
-      title: OpenAIResponseInputMessageContentText
-      description: >-
-        Text content for input messages in OpenAI response format.
-    OpenAIResponseMCPApprovalRequest:
       type: object
+      required:
+      - text
+      title: OpenAIResponseInputMessageContentText
+      description: Text content for input messages in OpenAI response format.
+    OpenAIResponseMCPApprovalRequest:
       properties:
         arguments:
           type: string
+          title: Arguments
         id:
           type: string
+          title: Id
         name:
           type: string
+          title: Name
         server_label:
           type: string
+          title: Server Label
         type:
           type: string
           const: mcp_approval_request
+          title: Type
           default: mcp_approval_request
-      additionalProperties: false
-      required:
-        - arguments
-        - id
-        - name
-        - server_label
-        - type
-      title: OpenAIResponseMCPApprovalRequest
-      description: >-
-        A request for human approval of a tool invocation.
-    OpenAIResponseMCPApprovalResponse:
       type: object
+      required:
+      - arguments
+      - id
+      - name
+      - server_label
+      title: OpenAIResponseMCPApprovalRequest
+      description: A request for human approval of a tool invocation.
+    OpenAIResponseMCPApprovalResponse:
       properties:
         approval_request_id:
           type: string
+          title: Approval Request Id
         approve:
           type: boolean
+          title: Approve
         type:
           type: string
           const: mcp_approval_response
+          title: Type
           default: mcp_approval_response
         id:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
         reason:
-          type: string
-      additionalProperties: false
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
       required:
-        - approval_request_id
-        - approve
-        - type
+      - approval_request_id
+      - approve
       title: OpenAIResponseMCPApprovalResponse
       description: A response to an MCP approval request.
     OpenAIResponseMessage:
-      type: object
+      description: |-
+        Corresponds to the various Message types in the Responses API.
+        They are all under one type because the Responses API gives them all
+        the same "type" value, and there is no way to tell them apart in certain
+        scenarios.
       properties:
         content:
-          oneOf:
-            - type: string
-            - type: array
-              items:
-                $ref: '#/components/schemas/OpenAIResponseInputMessageContent'
-            - type: array
-              items:
-                $ref: '#/components/schemas/OpenAIResponseOutputMessageContent'
+          anyOf:
+          - type: string
+          - items:
+              discriminator:
+                mapping:
+                  input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile'
+                  input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+                  input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
+                propertyName: type
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
+                title: OpenAIResponseInputMessageContentText
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+                title: OpenAIResponseInputMessageContentImage
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile'
+                title: OpenAIResponseInputMessageContentFile
+              title: OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile
+            type: array
+            title: list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile]
+          - items:
+              discriminator:
+                mapping:
+                  output_text: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText'
+                  refusal: '#/components/schemas/OpenAIResponseContentPartRefusal'
+                propertyName: type
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText'
+                title: OpenAIResponseOutputMessageContentOutputText
+              - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
+                title: OpenAIResponseContentPartRefusal
+              title: OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal
+            type: array
+            title: list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal]
+          title: string | list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile] | list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal]
         role:
-          oneOf:
-            - type: string
-              const: system
-            - type: string
-              const: developer
-            - type: string
-              const: user
-            - type: string
-              const: assistant
-        type:
+          title: Role
           type: string
+          enum:
+          - system
+          - developer
+          - user
+          - assistant
+          default: system
+        type:
           const: message
           default: message
+          title: Type
+          type: string
         id:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
         status:
-          type: string
-      additionalProperties: false
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
       required:
-        - content
-        - role
-        - type
+      - content
+      - role
       title: OpenAIResponseMessage
-      description: >-
-        Corresponds to the various Message types in the Responses API. They are all
-        under one type because the Responses API gives them all the same "type" value,
-        and there is no way to tell them apart in certain scenarios.
+      type: object
     OpenAIResponseOutputMessageContent:
-      oneOf:
-        - $ref: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText'
-        - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
       discriminator:
-        propertyName: type
         mapping:
           output_text: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText'
           refusal: '#/components/schemas/OpenAIResponseContentPartRefusal'
-    "OpenAIResponseOutputMessageContentOutputText":
-      type: object
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText'
+        title: OpenAIResponseOutputMessageContentOutputText
+      - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
+        title: OpenAIResponseContentPartRefusal
+      title: OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal
+    OpenAIResponseOutputMessageContentOutputText:
       properties:
         text:
           type: string
+          title: Text
         type:
           type: string
           const: output_text
+          title: Type
           default: output_text
         annotations:
-          type: array
           items:
-            $ref: '#/components/schemas/OpenAIResponseAnnotations'
-      additionalProperties: false
-      required:
-        - text
-        - type
-        - annotations
-      title: >-
-        OpenAIResponseOutputMessageContentOutputText
-    "OpenAIResponseOutputMessageFileSearchToolCall":
+            oneOf:
+            - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
+              title: OpenAIResponseAnnotationFileCitation
+            - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation'
+              title: OpenAIResponseAnnotationCitation
+            - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
+              title: OpenAIResponseAnnotationContainerFileCitation
+            - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath'
+              title: OpenAIResponseAnnotationFilePath
+            discriminator:
+              propertyName: type
+              mapping:
+                container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
+                file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
+                file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath'
+                url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation'
+            title: OpenAIResponseAnnotationFileCitation | ... (4 variants)
+          type: array
+          title: Annotations
       type: object
+      required:
+      - text
+      title: OpenAIResponseOutputMessageContentOutputText
+    OpenAIResponseOutputMessageFileSearchToolCall:
       properties:
         id:
           type: string
-          description: Unique identifier for this tool call
+          title: Id
         queries:
-          type: array
           items:
             type: string
-          description: List of search queries executed
+          type: array
+          title: Queries
         status:
           type: string
-          description: >-
-            Current status of the file search operation
+          title: Status
         type:
           type: string
           const: file_search_call
+          title: Type
           default: file_search_call
-          description: >-
-            Tool call type identifier, always "file_search_call"
         results:
-          type: array
-          items:
-            type: object
-            properties:
-              attributes:
-                type: object
-                additionalProperties:
-                  oneOf:
-                    - type: 'null'
-                    - type: boolean
-                    - type: number
-                    - type: string
-                    - type: array
-                    - type: object
-                description: >-
-                  (Optional) Key-value attributes associated with the file
-              file_id:
-                type: string
-                description: >-
-                  Unique identifier of the file containing the result
-              filename:
-                type: string
-                description: Name of the file containing the result
-              score:
-                type: number
-                description: >-
-                  Relevance score for this search result (between 0 and 1)
-              text:
-                type: string
-                description: Text content of the search result
-            additionalProperties: false
-            required:
-              - attributes
-              - file_id
-              - filename
-              - score
-              - text
-            title: >-
-              OpenAIResponseOutputMessageFileSearchToolCallResults
-            description: >-
-              Search results returned by the file search operation.
-          description: >-
-            (Optional) Search results returned by the file search operation
-      additionalProperties: false
-      required:
-        - id
-        - queries
-        - status
-        - type
-      title: >-
-        OpenAIResponseOutputMessageFileSearchToolCall
-      description: >-
-        File search tool call output message for OpenAI responses.
-    "OpenAIResponseOutputMessageFunctionToolCall":
+          anyOf:
+          - items:
+              $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCallResults'
+            type: array
+          - type: 'null'
       type: object
+      required:
+      - id
+      - queries
+      - status
+      title: OpenAIResponseOutputMessageFileSearchToolCall
+      description: File search tool call output message for OpenAI responses.
+    OpenAIResponseOutputMessageFunctionToolCall:
       properties:
         call_id:
           type: string
-          description: Unique identifier for the function call
+          title: Call Id
         name:
           type: string
-          description: Name of the function being called
+          title: Name
         arguments:
           type: string
-          description: >-
-            JSON string containing the function arguments
+          title: Arguments
         type:
           type: string
           const: function_call
+          title: Type
           default: function_call
-          description: >-
-            Tool call type identifier, always "function_call"
         id:
-          type: string
-          description: >-
-            (Optional) Additional identifier for the tool call
+          anyOf:
+          - type: string
+          - type: 'null'
         status:
-          type: string
-          description: >-
-            (Optional) Current status of the function call execution
-      additionalProperties: false
-      required:
-        - call_id
-        - name
-        - arguments
-        - type
-      title: >-
-        OpenAIResponseOutputMessageFunctionToolCall
-      description: >-
-        Function tool call output message for OpenAI responses.
-    OpenAIResponseOutputMessageMCPCall:
+          anyOf:
+          - type: string
+          - type: 'null'
       type: object
+      required:
+      - call_id
+      - name
+      - arguments
+      title: OpenAIResponseOutputMessageFunctionToolCall
+      description: Function tool call output message for OpenAI responses.
+    OpenAIResponseOutputMessageMCPCall:
       properties:
         id:
           type: string
-          description: Unique identifier for this MCP call
+          title: Id
         type:
           type: string
           const: mcp_call
+          title: Type
           default: mcp_call
-          description: >-
-            Tool call type identifier, always "mcp_call"
         arguments:
           type: string
-          description: >-
-            JSON string containing the MCP call arguments
+          title: Arguments
         name:
           type: string
-          description: Name of the MCP method being called
+          title: Name
         server_label:
           type: string
-          description: >-
-            Label identifying the MCP server handling the call
+          title: Server Label
         error:
-          type: string
-          description: >-
-            (Optional) Error message if the MCP call failed
+          anyOf:
+          - type: string
+          - type: 'null'
         output:
-          type: string
-          description: >-
-            (Optional) Output result from the successful MCP call
-      additionalProperties: false
-      required:
-        - id
-        - type
-        - arguments
-        - name
-        - server_label
-      title: OpenAIResponseOutputMessageMCPCall
-      description: >-
-        Model Context Protocol (MCP) call output message for OpenAI responses.
-    OpenAIResponseOutputMessageMCPListTools:
+          anyOf:
+          - type: string
+          - type: 'null'
       type: object
+      required:
+      - id
+      - arguments
+      - name
+      - server_label
+      title: OpenAIResponseOutputMessageMCPCall
+      description: Model Context Protocol (MCP) call output message for OpenAI responses.
+    OpenAIResponseOutputMessageMCPListTools:
       properties:
         id:
           type: string
-          description: >-
-            Unique identifier for this MCP list tools operation
+          title: Id
         type:
           type: string
           const: mcp_list_tools
+          title: Type
           default: mcp_list_tools
-          description: >-
-            Tool call type identifier, always "mcp_list_tools"
         server_label:
           type: string
-          description: >-
-            Label identifying the MCP server providing the tools
+          title: Server Label
         tools:
-          type: array
           items:
-            type: object
-            properties:
-              input_schema:
-                type: object
-                additionalProperties:
-                  oneOf:
-                    - type: 'null'
-                    - type: boolean
-                    - type: number
-                    - type: string
-                    - type: array
-                    - type: object
-                description: >-
-                  JSON schema defining the tool's input parameters
-              name:
-                type: string
-                description: Name of the tool
-              description:
-                type: string
-                description: >-
-                  (Optional) Description of what the tool does
-            additionalProperties: false
-            required:
-              - input_schema
-              - name
-            title: MCPListToolsTool
-            description: >-
-              Tool definition returned by MCP list tools operation.
-          description: >-
-            List of available tools provided by the MCP server
-      additionalProperties: false
-      required:
-        - id
-        - type
-        - server_label
-        - tools
-      title: OpenAIResponseOutputMessageMCPListTools
-      description: >-
-        MCP list tools output message containing available tools from an MCP server.
-    "OpenAIResponseOutputMessageWebSearchToolCall":
+            $ref: '#/components/schemas/MCPListToolsTool'
+          type: array
+          title: Tools
       type: object
+      required:
+      - id
+      - server_label
+      - tools
+      title: OpenAIResponseOutputMessageMCPListTools
+      description: MCP list tools output message containing available tools from an MCP server.
+    OpenAIResponseOutputMessageWebSearchToolCall:
       properties:
         id:
           type: string
-          description: Unique identifier for this tool call
+          title: Id
         status:
           type: string
-          description: >-
-            Current status of the web search operation
+          title: Status
         type:
           type: string
           const: web_search_call
+          title: Type
           default: web_search_call
-          description: >-
-            Tool call type identifier, always "web_search_call"
-      additionalProperties: false
-      required:
-        - id
-        - status
-        - type
-      title: >-
-        OpenAIResponseOutputMessageWebSearchToolCall
-      description: >-
-        Web search tool call output message for OpenAI responses.
-    CreateConversationRequest:
       type: object
+      required:
+      - id
+      - status
+      title: OpenAIResponseOutputMessageWebSearchToolCall
+      description: Web search tool call output message for OpenAI responses.
+    CreateConversationRequest:
       properties:
         items:
-          type: array
-          items:
-            $ref: '#/components/schemas/ConversationItem'
-          description: >-
-            Initial items to include in the conversation context.
+          anyOf:
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseMessage-Input'
+                title: OpenAIResponseMessage-Input
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+                title: OpenAIResponseOutputMessageWebSearchToolCall
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                title: OpenAIResponseOutputMessageFileSearchToolCall
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                title: OpenAIResponseOutputMessageFunctionToolCall
+              - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+                title: OpenAIResponseInputFunctionToolCallOutput
+              - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                title: OpenAIResponseMCPApprovalRequest
+              - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+                title: OpenAIResponseMCPApprovalResponse
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                title: OpenAIResponseOutputMessageMCPCall
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                title: OpenAIResponseOutputMessageMCPListTools
+              discriminator:
+                propertyName: type
+                mapping:
+                  file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                  function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                  function_call_output: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+                  mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                  mcp_approval_response: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+                  mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                  mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                  message: '#/components/schemas/OpenAIResponseMessage-Input'
+                  web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+              title: OpenAIResponseMessage-Input | ... (9 variants)
+            type: array
+          - type: 'null'
         metadata:
-          type: object
-          additionalProperties:
-            type: string
-          description: >-
-            Set of key-value pairs that can be attached to an object.
-      additionalProperties: false
+          anyOf:
+          - additionalProperties:
+              type: string
+            type: object
+          - type: 'null'
+      type: object
       title: CreateConversationRequest
     Conversation:
-      type: object
       properties:
         id:
           type: string
+          title: Id
+          description: The unique ID of the conversation.
         object:
           type: string
           const: conversation
+          title: Object
+          description: The object type, which is always conversation.
           default: conversation
         created_at:
           type: integer
+          title: Created At
+          description: The time at which the conversation was created, measured in seconds since the Unix epoch.
         metadata:
-          type: object
-          additionalProperties:
-            type: string
-        items:
-          type: array
-          items:
+          anyOf:
+          - additionalProperties:
+              type: string
             type: object
-            title: dict
-            description: >-
-              dict() -> new empty dictionary dict(mapping) -> new dictionary initialized
-              from a mapping object's     (key, value) pairs dict(iterable) -> new
-              dictionary initialized as if via:     d = {}     for k, v in iterable:         d[k]
-              = v dict(**kwargs) -> new dictionary initialized with the name=value
-              pairs     in the keyword argument list.  For example:  dict(one=1, two=2)
-      additionalProperties: false
+          - type: 'null'
+          description: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard.
+        items:
+          anyOf:
+          - items:
+              additionalProperties: true
+              type: object
+            type: array
+          - type: 'null'
+          description: Initial items to include in the conversation context. You may add up to 20 items at a time.
+      type: object
       required:
-        - id
-        - object
-        - created_at
+      - id
+      - created_at
       title: Conversation
       description: OpenAI-compatible conversation object.
     UpdateConversationRequest:
-      type: object
       properties:
         metadata:
-          type: object
           additionalProperties:
             type: string
-          description: >-
-            Set of key-value pairs that can be attached to an object.
-      additionalProperties: false
+          type: object
+          title: Metadata
+      type: object
       required:
-        - metadata
+      - metadata
       title: UpdateConversationRequest
     ConversationDeletedResource:
-      type: object
       properties:
         id:
           type: string
+          title: Id
+          description: The deleted conversation identifier
         object:
           type: string
+          title: Object
+          description: Object type
           default: conversation.deleted
         deleted:
           type: boolean
+          title: Deleted
+          description: Whether the object was deleted
           default: true
-      additionalProperties: false
+      type: object
       required:
-        - id
-        - object
-        - deleted
+      - id
       title: ConversationDeletedResource
       description: Response for deleted conversation.
     ConversationItemList:
-      type: object
       properties:
         object:
           type: string
+          title: Object
+          description: Object type
           default: list
         data:
-          type: array
           items:
-            $ref: '#/components/schemas/ConversationItem'
+            oneOf:
+            - $ref: '#/components/schemas/OpenAIResponseMessage-Output'
+              title: OpenAIResponseMessage-Output
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+              title: OpenAIResponseOutputMessageWebSearchToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+              title: OpenAIResponseOutputMessageFileSearchToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+              title: OpenAIResponseOutputMessageFunctionToolCall
+            - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+              title: OpenAIResponseInputFunctionToolCallOutput
+            - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+              title: OpenAIResponseMCPApprovalRequest
+            - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+              title: OpenAIResponseMCPApprovalResponse
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+              title: OpenAIResponseOutputMessageMCPCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+              title: OpenAIResponseOutputMessageMCPListTools
+            discriminator:
+              propertyName: type
+              mapping:
+                file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                function_call_output: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+                mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                mcp_approval_response: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+                mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                message: '#/components/schemas/OpenAIResponseMessage-Output'
+                web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+            title: OpenAIResponseMessage-Output | ... (9 variants)
+          type: array
+          title: Data
+          description: List of conversation items
         first_id:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: The ID of the first item in the list
         last_id:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: The ID of the last item in the list
         has_more:
           type: boolean
+          title: Has More
+          description: Whether there are more items available
           default: false
-      additionalProperties: false
-      required:
-        - object
-        - data
-        - has_more
-      title: ConversationItemList
-      description: >-
-        List of conversation items with pagination.
-    AddItemsRequest:
       type: object
+      required:
+      - data
+      title: ConversationItemList
+      description: List of conversation items with pagination.
+    AddItemsRequest:
       properties:
         items:
-          type: array
           items:
-            $ref: '#/components/schemas/ConversationItem'
-          description: >-
-            Items to include in the conversation context.
-      additionalProperties: false
+            oneOf:
+            - $ref: '#/components/schemas/OpenAIResponseMessage-Input'
+              title: OpenAIResponseMessage-Input
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+              title: OpenAIResponseOutputMessageWebSearchToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+              title: OpenAIResponseOutputMessageFileSearchToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+              title: OpenAIResponseOutputMessageFunctionToolCall
+            - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+              title: OpenAIResponseInputFunctionToolCallOutput
+            - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+              title: OpenAIResponseMCPApprovalRequest
+            - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+              title: OpenAIResponseMCPApprovalResponse
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+              title: OpenAIResponseOutputMessageMCPCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+              title: OpenAIResponseOutputMessageMCPListTools
+            discriminator:
+              propertyName: type
+              mapping:
+                file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                function_call_output: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+                mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                mcp_approval_response: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+                mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                message: '#/components/schemas/OpenAIResponseMessage-Input'
+                web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+            title: OpenAIResponseMessage-Input | ... (9 variants)
+          type: array
+          title: Items
+      type: object
       required:
-        - items
+      - items
       title: AddItemsRequest
     ConversationItemDeletedResource:
-      type: object
       properties:
         id:
           type: string
+          title: Id
+          description: The deleted item identifier
         object:
           type: string
+          title: Object
+          description: Object type
           default: conversation.item.deleted
         deleted:
           type: boolean
+          title: Deleted
+          description: Whether the object was deleted
           default: true
-      additionalProperties: false
+      type: object
       required:
-        - id
-        - object
-        - deleted
+      - id
       title: ConversationItemDeletedResource
       description: Response for deleted conversation item.
     OpenAIEmbeddingsRequestWithExtraBody:
-      type: object
       properties:
         model:
           type: string
-          description: >-
-            The identifier of the model to use. The model must be an embedding model
-            registered with Llama Stack and available via the /models endpoint.
+          title: Model
         input:
-          oneOf:
-            - type: string
-            - type: array
-              items:
-                type: string
-          description: >-
-            Input text to embed, encoded as a string or array of strings. To embed
-            multiple inputs in a single request, pass an array of strings.
+          anyOf:
+          - type: string
+          - items:
+              type: string
+            type: array
+            title: list[string]
+          title: string | list[string]
         encoding_format:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
           default: float
-          description: >-
-            (Optional) The format to return the embeddings in. Can be either "float"
-            or "base64". Defaults to "float".
         dimensions:
-          type: integer
-          description: >-
-            (Optional) The number of dimensions the resulting output embeddings should
-            have. Only supported in text-embedding-3 and later models.
+          anyOf:
+          - type: integer
+          - type: 'null'
         user:
-          type: string
-          description: >-
-            (Optional) A unique identifier representing your end-user, which can help
-            OpenAI to monitor and detect abuse.
-      additionalProperties: false
-      required:
-        - model
-        - input
-      title: OpenAIEmbeddingsRequestWithExtraBody
-      description: >-
-        Request parameters for OpenAI-compatible embeddings endpoint.
-    OpenAIEmbeddingData:
+          anyOf:
+          - type: string
+          - type: 'null'
+      additionalProperties: true
       type: object
+      required:
+      - model
+      - input
+      title: OpenAIEmbeddingsRequestWithExtraBody
+      description: Request parameters for OpenAI-compatible embeddings endpoint.
+    OpenAIEmbeddingData:
       properties:
         object:
           type: string
           const: embedding
+          title: Object
           default: embedding
-          description: >-
-            The object type, which will be "embedding"
         embedding:
-          oneOf:
-            - type: array
-              items:
-                type: number
-            - type: string
-          description: >-
-            The embedding vector as a list of floats (when encoding_format="float")
-            or as a base64-encoded string (when encoding_format="base64")
+          anyOf:
+          - items:
+              type: number
+            type: array
+            title: list[number]
+          - type: string
+          title: list[number] | string
         index:
           type: integer
-          description: >-
-            The index of the embedding in the input list
-      additionalProperties: false
-      required:
-        - object
-        - embedding
-        - index
-      title: OpenAIEmbeddingData
-      description: >-
-        A single embedding data object from an OpenAI-compatible embeddings response.
-    OpenAIEmbeddingUsage:
+          title: Index
       type: object
+      required:
+      - embedding
+      - index
+      title: OpenAIEmbeddingData
+      description: A single embedding data object from an OpenAI-compatible embeddings response.
+    OpenAIEmbeddingUsage:
       properties:
         prompt_tokens:
           type: integer
-          description: The number of tokens in the input
+          title: Prompt Tokens
         total_tokens:
           type: integer
-          description: The total number of tokens used
-      additionalProperties: false
-      required:
-        - prompt_tokens
-        - total_tokens
-      title: OpenAIEmbeddingUsage
-      description: >-
-        Usage information for an OpenAI-compatible embeddings response.
-    OpenAIEmbeddingsResponse:
+          title: Total Tokens
       type: object
+      required:
+      - prompt_tokens
+      - total_tokens
+      title: OpenAIEmbeddingUsage
+      description: Usage information for an OpenAI-compatible embeddings response.
+    OpenAIEmbeddingsResponse:
       properties:
         object:
           type: string
           const: list
+          title: Object
           default: list
-          description: The object type, which will be "list"
         data:
-          type: array
           items:
             $ref: '#/components/schemas/OpenAIEmbeddingData'
-          description: List of embedding data objects
+          type: array
+          title: Data
         model:
           type: string
-          description: >-
-            The model that was used to generate the embeddings
+          title: Model
         usage:
           $ref: '#/components/schemas/OpenAIEmbeddingUsage'
-          description: Usage information
-      additionalProperties: false
+      type: object
       required:
-        - object
-        - data
-        - model
-        - usage
+      - data
+      - model
+      - usage
       title: OpenAIEmbeddingsResponse
-      description: >-
-        Response from an OpenAI-compatible embeddings request.
+      description: Response from an OpenAI-compatible embeddings request.
     OpenAIFilePurpose:
       type: string
       enum:
-        - assistants
-        - batch
+      - assistants
+      - batch
       title: OpenAIFilePurpose
-      description: >-
-        Valid purpose values for OpenAI Files API.
+      description: Valid purpose values for OpenAI Files API.
     ListOpenAIFileResponse:
-      type: object
       properties:
         data:
-          type: array
           items:
             $ref: '#/components/schemas/OpenAIFileObject'
-          description: List of file objects
+          type: array
+          title: Data
         has_more:
           type: boolean
-          description: >-
-            Whether there are more files available beyond this page
+          title: Has More
         first_id:
           type: string
-          description: >-
-            ID of the first file in the list for pagination
+          title: First Id
         last_id:
           type: string
-          description: >-
-            ID of the last file in the list for pagination
+          title: Last Id
         object:
           type: string
           const: list
+          title: Object
           default: list
-          description: The object type, which is always "list"
-      additionalProperties: false
-      required:
-        - data
-        - has_more
-        - first_id
-        - last_id
-        - object
-      title: ListOpenAIFileResponse
-      description: >-
-        Response for listing files in OpenAI Files API.
-    OpenAIFileObject:
       type: object
+      required:
+      - data
+      - has_more
+      - first_id
+      - last_id
+      title: ListOpenAIFileResponse
+      description: Response for listing files in OpenAI Files API.
+    OpenAIFileObject:
       properties:
         object:
           type: string
           const: file
+          title: Object
           default: file
-          description: The object type, which is always "file"
         id:
           type: string
-          description: >-
-            The file identifier, which can be referenced in the API endpoints
+          title: Id
         bytes:
           type: integer
-          description: The size of the file, in bytes
+          title: Bytes
         created_at:
           type: integer
-          description: >-
-            The Unix timestamp (in seconds) for when the file was created
+          title: Created At
         expires_at:
           type: integer
-          description: >-
-            The Unix timestamp (in seconds) for when the file expires
+          title: Expires At
         filename:
           type: string
-          description: The name of the file
+          title: Filename
         purpose:
-          type: string
-          enum:
-            - assistants
-            - batch
-          description: The intended purpose of the file
-      additionalProperties: false
-      required:
-        - object
-        - id
-        - bytes
-        - created_at
-        - expires_at
-        - filename
-        - purpose
-      title: OpenAIFileObject
-      description: >-
-        OpenAI File object as defined in the OpenAI Files API.
-    ExpiresAfter:
+          $ref: '#/components/schemas/OpenAIFilePurpose'
       type: object
+      required:
+      - id
+      - bytes
+      - created_at
+      - expires_at
+      - filename
+      - purpose
+      title: OpenAIFileObject
+      description: OpenAI File object as defined in the OpenAI Files API.
+    ExpiresAfter:
       properties:
         anchor:
           type: string
           const: created_at
+          title: Anchor
         seconds:
           type: integer
-      additionalProperties: false
+          maximum: 2592000.0
+          minimum: 3600.0
+          title: Seconds
+      type: object
       required:
-        - anchor
-        - seconds
+      - anchor
+      - seconds
       title: ExpiresAfter
-      description: >-
+      description: |-
         Control expiration of uploaded files.
 
         Params:
          - anchor, must be "created_at"
          - seconds, must be int between 3600 and 2592000 (1 hour to 30 days)
     OpenAIFileDeleteResponse:
-      type: object
       properties:
         id:
           type: string
-          description: The file identifier that was deleted
+          title: Id
         object:
           type: string
           const: file
+          title: Object
           default: file
-          description: The object type, which is always "file"
         deleted:
           type: boolean
-          description: >-
-            Whether the file was successfully deleted
-      additionalProperties: false
+          title: Deleted
+      type: object
       required:
-        - id
-        - object
-        - deleted
+      - id
+      - deleted
       title: OpenAIFileDeleteResponse
-      description: >-
-        Response for deleting a file in OpenAI Files API.
+      description: Response for deleting a file in OpenAI Files API.
     Response:
-      type: object
       title: Response
-    HealthInfo:
       type: object
+    HealthInfo:
       properties:
         status:
-          type: string
-          enum:
-            - OK
-            - Error
-            - Not Implemented
-          description: Current health status of the service
-      additionalProperties: false
-      required:
-        - status
-      title: HealthInfo
-      description: >-
-        Health status information for the service.
-    RouteInfo:
+          $ref: '#/components/schemas/HealthStatus'
       type: object
+      required:
+      - status
+      title: HealthInfo
+      description: Health status information for the service.
+    RouteInfo:
       properties:
         route:
           type: string
-          description: The API endpoint path
+          title: Route
         method:
           type: string
-          description: HTTP method for the route
+          title: Method
         provider_types:
-          type: array
           items:
             type: string
-          description: >-
-            List of provider types that implement this route
-      additionalProperties: false
-      required:
-        - route
-        - method
-        - provider_types
-      title: RouteInfo
-      description: >-
-        Information about an API route including its path, method, and implementing
-        providers.
-    ListRoutesResponse:
+          type: array
+          title: Provider Types
       type: object
+      required:
+      - route
+      - method
+      - provider_types
+      title: RouteInfo
+      description: Information about an API route including its path, method, and implementing providers.
+    ListRoutesResponse:
       properties:
         data:
-          type: array
           items:
             $ref: '#/components/schemas/RouteInfo'
-          description: >-
-            List of available route information objects
-      additionalProperties: false
-      required:
-        - data
-      title: ListRoutesResponse
-      description: >-
-        Response containing a list of all available API routes.
-    OpenAIModel:
+          type: array
+          title: Data
       type: object
+      required:
+      - data
+      title: ListRoutesResponse
+      description: Response containing a list of all available API routes.
+    OpenAIModel:
       properties:
         id:
           type: string
+          title: Id
         object:
           type: string
           const: model
+          title: Object
           default: model
         created:
           type: integer
+          title: Created
         owned_by:
           type: string
+          title: Owned By
         custom_metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-      additionalProperties: false
-      required:
-        - id
-        - object
-        - created
-        - owned_by
-      title: OpenAIModel
-      description: A model from OpenAI.
-    OpenAIListModelsResponse:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
       type: object
+      required:
+      - id
+      - created
+      - owned_by
+      title: OpenAIModel
+      description: |-
+        A model from OpenAI.
+
+        :id: The ID of the model
+        :object: The object type, which will be "model"
+        :created: The Unix timestamp in seconds when the model was created
+        :owned_by: The owner of the model
+        :custom_metadata: Llama Stack-specific metadata including model_type, provider info, and additional metadata
+    OpenAIListModelsResponse:
       properties:
         data:
-          type: array
           items:
             $ref: '#/components/schemas/OpenAIModel'
-      additionalProperties: false
+          type: array
+          title: Data
+      type: object
       required:
-        - data
+      - data
       title: OpenAIListModelsResponse
     Model:
-      type: object
       properties:
         identifier:
           type: string
-          description: >-
-            Unique identifier for this resource in llama stack
+          title: Identifier
+          description: Unique identifier for this resource in llama stack
         provider_resource_id:
-          type: string
-          description: >-
-            Unique identifier for this resource in the provider
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: Unique identifier for this resource in the provider
         provider_id:
           type: string
-          description: >-
-            ID of the provider that owns this resource
+          title: Provider Id
+          description: ID of the provider that owns this resource
         type:
           type: string
-          enum:
-            - model
-            - shield
-            - vector_store
-            - dataset
-            - scoring_function
-            - benchmark
-            - tool
-            - tool_group
-            - prompt
           const: model
+          title: Type
           default: model
-          description: >-
-            The resource type, always 'model' for model resources
         metadata:
+          additionalProperties: true
           type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
+          title: Metadata
           description: Any additional metadata for this model
         model_type:
           $ref: '#/components/schemas/ModelType'
           default: llm
-          description: >-
-            The type of model (LLM or embedding model)
-      additionalProperties: false
+      type: object
       required:
-        - identifier
-        - provider_id
-        - type
-        - metadata
-        - model_type
+      - identifier
+      - provider_id
       title: Model
-      description: >-
-        A model resource representing an AI model registered in Llama Stack.
+      description: A model resource representing an AI model registered in Llama Stack.
     ModelType:
       type: string
       enum:
-        - llm
-        - embedding
-        - rerank
+      - llm
+      - embedding
+      - rerank
       title: ModelType
-      description: >-
-        Enumeration of supported model types in Llama Stack.
+      description: Enumeration of supported model types in Llama Stack.
     RunModerationRequest:
-      type: object
       properties:
         input:
-          oneOf:
-            - type: string
-            - type: array
-              items:
-                type: string
-          description: >-
-            Input (or inputs) to classify. Can be a single string, an array of strings,
-            or an array of multi-modal input objects similar to other models.
+          anyOf:
+          - type: string
+          - items:
+              type: string
+            type: array
+            title: list[string]
+          title: string | list[string]
         model:
-          type: string
-          description: >-
-            (Optional) The content moderation model you would like to use.
-      additionalProperties: false
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
       required:
-        - input
+      - input
       title: RunModerationRequest
     ModerationObject:
-      type: object
       properties:
         id:
           type: string
-          description: >-
-            The unique identifier for the moderation request.
+          title: Id
         model:
           type: string
-          description: >-
-            The model used to generate the moderation results.
+          title: Model
         results:
-          type: array
           items:
             $ref: '#/components/schemas/ModerationObjectResults'
-          description: A list of moderation objects
-      additionalProperties: false
+          type: array
+          title: Results
+      type: object
       required:
-        - id
-        - model
-        - results
+      - id
+      - model
+      - results
       title: ModerationObject
       description: A moderation object.
     ModerationObjectResults:
-      type: object
       properties:
         flagged:
           type: boolean
-          description: >-
-            Whether any of the below categories are flagged.
+          title: Flagged
         categories:
-          type: object
-          additionalProperties:
-            type: boolean
-          description: >-
-            A list of the categories, and whether they are flagged or not.
+          anyOf:
+          - additionalProperties:
+              type: boolean
+            type: object
+          - type: 'null'
         category_applied_input_types:
-          type: object
-          additionalProperties:
-            type: array
-            items:
-              type: string
-          description: >-
-            A list of the categories along with the input type(s) that the score applies
-            to.
+          anyOf:
+          - additionalProperties:
+              items:
+                type: string
+              type: array
+            type: object
+          - type: 'null'
         category_scores:
-          type: object
-          additionalProperties:
-            type: number
-          description: >-
-            A list of the categories along with their scores as predicted by model.
+          anyOf:
+          - additionalProperties:
+              type: number
+            type: object
+          - type: 'null'
         user_message:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
         metadata:
+          additionalProperties: true
           type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-      additionalProperties: false
+          title: Metadata
+      type: object
       required:
-        - flagged
-        - metadata
+      - flagged
       title: ModerationObjectResults
       description: A moderation object.
     Prompt:
-      type: object
       properties:
         prompt:
-          type: string
-          description: >-
-            The system prompt text with variable placeholders. Variables are only
-            supported when using the Responses API.
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: The system prompt with variable placeholders
         version:
           type: integer
-          description: >-
-            Version (integer starting at 1, incremented on save)
+          minimum: 1.0
+          title: Version
+          description: Version (integer starting at 1, incremented on save)
         prompt_id:
           type: string
-          description: >-
-            Unique identifier formatted as 'pmpt_<48-digit-hash>'
+          title: Prompt Id
+          description: Unique identifier in format 'pmpt_<48-digit-hash>'
         variables:
-          type: array
           items:
             type: string
-          description: >-
-            List of prompt variable names that can be used in the prompt template
+          type: array
+          title: Variables
+          description: List of variable names that can be used in the prompt template
         is_default:
           type: boolean
+          title: Is Default
+          description: Boolean indicating whether this version is the default version
           default: false
-          description: >-
-            Boolean indicating whether this version is the default version for this
-            prompt
-      additionalProperties: false
-      required:
-        - version
-        - prompt_id
-        - variables
-        - is_default
-      title: Prompt
-      description: >-
-        A prompt resource representing a stored OpenAI Compatible prompt template
-        in Llama Stack.
-    ListPromptsResponse:
       type: object
+      required:
+      - version
+      - prompt_id
+      title: Prompt
+      description: A prompt resource representing a stored OpenAI Compatible prompt template in Llama Stack.
+    ListPromptsResponse:
       properties:
         data:
-          type: array
           items:
             $ref: '#/components/schemas/Prompt'
-      additionalProperties: false
+          type: array
+          title: Data
+      type: object
       required:
-        - data
+      - data
       title: ListPromptsResponse
       description: Response model to list prompts.
     CreatePromptRequest:
-      type: object
       properties:
         prompt:
           type: string
-          description: >-
-            The prompt text content with variable placeholders.
+          title: Prompt
         variables:
-          type: array
-          items:
-            type: string
-          description: >-
-            List of variable names that can be used in the prompt template.
-      additionalProperties: false
+          anyOf:
+          - items:
+              type: string
+            type: array
+          - type: 'null'
+      type: object
       required:
-        - prompt
+      - prompt
       title: CreatePromptRequest
     UpdatePromptRequest:
-      type: object
       properties:
         prompt:
           type: string
-          description: The updated prompt text content.
+          title: Prompt
         version:
           type: integer
-          description: >-
-            The current version of the prompt being updated.
+          title: Version
         variables:
-          type: array
-          items:
-            type: string
-          description: >-
-            Updated list of variable names that can be used in the prompt template.
+          anyOf:
+          - items:
+              type: string
+            type: array
+          - type: 'null'
         set_as_default:
           type: boolean
-          description: >-
-            Set the new version as the default (default=True).
-      additionalProperties: false
+          title: Set As Default
+          default: true
+      type: object
       required:
-        - prompt
-        - version
-        - set_as_default
+      - prompt
+      - version
       title: UpdatePromptRequest
     SetDefaultVersionRequest:
-      type: object
       properties:
         version:
           type: integer
-          description: The version to set as default.
-      additionalProperties: false
+          title: Version
+      type: object
       required:
-        - version
+      - version
       title: SetDefaultVersionRequest
     ProviderInfo:
-      type: object
       properties:
         api:
           type: string
-          description: The API name this provider implements
+          title: Api
         provider_id:
           type: string
-          description: Unique identifier for the provider
+          title: Provider Id
         provider_type:
           type: string
-          description: The type of provider implementation
+          title: Provider Type
         config:
+          additionalProperties: true
           type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            Configuration parameters for the provider
+          title: Config
         health:
+          additionalProperties: true
           type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: Current health status of the provider
-      additionalProperties: false
-      required:
-        - api
-        - provider_id
-        - provider_type
-        - config
-        - health
-      title: ProviderInfo
-      description: >-
-        Information about a registered provider including its configuration and health
-        status.
-    ListProvidersResponse:
+          title: Health
       type: object
+      required:
+      - api
+      - provider_id
+      - provider_type
+      - config
+      - health
+      title: ProviderInfo
+      description: Information about a registered provider including its configuration and health status.
+    ListProvidersResponse:
       properties:
         data:
-          type: array
           items:
             $ref: '#/components/schemas/ProviderInfo'
-          description: List of provider information objects
-      additionalProperties: false
-      required:
-        - data
-      title: ListProvidersResponse
-      description: >-
-        Response containing a list of all available providers.
-    ListOpenAIResponseObject:
+          type: array
+          title: Data
       type: object
+      required:
+      - data
+      title: ListProvidersResponse
+      description: Response containing a list of all available providers.
+    ListOpenAIResponseObject:
       properties:
         data:
-          type: array
           items:
             $ref: '#/components/schemas/OpenAIResponseObjectWithInput'
-          description: >-
-            List of response objects with their input context
+          type: array
+          title: Data
         has_more:
           type: boolean
-          description: >-
-            Whether there are more results available beyond this page
+          title: Has More
         first_id:
           type: string
-          description: >-
-            Identifier of the first item in this page
+          title: First Id
         last_id:
           type: string
-          description: Identifier of the last item in this page
+          title: Last Id
         object:
           type: string
           const: list
+          title: Object
           default: list
-          description: Object type identifier, always "list"
-      additionalProperties: false
-      required:
-        - data
-        - has_more
-        - first_id
-        - last_id
-        - object
-      title: ListOpenAIResponseObject
-      description: >-
-        Paginated list of OpenAI response objects with navigation metadata.
-    OpenAIResponseError:
       type: object
+      required:
+      - data
+      - has_more
+      - first_id
+      - last_id
+      title: ListOpenAIResponseObject
+      description: Paginated list of OpenAI response objects with navigation metadata.
+    OpenAIResponseError:
       properties:
         code:
           type: string
-          description: >-
-            Error code identifying the type of failure
+          title: Code
         message:
           type: string
-          description: >-
-            Human-readable error message describing the failure
-      additionalProperties: false
-      required:
-        - code
-        - message
-      title: OpenAIResponseError
-      description: >-
-        Error details for failed OpenAI response requests.
-    OpenAIResponseInput:
-      oneOf:
-        - $ref: '#/components/schemas/OpenAIResponseOutput'
-        - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
-        - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
-        - $ref: '#/components/schemas/OpenAIResponseMessage'
-    OpenAIResponseInputToolFileSearch:
+          title: Message
       type: object
+      required:
+      - code
+      - message
+      title: OpenAIResponseError
+      description: Error details for failed OpenAI response requests.
+    OpenAIResponseInput:
+      anyOf:
+      - discriminator:
+          mapping:
+            file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+            function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+            mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+            mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+            mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+            message: '#/components/schemas/OpenAIResponseMessage'
+            web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+          propertyName: type
+        oneOf:
+        - $ref: '#/components/schemas/OpenAIResponseMessage'
+          title: OpenAIResponseMessage
+        - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+          title: OpenAIResponseOutputMessageWebSearchToolCall
+        - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+          title: OpenAIResponseOutputMessageFileSearchToolCall
+        - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+          title: OpenAIResponseOutputMessageFunctionToolCall
+        - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+          title: OpenAIResponseOutputMessageMCPCall
+        - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+          title: OpenAIResponseOutputMessageMCPListTools
+        - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+          title: OpenAIResponseMCPApprovalRequest
+        title: OpenAIResponseMessage | ... (7 variants)
+      - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+        title: OpenAIResponseInputFunctionToolCallOutput
+      - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+        title: OpenAIResponseMCPApprovalResponse
+      - $ref: '#/components/schemas/OpenAIResponseMessage'
+        title: OpenAIResponseMessage
+      title: OpenAIResponseInputFunctionToolCallOutput | OpenAIResponseMCPApprovalResponse | OpenAIResponseMessage
+    OpenAIResponseInputToolFileSearch:
       properties:
         type:
           type: string
           const: file_search
+          title: Type
           default: file_search
-          description: >-
-            Tool type identifier, always "file_search"
         vector_store_ids:
-          type: array
           items:
             type: string
-          description: >-
-            List of vector store identifiers to search within
+          type: array
+          title: Vector Store Ids
         filters:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) Additional filters to apply to the search
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
         max_num_results:
-          type: integer
+          anyOf:
+          - type: integer
+            maximum: 50.0
+            minimum: 1.0
+          - type: 'null'
           default: 10
-          description: >-
-            (Optional) Maximum number of search results to return (1-50)
         ranking_options:
-          type: object
-          properties:
-            ranker:
-              type: string
-              description: >-
-                (Optional) Name of the ranking algorithm to use
-            score_threshold:
-              type: number
-              default: 0.0
-              description: >-
-                (Optional) Minimum relevance score threshold for results
-          additionalProperties: false
-          description: >-
-            (Optional) Options for ranking and scoring search results
-      additionalProperties: false
-      required:
-        - type
-        - vector_store_ids
-      title: OpenAIResponseInputToolFileSearch
-      description: >-
-        File search tool configuration for OpenAI response inputs.
-    OpenAIResponseInputToolFunction:
+          anyOf:
+          - $ref: '#/components/schemas/SearchRankingOptions'
+            title: SearchRankingOptions
+          - type: 'null'
+          title: SearchRankingOptions
       type: object
+      required:
+      - vector_store_ids
+      title: OpenAIResponseInputToolFileSearch
+      description: File search tool configuration for OpenAI response inputs.
+    OpenAIResponseInputToolFunction:
       properties:
         type:
           type: string
           const: function
+          title: Type
           default: function
-          description: Tool type identifier, always "function"
         name:
           type: string
-          description: Name of the function that can be called
+          title: Name
         description:
-          type: string
-          description: >-
-            (Optional) Description of what the function does
+          anyOf:
+          - type: string
+          - type: 'null'
         parameters:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) JSON schema defining the function's parameters
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
         strict:
-          type: boolean
-          description: >-
-            (Optional) Whether to enforce strict parameter validation
-      additionalProperties: false
-      required:
-        - type
-        - name
-      title: OpenAIResponseInputToolFunction
-      description: >-
-        Function tool configuration for OpenAI response inputs.
-    OpenAIResponseInputToolWebSearch:
+          anyOf:
+          - type: boolean
+          - type: 'null'
       type: object
+      required:
+      - name
+      - parameters
+      title: OpenAIResponseInputToolFunction
+      description: Function tool configuration for OpenAI response inputs.
+    OpenAIResponseInputToolWebSearch:
       properties:
         type:
-          oneOf:
-            - type: string
-              const: web_search
-            - type: string
-              const: web_search_preview
-            - type: string
-              const: web_search_preview_2025_03_11
-            - type: string
-              const: web_search_2025_08_26
+          title: Type
           default: web_search
-          description: Web search tool type variant to use
-        search_context_size:
           type: string
+          enum:
+          - web_search
+          - web_search_preview
+          - web_search_preview_2025_03_11
+          - web_search_2025_08_26
+        search_context_size:
+          anyOf:
+          - type: string
+            pattern: ^low|medium|high$
+          - type: 'null'
           default: medium
-          description: >-
-            (Optional) Size of search context, must be "low", "medium", or "high"
-      additionalProperties: false
-      required:
-        - type
-      title: OpenAIResponseInputToolWebSearch
-      description: >-
-        Web search tool configuration for OpenAI response inputs.
-    OpenAIResponseObjectWithInput:
       type: object
+      title: OpenAIResponseInputToolWebSearch
+      description: Web search tool configuration for OpenAI response inputs.
+    OpenAIResponseObjectWithInput:
       properties:
         created_at:
           type: integer
-          description: >-
-            Unix timestamp when the response was created
+          title: Created At
         error:
-          $ref: '#/components/schemas/OpenAIResponseError'
-          description: >-
-            (Optional) Error details if the response generation failed
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIResponseError'
+            title: OpenAIResponseError
+          - type: 'null'
+          title: OpenAIResponseError
         id:
           type: string
-          description: Unique identifier for this response
+          title: Id
         model:
           type: string
-          description: Model identifier used for generation
+          title: Model
         object:
           type: string
           const: response
+          title: Object
           default: response
-          description: >-
-            Object type identifier, always "response"
         output:
-          type: array
           items:
-            $ref: '#/components/schemas/OpenAIResponseOutput'
-          description: >-
-            List of generated output items (messages, tool calls, etc.)
+            oneOf:
+            - $ref: '#/components/schemas/OpenAIResponseMessage-Output'
+              title: OpenAIResponseMessage-Output
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+              title: OpenAIResponseOutputMessageWebSearchToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+              title: OpenAIResponseOutputMessageFileSearchToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+              title: OpenAIResponseOutputMessageFunctionToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+              title: OpenAIResponseOutputMessageMCPCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+              title: OpenAIResponseOutputMessageMCPListTools
+            - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+              title: OpenAIResponseMCPApprovalRequest
+            discriminator:
+              propertyName: type
+              mapping:
+                file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                message: '#/components/schemas/OpenAIResponseMessage-Output'
+                web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+            title: OpenAIResponseMessage-Output | ... (7 variants)
+          type: array
+          title: Output
         parallel_tool_calls:
           type: boolean
+          title: Parallel Tool Calls
           default: false
-          description: >-
-            Whether tool calls can be executed in parallel
         previous_response_id:
-          type: string
-          description: >-
-            (Optional) ID of the previous response in a conversation
+          anyOf:
+          - type: string
+          - type: 'null'
         prompt:
-          $ref: '#/components/schemas/OpenAIResponsePrompt'
-          description: >-
-            (Optional) Reference to a prompt template and its variables.
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIResponsePrompt'
+            title: OpenAIResponsePrompt
+          - type: 'null'
+          title: OpenAIResponsePrompt
         status:
           type: string
-          description: >-
-            Current status of the response generation
+          title: Status
         temperature:
-          type: number
-          description: >-
-            (Optional) Sampling temperature used for generation
+          anyOf:
+          - type: number
+          - type: 'null'
         text:
           $ref: '#/components/schemas/OpenAIResponseText'
-          description: >-
-            Text formatting configuration for the response
+          default:
+            format:
+              type: text
         top_p:
-          type: number
-          description: >-
-            (Optional) Nucleus sampling parameter used for generation
+          anyOf:
+          - type: number
+          - type: 'null'
         tools:
-          type: array
-          items:
-            $ref: '#/components/schemas/OpenAIResponseTool'
-          description: >-
-            (Optional) An array of tools the model may call while generating a response.
+          anyOf:
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                title: OpenAIResponseInputToolWebSearch
+              - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch'
+                title: OpenAIResponseInputToolFileSearch
+              - $ref: '#/components/schemas/OpenAIResponseInputToolFunction'
+                title: OpenAIResponseInputToolFunction
+              - $ref: '#/components/schemas/OpenAIResponseToolMCP'
+                title: OpenAIResponseToolMCP
+              discriminator:
+                propertyName: type
+                mapping:
+                  file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch'
+                  function: '#/components/schemas/OpenAIResponseInputToolFunction'
+                  mcp: '#/components/schemas/OpenAIResponseToolMCP'
+                  web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                  web_search_2025_08_26: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                  web_search_preview: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                  web_search_preview_2025_03_11: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+              title: OpenAIResponseInputToolWebSearch | ... (4 variants)
+            type: array
+          - type: 'null'
         truncation:
-          type: string
-          description: >-
-            (Optional) Truncation strategy applied to the response
+          anyOf:
+          - type: string
+          - type: 'null'
         usage:
-          $ref: '#/components/schemas/OpenAIResponseUsage'
-          description: >-
-            (Optional) Token usage information for the response
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIResponseUsage'
+            title: OpenAIResponseUsage
+          - type: 'null'
+          title: OpenAIResponseUsage
         instructions:
-          type: string
-          description: >-
-            (Optional) System message inserted into the model's context
+          anyOf:
+          - type: string
+          - type: 'null'
         max_tool_calls:
-          type: integer
-          description: >-
-            (Optional) Max number of total calls to built-in tools that can be processed
-            in a response
+          anyOf:
+          - type: integer
+          - type: 'null'
         input:
-          type: array
           items:
-            $ref: '#/components/schemas/OpenAIResponseInput'
-          description: >-
-            List of input items that led to this response
-      additionalProperties: false
+            anyOf:
+            - oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseMessage-Output'
+                title: OpenAIResponseMessage-Output
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+                title: OpenAIResponseOutputMessageWebSearchToolCall
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                title: OpenAIResponseOutputMessageFileSearchToolCall
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                title: OpenAIResponseOutputMessageFunctionToolCall
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                title: OpenAIResponseOutputMessageMCPCall
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                title: OpenAIResponseOutputMessageMCPListTools
+              - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                title: OpenAIResponseMCPApprovalRequest
+              discriminator:
+                propertyName: type
+                mapping:
+                  file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                  function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                  mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                  mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                  mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                  message: '#/components/schemas/OpenAIResponseMessage-Output'
+                  web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+              title: OpenAIResponseMessage-Output | ... (7 variants)
+            - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+              title: OpenAIResponseInputFunctionToolCallOutput
+            - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+              title: OpenAIResponseMCPApprovalResponse
+            - $ref: '#/components/schemas/OpenAIResponseMessage-Output'
+              title: OpenAIResponseMessage-Output
+            title: OpenAIResponseInputFunctionToolCallOutput | OpenAIResponseMCPApprovalResponse | OpenAIResponseMessage-Output
+          type: array
+          title: Input
+      type: object
       required:
-        - created_at
-        - id
-        - model
-        - object
-        - output
-        - parallel_tool_calls
-        - status
-        - text
-        - input
+      - created_at
+      - id
+      - model
+      - output
+      - status
+      - input
       title: OpenAIResponseObjectWithInput
-      description: >-
-        OpenAI response object extended with input context information.
+      description: OpenAI response object extended with input context information.
     OpenAIResponseOutput:
-      oneOf:
-        - $ref: '#/components/schemas/OpenAIResponseMessage'
-        - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
-        - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
-        - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
-        - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
-        - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
-        - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
       discriminator:
-        propertyName: type
         mapping:
-          message: '#/components/schemas/OpenAIResponseMessage'
-          web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
           file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
           function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+          mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
           mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
           mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
-          mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+          message: '#/components/schemas/OpenAIResponseMessage'
+          web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIResponseMessage'
+        title: OpenAIResponseMessage
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+        title: OpenAIResponseOutputMessageWebSearchToolCall
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+        title: OpenAIResponseOutputMessageFileSearchToolCall
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+        title: OpenAIResponseOutputMessageFunctionToolCall
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+        title: OpenAIResponseOutputMessageMCPCall
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+        title: OpenAIResponseOutputMessageMCPListTools
+      - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+        title: OpenAIResponseMCPApprovalRequest
+      title: OpenAIResponseMessage | ... (7 variants)
     OpenAIResponsePrompt:
-      type: object
       properties:
         id:
           type: string
-          description: Unique identifier of the prompt template
+          title: Id
         variables:
-          type: object
-          additionalProperties:
-            $ref: '#/components/schemas/OpenAIResponseInputMessageContent'
-          description: >-
-            Dictionary of variable names to OpenAIResponseInputMessageContent structure
-            for template substitution. The substitution values can either be strings,
-            or other Response input types like images or files.
+          anyOf:
+          - additionalProperties:
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
+                title: OpenAIResponseInputMessageContentText
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+                title: OpenAIResponseInputMessageContentImage
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile'
+                title: OpenAIResponseInputMessageContentFile
+              discriminator:
+                propertyName: type
+                mapping:
+                  input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile'
+                  input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+                  input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
+              title: OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile
+            type: object
+          - type: 'null'
         version:
-          type: string
-          description: >-
-            Version number of the prompt to use (defaults to latest if not specified)
-      additionalProperties: false
-      required:
-        - id
-      title: OpenAIResponsePrompt
-      description: >-
-        OpenAI compatible Prompt object that is used in OpenAI responses.
-    OpenAIResponseText:
+          anyOf:
+          - type: string
+          - type: 'null'
       type: object
+      required:
+      - id
+      title: OpenAIResponsePrompt
+      description: OpenAI compatible Prompt object that is used in OpenAI responses.
+    OpenAIResponseText:
       properties:
         format:
-          type: object
-          properties:
-            type:
-              oneOf:
-                - type: string
-                  const: text
-                - type: string
-                  const: json_schema
-                - type: string
-                  const: json_object
-              description: >-
-                Must be "text", "json_schema", or "json_object" to identify the format
-                type
-            name:
-              type: string
-              description: >-
-                The name of the response format. Only used for json_schema.
-            schema:
-              type: object
-              additionalProperties:
-                oneOf:
-                  - type: 'null'
-                  - type: boolean
-                  - type: number
-                  - type: string
-                  - type: array
-                  - type: object
-              description: >-
-                The JSON schema the response should conform to. In a Python SDK, this
-                is often a `pydantic` model. Only used for json_schema.
-            description:
-              type: string
-              description: >-
-                (Optional) A description of the response format. Only used for json_schema.
-            strict:
-              type: boolean
-              description: >-
-                (Optional) Whether to strictly enforce the JSON schema. If true, the
-                response must match the schema exactly. Only used for json_schema.
-          additionalProperties: false
-          required:
-            - type
-          description: >-
-            (Optional) Text format configuration specifying output format requirements
-      additionalProperties: false
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIResponseTextFormat'
+            title: OpenAIResponseTextFormat
+          - type: 'null'
+          title: OpenAIResponseTextFormat
+      type: object
       title: OpenAIResponseText
-      description: >-
-        Text response configuration for OpenAI responses.
+      description: Text response configuration for OpenAI responses.
     OpenAIResponseTool:
-      oneOf:
-        - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch'
-        - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch'
-        - $ref: '#/components/schemas/OpenAIResponseInputToolFunction'
-        - $ref: '#/components/schemas/OpenAIResponseToolMCP'
       discriminator:
-        propertyName: type
         mapping:
-          web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch'
           file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch'
           function: '#/components/schemas/OpenAIResponseInputToolFunction'
           mcp: '#/components/schemas/OpenAIResponseToolMCP'
+          web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+          web_search_2025_08_26: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+          web_search_preview: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+          web_search_preview_2025_03_11: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+        title: OpenAIResponseInputToolWebSearch
+      - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch'
+        title: OpenAIResponseInputToolFileSearch
+      - $ref: '#/components/schemas/OpenAIResponseInputToolFunction'
+        title: OpenAIResponseInputToolFunction
+      - $ref: '#/components/schemas/OpenAIResponseToolMCP'
+        title: OpenAIResponseToolMCP
+      title: OpenAIResponseInputToolWebSearch | ... (4 variants)
     OpenAIResponseToolMCP:
-      type: object
       properties:
         type:
           type: string
           const: mcp
+          title: Type
           default: mcp
-          description: Tool type identifier, always "mcp"
         server_label:
           type: string
-          description: Label to identify this MCP server
+          title: Server Label
         allowed_tools:
-          oneOf:
-            - type: array
-              items:
-                type: string
-            - type: object
-              properties:
-                tool_names:
-                  type: array
-                  items:
-                    type: string
-                  description: >-
-                    (Optional) List of specific tool names that are allowed
-              additionalProperties: false
-              title: AllowedToolsFilter
-              description: >-
-                Filter configuration for restricting which MCP tools can be used.
-          description: >-
-            (Optional) Restriction on which tools can be used from this server
-      additionalProperties: false
-      required:
-        - type
-        - server_label
-      title: OpenAIResponseToolMCP
-      description: >-
-        Model Context Protocol (MCP) tool configuration for OpenAI response object.
-    OpenAIResponseUsage:
+          anyOf:
+          - items:
+              type: string
+            type: array
+            title: list[string]
+          - $ref: '#/components/schemas/AllowedToolsFilter'
+            title: AllowedToolsFilter
+          - type: 'null'
+          title: list[string] | AllowedToolsFilter
       type: object
+      required:
+      - server_label
+      title: OpenAIResponseToolMCP
+      description: Model Context Protocol (MCP) tool configuration for OpenAI response object.
+    OpenAIResponseUsage:
       properties:
         input_tokens:
           type: integer
-          description: Number of tokens in the input
+          title: Input Tokens
         output_tokens:
           type: integer
-          description: Number of tokens in the output
+          title: Output Tokens
         total_tokens:
           type: integer
-          description: Total tokens used (input + output)
+          title: Total Tokens
         input_tokens_details:
-          type: object
-          properties:
-            cached_tokens:
-              type: integer
-              description: Number of tokens retrieved from cache
-          additionalProperties: false
-          description: Detailed breakdown of input token usage
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIResponseUsageInputTokensDetails'
+            title: OpenAIResponseUsageInputTokensDetails
+          - type: 'null'
+          title: OpenAIResponseUsageInputTokensDetails
         output_tokens_details:
-          type: object
-          properties:
-            reasoning_tokens:
-              type: integer
-              description: >-
-                Number of tokens used for reasoning (o1/o3 models)
-          additionalProperties: false
-          description: Detailed breakdown of output token usage
-      additionalProperties: false
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIResponseUsageOutputTokensDetails'
+            title: OpenAIResponseUsageOutputTokensDetails
+          - type: 'null'
+          title: OpenAIResponseUsageOutputTokensDetails
+      type: object
       required:
-        - input_tokens
-        - output_tokens
-        - total_tokens
+      - input_tokens
+      - output_tokens
+      - total_tokens
       title: OpenAIResponseUsage
       description: Usage information for OpenAI response.
     ResponseGuardrailSpec:
-      type: object
+      description: Specification for a guardrail to apply during response generation.
       properties:
         type:
+          title: Type
           type: string
-          description: The type/identifier of the guardrail.
-      additionalProperties: false
       required:
-        - type
+      - type
       title: ResponseGuardrailSpec
-      description: >-
-        Specification for a guardrail to apply during response generation.
+      type: object
     OpenAIResponseInputTool:
-      oneOf:
-        - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch'
-        - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch'
-        - $ref: '#/components/schemas/OpenAIResponseInputToolFunction'
-        - $ref: '#/components/schemas/OpenAIResponseInputToolMCP'
       discriminator:
-        propertyName: type
         mapping:
-          web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch'
           file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch'
           function: '#/components/schemas/OpenAIResponseInputToolFunction'
           mcp: '#/components/schemas/OpenAIResponseInputToolMCP'
+          web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+          web_search_2025_08_26: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+          web_search_preview: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+          web_search_preview_2025_03_11: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+        title: OpenAIResponseInputToolWebSearch
+      - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch'
+        title: OpenAIResponseInputToolFileSearch
+      - $ref: '#/components/schemas/OpenAIResponseInputToolFunction'
+        title: OpenAIResponseInputToolFunction
+      - $ref: '#/components/schemas/OpenAIResponseInputToolMCP'
+        title: OpenAIResponseInputToolMCP
+      title: OpenAIResponseInputToolWebSearch | ... (4 variants)
     OpenAIResponseInputToolMCP:
-      type: object
       properties:
         type:
           type: string
           const: mcp
+          title: Type
           default: mcp
-          description: Tool type identifier, always "mcp"
         server_label:
           type: string
-          description: Label to identify this MCP server
+          title: Server Label
         server_url:
           type: string
-          description: URL endpoint of the MCP server
+          title: Server Url
         headers:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) HTTP headers to include when connecting to the server
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
         authorization:
-          type: string
-          description: >-
-            (Optional) OAuth access token for authenticating with the MCP server
+          anyOf:
+          - type: string
+          - type: 'null'
         require_approval:
-          oneOf:
-            - type: string
-              const: always
-            - type: string
-              const: never
-            - type: object
-              properties:
-                always:
-                  type: array
-                  items:
-                    type: string
-                  description: >-
-                    (Optional) List of tool names that always require approval
-                never:
-                  type: array
-                  items:
-                    type: string
-                  description: >-
-                    (Optional) List of tool names that never require approval
-              additionalProperties: false
-              title: ApprovalFilter
-              description: >-
-                Filter configuration for MCP tool approval requirements.
+          anyOf:
+          - type: string
+            const: always
+          - type: string
+            const: never
+          - $ref: '#/components/schemas/ApprovalFilter'
+            title: ApprovalFilter
+          title: string | ApprovalFilter
           default: never
-          description: >-
-            Approval requirement for tool calls ("always", "never", or filter)
         allowed_tools:
-          oneOf:
-            - type: array
-              items:
-                type: string
-            - type: object
-              properties:
-                tool_names:
-                  type: array
-                  items:
-                    type: string
-                  description: >-
-                    (Optional) List of specific tool names that are allowed
-              additionalProperties: false
-              title: AllowedToolsFilter
-              description: >-
-                Filter configuration for restricting which MCP tools can be used.
-          description: >-
-            (Optional) Restriction on which tools can be used from this server
-      additionalProperties: false
-      required:
-        - type
-        - server_label
-        - server_url
-        - require_approval
-      title: OpenAIResponseInputToolMCP
-      description: >-
-        Model Context Protocol (MCP) tool configuration for OpenAI response inputs.
-    CreateOpenaiResponseRequest:
+          anyOf:
+          - items:
+              type: string
+            type: array
+            title: list[string]
+          - $ref: '#/components/schemas/AllowedToolsFilter'
+            title: AllowedToolsFilter
+          - type: 'null'
+          title: list[string] | AllowedToolsFilter
       type: object
+      required:
+      - server_label
+      - server_url
+      title: OpenAIResponseInputToolMCP
+      description: Model Context Protocol (MCP) tool configuration for OpenAI response inputs.
+    CreateOpenaiResponseRequest:
       properties:
         input:
-          oneOf:
-            - type: string
-            - type: array
-              items:
-                $ref: '#/components/schemas/OpenAIResponseInput'
-          description: Input message(s) to create the response.
+          anyOf:
+          - type: string
+          - items:
+              anyOf:
+              - oneOf:
+                - $ref: '#/components/schemas/OpenAIResponseMessage-Input'
+                  title: OpenAIResponseMessage-Input
+                - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+                  title: OpenAIResponseOutputMessageWebSearchToolCall
+                - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                  title: OpenAIResponseOutputMessageFileSearchToolCall
+                - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                  title: OpenAIResponseOutputMessageFunctionToolCall
+                - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                  title: OpenAIResponseOutputMessageMCPCall
+                - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                  title: OpenAIResponseOutputMessageMCPListTools
+                - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                  title: OpenAIResponseMCPApprovalRequest
+                discriminator:
+                  propertyName: type
+                  mapping:
+                    file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                    function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                    mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                    mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                    mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                    message: '#/components/schemas/OpenAIResponseMessage-Input'
+                    web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+                title: OpenAIResponseMessage-Input | ... (7 variants)
+              - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+                title: OpenAIResponseInputFunctionToolCallOutput
+              - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+                title: OpenAIResponseMCPApprovalResponse
+              - $ref: '#/components/schemas/OpenAIResponseMessage-Input'
+                title: OpenAIResponseMessage-Input
+              title: OpenAIResponseInputFunctionToolCallOutput | OpenAIResponseMCPApprovalResponse | OpenAIResponseMessage-Input
+            type: array
+            title: list[OpenAIResponseMessageUnion | OpenAIResponseInputFunctionToolCallOutput | ...]
+          title: string | list[OpenAIResponseMessageUnion | OpenAIResponseInputFunctionToolCallOutput | ...]
         model:
           type: string
-          description: The underlying LLM used for completions.
+          title: Model
         prompt:
-          $ref: '#/components/schemas/OpenAIResponsePrompt'
-          description: >-
-            (Optional) Prompt object with ID, version, and variables.
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIResponsePrompt'
+            title: OpenAIResponsePrompt
+          - type: 'null'
+          title: OpenAIResponsePrompt
         instructions:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
         previous_response_id:
-          type: string
-          description: >-
-            (Optional) if specified, the new response will be a continuation of the
-            previous response. This can be used to easily fork-off new responses from
-            existing responses.
+          anyOf:
+          - type: string
+          - type: 'null'
         conversation:
-          type: string
-          description: >-
-            (Optional) The ID of a conversation to add the response to. Must begin
-            with 'conv_'. Input and output messages will be automatically added to
-            the conversation.
+          anyOf:
+          - type: string
+          - type: 'null'
         store:
-          type: boolean
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          default: true
         stream:
-          type: boolean
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          default: false
         temperature:
-          type: number
+          anyOf:
+          - type: number
+          - type: 'null'
         text:
-          $ref: '#/components/schemas/OpenAIResponseText'
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIResponseText'
+            title: OpenAIResponseText
+          - type: 'null'
+          title: OpenAIResponseText
         tools:
-          type: array
-          items:
-            $ref: '#/components/schemas/OpenAIResponseInputTool'
+          anyOf:
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                title: OpenAIResponseInputToolWebSearch
+              - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch'
+                title: OpenAIResponseInputToolFileSearch
+              - $ref: '#/components/schemas/OpenAIResponseInputToolFunction'
+                title: OpenAIResponseInputToolFunction
+              - $ref: '#/components/schemas/OpenAIResponseInputToolMCP'
+                title: OpenAIResponseInputToolMCP
+              discriminator:
+                propertyName: type
+                mapping:
+                  file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch'
+                  function: '#/components/schemas/OpenAIResponseInputToolFunction'
+                  mcp: '#/components/schemas/OpenAIResponseInputToolMCP'
+                  web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                  web_search_2025_08_26: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                  web_search_preview: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                  web_search_preview_2025_03_11: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+              title: OpenAIResponseInputToolWebSearch | ... (4 variants)
+            type: array
+          - type: 'null'
         include:
-          type: array
-          items:
-            type: string
-          description: >-
-            (Optional) Additional fields to include in the response.
+          anyOf:
+          - items:
+              type: string
+            type: array
+          - type: 'null'
         max_infer_iters:
-          type: integer
+          anyOf:
+          - type: integer
+          - type: 'null'
+          default: 10
         max_tool_calls:
-          type: integer
-          description: >-
-            (Optional) Max number of total calls to built-in tools that can be processed
-            in a response.
-      additionalProperties: false
+          anyOf:
+          - type: integer
+          - type: 'null'
+      type: object
       required:
-        - input
-        - model
+      - input
+      - model
       title: CreateOpenaiResponseRequest
     OpenAIResponseObject:
-      type: object
       properties:
         created_at:
           type: integer
-          description: >-
-            Unix timestamp when the response was created
+          title: Created At
         error:
-          $ref: '#/components/schemas/OpenAIResponseError'
-          description: >-
-            (Optional) Error details if the response generation failed
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIResponseError'
+            title: OpenAIResponseError
+          - type: 'null'
+          title: OpenAIResponseError
         id:
           type: string
-          description: Unique identifier for this response
+          title: Id
         model:
           type: string
-          description: Model identifier used for generation
+          title: Model
         object:
           type: string
           const: response
+          title: Object
           default: response
-          description: >-
-            Object type identifier, always "response"
         output:
-          type: array
           items:
-            $ref: '#/components/schemas/OpenAIResponseOutput'
-          description: >-
-            List of generated output items (messages, tool calls, etc.)
+            oneOf:
+            - $ref: '#/components/schemas/OpenAIResponseMessage-Output'
+              title: OpenAIResponseMessage-Output
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+              title: OpenAIResponseOutputMessageWebSearchToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+              title: OpenAIResponseOutputMessageFileSearchToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+              title: OpenAIResponseOutputMessageFunctionToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+              title: OpenAIResponseOutputMessageMCPCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+              title: OpenAIResponseOutputMessageMCPListTools
+            - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+              title: OpenAIResponseMCPApprovalRequest
+            discriminator:
+              propertyName: type
+              mapping:
+                file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                message: '#/components/schemas/OpenAIResponseMessage-Output'
+                web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+            title: OpenAIResponseMessage-Output | ... (7 variants)
+          type: array
+          title: Output
         parallel_tool_calls:
           type: boolean
+          title: Parallel Tool Calls
           default: false
-          description: >-
-            Whether tool calls can be executed in parallel
         previous_response_id:
-          type: string
-          description: >-
-            (Optional) ID of the previous response in a conversation
+          anyOf:
+          - type: string
+          - type: 'null'
         prompt:
-          $ref: '#/components/schemas/OpenAIResponsePrompt'
-          description: >-
-            (Optional) Reference to a prompt template and its variables.
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIResponsePrompt'
+            title: OpenAIResponsePrompt
+          - type: 'null'
+          title: OpenAIResponsePrompt
         status:
           type: string
-          description: >-
-            Current status of the response generation
+          title: Status
         temperature:
-          type: number
-          description: >-
-            (Optional) Sampling temperature used for generation
+          anyOf:
+          - type: number
+          - type: 'null'
         text:
           $ref: '#/components/schemas/OpenAIResponseText'
-          description: >-
-            Text formatting configuration for the response
+          default:
+            format:
+              type: text
         top_p:
-          type: number
-          description: >-
-            (Optional) Nucleus sampling parameter used for generation
+          anyOf:
+          - type: number
+          - type: 'null'
         tools:
-          type: array
-          items:
-            $ref: '#/components/schemas/OpenAIResponseTool'
-          description: >-
-            (Optional) An array of tools the model may call while generating a response.
+          anyOf:
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                title: OpenAIResponseInputToolWebSearch
+              - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch'
+                title: OpenAIResponseInputToolFileSearch
+              - $ref: '#/components/schemas/OpenAIResponseInputToolFunction'
+                title: OpenAIResponseInputToolFunction
+              - $ref: '#/components/schemas/OpenAIResponseToolMCP'
+                title: OpenAIResponseToolMCP
+              discriminator:
+                propertyName: type
+                mapping:
+                  file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch'
+                  function: '#/components/schemas/OpenAIResponseInputToolFunction'
+                  mcp: '#/components/schemas/OpenAIResponseToolMCP'
+                  web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                  web_search_2025_08_26: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                  web_search_preview: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                  web_search_preview_2025_03_11: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+              title: OpenAIResponseInputToolWebSearch | ... (4 variants)
+            type: array
+          - type: 'null'
         truncation:
-          type: string
-          description: >-
-            (Optional) Truncation strategy applied to the response
+          anyOf:
+          - type: string
+          - type: 'null'
         usage:
-          $ref: '#/components/schemas/OpenAIResponseUsage'
-          description: >-
-            (Optional) Token usage information for the response
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIResponseUsage'
+            title: OpenAIResponseUsage
+          - type: 'null'
+          title: OpenAIResponseUsage
         instructions:
-          type: string
-          description: >-
-            (Optional) System message inserted into the model's context
+          anyOf:
+          - type: string
+          - type: 'null'
         max_tool_calls:
-          type: integer
-          description: >-
-            (Optional) Max number of total calls to built-in tools that can be processed
-            in a response
-      additionalProperties: false
-      required:
-        - created_at
-        - id
-        - model
-        - object
-        - output
-        - parallel_tool_calls
-        - status
-        - text
-      title: OpenAIResponseObject
-      description: >-
-        Complete OpenAI response object containing generation results and metadata.
-    OpenAIResponseContentPartOutputText:
+          anyOf:
+          - type: integer
+          - type: 'null'
       type: object
+      required:
+      - created_at
+      - id
+      - model
+      - output
+      - status
+      title: OpenAIResponseObject
+      description: Complete OpenAI response object containing generation results and metadata.
+    OpenAIResponseContentPartOutputText:
+      description: Text content within a streamed response part.
       properties:
         type:
-          type: string
           const: output_text
           default: output_text
-          description: >-
-            Content part type identifier, always "output_text"
-        text:
+          title: Type
+          type: string
+        text:
+          title: Text
           type: string
-          description: Text emitted for this content part
         annotations:
-          type: array
           items:
-            $ref: '#/components/schemas/OpenAIResponseAnnotations'
-          description: >-
-            Structured annotations associated with the text
+            discriminator:
+              mapping:
+                container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
+                file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
+                file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath'
+                url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation'
+              propertyName: type
+            oneOf:
+            - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
+              title: OpenAIResponseAnnotationFileCitation
+            - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation'
+              title: OpenAIResponseAnnotationCitation
+            - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
+              title: OpenAIResponseAnnotationContainerFileCitation
+            - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath'
+              title: OpenAIResponseAnnotationFilePath
+            title: OpenAIResponseAnnotationFileCitation | ... (4 variants)
+          title: Annotations
+          type: array
         logprobs:
-          type: array
-          items:
-            type: object
-            additionalProperties:
-              oneOf:
-                - type: 'null'
-                - type: boolean
-                - type: number
-                - type: string
-                - type: array
-                - type: object
-          description: (Optional) Token log probability details
-      additionalProperties: false
+          anyOf:
+          - items:
+              additionalProperties: true
+              type: object
+            type: array
+          - type: 'null'
+          nullable: true
       required:
-        - type
-        - text
-        - annotations
+      - text
       title: OpenAIResponseContentPartOutputText
-      description: >-
-        Text content within a streamed response part.
-    "OpenAIResponseContentPartReasoningSummary":
       type: object
+    OpenAIResponseContentPartReasoningSummary:
+      description: Reasoning summary part in a streamed response.
       properties:
         type:
-          type: string
           const: summary_text
           default: summary_text
-          description: >-
-            Content part type identifier, always "summary_text"
-        text:
+          title: Type
+          type: string
+        text:
+          title: Text
           type: string
-          description: Summary text
-      additionalProperties: false
       required:
-        - type
-        - text
-      title: >-
-        OpenAIResponseContentPartReasoningSummary
-      description: >-
-        Reasoning summary part in a streamed response.
-    OpenAIResponseContentPartReasoningText:
+      - text
+      title: OpenAIResponseContentPartReasoningSummary
       type: object
+    OpenAIResponseContentPartReasoningText:
+      description: Reasoning text emitted as part of a streamed response.
       properties:
         type:
-          type: string
           const: reasoning_text
           default: reasoning_text
-          description: >-
-            Content part type identifier, always "reasoning_text"
-        text:
+          title: Type
+          type: string
+        text:
+          title: Text
           type: string
-          description: Reasoning text supplied by the model
-      additionalProperties: false
       required:
-        - type
-        - text
+      - text
       title: OpenAIResponseContentPartReasoningText
-      description: >-
-        Reasoning text emitted as part of a streamed response.
+      type: object
     OpenAIResponseObjectStream:
-      oneOf:
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseInProgress'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDone'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallInProgress'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallSearching'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallCompleted'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsInProgress'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsFailed'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsCompleted'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDone'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallInProgress'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallFailed'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartAdded'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartDone'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDelta'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDone'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartDone'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDone'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDelta'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDone'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallInProgress'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallSearching'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallCompleted'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseIncomplete'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFailed'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
       discriminator:
-        propertyName: type
         mapping:
-          response.created: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated'
-          response.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseInProgress'
-          response.output_item.added: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded'
-          response.output_item.done: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone'
-          response.output_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta'
-          response.output_text.done: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDone'
-          response.function_call_arguments.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta'
-          response.function_call_arguments.done: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone'
-          response.web_search_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallInProgress'
-          response.web_search_call.searching: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallSearching'
-          response.web_search_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallCompleted'
-          response.mcp_list_tools.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsInProgress'
-          response.mcp_list_tools.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsFailed'
-          response.mcp_list_tools.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsCompleted'
-          response.mcp_call.arguments.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta'
-          response.mcp_call.arguments.done: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDone'
-          response.mcp_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallInProgress'
-          response.mcp_call.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallFailed'
-          response.mcp_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted'
+          response.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
           response.content_part.added: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartAdded'
           response.content_part.done: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartDone'
-          response.reasoning_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDelta'
-          response.reasoning_text.done: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDone'
+          response.created: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated'
+          response.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseFailed'
+          response.file_search_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallCompleted'
+          response.file_search_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallInProgress'
+          response.file_search_call.searching: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallSearching'
+          response.function_call_arguments.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta'
+          response.function_call_arguments.done: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone'
+          response.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseInProgress'
+          response.incomplete: '#/components/schemas/OpenAIResponseObjectStreamResponseIncomplete'
+          response.mcp_call.arguments.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta'
+          response.mcp_call.arguments.done: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDone'
+          response.mcp_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted'
+          response.mcp_call.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallFailed'
+          response.mcp_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallInProgress'
+          response.mcp_list_tools.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsCompleted'
+          response.mcp_list_tools.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsFailed'
+          response.mcp_list_tools.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsInProgress'
+          response.output_item.added: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded'
+          response.output_item.done: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone'
+          response.output_text.annotation.added: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded'
+          response.output_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta'
+          response.output_text.done: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDone'
           response.reasoning_summary_part.added: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded'
           response.reasoning_summary_part.done: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartDone'
           response.reasoning_summary_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta'
           response.reasoning_summary_text.done: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDone'
+          response.reasoning_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDelta'
+          response.reasoning_text.done: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDone'
           response.refusal.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDelta'
           response.refusal.done: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDone'
-          response.output_text.annotation.added: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded'
-          response.file_search_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallInProgress'
-          response.file_search_call.searching: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallSearching'
-          response.file_search_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallCompleted'
-          response.incomplete: '#/components/schemas/OpenAIResponseObjectStreamResponseIncomplete'
-          response.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseFailed'
-          response.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
-    "OpenAIResponseObjectStreamResponseCompleted":
-      type: object
+          response.web_search_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallCompleted'
+          response.web_search_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallInProgress'
+          response.web_search_call.searching: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallSearching'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated'
+        title: OpenAIResponseObjectStreamResponseCreated
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseInProgress'
+        title: OpenAIResponseObjectStreamResponseInProgress
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded'
+        title: OpenAIResponseObjectStreamResponseOutputItemAdded
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone'
+        title: OpenAIResponseObjectStreamResponseOutputItemDone
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta'
+        title: OpenAIResponseObjectStreamResponseOutputTextDelta
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDone'
+        title: OpenAIResponseObjectStreamResponseOutputTextDone
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta'
+        title: OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone'
+        title: OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallInProgress'
+        title: OpenAIResponseObjectStreamResponseWebSearchCallInProgress
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallSearching'
+        title: OpenAIResponseObjectStreamResponseWebSearchCallSearching
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallCompleted'
+        title: OpenAIResponseObjectStreamResponseWebSearchCallCompleted
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsInProgress'
+        title: OpenAIResponseObjectStreamResponseMcpListToolsInProgress
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsFailed'
+        title: OpenAIResponseObjectStreamResponseMcpListToolsFailed
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsCompleted'
+        title: OpenAIResponseObjectStreamResponseMcpListToolsCompleted
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta'
+        title: OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDone'
+        title: OpenAIResponseObjectStreamResponseMcpCallArgumentsDone
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallInProgress'
+        title: OpenAIResponseObjectStreamResponseMcpCallInProgress
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallFailed'
+        title: OpenAIResponseObjectStreamResponseMcpCallFailed
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted'
+        title: OpenAIResponseObjectStreamResponseMcpCallCompleted
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartAdded'
+        title: OpenAIResponseObjectStreamResponseContentPartAdded
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartDone'
+        title: OpenAIResponseObjectStreamResponseContentPartDone
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDelta'
+        title: OpenAIResponseObjectStreamResponseReasoningTextDelta
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDone'
+        title: OpenAIResponseObjectStreamResponseReasoningTextDone
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded'
+        title: OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartDone'
+        title: OpenAIResponseObjectStreamResponseReasoningSummaryPartDone
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta'
+        title: OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDone'
+        title: OpenAIResponseObjectStreamResponseReasoningSummaryTextDone
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDelta'
+        title: OpenAIResponseObjectStreamResponseRefusalDelta
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDone'
+        title: OpenAIResponseObjectStreamResponseRefusalDone
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded'
+        title: OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallInProgress'
+        title: OpenAIResponseObjectStreamResponseFileSearchCallInProgress
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallSearching'
+        title: OpenAIResponseObjectStreamResponseFileSearchCallSearching
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallCompleted'
+        title: OpenAIResponseObjectStreamResponseFileSearchCallCompleted
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseIncomplete'
+        title: OpenAIResponseObjectStreamResponseIncomplete
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFailed'
+        title: OpenAIResponseObjectStreamResponseFailed
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
+        title: OpenAIResponseObjectStreamResponseCompleted
+      title: OpenAIResponseObjectStreamResponseCreated | ... (36 variants)
+    OpenAIResponseObjectStreamResponseCompleted:
+      description: Streaming event indicating a response has been completed.
       properties:
         response:
           $ref: '#/components/schemas/OpenAIResponseObject'
-          description: Completed response object
         type:
-          type: string
           const: response.completed
           default: response.completed
-          description: >-
-            Event type identifier, always "response.completed"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - response
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseCompleted
-      description: >-
-        Streaming event indicating a response has been completed.
-    "OpenAIResponseObjectStreamResponseContentPartAdded":
+      - response
+      title: OpenAIResponseObjectStreamResponseCompleted
       type: object
+    OpenAIResponseObjectStreamResponseContentPartAdded:
+      description: Streaming event for when a new content part is added to a response item.
       properties:
         content_index:
+          title: Content Index
           type: integer
-          description: >-
-            Index position of the part within the content array
         response_id:
+          title: Response Id
           type: string
-          description: >-
-            Unique identifier of the response containing this content
         item_id:
+          title: Item Id
           type: string
-          description: >-
-            Unique identifier of the output item containing this content part
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of the output item in the response
         part:
-          oneOf:
-            - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText'
-            - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
-            - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
           discriminator:
-            propertyName: type
             mapping:
               output_text: '#/components/schemas/OpenAIResponseContentPartOutputText'
-              refusal: '#/components/schemas/OpenAIResponseContentPartRefusal'
               reasoning_text: '#/components/schemas/OpenAIResponseContentPartReasoningText'
-          description: The content part that was added
+              refusal: '#/components/schemas/OpenAIResponseContentPartRefusal'
+            propertyName: type
+          oneOf:
+          - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText'
+            title: OpenAIResponseContentPartOutputText
+          - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
+            title: OpenAIResponseContentPartRefusal
+          - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
+            title: OpenAIResponseContentPartReasoningText
+          title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.content_part.added
           default: response.content_part.added
-          description: >-
-            Event type identifier, always "response.content_part.added"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - content_index
-        - response_id
-        - item_id
-        - output_index
-        - part
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseContentPartAdded
-      description: >-
-        Streaming event for when a new content part is added to a response item.
-    "OpenAIResponseObjectStreamResponseContentPartDone":
+      - content_index
+      - response_id
+      - item_id
+      - output_index
+      - part
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseContentPartAdded
       type: object
+    OpenAIResponseObjectStreamResponseContentPartDone:
+      description: Streaming event for when a content part is completed.
       properties:
         content_index:
+          title: Content Index
           type: integer
-          description: >-
-            Index position of the part within the content array
         response_id:
+          title: Response Id
           type: string
-          description: >-
-            Unique identifier of the response containing this content
         item_id:
+          title: Item Id
           type: string
-          description: >-
-            Unique identifier of the output item containing this content part
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of the output item in the response
         part:
-          oneOf:
-            - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText'
-            - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
-            - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
           discriminator:
-            propertyName: type
             mapping:
               output_text: '#/components/schemas/OpenAIResponseContentPartOutputText'
-              refusal: '#/components/schemas/OpenAIResponseContentPartRefusal'
               reasoning_text: '#/components/schemas/OpenAIResponseContentPartReasoningText'
-          description: The completed content part
+              refusal: '#/components/schemas/OpenAIResponseContentPartRefusal'
+            propertyName: type
+          oneOf:
+          - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText'
+            title: OpenAIResponseContentPartOutputText
+          - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
+            title: OpenAIResponseContentPartRefusal
+          - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
+            title: OpenAIResponseContentPartReasoningText
+          title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.content_part.done
           default: response.content_part.done
-          description: >-
-            Event type identifier, always "response.content_part.done"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - content_index
-        - response_id
-        - item_id
-        - output_index
-        - part
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseContentPartDone
-      description: >-
-        Streaming event for when a content part is completed.
-    "OpenAIResponseObjectStreamResponseCreated":
+      - content_index
+      - response_id
+      - item_id
+      - output_index
+      - part
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseContentPartDone
       type: object
+    OpenAIResponseObjectStreamResponseCreated:
+      description: Streaming event indicating a new response has been created.
       properties:
         response:
           $ref: '#/components/schemas/OpenAIResponseObject'
-          description: The response object that was created
         type:
-          type: string
           const: response.created
           default: response.created
-          description: >-
-            Event type identifier, always "response.created"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - response
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseCreated
-      description: >-
-        Streaming event indicating a new response has been created.
-    OpenAIResponseObjectStreamResponseFailed:
+      - response
+      title: OpenAIResponseObjectStreamResponseCreated
       type: object
+    OpenAIResponseObjectStreamResponseFailed:
+      description: Streaming event emitted when a response fails.
       properties:
         response:
           $ref: '#/components/schemas/OpenAIResponseObject'
-          description: Response object describing the failure
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.failed
           default: response.failed
-          description: >-
-            Event type identifier, always "response.failed"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - response
-        - sequence_number
-        - type
+      - response
+      - sequence_number
       title: OpenAIResponseObjectStreamResponseFailed
-      description: >-
-        Streaming event emitted when a response fails.
-    "OpenAIResponseObjectStreamResponseFileSearchCallCompleted":
       type: object
+    OpenAIResponseObjectStreamResponseFileSearchCallCompleted:
+      description: Streaming event for completed file search calls.
       properties:
         item_id:
+          title: Item Id
           type: string
-          description: >-
-            Unique identifier of the completed file search call
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of the item in the output list
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.file_search_call.completed
           default: response.file_search_call.completed
-          description: >-
-            Event type identifier, always "response.file_search_call.completed"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - item_id
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseFileSearchCallCompleted
-      description: >-
-        Streaming event for completed file search calls.
-    "OpenAIResponseObjectStreamResponseFileSearchCallInProgress":
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseFileSearchCallCompleted
       type: object
+    OpenAIResponseObjectStreamResponseFileSearchCallInProgress:
+      description: Streaming event for file search calls in progress.
       properties:
         item_id:
+          title: Item Id
           type: string
-          description: >-
-            Unique identifier of the file search call
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of the item in the output list
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.file_search_call.in_progress
           default: response.file_search_call.in_progress
-          description: >-
-            Event type identifier, always "response.file_search_call.in_progress"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - item_id
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseFileSearchCallInProgress
-      description: >-
-        Streaming event for file search calls in progress.
-    "OpenAIResponseObjectStreamResponseFileSearchCallSearching":
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseFileSearchCallInProgress
       type: object
+    OpenAIResponseObjectStreamResponseFileSearchCallSearching:
+      description: Streaming event for file search currently searching.
       properties:
         item_id:
+          title: Item Id
           type: string
-          description: >-
-            Unique identifier of the file search call
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of the item in the output list
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.file_search_call.searching
           default: response.file_search_call.searching
-          description: >-
-            Event type identifier, always "response.file_search_call.searching"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - item_id
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseFileSearchCallSearching
-      description: >-
-        Streaming event for file search currently searching.
-    "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta":
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseFileSearchCallSearching
       type: object
+    OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta:
+      description: Streaming event for incremental function call argument updates.
       properties:
         delta:
+          title: Delta
           type: string
-          description: >-
-            Incremental function call arguments being added
         item_id:
+          title: Item Id
           type: string
-          description: >-
-            Unique identifier of the function call being updated
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of the item in the output list
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.function_call_arguments.delta
           default: response.function_call_arguments.delta
-          description: >-
-            Event type identifier, always "response.function_call_arguments.delta"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - delta
-        - item_id
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta
-      description: >-
-        Streaming event for incremental function call argument updates.
-    "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone":
+      - delta
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta
       type: object
+    OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone:
+      description: Streaming event for when function call arguments are completed.
       properties:
         arguments:
+          title: Arguments
           type: string
-          description: >-
-            Final complete arguments JSON string for the function call
         item_id:
+          title: Item Id
           type: string
-          description: >-
-            Unique identifier of the completed function call
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of the item in the output list
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.function_call_arguments.done
           default: response.function_call_arguments.done
-          description: >-
-            Event type identifier, always "response.function_call_arguments.done"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - arguments
-        - item_id
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone
-      description: >-
-        Streaming event for when function call arguments are completed.
-    "OpenAIResponseObjectStreamResponseInProgress":
+      - arguments
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone
       type: object
+    OpenAIResponseObjectStreamResponseInProgress:
+      description: Streaming event indicating the response remains in progress.
       properties:
         response:
           $ref: '#/components/schemas/OpenAIResponseObject'
-          description: Current response state while in progress
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.in_progress
           default: response.in_progress
-          description: >-
-            Event type identifier, always "response.in_progress"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - response
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseInProgress
-      description: >-
-        Streaming event indicating the response remains in progress.
-    "OpenAIResponseObjectStreamResponseIncomplete":
+      - response
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseInProgress
       type: object
+    OpenAIResponseObjectStreamResponseIncomplete:
+      description: Streaming event emitted when a response ends in an incomplete state.
       properties:
         response:
           $ref: '#/components/schemas/OpenAIResponseObject'
-          description: >-
-            Response object describing the incomplete state
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.incomplete
           default: response.incomplete
-          description: >-
-            Event type identifier, always "response.incomplete"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - response
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseIncomplete
-      description: >-
-        Streaming event emitted when a response ends in an incomplete state.
-    "OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta":
+      - response
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseIncomplete
       type: object
+    OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta:
       properties:
         delta:
+          title: Delta
           type: string
         item_id:
+          title: Item Id
           type: string
         output_index:
+          title: Output Index
           type: integer
         sequence_number:
+          title: Sequence Number
           type: integer
         type:
-          type: string
           const: response.mcp_call.arguments.delta
           default: response.mcp_call.arguments.delta
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - delta
-        - item_id
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta
-    "OpenAIResponseObjectStreamResponseMcpCallArgumentsDone":
+      - delta
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta
       type: object
+    OpenAIResponseObjectStreamResponseMcpCallArgumentsDone:
       properties:
         arguments:
+          title: Arguments
           type: string
         item_id:
+          title: Item Id
           type: string
         output_index:
+          title: Output Index
           type: integer
         sequence_number:
+          title: Sequence Number
           type: integer
         type:
-          type: string
           const: response.mcp_call.arguments.done
           default: response.mcp_call.arguments.done
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - arguments
-        - item_id
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseMcpCallArgumentsDone
-    "OpenAIResponseObjectStreamResponseMcpCallCompleted":
+      - arguments
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseMcpCallArgumentsDone
       type: object
+    OpenAIResponseObjectStreamResponseMcpCallCompleted:
+      description: Streaming event for completed MCP calls.
       properties:
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.mcp_call.completed
           default: response.mcp_call.completed
-          description: >-
-            Event type identifier, always "response.mcp_call.completed"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseMcpCallCompleted
-      description: Streaming event for completed MCP calls.
-    "OpenAIResponseObjectStreamResponseMcpCallFailed":
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseMcpCallCompleted
       type: object
+    OpenAIResponseObjectStreamResponseMcpCallFailed:
+      description: Streaming event for failed MCP calls.
       properties:
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.mcp_call.failed
           default: response.mcp_call.failed
-          description: >-
-            Event type identifier, always "response.mcp_call.failed"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseMcpCallFailed
-      description: Streaming event for failed MCP calls.
-    "OpenAIResponseObjectStreamResponseMcpCallInProgress":
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseMcpCallFailed
       type: object
+    OpenAIResponseObjectStreamResponseMcpCallInProgress:
+      description: Streaming event for MCP calls in progress.
       properties:
         item_id:
+          title: Item Id
           type: string
-          description: Unique identifier of the MCP call
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of the item in the output list
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.mcp_call.in_progress
           default: response.mcp_call.in_progress
-          description: >-
-            Event type identifier, always "response.mcp_call.in_progress"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - item_id
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseMcpCallInProgress
-      description: >-
-        Streaming event for MCP calls in progress.
-    "OpenAIResponseObjectStreamResponseMcpListToolsCompleted":
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseMcpCallInProgress
       type: object
+    OpenAIResponseObjectStreamResponseMcpListToolsCompleted:
       properties:
         sequence_number:
+          title: Sequence Number
           type: integer
         type:
-          type: string
           const: response.mcp_list_tools.completed
           default: response.mcp_list_tools.completed
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseMcpListToolsCompleted
-    "OpenAIResponseObjectStreamResponseMcpListToolsFailed":
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseMcpListToolsCompleted
       type: object
+    OpenAIResponseObjectStreamResponseMcpListToolsFailed:
       properties:
         sequence_number:
+          title: Sequence Number
           type: integer
         type:
-          type: string
           const: response.mcp_list_tools.failed
           default: response.mcp_list_tools.failed
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseMcpListToolsFailed
-    "OpenAIResponseObjectStreamResponseMcpListToolsInProgress":
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseMcpListToolsFailed
       type: object
+    OpenAIResponseObjectStreamResponseMcpListToolsInProgress:
       properties:
         sequence_number:
+          title: Sequence Number
           type: integer
         type:
-          type: string
           const: response.mcp_list_tools.in_progress
           default: response.mcp_list_tools.in_progress
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseMcpListToolsInProgress
-    "OpenAIResponseObjectStreamResponseOutputItemAdded":
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseMcpListToolsInProgress
       type: object
+    OpenAIResponseObjectStreamResponseOutputItemAdded:
+      description: Streaming event for when a new output item is added to the response.
       properties:
         response_id:
+          title: Response Id
           type: string
-          description: >-
-            Unique identifier of the response containing this output
         item:
-          oneOf:
-            - $ref: '#/components/schemas/OpenAIResponseMessage'
-            - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
-            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
-            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
-            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
-            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
-            - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
           discriminator:
-            propertyName: type
             mapping:
-              message: '#/components/schemas/OpenAIResponseMessage'
-              web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
               file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
               function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+              mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
               mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
               mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
-              mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
-          description: >-
-            The output item that was added (message, tool call, etc.)
+              message: '#/components/schemas/OpenAIResponseMessage'
+              web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+            propertyName: type
+          oneOf:
+          - $ref: '#/components/schemas/OpenAIResponseMessage'
+            title: OpenAIResponseMessage
+          - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+            title: OpenAIResponseOutputMessageWebSearchToolCall
+          - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+            title: OpenAIResponseOutputMessageFileSearchToolCall
+          - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+            title: OpenAIResponseOutputMessageFunctionToolCall
+          - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+            title: OpenAIResponseOutputMessageMCPCall
+          - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+            title: OpenAIResponseOutputMessageMCPListTools
+          - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+            title: OpenAIResponseMCPApprovalRequest
+          title: OpenAIResponseMessage | ... (7 variants)
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of this item in the output list
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.output_item.added
           default: response.output_item.added
-          description: >-
-            Event type identifier, always "response.output_item.added"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - response_id
-        - item
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseOutputItemAdded
-      description: >-
-        Streaming event for when a new output item is added to the response.
-    "OpenAIResponseObjectStreamResponseOutputItemDone":
+      - response_id
+      - item
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseOutputItemAdded
       type: object
+    OpenAIResponseObjectStreamResponseOutputItemDone:
+      description: Streaming event for when an output item is completed.
       properties:
         response_id:
+          title: Response Id
           type: string
-          description: >-
-            Unique identifier of the response containing this output
         item:
-          oneOf:
-            - $ref: '#/components/schemas/OpenAIResponseMessage'
-            - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
-            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
-            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
-            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
-            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
-            - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
           discriminator:
-            propertyName: type
             mapping:
-              message: '#/components/schemas/OpenAIResponseMessage'
-              web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
               file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
               function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+              mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
               mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
               mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
-              mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
-          description: >-
-            The completed output item (message, tool call, etc.)
+              message: '#/components/schemas/OpenAIResponseMessage'
+              web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+            propertyName: type
+          oneOf:
+          - $ref: '#/components/schemas/OpenAIResponseMessage'
+            title: OpenAIResponseMessage
+          - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+            title: OpenAIResponseOutputMessageWebSearchToolCall
+          - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+            title: OpenAIResponseOutputMessageFileSearchToolCall
+          - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+            title: OpenAIResponseOutputMessageFunctionToolCall
+          - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+            title: OpenAIResponseOutputMessageMCPCall
+          - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+            title: OpenAIResponseOutputMessageMCPListTools
+          - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+            title: OpenAIResponseMCPApprovalRequest
+          title: OpenAIResponseMessage | ... (7 variants)
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of this item in the output list
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.output_item.done
           default: response.output_item.done
-          description: >-
-            Event type identifier, always "response.output_item.done"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - response_id
-        - item
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseOutputItemDone
-      description: >-
-        Streaming event for when an output item is completed.
-    "OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded":
+      - response_id
+      - item
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseOutputItemDone
       type: object
+    OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded:
+      description: Streaming event for when an annotation is added to output text.
       properties:
         item_id:
+          title: Item Id
           type: string
-          description: >-
-            Unique identifier of the item to which the annotation is being added
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of the output item in the response's output array
         content_index:
+          title: Content Index
           type: integer
-          description: >-
-            Index position of the content part within the output item
         annotation_index:
+          title: Annotation Index
           type: integer
-          description: >-
-            Index of the annotation within the content part
         annotation:
-          oneOf:
-            - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
-            - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation'
-            - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
-            - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath'
           discriminator:
-            propertyName: type
             mapping:
-              file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
-              url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation'
               container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
+              file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
               file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath'
-          description: The annotation object being added
+              url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation'
+            propertyName: type
+          oneOf:
+          - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
+            title: OpenAIResponseAnnotationFileCitation
+          - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation'
+            title: OpenAIResponseAnnotationCitation
+          - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
+            title: OpenAIResponseAnnotationContainerFileCitation
+          - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath'
+            title: OpenAIResponseAnnotationFilePath
+          title: OpenAIResponseAnnotationFileCitation | ... (4 variants)
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.output_text.annotation.added
           default: response.output_text.annotation.added
-          description: >-
-            Event type identifier, always "response.output_text.annotation.added"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - item_id
-        - output_index
-        - content_index
-        - annotation_index
-        - annotation
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded
-      description: >-
-        Streaming event for when an annotation is added to output text.
-    "OpenAIResponseObjectStreamResponseOutputTextDelta":
+      - item_id
+      - output_index
+      - content_index
+      - annotation_index
+      - annotation
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded
       type: object
+    OpenAIResponseObjectStreamResponseOutputTextDelta:
+      description: Streaming event for incremental text content updates.
       properties:
         content_index:
+          title: Content Index
           type: integer
-          description: Index position within the text content
         delta:
+          title: Delta
           type: string
-          description: Incremental text content being added
         item_id:
+          title: Item Id
           type: string
-          description: >-
-            Unique identifier of the output item being updated
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of the item in the output list
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.output_text.delta
           default: response.output_text.delta
-          description: >-
-            Event type identifier, always "response.output_text.delta"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - content_index
-        - delta
-        - item_id
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseOutputTextDelta
-      description: >-
-        Streaming event for incremental text content updates.
-    "OpenAIResponseObjectStreamResponseOutputTextDone":
+      - content_index
+      - delta
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseOutputTextDelta
       type: object
+    OpenAIResponseObjectStreamResponseOutputTextDone:
+      description: Streaming event for when text output is completed.
       properties:
         content_index:
+          title: Content Index
           type: integer
-          description: Index position within the text content
         text:
+          title: Text
           type: string
-          description: >-
-            Final complete text content of the output item
         item_id:
+          title: Item Id
           type: string
-          description: >-
-            Unique identifier of the completed output item
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of the item in the output list
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.output_text.done
           default: response.output_text.done
-          description: >-
-            Event type identifier, always "response.output_text.done"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - content_index
-        - text
-        - item_id
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseOutputTextDone
-      description: >-
-        Streaming event for when text output is completed.
-    "OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded":
+      - content_index
+      - text
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseOutputTextDone
       type: object
+    OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded:
+      description: Streaming event for when a new reasoning summary part is added.
       properties:
         item_id:
+          title: Item Id
           type: string
-          description: Unique identifier of the output item
         output_index:
+          title: Output Index
           type: integer
-          description: Index position of the output item
         part:
           $ref: '#/components/schemas/OpenAIResponseContentPartReasoningSummary'
-          description: The summary part that was added
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         summary_index:
+          title: Summary Index
           type: integer
-          description: >-
-            Index of the summary part within the reasoning summary
         type:
-          type: string
           const: response.reasoning_summary_part.added
           default: response.reasoning_summary_part.added
-          description: >-
-            Event type identifier, always "response.reasoning_summary_part.added"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - item_id
-        - output_index
-        - part
-        - sequence_number
-        - summary_index
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded
-      description: >-
-        Streaming event for when a new reasoning summary part is added.
-    "OpenAIResponseObjectStreamResponseReasoningSummaryPartDone":
+      - item_id
+      - output_index
+      - part
+      - sequence_number
+      - summary_index
+      title: OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded
       type: object
+    OpenAIResponseObjectStreamResponseReasoningSummaryPartDone:
+      description: Streaming event for when a reasoning summary part is completed.
       properties:
         item_id:
+          title: Item Id
           type: string
-          description: Unique identifier of the output item
         output_index:
+          title: Output Index
           type: integer
-          description: Index position of the output item
         part:
           $ref: '#/components/schemas/OpenAIResponseContentPartReasoningSummary'
-          description: The completed summary part
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         summary_index:
+          title: Summary Index
           type: integer
-          description: >-
-            Index of the summary part within the reasoning summary
         type:
-          type: string
           const: response.reasoning_summary_part.done
           default: response.reasoning_summary_part.done
-          description: >-
-            Event type identifier, always "response.reasoning_summary_part.done"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - item_id
-        - output_index
-        - part
-        - sequence_number
-        - summary_index
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseReasoningSummaryPartDone
-      description: >-
-        Streaming event for when a reasoning summary part is completed.
-    "OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta":
+      - item_id
+      - output_index
+      - part
+      - sequence_number
+      - summary_index
+      title: OpenAIResponseObjectStreamResponseReasoningSummaryPartDone
       type: object
+    OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta:
+      description: Streaming event for incremental reasoning summary text updates.
       properties:
         delta:
+          title: Delta
           type: string
-          description: Incremental summary text being added
         item_id:
+          title: Item Id
           type: string
-          description: Unique identifier of the output item
         output_index:
+          title: Output Index
           type: integer
-          description: Index position of the output item
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         summary_index:
+          title: Summary Index
           type: integer
-          description: >-
-            Index of the summary part within the reasoning summary
         type:
-          type: string
           const: response.reasoning_summary_text.delta
           default: response.reasoning_summary_text.delta
-          description: >-
-            Event type identifier, always "response.reasoning_summary_text.delta"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - delta
-        - item_id
-        - output_index
-        - sequence_number
-        - summary_index
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta
-      description: >-
-        Streaming event for incremental reasoning summary text updates.
-    "OpenAIResponseObjectStreamResponseReasoningSummaryTextDone":
+      - delta
+      - item_id
+      - output_index
+      - sequence_number
+      - summary_index
+      title: OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta
       type: object
+    OpenAIResponseObjectStreamResponseReasoningSummaryTextDone:
+      description: Streaming event for when reasoning summary text is completed.
       properties:
         text:
+          title: Text
           type: string
-          description: Final complete summary text
         item_id:
+          title: Item Id
           type: string
-          description: Unique identifier of the output item
         output_index:
+          title: Output Index
           type: integer
-          description: Index position of the output item
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         summary_index:
+          title: Summary Index
           type: integer
-          description: >-
-            Index of the summary part within the reasoning summary
         type:
-          type: string
           const: response.reasoning_summary_text.done
           default: response.reasoning_summary_text.done
-          description: >-
-            Event type identifier, always "response.reasoning_summary_text.done"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - text
-        - item_id
-        - output_index
-        - sequence_number
-        - summary_index
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseReasoningSummaryTextDone
-      description: >-
-        Streaming event for when reasoning summary text is completed.
-    "OpenAIResponseObjectStreamResponseReasoningTextDelta":
+      - text
+      - item_id
+      - output_index
+      - sequence_number
+      - summary_index
+      title: OpenAIResponseObjectStreamResponseReasoningSummaryTextDone
       type: object
+    OpenAIResponseObjectStreamResponseReasoningTextDelta:
+      description: Streaming event for incremental reasoning text updates.
       properties:
         content_index:
+          title: Content Index
           type: integer
-          description: >-
-            Index position of the reasoning content part
         delta:
+          title: Delta
           type: string
-          description: Incremental reasoning text being added
         item_id:
+          title: Item Id
           type: string
-          description: >-
-            Unique identifier of the output item being updated
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of the item in the output list
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.reasoning_text.delta
           default: response.reasoning_text.delta
-          description: >-
-            Event type identifier, always "response.reasoning_text.delta"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - content_index
-        - delta
-        - item_id
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseReasoningTextDelta
-      description: >-
-        Streaming event for incremental reasoning text updates.
-    "OpenAIResponseObjectStreamResponseReasoningTextDone":
+      - content_index
+      - delta
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseReasoningTextDelta
       type: object
+    OpenAIResponseObjectStreamResponseReasoningTextDone:
+      description: Streaming event for when reasoning text is completed.
       properties:
         content_index:
+          title: Content Index
           type: integer
-          description: >-
-            Index position of the reasoning content part
         text:
+          title: Text
           type: string
-          description: Final complete reasoning text
         item_id:
+          title: Item Id
           type: string
-          description: >-
-            Unique identifier of the completed output item
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of the item in the output list
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.reasoning_text.done
           default: response.reasoning_text.done
-          description: >-
-            Event type identifier, always "response.reasoning_text.done"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - content_index
-        - text
-        - item_id
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseReasoningTextDone
-      description: >-
-        Streaming event for when reasoning text is completed.
-    "OpenAIResponseObjectStreamResponseRefusalDelta":
+      - content_index
+      - text
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseReasoningTextDone
       type: object
+    OpenAIResponseObjectStreamResponseRefusalDelta:
+      description: Streaming event for incremental refusal text updates.
       properties:
         content_index:
+          title: Content Index
           type: integer
-          description: Index position of the content part
         delta:
+          title: Delta
           type: string
-          description: Incremental refusal text being added
         item_id:
+          title: Item Id
           type: string
-          description: Unique identifier of the output item
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of the item in the output list
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.refusal.delta
           default: response.refusal.delta
-          description: >-
-            Event type identifier, always "response.refusal.delta"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - content_index
-        - delta
-        - item_id
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseRefusalDelta
-      description: >-
-        Streaming event for incremental refusal text updates.
-    "OpenAIResponseObjectStreamResponseRefusalDone":
+      - content_index
+      - delta
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseRefusalDelta
       type: object
+    OpenAIResponseObjectStreamResponseRefusalDone:
+      description: Streaming event for when refusal text is completed.
       properties:
         content_index:
+          title: Content Index
           type: integer
-          description: Index position of the content part
         refusal:
+          title: Refusal
           type: string
-          description: Final complete refusal text
         item_id:
+          title: Item Id
           type: string
-          description: Unique identifier of the output item
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of the item in the output list
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.refusal.done
           default: response.refusal.done
-          description: >-
-            Event type identifier, always "response.refusal.done"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - content_index
-        - refusal
-        - item_id
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseRefusalDone
-      description: >-
-        Streaming event for when refusal text is completed.
-    "OpenAIResponseObjectStreamResponseWebSearchCallCompleted":
+      - content_index
+      - refusal
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseRefusalDone
       type: object
+    OpenAIResponseObjectStreamResponseWebSearchCallCompleted:
+      description: Streaming event for completed web search calls.
       properties:
         item_id:
+          title: Item Id
           type: string
-          description: >-
-            Unique identifier of the completed web search call
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of the item in the output list
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.web_search_call.completed
           default: response.web_search_call.completed
-          description: >-
-            Event type identifier, always "response.web_search_call.completed"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - item_id
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseWebSearchCallCompleted
-      description: >-
-        Streaming event for completed web search calls.
-    "OpenAIResponseObjectStreamResponseWebSearchCallInProgress":
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseWebSearchCallCompleted
       type: object
+    OpenAIResponseObjectStreamResponseWebSearchCallInProgress:
+      description: Streaming event for web search calls in progress.
       properties:
         item_id:
+          title: Item Id
           type: string
-          description: Unique identifier of the web search call
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of the item in the output list
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.web_search_call.in_progress
           default: response.web_search_call.in_progress
-          description: >-
-            Event type identifier, always "response.web_search_call.in_progress"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - item_id
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseWebSearchCallInProgress
-      description: >-
-        Streaming event for web search calls in progress.
-    "OpenAIResponseObjectStreamResponseWebSearchCallSearching":
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseWebSearchCallInProgress
       type: object
+    OpenAIResponseObjectStreamResponseWebSearchCallSearching:
       properties:
         item_id:
+          title: Item Id
           type: string
         output_index:
+          title: Output Index
           type: integer
         sequence_number:
+          title: Sequence Number
           type: integer
         type:
-          type: string
           const: response.web_search_call.searching
           default: response.web_search_call.searching
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - item_id
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseWebSearchCallSearching
-    OpenAIDeleteResponseObject:
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseWebSearchCallSearching
       type: object
+    OpenAIDeleteResponseObject:
       properties:
         id:
           type: string
-          description: >-
-            Unique identifier of the deleted response
+          title: Id
         object:
           type: string
           const: response
+          title: Object
           default: response
-          description: >-
-            Object type identifier, always "response"
         deleted:
           type: boolean
+          title: Deleted
           default: true
-          description: Deletion confirmation flag, always True
-      additionalProperties: false
-      required:
-        - id
-        - object
-        - deleted
-      title: OpenAIDeleteResponseObject
-      description: >-
-        Response object confirming deletion of an OpenAI response.
-    ListOpenAIResponseInputItem:
       type: object
+      required:
+      - id
+      title: OpenAIDeleteResponseObject
+      description: Response object confirming deletion of an OpenAI response.
+    ListOpenAIResponseInputItem:
       properties:
         data:
-          type: array
           items:
-            $ref: '#/components/schemas/OpenAIResponseInput'
-          description: List of input items
+            anyOf:
+            - oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseMessage-Output'
+                title: OpenAIResponseMessage-Output
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+                title: OpenAIResponseOutputMessageWebSearchToolCall
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                title: OpenAIResponseOutputMessageFileSearchToolCall
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                title: OpenAIResponseOutputMessageFunctionToolCall
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                title: OpenAIResponseOutputMessageMCPCall
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                title: OpenAIResponseOutputMessageMCPListTools
+              - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                title: OpenAIResponseMCPApprovalRequest
+              discriminator:
+                propertyName: type
+                mapping:
+                  file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                  function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                  mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                  mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                  mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                  message: '#/components/schemas/OpenAIResponseMessage-Output'
+                  web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+              title: OpenAIResponseMessage-Output | ... (7 variants)
+            - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+              title: OpenAIResponseInputFunctionToolCallOutput
+            - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+              title: OpenAIResponseMCPApprovalResponse
+            - $ref: '#/components/schemas/OpenAIResponseMessage-Output'
+              title: OpenAIResponseMessage-Output
+            title: OpenAIResponseInputFunctionToolCallOutput | OpenAIResponseMCPApprovalResponse | OpenAIResponseMessage-Output
+          type: array
+          title: Data
         object:
           type: string
           const: list
+          title: Object
           default: list
-          description: Object type identifier, always "list"
-      additionalProperties: false
-      required:
-        - data
-        - object
-      title: ListOpenAIResponseInputItem
-      description: >-
-        List container for OpenAI response input items.
-    RunShieldRequest:
       type: object
+      required:
+      - data
+      title: ListOpenAIResponseInputItem
+      description: List container for OpenAI response input items.
+    RunShieldRequest:
       properties:
         shield_id:
           type: string
-          description: The identifier of the shield to run.
+          title: Shield Id
         messages:
-          type: array
           items:
-            $ref: '#/components/schemas/OpenAIMessageParam'
-          description: The messages to run the shield on.
-        params:
-          type: object
-          additionalProperties:
             oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: The parameters of the shield.
-      additionalProperties: false
+            - $ref: '#/components/schemas/OpenAIUserMessageParam-Input'
+              title: OpenAIUserMessageParam-Input
+            - $ref: '#/components/schemas/OpenAISystemMessageParam'
+              title: OpenAISystemMessageParam
+            - $ref: '#/components/schemas/OpenAIAssistantMessageParam-Input'
+              title: OpenAIAssistantMessageParam-Input
+            - $ref: '#/components/schemas/OpenAIToolMessageParam'
+              title: OpenAIToolMessageParam
+            - $ref: '#/components/schemas/OpenAIDeveloperMessageParam'
+              title: OpenAIDeveloperMessageParam
+            discriminator:
+              propertyName: role
+              mapping:
+                assistant: '#/components/schemas/OpenAIAssistantMessageParam-Input'
+                developer: '#/components/schemas/OpenAIDeveloperMessageParam'
+                system: '#/components/schemas/OpenAISystemMessageParam'
+                tool: '#/components/schemas/OpenAIToolMessageParam'
+                user: '#/components/schemas/OpenAIUserMessageParam-Input'
+            title: OpenAIUserMessageParam-Input | ... (5 variants)
+          type: array
+          title: Messages
+        params:
+          additionalProperties: true
+          type: object
+          title: Params
+      type: object
       required:
-        - shield_id
-        - messages
-        - params
+      - shield_id
+      - messages
+      - params
       title: RunShieldRequest
     RunShieldResponse:
-      type: object
       properties:
         violation:
-          $ref: '#/components/schemas/SafetyViolation'
-          description: >-
-            (Optional) Safety violation detected by the shield, if any
-      additionalProperties: false
+          anyOf:
+          - $ref: '#/components/schemas/SafetyViolation'
+            title: SafetyViolation
+          - type: 'null'
+          title: SafetyViolation
+      type: object
       title: RunShieldResponse
       description: Response from running a safety shield.
     SafetyViolation:
-      type: object
       properties:
         violation_level:
           $ref: '#/components/schemas/ViolationLevel'
-          description: Severity level of the violation
         user_message:
-          type: string
-          description: >-
-            (Optional) Message to convey to the user about the violation
+          anyOf:
+          - type: string
+          - type: 'null'
         metadata:
+          additionalProperties: true
           type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            Additional metadata including specific violation codes for debugging and
-            telemetry
-      additionalProperties: false
+          title: Metadata
+      type: object
       required:
-        - violation_level
-        - metadata
+      - violation_level
       title: SafetyViolation
-      description: >-
-        Details of a safety violation detected by content moderation.
+      description: Details of a safety violation detected by content moderation.
     ViolationLevel:
       type: string
       enum:
-        - info
-        - warn
-        - error
+      - info
+      - warn
+      - error
       title: ViolationLevel
       description: Severity level of a safety violation.
     AggregationFunctionType:
       type: string
       enum:
-        - average
-        - weighted_average
-        - median
-        - categorical_count
-        - accuracy
+      - average
+      - weighted_average
+      - median
+      - categorical_count
+      - accuracy
       title: AggregationFunctionType
-      description: >-
-        Types of aggregation functions for scoring results.
+      description: Types of aggregation functions for scoring results.
     ArrayType:
-      type: object
       properties:
         type:
           type: string
           const: array
+          title: Type
           default: array
-          description: Discriminator type. Always "array"
-      additionalProperties: false
-      required:
-        - type
+      type: object
       title: ArrayType
       description: Parameter type for array values.
     BasicScoringFnParams:
-      type: object
       properties:
         type:
-          $ref: '#/components/schemas/ScoringFnParamsType'
+          type: string
           const: basic
+          title: Type
           default: basic
-          description: >-
-            The type of scoring function parameters, always basic
         aggregation_functions:
-          type: array
           items:
             $ref: '#/components/schemas/AggregationFunctionType'
-          description: >-
-            Aggregation functions to apply to the scores of each row
-      additionalProperties: false
-      required:
-        - type
-        - aggregation_functions
-      title: BasicScoringFnParams
-      description: >-
-        Parameters for basic scoring function configuration.
-    BooleanType:
+          type: array
+          title: Aggregation Functions
+          description: Aggregation functions to apply to the scores of each row
       type: object
+      title: BasicScoringFnParams
+      description: Parameters for basic scoring function configuration.
+    BooleanType:
       properties:
         type:
           type: string
           const: boolean
+          title: Type
           default: boolean
-          description: Discriminator type. Always "boolean"
-      additionalProperties: false
-      required:
-        - type
+      type: object
       title: BooleanType
       description: Parameter type for boolean values.
     ChatCompletionInputType:
-      type: object
       properties:
         type:
           type: string
           const: chat_completion_input
+          title: Type
           default: chat_completion_input
-          description: >-
-            Discriminator type. Always "chat_completion_input"
-      additionalProperties: false
-      required:
-        - type
-      title: ChatCompletionInputType
-      description: >-
-        Parameter type for chat completion input.
-    CompletionInputType:
       type: object
+      title: ChatCompletionInputType
+      description: Parameter type for chat completion input.
+    CompletionInputType:
       properties:
         type:
           type: string
           const: completion_input
+          title: Type
           default: completion_input
-          description: >-
-            Discriminator type. Always "completion_input"
-      additionalProperties: false
-      required:
-        - type
+      type: object
       title: CompletionInputType
       description: Parameter type for completion input.
     JsonType:
-      type: object
       properties:
         type:
           type: string
           const: json
+          title: Type
           default: json
-          description: Discriminator type. Always "json"
-      additionalProperties: false
-      required:
-        - type
+      type: object
       title: JsonType
       description: Parameter type for JSON values.
     LLMAsJudgeScoringFnParams:
-      type: object
       properties:
         type:
-          $ref: '#/components/schemas/ScoringFnParamsType'
+          type: string
           const: llm_as_judge
+          title: Type
           default: llm_as_judge
-          description: >-
-            The type of scoring function parameters, always llm_as_judge
         judge_model:
           type: string
-          description: >-
-            Identifier of the LLM model to use as a judge for scoring
+          title: Judge Model
         prompt_template:
-          type: string
-          description: >-
-            (Optional) Custom prompt template for the judge model
+          anyOf:
+          - type: string
+          - type: 'null'
         judge_score_regexes:
-          type: array
           items:
             type: string
-          description: >-
-            Regexes to extract the answer from generated response
-        aggregation_functions:
           type: array
+          title: Judge Score Regexes
+          description: Regexes to extract the answer from generated response
+        aggregation_functions:
           items:
             $ref: '#/components/schemas/AggregationFunctionType'
-          description: >-
-            Aggregation functions to apply to the scores of each row
-      additionalProperties: false
-      required:
-        - type
-        - judge_model
-        - judge_score_regexes
-        - aggregation_functions
-      title: LLMAsJudgeScoringFnParams
-      description: >-
-        Parameters for LLM-as-judge scoring function configuration.
-    NumberType:
+          type: array
+          title: Aggregation Functions
+          description: Aggregation functions to apply to the scores of each row
       type: object
+      required:
+      - judge_model
+      title: LLMAsJudgeScoringFnParams
+      description: Parameters for LLM-as-judge scoring function configuration.
+    NumberType:
       properties:
         type:
           type: string
           const: number
+          title: Type
           default: number
-          description: Discriminator type. Always "number"
-      additionalProperties: false
-      required:
-        - type
+      type: object
       title: NumberType
       description: Parameter type for numeric values.
     ObjectType:
-      type: object
       properties:
         type:
           type: string
           const: object
+          title: Type
           default: object
-          description: Discriminator type. Always "object"
-      additionalProperties: false
-      required:
-        - type
+      type: object
       title: ObjectType
       description: Parameter type for object values.
     RegexParserScoringFnParams:
-      type: object
       properties:
         type:
-          $ref: '#/components/schemas/ScoringFnParamsType'
+          type: string
           const: regex_parser
+          title: Type
           default: regex_parser
-          description: >-
-            The type of scoring function parameters, always regex_parser
         parsing_regexes:
-          type: array
           items:
             type: string
-          description: >-
-            Regex to extract the answer from generated response
-        aggregation_functions:
           type: array
+          title: Parsing Regexes
+          description: Regex to extract the answer from generated response
+        aggregation_functions:
           items:
             $ref: '#/components/schemas/AggregationFunctionType'
-          description: >-
-            Aggregation functions to apply to the scores of each row
-      additionalProperties: false
-      required:
-        - type
-        - parsing_regexes
-        - aggregation_functions
-      title: RegexParserScoringFnParams
-      description: >-
-        Parameters for regex parser scoring function configuration.
-    ScoringFn:
+          type: array
+          title: Aggregation Functions
+          description: Aggregation functions to apply to the scores of each row
       type: object
+      title: RegexParserScoringFnParams
+      description: Parameters for regex parser scoring function configuration.
+    ScoringFn:
       properties:
         identifier:
           type: string
+          title: Identifier
+          description: Unique identifier for this resource in llama stack
         provider_resource_id:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: Unique identifier for this resource in the provider
         provider_id:
           type: string
+          title: Provider Id
+          description: ID of the provider that owns this resource
         type:
           type: string
-          enum:
-            - model
-            - shield
-            - vector_store
-            - dataset
-            - scoring_function
-            - benchmark
-            - tool
-            - tool_group
-            - prompt
           const: scoring_function
+          title: Type
           default: scoring_function
-          description: >-
-            The resource type, always scoring_function
         description:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
         metadata:
+          additionalProperties: true
           type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
+          title: Metadata
+          description: Any additional metadata for this definition
         return_type:
           oneOf:
-            - $ref: '#/components/schemas/StringType'
-            - $ref: '#/components/schemas/NumberType'
-            - $ref: '#/components/schemas/BooleanType'
-            - $ref: '#/components/schemas/ArrayType'
-            - $ref: '#/components/schemas/ObjectType'
-            - $ref: '#/components/schemas/JsonType'
-            - $ref: '#/components/schemas/UnionType'
-            - $ref: '#/components/schemas/ChatCompletionInputType'
-            - $ref: '#/components/schemas/CompletionInputType'
+          - $ref: '#/components/schemas/StringType'
+            title: StringType
+          - $ref: '#/components/schemas/NumberType'
+            title: NumberType
+          - $ref: '#/components/schemas/BooleanType'
+            title: BooleanType
+          - $ref: '#/components/schemas/ArrayType'
+            title: ArrayType
+          - $ref: '#/components/schemas/ObjectType'
+            title: ObjectType
+          - $ref: '#/components/schemas/JsonType'
+            title: JsonType
+          - $ref: '#/components/schemas/UnionType'
+            title: UnionType
+          - $ref: '#/components/schemas/ChatCompletionInputType'
+            title: ChatCompletionInputType
+          - $ref: '#/components/schemas/CompletionInputType'
+            title: CompletionInputType
+          title: StringType | ... (9 variants)
+          description: The return type of the deterministic function
           discriminator:
             propertyName: type
             mapping:
-              string: '#/components/schemas/StringType'
-              number: '#/components/schemas/NumberType'
-              boolean: '#/components/schemas/BooleanType'
               array: '#/components/schemas/ArrayType'
-              object: '#/components/schemas/ObjectType'
-              json: '#/components/schemas/JsonType'
-              union: '#/components/schemas/UnionType'
+              boolean: '#/components/schemas/BooleanType'
               chat_completion_input: '#/components/schemas/ChatCompletionInputType'
               completion_input: '#/components/schemas/CompletionInputType'
+              json: '#/components/schemas/JsonType'
+              number: '#/components/schemas/NumberType'
+              object: '#/components/schemas/ObjectType'
+              string: '#/components/schemas/StringType'
+              union: '#/components/schemas/UnionType'
         params:
-          $ref: '#/components/schemas/ScoringFnParams'
-      additionalProperties: false
+          anyOf:
+          - oneOf:
+            - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
+              title: LLMAsJudgeScoringFnParams
+            - $ref: '#/components/schemas/RegexParserScoringFnParams'
+              title: RegexParserScoringFnParams
+            - $ref: '#/components/schemas/BasicScoringFnParams'
+              title: BasicScoringFnParams
+            discriminator:
+              propertyName: type
+              mapping:
+                basic: '#/components/schemas/BasicScoringFnParams'
+                llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams'
+                regex_parser: '#/components/schemas/RegexParserScoringFnParams'
+            title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams
+          - type: 'null'
+          title: Params
+          description: The parameters for the scoring function for benchmark eval, these can be overridden for app eval
+      type: object
       required:
-        - identifier
-        - provider_id
-        - type
-        - metadata
-        - return_type
+      - identifier
+      - provider_id
+      - return_type
       title: ScoringFn
-      description: >-
-        A scoring function resource for evaluating model outputs.
+      description: A scoring function resource for evaluating model outputs.
     ScoringFnParams:
-      oneOf:
-        - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
-        - $ref: '#/components/schemas/RegexParserScoringFnParams'
-        - $ref: '#/components/schemas/BasicScoringFnParams'
       discriminator:
-        propertyName: type
         mapping:
+          basic: '#/components/schemas/BasicScoringFnParams'
           llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams'
           regex_parser: '#/components/schemas/RegexParserScoringFnParams'
-          basic: '#/components/schemas/BasicScoringFnParams'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
+        title: LLMAsJudgeScoringFnParams
+      - $ref: '#/components/schemas/RegexParserScoringFnParams'
+        title: RegexParserScoringFnParams
+      - $ref: '#/components/schemas/BasicScoringFnParams'
+        title: BasicScoringFnParams
+      title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams
     ScoringFnParamsType:
-      type: string
+      description: Types of scoring function parameter configurations.
       enum:
-        - llm_as_judge
-        - regex_parser
-        - basic
+      - llm_as_judge
+      - regex_parser
+      - basic
       title: ScoringFnParamsType
-      description: >-
-        Types of scoring function parameter configurations.
+      type: string
     StringType:
-      type: object
       properties:
         type:
           type: string
           const: string
+          title: Type
           default: string
-          description: Discriminator type. Always "string"
-      additionalProperties: false
-      required:
-        - type
+      type: object
       title: StringType
       description: Parameter type for string values.
     UnionType:
-      type: object
       properties:
         type:
           type: string
           const: union
+          title: Type
           default: union
-          description: Discriminator type. Always "union"
-      additionalProperties: false
-      required:
-        - type
+      type: object
       title: UnionType
       description: Parameter type for union values.
     ListScoringFunctionsResponse:
-      type: object
       properties:
         data:
-          type: array
           items:
             $ref: '#/components/schemas/ScoringFn'
-      additionalProperties: false
+          type: array
+          title: Data
+      type: object
       required:
-        - data
+      - data
       title: ListScoringFunctionsResponse
     ScoreRequest:
-      type: object
       properties:
         input_rows:
-          type: array
           items:
+            additionalProperties: true
             type: object
-            additionalProperties:
-              oneOf:
-                - type: 'null'
-                - type: boolean
-                - type: number
-                - type: string
-                - type: array
-                - type: object
-          description: The rows to score.
+          type: array
+          title: Input Rows
         scoring_functions:
-          type: object
           additionalProperties:
-            oneOf:
-              - $ref: '#/components/schemas/ScoringFnParams'
-              - type: 'null'
-          description: >-
-            The scoring functions to use for the scoring.
-      additionalProperties: false
+            anyOf:
+            - oneOf:
+              - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
+                title: LLMAsJudgeScoringFnParams
+              - $ref: '#/components/schemas/RegexParserScoringFnParams'
+                title: RegexParserScoringFnParams
+              - $ref: '#/components/schemas/BasicScoringFnParams'
+                title: BasicScoringFnParams
+              discriminator:
+                propertyName: type
+                mapping:
+                  basic: '#/components/schemas/BasicScoringFnParams'
+                  llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams'
+                  regex_parser: '#/components/schemas/RegexParserScoringFnParams'
+              title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams
+            - type: 'null'
+            title: AdditionalpropertiesUnion
+          type: object
+          title: Scoring Functions
+      type: object
       required:
-        - input_rows
-        - scoring_functions
+      - input_rows
+      - scoring_functions
       title: ScoreRequest
     ScoreResponse:
-      type: object
       properties:
         results:
-          type: object
           additionalProperties:
             $ref: '#/components/schemas/ScoringResult'
-          description: >-
-            A map of scoring function name to ScoringResult.
-      additionalProperties: false
+          type: object
+          title: Results
+      type: object
       required:
-        - results
+      - results
       title: ScoreResponse
       description: The response from scoring.
     ScoringResult:
-      type: object
       properties:
         score_rows:
-          type: array
           items:
+            additionalProperties: true
             type: object
-            additionalProperties:
-              oneOf:
-                - type: 'null'
-                - type: boolean
-                - type: number
-                - type: string
-                - type: array
-                - type: object
-          description: >-
-            The scoring result for each row. Each row is a map of column name to value.
+          type: array
+          title: Score Rows
         aggregated_results:
+          additionalProperties: true
           type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: Map of metric name to aggregated value
-      additionalProperties: false
+          title: Aggregated Results
+      type: object
       required:
-        - score_rows
-        - aggregated_results
+      - score_rows
+      - aggregated_results
       title: ScoringResult
       description: A scoring result for a single row.
     ScoreBatchRequest:
-      type: object
       properties:
         dataset_id:
           type: string
-          description: The ID of the dataset to score.
+          title: Dataset Id
         scoring_functions:
-          type: object
           additionalProperties:
-            oneOf:
-              - $ref: '#/components/schemas/ScoringFnParams'
-              - type: 'null'
-          description: >-
-            The scoring functions to use for the scoring.
+            anyOf:
+            - oneOf:
+              - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
+                title: LLMAsJudgeScoringFnParams
+              - $ref: '#/components/schemas/RegexParserScoringFnParams'
+                title: RegexParserScoringFnParams
+              - $ref: '#/components/schemas/BasicScoringFnParams'
+                title: BasicScoringFnParams
+              discriminator:
+                propertyName: type
+                mapping:
+                  basic: '#/components/schemas/BasicScoringFnParams'
+                  llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams'
+                  regex_parser: '#/components/schemas/RegexParserScoringFnParams'
+              title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams
+            - type: 'null'
+            title: AdditionalpropertiesUnion
+          type: object
+          title: Scoring Functions
         save_results_dataset:
           type: boolean
-          description: >-
-            Whether to save the results to a dataset.
-      additionalProperties: false
+          title: Save Results Dataset
+          default: false
+      type: object
       required:
-        - dataset_id
-        - scoring_functions
-        - save_results_dataset
+      - dataset_id
+      - scoring_functions
       title: ScoreBatchRequest
     ScoreBatchResponse:
-      type: object
       properties:
         dataset_id:
-          type: string
-          description: >-
-            (Optional) The identifier of the dataset that was scored
+          anyOf:
+          - type: string
+          - type: 'null'
         results:
-          type: object
           additionalProperties:
             $ref: '#/components/schemas/ScoringResult'
-          description: >-
-            A map of scoring function name to ScoringResult
-      additionalProperties: false
-      required:
-        - results
-      title: ScoreBatchResponse
-      description: >-
-        Response from batch scoring operations on datasets.
-    Shield:
+          type: object
+          title: Results
       type: object
+      required:
+      - results
+      title: ScoreBatchResponse
+      description: Response from batch scoring operations on datasets.
+    Shield:
       properties:
         identifier:
           type: string
+          title: Identifier
+          description: Unique identifier for this resource in llama stack
         provider_resource_id:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: Unique identifier for this resource in the provider
         provider_id:
           type: string
+          title: Provider Id
+          description: ID of the provider that owns this resource
         type:
           type: string
-          enum:
-            - model
-            - shield
-            - vector_store
-            - dataset
-            - scoring_function
-            - benchmark
-            - tool
-            - tool_group
-            - prompt
           const: shield
+          title: Type
           default: shield
-          description: The resource type, always shield
         params:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) Configuration parameters for the shield
-      additionalProperties: false
-      required:
-        - identifier
-        - provider_id
-        - type
-      title: Shield
-      description: >-
-        A safety shield resource that can be used to check content.
-    ListShieldsResponse:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
       type: object
+      required:
+      - identifier
+      - provider_id
+      title: Shield
+      description: A safety shield resource that can be used to check content.
+    ListShieldsResponse:
       properties:
         data:
-          type: array
           items:
             $ref: '#/components/schemas/Shield'
-      additionalProperties: false
+          type: array
+          title: Data
+      type: object
       required:
-        - data
+      - data
       title: ListShieldsResponse
     InvokeToolRequest:
-      type: object
       properties:
         tool_name:
           type: string
-          description: The name of the tool to invoke.
+          title: Tool Name
         kwargs:
+          additionalProperties: true
           type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            A dictionary of arguments to pass to the tool.
+          title: Kwargs
         authorization:
-          type: string
-          description: >-
-            (Optional) OAuth access token for authenticating with the MCP server.
-      additionalProperties: false
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
       required:
-        - tool_name
-        - kwargs
+      - tool_name
+      - kwargs
       title: InvokeToolRequest
     ImageContentItem:
-      type: object
+      description: A image content item
       properties:
         type:
-          type: string
           const: image
           default: image
-          description: >-
-            Discriminator type of the content item. Always "image"
+          title: Type
+          type: string
         image:
-          type: object
-          properties:
-            url:
-              $ref: '#/components/schemas/URL'
-              description: >-
-                A URL of the image or data URL in the format of data:image/{type};base64,{data}.
-                Note that URL could have length limits.
-            data:
-              type: string
-              contentEncoding: base64
-              description: base64 encoded image data as string
-          additionalProperties: false
-          description: >-
-            Image as a base64 encoded string or an URL
-      additionalProperties: false
+          $ref: '#/components/schemas/_URLOrData'
       required:
-        - type
-        - image
+      - image
       title: ImageContentItem
-      description: A image content item
+      type: object
     InterleavedContent:
-      oneOf:
-        - type: string
-        - $ref: '#/components/schemas/InterleavedContentItem'
-        - type: array
-          items:
-            $ref: '#/components/schemas/InterleavedContentItem'
-    InterleavedContentItem:
-      oneOf:
+      anyOf:
+      - type: string
+      - discriminator:
+          mapping:
+            image: '#/components/schemas/ImageContentItem'
+            text: '#/components/schemas/TextContentItem'
+          propertyName: type
+        oneOf:
         - $ref: '#/components/schemas/ImageContentItem'
+          title: ImageContentItem
         - $ref: '#/components/schemas/TextContentItem'
+          title: TextContentItem
+        title: ImageContentItem | TextContentItem
+      - items:
+          discriminator:
+            mapping:
+              image: '#/components/schemas/ImageContentItem'
+              text: '#/components/schemas/TextContentItem'
+            propertyName: type
+          oneOf:
+          - $ref: '#/components/schemas/ImageContentItem'
+            title: ImageContentItem
+          - $ref: '#/components/schemas/TextContentItem'
+            title: TextContentItem
+          title: ImageContentItem | TextContentItem
+        type: array
+        title: list[ImageContentItem | TextContentItem]
+      title: string | list[ImageContentItem | TextContentItem]
+    InterleavedContentItem:
       discriminator:
-        propertyName: type
         mapping:
           image: '#/components/schemas/ImageContentItem'
           text: '#/components/schemas/TextContentItem'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/ImageContentItem'
+        title: ImageContentItem
+      - $ref: '#/components/schemas/TextContentItem'
+        title: TextContentItem
+      title: ImageContentItem | TextContentItem
     TextContentItem:
-      type: object
       properties:
         type:
           type: string
           const: text
+          title: Type
           default: text
-          description: >-
-            Discriminator type of the content item. Always "text"
         text:
           type: string
-          description: Text content
-      additionalProperties: false
+          title: Text
+      type: object
       required:
-        - type
-        - text
+      - text
       title: TextContentItem
       description: A text content item
     ToolInvocationResult:
-      type: object
       properties:
         content:
-          $ref: '#/components/schemas/InterleavedContent'
-          description: >-
-            (Optional) The output content from the tool execution
+          anyOf:
+          - type: string
+          - oneOf:
+            - $ref: '#/components/schemas/ImageContentItem-Output'
+              title: ImageContentItem-Output
+            - $ref: '#/components/schemas/TextContentItem'
+              title: TextContentItem
+            discriminator:
+              propertyName: type
+              mapping:
+                image: '#/components/schemas/ImageContentItem-Output'
+                text: '#/components/schemas/TextContentItem'
+            title: ImageContentItem-Output | TextContentItem
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/ImageContentItem-Output'
+                title: ImageContentItem-Output
+              - $ref: '#/components/schemas/TextContentItem'
+                title: TextContentItem
+              discriminator:
+                propertyName: type
+                mapping:
+                  image: '#/components/schemas/ImageContentItem-Output'
+                  text: '#/components/schemas/TextContentItem'
+              title: ImageContentItem-Output | TextContentItem
+            type: array
+            title: list[ImageContentItem-Output | TextContentItem]
+          - type: 'null'
+          title: string | list[ImageContentItem-Output | TextContentItem]
         error_message:
-          type: string
-          description: >-
-            (Optional) Error message if the tool execution failed
+          anyOf:
+          - type: string
+          - type: 'null'
         error_code:
-          type: integer
-          description: >-
-            (Optional) Numeric error code if the tool execution failed
+          anyOf:
+          - type: integer
+          - type: 'null'
         metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) Additional metadata about the tool execution
-      additionalProperties: false
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+      type: object
       title: ToolInvocationResult
       description: Result of a tool invocation.
     URL:
-      type: object
       properties:
         uri:
           type: string
-          description: The URL string pointing to the resource
-      additionalProperties: false
+          title: Uri
+      type: object
       required:
-        - uri
+      - uri
       title: URL
       description: A URL reference to external content.
     ToolDef:
-      type: object
       properties:
         toolgroup_id:
-          type: string
-          description: >-
-            (Optional) ID of the tool group this tool belongs to
+          anyOf:
+          - type: string
+          - type: 'null'
         name:
           type: string
-          description: Name of the tool
+          title: Name
         description:
-          type: string
-          description: >-
-            (Optional) Human-readable description of what the tool does
+          anyOf:
+          - type: string
+          - type: 'null'
         input_schema:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) JSON Schema for tool inputs (MCP inputSchema)
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
         output_schema:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) JSON Schema for tool outputs (MCP outputSchema)
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
         metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) Additional metadata about the tool
-      additionalProperties: false
-      required:
-        - name
-      title: ToolDef
-      description: >-
-        Tool definition used in runtime contexts.
-    ListToolDefsResponse:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
       type: object
+      required:
+      - name
+      title: ToolDef
+      description: Tool definition used in runtime contexts.
+    ListToolDefsResponse:
       properties:
         data:
-          type: array
           items:
             $ref: '#/components/schemas/ToolDef'
-          description: List of tool definitions
-      additionalProperties: false
-      required:
-        - data
-      title: ListToolDefsResponse
-      description: >-
-        Response containing a list of tool definitions.
-    ToolGroup:
+          type: array
+          title: Data
       type: object
+      required:
+      - data
+      title: ListToolDefsResponse
+      description: Response containing a list of tool definitions.
+    ToolGroup:
       properties:
         identifier:
           type: string
+          title: Identifier
+          description: Unique identifier for this resource in llama stack
         provider_resource_id:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: Unique identifier for this resource in the provider
         provider_id:
           type: string
+          title: Provider Id
+          description: ID of the provider that owns this resource
         type:
           type: string
-          enum:
-            - model
-            - shield
-            - vector_store
-            - dataset
-            - scoring_function
-            - benchmark
-            - tool
-            - tool_group
-            - prompt
           const: tool_group
+          title: Type
           default: tool_group
-          description: Type of resource, always 'tool_group'
         mcp_endpoint:
-          $ref: '#/components/schemas/URL'
-          description: >-
-            (Optional) Model Context Protocol endpoint for remote tools
+          anyOf:
+          - $ref: '#/components/schemas/URL'
+            title: URL
+          - type: 'null'
+          title: URL
         args:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) Additional arguments for the tool group
-      additionalProperties: false
-      required:
-        - identifier
-        - provider_id
-        - type
-      title: ToolGroup
-      description: >-
-        A group of related tools managed together.
-    ListToolGroupsResponse:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
       type: object
+      required:
+      - identifier
+      - provider_id
+      title: ToolGroup
+      description: A group of related tools managed together.
+    ListToolGroupsResponse:
       properties:
         data:
-          type: array
           items:
             $ref: '#/components/schemas/ToolGroup'
-          description: List of tool groups
-      additionalProperties: false
-      required:
-        - data
-      title: ListToolGroupsResponse
-      description: >-
-        Response containing a list of tool groups.
-    Chunk:
+          type: array
+          title: Data
       type: object
+      required:
+      - data
+      title: ListToolGroupsResponse
+      description: Response containing a list of tool groups.
+    Chunk:
+      description: A chunk of content that can be inserted into a vector database.
       properties:
         content:
-          $ref: '#/components/schemas/InterleavedContent'
-          description: >-
-            The content of the chunk, which can be interleaved text, images, or other
-            types.
-        chunk_id:
-          type: string
-          description: >-
-            Unique identifier for the chunk. Must be provided explicitly.
-        metadata:
-          type: object
-          additionalProperties:
+          anyOf:
+          - type: string
+          - discriminator:
+              mapping:
+                image: '#/components/schemas/ImageContentItem'
+                text: '#/components/schemas/TextContentItem'
+              propertyName: type
             oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            Metadata associated with the chunk that will be used in the model context
-            during inference.
+            - $ref: '#/components/schemas/ImageContentItem'
+              title: ImageContentItem
+            - $ref: '#/components/schemas/TextContentItem'
+              title: TextContentItem
+            title: ImageContentItem | TextContentItem
+          - items:
+              discriminator:
+                mapping:
+                  image: '#/components/schemas/ImageContentItem'
+                  text: '#/components/schemas/TextContentItem'
+                propertyName: type
+              oneOf:
+              - $ref: '#/components/schemas/ImageContentItem'
+                title: ImageContentItem
+              - $ref: '#/components/schemas/TextContentItem'
+                title: TextContentItem
+              title: ImageContentItem | TextContentItem
+            type: array
+            title: list[ImageContentItem | TextContentItem]
+          title: string | list[ImageContentItem | TextContentItem]
+        chunk_id:
+          title: Chunk Id
+          type: string
+        metadata:
+          additionalProperties: true
+          title: Metadata
+          type: object
         embedding:
-          type: array
-          items:
-            type: number
-          description: >-
-            Optional embedding for the chunk. If not provided, it will be computed
-            later.
+          anyOf:
+          - items:
+              type: number
+            type: array
+          - type: 'null'
+          nullable: true
         chunk_metadata:
-          $ref: '#/components/schemas/ChunkMetadata'
-          description: >-
-            Metadata for the chunk that will NOT be used in the context during inference.
-            The `chunk_metadata` is required backend functionality.
-      additionalProperties: false
+          anyOf:
+          - $ref: '#/components/schemas/ChunkMetadata'
+            title: ChunkMetadata
+          - type: 'null'
+          nullable: true
+          title: ChunkMetadata
       required:
-        - content
-        - chunk_id
-        - metadata
+      - content
+      - chunk_id
       title: Chunk
-      description: >-
-        A chunk of content that can be inserted into a vector database.
-    ChunkMetadata:
       type: object
+    ChunkMetadata:
       properties:
         chunk_id:
-          type: string
-          description: >-
-            The ID of the chunk. If not set, it will be generated based on the document
-            ID and content.
+          anyOf:
+          - type: string
+          - type: 'null'
         document_id:
-          type: string
-          description: >-
-            The ID of the document this chunk belongs to.
+          anyOf:
+          - type: string
+          - type: 'null'
         source:
-          type: string
-          description: >-
-            The source of the content, such as a URL, file path, or other identifier.
+          anyOf:
+          - type: string
+          - type: 'null'
         created_timestamp:
-          type: integer
-          description: >-
-            An optional timestamp indicating when the chunk was created.
+          anyOf:
+          - type: integer
+          - type: 'null'
         updated_timestamp:
-          type: integer
-          description: >-
-            An optional timestamp indicating when the chunk was last updated.
+          anyOf:
+          - type: integer
+          - type: 'null'
         chunk_window:
-          type: string
-          description: >-
-            The window of the chunk, which can be used to group related chunks together.
+          anyOf:
+          - type: string
+          - type: 'null'
         chunk_tokenizer:
-          type: string
-          description: >-
-            The tokenizer used to create the chunk. Default is Tiktoken.
+          anyOf:
+          - type: string
+          - type: 'null'
         chunk_embedding_model:
-          type: string
-          description: >-
-            The embedding model used to create the chunk's embedding.
+          anyOf:
+          - type: string
+          - type: 'null'
         chunk_embedding_dimension:
-          type: integer
-          description: >-
-            The dimension of the embedding vector for the chunk.
+          anyOf:
+          - type: integer
+          - type: 'null'
         content_token_count:
-          type: integer
-          description: >-
-            The number of tokens in the content of the chunk.
+          anyOf:
+          - type: integer
+          - type: 'null'
         metadata_token_count:
-          type: integer
-          description: >-
-            The number of tokens in the metadata of the chunk.
-      additionalProperties: false
-      title: ChunkMetadata
-      description: >-
-        `ChunkMetadata` is backend metadata for a `Chunk` that is used to store additional
-        information about the chunk that     will not be used in the context during
-        inference, but is required for backend functionality. The `ChunkMetadata`     is
-        set during chunk creation in `MemoryToolRuntimeImpl().insert()`and is not
-        expected to change after.     Use `Chunk.metadata` for metadata that will
-        be used in the context during inference.
-    InsertChunksRequest:
+          anyOf:
+          - type: integer
+          - type: 'null'
       type: object
+      title: ChunkMetadata
+      description: |-
+        `ChunkMetadata` is backend metadata for a `Chunk` that is used to store additional information about the chunk that
+            will not be used in the context during inference, but is required for backend functionality. The `ChunkMetadata`
+            is set during chunk creation in `MemoryToolRuntimeImpl().insert()`and is not expected to change after.
+            Use `Chunk.metadata` for metadata that will be used in the context during inference.
+    InsertChunksRequest:
       properties:
         vector_store_id:
           type: string
-          description: >-
-            The identifier of the vector database to insert the chunks into.
+          title: Vector Store Id
         chunks:
-          type: array
           items:
-            $ref: '#/components/schemas/Chunk'
-          description: >-
-            The chunks to insert. Each `Chunk` should contain content which can be
-            interleaved text, images, or other types. `metadata`: `dict[str, Any]`
-            and `embedding`: `List[float]` are optional. If `metadata` is provided,
-            you configure how Llama Stack formats the chunk during generation. If
-            `embedding` is not provided, it will be computed later.
+            $ref: '#/components/schemas/Chunk-Input'
+          type: array
+          title: Chunks
         ttl_seconds:
-          type: integer
-          description: The time to live of the chunks.
-      additionalProperties: false
+          anyOf:
+          - type: integer
+          - type: 'null'
+      type: object
       required:
-        - vector_store_id
-        - chunks
+      - vector_store_id
+      - chunks
       title: InsertChunksRequest
     QueryChunksRequest:
-      type: object
       properties:
         vector_store_id:
           type: string
-          description: >-
-            The identifier of the vector database to query.
+          title: Vector Store Id
         query:
-          $ref: '#/components/schemas/InterleavedContent'
-          description: The query to search for.
+          anyOf:
+          - type: string
+          - oneOf:
+            - $ref: '#/components/schemas/ImageContentItem-Input'
+              title: ImageContentItem-Input
+            - $ref: '#/components/schemas/TextContentItem'
+              title: TextContentItem
+            discriminator:
+              propertyName: type
+              mapping:
+                image: '#/components/schemas/ImageContentItem-Input'
+                text: '#/components/schemas/TextContentItem'
+            title: ImageContentItem-Input | TextContentItem
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/ImageContentItem-Input'
+                title: ImageContentItem-Input
+              - $ref: '#/components/schemas/TextContentItem'
+                title: TextContentItem
+              discriminator:
+                propertyName: type
+                mapping:
+                  image: '#/components/schemas/ImageContentItem-Input'
+                  text: '#/components/schemas/TextContentItem'
+              title: ImageContentItem-Input | TextContentItem
+            type: array
+            title: list[ImageContentItem-Input | TextContentItem]
+          title: string | list[ImageContentItem-Input | TextContentItem]
         params:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: The parameters of the query.
-      additionalProperties: false
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+      type: object
       required:
-        - vector_store_id
-        - query
+      - vector_store_id
+      - query
       title: QueryChunksRequest
     QueryChunksResponse:
-      type: object
       properties:
         chunks:
-          type: array
           items:
-            $ref: '#/components/schemas/Chunk'
-          description: >-
-            List of content chunks returned from the query
-        scores:
+            $ref: '#/components/schemas/Chunk-Output'
           type: array
+          title: Chunks
+        scores:
           items:
             type: number
-          description: >-
-            Relevance scores corresponding to each returned chunk
-      additionalProperties: false
-      required:
-        - chunks
-        - scores
-      title: QueryChunksResponse
-      description: >-
-        Response from querying chunks in a vector database.
-    VectorStoreFileCounts:
+          type: array
+          title: Scores
       type: object
+      required:
+      - chunks
+      - scores
+      title: QueryChunksResponse
+      description: Response from querying chunks in a vector database.
+    VectorStoreFileCounts:
       properties:
         completed:
           type: integer
-          description: >-
-            Number of files that have been successfully processed
+          title: Completed
         cancelled:
           type: integer
-          description: >-
-            Number of files that had their processing cancelled
+          title: Cancelled
         failed:
           type: integer
-          description: Number of files that failed to process
+          title: Failed
         in_progress:
           type: integer
-          description: >-
-            Number of files currently being processed
+          title: In Progress
         total:
           type: integer
-          description: >-
-            Total number of files in the vector store
-      additionalProperties: false
-      required:
-        - completed
-        - cancelled
-        - failed
-        - in_progress
-        - total
-      title: VectorStoreFileCounts
-      description: >-
-        File processing status counts for a vector store.
-    VectorStoreListResponse:
+          title: Total
       type: object
+      required:
+      - completed
+      - cancelled
+      - failed
+      - in_progress
+      - total
+      title: VectorStoreFileCounts
+      description: File processing status counts for a vector store.
+    VectorStoreListResponse:
       properties:
         object:
           type: string
+          title: Object
           default: list
-          description: Object type identifier, always "list"
         data:
-          type: array
           items:
             $ref: '#/components/schemas/VectorStoreObject'
-          description: List of vector store objects
+          type: array
+          title: Data
         first_id:
-          type: string
-          description: >-
-            (Optional) ID of the first vector store in the list for pagination
+          anyOf:
+          - type: string
+          - type: 'null'
         last_id:
-          type: string
-          description: >-
-            (Optional) ID of the last vector store in the list for pagination
+          anyOf:
+          - type: string
+          - type: 'null'
         has_more:
           type: boolean
+          title: Has More
           default: false
-          description: >-
-            Whether there are more vector stores available beyond this page
-      additionalProperties: false
+      type: object
       required:
-        - object
-        - data
-        - has_more
+      - data
       title: VectorStoreListResponse
       description: Response from listing vector stores.
     VectorStoreObject:
-      type: object
       properties:
         id:
           type: string
-          description: Unique identifier for the vector store
+          title: Id
         object:
           type: string
+          title: Object
           default: vector_store
-          description: >-
-            Object type identifier, always "vector_store"
         created_at:
           type: integer
-          description: >-
-            Timestamp when the vector store was created
+          title: Created At
         name:
-          type: string
-          description: (Optional) Name of the vector store
+          anyOf:
+          - type: string
+          - type: 'null'
         usage_bytes:
           type: integer
+          title: Usage Bytes
           default: 0
-          description: >-
-            Storage space used by the vector store in bytes
         file_counts:
           $ref: '#/components/schemas/VectorStoreFileCounts'
-          description: >-
-            File processing status counts for the vector store
         status:
           type: string
+          title: Status
           default: completed
-          description: Current status of the vector store
         expires_after:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) Expiration policy for the vector store
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
         expires_at:
-          type: integer
-          description: >-
-            (Optional) Timestamp when the vector store will expire
+          anyOf:
+          - type: integer
+          - type: 'null'
         last_active_at:
-          type: integer
-          description: >-
-            (Optional) Timestamp of last activity on the vector store
+          anyOf:
+          - type: integer
+          - type: 'null'
         metadata:
+          additionalProperties: true
           type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            Set of key-value pairs that can be attached to the vector store
-      additionalProperties: false
+          title: Metadata
+      type: object
       required:
-        - id
-        - object
-        - created_at
-        - usage_bytes
-        - file_counts
-        - status
-        - metadata
+      - id
+      - created_at
+      - file_counts
       title: VectorStoreObject
       description: OpenAI Vector Store object.
     VectorStoreChunkingStrategy:
-      oneOf:
-        - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
-        - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
       discriminator:
-        propertyName: type
         mapping:
           auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
           static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+        title: VectorStoreChunkingStrategyAuto
+      - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+        title: VectorStoreChunkingStrategyStatic
+      title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic
     VectorStoreChunkingStrategyAuto:
-      type: object
       properties:
         type:
           type: string
           const: auto
+          title: Type
           default: auto
-          description: >-
-            Strategy type, always "auto" for automatic chunking
-      additionalProperties: false
-      required:
-        - type
-      title: VectorStoreChunkingStrategyAuto
-      description: >-
-        Automatic chunking strategy for vector store files.
-    VectorStoreChunkingStrategyStatic:
       type: object
+      title: VectorStoreChunkingStrategyAuto
+      description: Automatic chunking strategy for vector store files.
+    VectorStoreChunkingStrategyStatic:
       properties:
         type:
           type: string
           const: static
+          title: Type
           default: static
-          description: >-
-            Strategy type, always "static" for static chunking
         static:
           $ref: '#/components/schemas/VectorStoreChunkingStrategyStaticConfig'
-          description: >-
-            Configuration parameters for the static chunking strategy
-      additionalProperties: false
-      required:
-        - type
-        - static
-      title: VectorStoreChunkingStrategyStatic
-      description: >-
-        Static chunking strategy with configurable parameters.
-    VectorStoreChunkingStrategyStaticConfig:
       type: object
+      required:
+      - static
+      title: VectorStoreChunkingStrategyStatic
+      description: Static chunking strategy with configurable parameters.
+    VectorStoreChunkingStrategyStaticConfig:
       properties:
         chunk_overlap_tokens:
           type: integer
+          title: Chunk Overlap Tokens
           default: 400
-          description: >-
-            Number of tokens to overlap between adjacent chunks
         max_chunk_size_tokens:
           type: integer
+          maximum: 4096.0
+          minimum: 100.0
+          title: Max Chunk Size Tokens
           default: 800
-          description: >-
-            Maximum number of tokens per chunk, must be between 100 and 4096
-      additionalProperties: false
-      required:
-        - chunk_overlap_tokens
-        - max_chunk_size_tokens
+      type: object
       title: VectorStoreChunkingStrategyStaticConfig
-      description: >-
-        Configuration for static chunking strategy.
-    "OpenAICreateVectorStoreRequestWithExtraBody":
-      type: object
+      description: Configuration for static chunking strategy.
+    OpenAICreateVectorStoreRequestWithExtraBody:
       properties:
         name:
-          type: string
-          description: (Optional) A name for the vector store
+          anyOf:
+          - type: string
+          - type: 'null'
         file_ids:
-          type: array
-          items:
-            type: string
-          description: >-
-            List of file IDs to include in the vector store
+          anyOf:
+          - items:
+              type: string
+            type: array
+          - type: 'null'
         expires_after:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) Expiration policy for the vector store
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
         chunking_strategy:
-          $ref: '#/components/schemas/VectorStoreChunkingStrategy'
-          description: >-
-            (Optional) Strategy for splitting files into chunks
+          anyOf:
+          - oneOf:
+            - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+              title: VectorStoreChunkingStrategyAuto
+            - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+              title: VectorStoreChunkingStrategyStatic
+            discriminator:
+              propertyName: type
+              mapping:
+                auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+                static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+            title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic
+          - type: 'null'
+          title: Chunking Strategy
         metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            Set of key-value pairs that can be attached to the vector store
-      additionalProperties: false
-      title: >-
-        OpenAICreateVectorStoreRequestWithExtraBody
-      description: >-
-        Request to create a vector store with extra_body support.
-    OpenaiUpdateVectorStoreRequest:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+      additionalProperties: true
       type: object
+      title: OpenAICreateVectorStoreRequestWithExtraBody
+      description: Request to create a vector store with extra_body support.
+    OpenaiUpdateVectorStoreRequest:
       properties:
         name:
-          type: string
-          description: The name of the vector store.
+          anyOf:
+          - type: string
+          - type: 'null'
         expires_after:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            The expiration policy for a vector store.
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
         metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            Set of 16 key-value pairs that can be attached to an object.
-      additionalProperties: false
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+      type: object
       title: OpenaiUpdateVectorStoreRequest
     VectorStoreDeleteResponse:
-      type: object
       properties:
         id:
           type: string
-          description: >-
-            Unique identifier of the deleted vector store
+          title: Id
         object:
           type: string
+          title: Object
           default: vector_store.deleted
-          description: >-
-            Object type identifier for the deletion response
         deleted:
           type: boolean
+          title: Deleted
           default: true
-          description: >-
-            Whether the deletion operation was successful
-      additionalProperties: false
+      type: object
       required:
-        - id
-        - object
-        - deleted
+      - id
       title: VectorStoreDeleteResponse
       description: Response from deleting a vector store.
-    "OpenAICreateVectorStoreFileBatchRequestWithExtraBody":
-      type: object
+    OpenAICreateVectorStoreFileBatchRequestWithExtraBody:
       properties:
         file_ids:
-          type: array
           items:
             type: string
-          description: >-
-            A list of File IDs that the vector store should use
+          type: array
+          title: File Ids
         attributes:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) Key-value attributes to store with the files
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
         chunking_strategy:
-          $ref: '#/components/schemas/VectorStoreChunkingStrategy'
-          description: >-
-            (Optional) The chunking strategy used to chunk the file(s). Defaults to
-            auto
-      additionalProperties: false
-      required:
-        - file_ids
-      title: >-
-        OpenAICreateVectorStoreFileBatchRequestWithExtraBody
-      description: >-
-        Request to create a vector store file batch with extra_body support.
-    VectorStoreFileBatchObject:
+          anyOf:
+          - oneOf:
+            - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+              title: VectorStoreChunkingStrategyAuto
+            - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+              title: VectorStoreChunkingStrategyStatic
+            discriminator:
+              propertyName: type
+              mapping:
+                auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+                static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+            title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic
+          - type: 'null'
+          title: Chunking Strategy
+      additionalProperties: true
       type: object
+      required:
+      - file_ids
+      title: OpenAICreateVectorStoreFileBatchRequestWithExtraBody
+      description: Request to create a vector store file batch with extra_body support.
+    VectorStoreFileBatchObject:
       properties:
         id:
           type: string
-          description: Unique identifier for the file batch
+          title: Id
         object:
           type: string
+          title: Object
           default: vector_store.file_batch
-          description: >-
-            Object type identifier, always "vector_store.file_batch"
         created_at:
           type: integer
-          description: >-
-            Timestamp when the file batch was created
+          title: Created At
         vector_store_id:
           type: string
-          description: >-
-            ID of the vector store containing the file batch
+          title: Vector Store Id
         status:
-          $ref: '#/components/schemas/VectorStoreFileStatus'
-          description: >-
-            Current processing status of the file batch
+          title: Status
+          type: string
+          enum:
+          - completed
+          - in_progress
+          - cancelled
+          - failed
+          default: completed
         file_counts:
           $ref: '#/components/schemas/VectorStoreFileCounts'
-          description: >-
-            File processing status counts for the batch
-      additionalProperties: false
+      type: object
       required:
-        - id
-        - object
-        - created_at
-        - vector_store_id
-        - status
-        - file_counts
+      - id
+      - created_at
+      - vector_store_id
+      - status
+      - file_counts
       title: VectorStoreFileBatchObject
       description: OpenAI Vector Store File Batch object.
     VectorStoreFileStatus:
-      oneOf:
-        - type: string
-          const: completed
-        - type: string
-          const: in_progress
-        - type: string
-          const: cancelled
-        - type: string
-          const: failed
+      type: string
+      enum:
+      - completed
+      - in_progress
+      - cancelled
+      - failed
+      default: completed
     VectorStoreFileLastError:
-      type: object
       properties:
         code:
-          oneOf:
-            - type: string
-              const: server_error
-            - type: string
-              const: rate_limit_exceeded
-          description: >-
-            Error code indicating the type of failure
+          title: Code
+          type: string
+          enum:
+          - server_error
+          - rate_limit_exceeded
+          default: server_error
         message:
           type: string
-          description: >-
-            Human-readable error message describing the failure
-      additionalProperties: false
-      required:
-        - code
-        - message
-      title: VectorStoreFileLastError
-      description: >-
-        Error information for failed vector store file processing.
-    VectorStoreFileObject:
+          title: Message
       type: object
+      required:
+      - code
+      - message
+      title: VectorStoreFileLastError
+      description: Error information for failed vector store file processing.
+    VectorStoreFileObject:
       properties:
         id:
           type: string
-          description: Unique identifier for the file
+          title: Id
         object:
           type: string
+          title: Object
           default: vector_store.file
-          description: >-
-            Object type identifier, always "vector_store.file"
         attributes:
+          additionalProperties: true
           type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            Key-value attributes associated with the file
+          title: Attributes
         chunking_strategy:
           oneOf:
-            - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
-            - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+          - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+            title: VectorStoreChunkingStrategyAuto
+          - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+            title: VectorStoreChunkingStrategyStatic
+          title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic
           discriminator:
             propertyName: type
             mapping:
               auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
               static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
-          description: >-
-            Strategy used for splitting the file into chunks
         created_at:
           type: integer
-          description: >-
-            Timestamp when the file was added to the vector store
+          title: Created At
         last_error:
-          $ref: '#/components/schemas/VectorStoreFileLastError'
-          description: >-
-            (Optional) Error information if file processing failed
+          anyOf:
+          - $ref: '#/components/schemas/VectorStoreFileLastError'
+            title: VectorStoreFileLastError
+          - type: 'null'
+          title: VectorStoreFileLastError
         status:
-          $ref: '#/components/schemas/VectorStoreFileStatus'
-          description: Current processing status of the file
+          title: Status
+          type: string
+          enum:
+          - completed
+          - in_progress
+          - cancelled
+          - failed
+          default: completed
         usage_bytes:
           type: integer
+          title: Usage Bytes
           default: 0
-          description: Storage space used by this file in bytes
         vector_store_id:
           type: string
-          description: >-
-            ID of the vector store containing this file
-      additionalProperties: false
+          title: Vector Store Id
+      type: object
       required:
-        - id
-        - object
-        - attributes
-        - chunking_strategy
-        - created_at
-        - status
-        - usage_bytes
-        - vector_store_id
+      - id
+      - chunking_strategy
+      - created_at
+      - status
+      - vector_store_id
       title: VectorStoreFileObject
       description: OpenAI Vector Store File object.
     VectorStoreFilesListInBatchResponse:
-      type: object
       properties:
         object:
           type: string
+          title: Object
           default: list
-          description: Object type identifier, always "list"
         data:
-          type: array
           items:
             $ref: '#/components/schemas/VectorStoreFileObject'
-          description: >-
-            List of vector store file objects in the batch
+          type: array
+          title: Data
         first_id:
-          type: string
-          description: >-
-            (Optional) ID of the first file in the list for pagination
+          anyOf:
+          - type: string
+          - type: 'null'
         last_id:
-          type: string
-          description: >-
-            (Optional) ID of the last file in the list for pagination
+          anyOf:
+          - type: string
+          - type: 'null'
         has_more:
           type: boolean
+          title: Has More
           default: false
-          description: >-
-            Whether there are more files available beyond this page
-      additionalProperties: false
+      type: object
       required:
-        - object
-        - data
-        - has_more
+      - data
       title: VectorStoreFilesListInBatchResponse
-      description: >-
-        Response from listing files in a vector store file batch.
+      description: Response from listing files in a vector store file batch.
     VectorStoreListFilesResponse:
-      type: object
       properties:
         object:
           type: string
+          title: Object
           default: list
-          description: Object type identifier, always "list"
         data:
-          type: array
           items:
             $ref: '#/components/schemas/VectorStoreFileObject'
-          description: List of vector store file objects
+          type: array
+          title: Data
         first_id:
-          type: string
-          description: >-
-            (Optional) ID of the first file in the list for pagination
+          anyOf:
+          - type: string
+          - type: 'null'
         last_id:
-          type: string
-          description: >-
-            (Optional) ID of the last file in the list for pagination
+          anyOf:
+          - type: string
+          - type: 'null'
         has_more:
           type: boolean
+          title: Has More
           default: false
-          description: >-
-            Whether there are more files available beyond this page
-      additionalProperties: false
-      required:
-        - object
-        - data
-        - has_more
-      title: VectorStoreListFilesResponse
-      description: >-
-        Response from listing files in a vector store.
-    OpenaiAttachFileToVectorStoreRequest:
       type: object
+      required:
+      - data
+      title: VectorStoreListFilesResponse
+      description: Response from listing files in a vector store.
+    OpenaiAttachFileToVectorStoreRequest:
       properties:
         file_id:
           type: string
-          description: >-
-            The ID of the file to attach to the vector store.
+          title: File Id
         attributes:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            The key-value attributes stored with the file, which can be used for filtering.
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
         chunking_strategy:
-          $ref: '#/components/schemas/VectorStoreChunkingStrategy'
-          description: >-
-            The chunking strategy to use for the file.
-      additionalProperties: false
+          anyOf:
+          - oneOf:
+            - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+              title: VectorStoreChunkingStrategyAuto
+            - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+              title: VectorStoreChunkingStrategyStatic
+            discriminator:
+              propertyName: type
+              mapping:
+                auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+                static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+            title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic
+          - type: 'null'
+          title: Chunking Strategy
+      type: object
       required:
-        - file_id
+      - file_id
       title: OpenaiAttachFileToVectorStoreRequest
     OpenaiUpdateVectorStoreFileRequest:
-      type: object
       properties:
         attributes:
+          additionalProperties: true
           type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            The updated key-value attributes to store with the file.
-      additionalProperties: false
+          title: Attributes
+      type: object
       required:
-        - attributes
+      - attributes
       title: OpenaiUpdateVectorStoreFileRequest
     VectorStoreFileDeleteResponse:
-      type: object
       properties:
         id:
           type: string
-          description: Unique identifier of the deleted file
+          title: Id
         object:
           type: string
+          title: Object
           default: vector_store.file.deleted
-          description: >-
-            Object type identifier for the deletion response
         deleted:
           type: boolean
+          title: Deleted
           default: true
-          description: >-
-            Whether the deletion operation was successful
-      additionalProperties: false
-      required:
-        - id
-        - object
-        - deleted
-      title: VectorStoreFileDeleteResponse
-      description: >-
-        Response from deleting a vector store file.
-    bool:
-      type: boolean
-    VectorStoreContent:
       type: object
+      required:
+      - id
+      title: VectorStoreFileDeleteResponse
+      description: Response from deleting a vector store file.
+    VectorStoreContent:
       properties:
         type:
           type: string
           const: text
-          description: >-
-            Content type, currently only "text" is supported
+          title: Type
         text:
           type: string
-          description: The actual text content
+          title: Text
         embedding:
-          type: array
-          items:
-            type: number
-          description: >-
-            Optional embedding vector for this content chunk
+          anyOf:
+          - items:
+              type: number
+            type: array
+          - type: 'null'
         chunk_metadata:
-          $ref: '#/components/schemas/ChunkMetadata'
-          description: Optional chunk metadata
+          anyOf:
+          - $ref: '#/components/schemas/ChunkMetadata'
+            title: ChunkMetadata
+          - type: 'null'
+          title: ChunkMetadata
         metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: Optional user-defined metadata
-      additionalProperties: false
-      required:
-        - type
-        - text
-      title: VectorStoreContent
-      description: >-
-        Content item from a vector store file or search result.
-    VectorStoreFileContentResponse:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
       type: object
+      required:
+      - type
+      - text
+      title: VectorStoreContent
+      description: Content item from a vector store file or search result.
+    VectorStoreFileContentResponse:
       properties:
         object:
           type: string
           const: vector_store.file_content.page
+          title: Object
           default: vector_store.file_content.page
-          description: >-
-            The object type, which is always `vector_store.file_content.page`
         data:
-          type: array
           items:
             $ref: '#/components/schemas/VectorStoreContent'
-          description: Parsed content of the file
+          type: array
+          title: Data
         has_more:
           type: boolean
+          title: Has More
           default: false
-          description: >-
-            Indicates if there are more content pages to fetch
         next_page:
-          type: string
-          description: The token for the next page, if any
-      additionalProperties: false
-      required:
-        - object
-        - data
-        - has_more
-      title: VectorStoreFileContentResponse
-      description: >-
-        Represents the parsed content of a vector store file.
-    OpenaiSearchVectorStoreRequest:
+          anyOf:
+          - type: string
+          - type: 'null'
       type: object
+      required:
+      - data
+      title: VectorStoreFileContentResponse
+      description: Represents the parsed content of a vector store file.
+    OpenaiSearchVectorStoreRequest:
       properties:
         query:
-          oneOf:
-            - type: string
-            - type: array
-              items:
-                type: string
-          description: >-
-            The query string or array for performing the search.
-        filters:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            Filters based on file attributes to narrow the search results.
-        max_num_results:
-          type: integer
-          description: >-
-            Maximum number of results to return (1 to 50 inclusive, default 10).
-        ranking_options:
-          type: object
-          properties:
-            ranker:
+          anyOf:
+          - type: string
+          - items:
               type: string
-              description: >-
-                (Optional) Name of the ranking algorithm to use
-            score_threshold:
-              type: number
-              default: 0.0
-              description: >-
-                (Optional) Minimum relevance score threshold for results
-          additionalProperties: false
-          description: >-
-            Ranking options for fine-tuning the search results.
+            type: array
+            title: list[string]
+          title: string | list[string]
+        filters:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+        max_num_results:
+          anyOf:
+          - type: integer
+          - type: 'null'
+          default: 10
+        ranking_options:
+          anyOf:
+          - $ref: '#/components/schemas/SearchRankingOptions'
+            title: SearchRankingOptions
+          - type: 'null'
+          title: SearchRankingOptions
         rewrite_query:
-          type: boolean
-          description: >-
-            Whether to rewrite the natural language query for vector search (default
-            false)
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          default: false
         search_mode:
-          type: string
-          description: >-
-            The search mode to use - "keyword", "vector", or "hybrid" (default "vector")
-      additionalProperties: false
+          anyOf:
+          - type: string
+          - type: 'null'
+          default: vector
+      type: object
       required:
-        - query
+      - query
       title: OpenaiSearchVectorStoreRequest
     VectorStoreSearchResponse:
-      type: object
       properties:
         file_id:
           type: string
-          description: >-
-            Unique identifier of the file containing the result
+          title: File Id
         filename:
           type: string
-          description: Name of the file containing the result
+          title: Filename
         score:
           type: number
-          description: Relevance score for this search result
+          title: Score
         attributes:
-          type: object
-          additionalProperties:
-            oneOf:
+          anyOf:
+          - additionalProperties:
+              anyOf:
               - type: string
               - type: number
               - type: boolean
-          description: >-
-            (Optional) Key-value attributes associated with the file
+              title: string | number | boolean
+            type: object
+          - type: 'null'
         content:
-          type: array
           items:
             $ref: '#/components/schemas/VectorStoreContent'
-          description: >-
-            List of content items matching the search query
-      additionalProperties: false
+          type: array
+          title: Content
+      type: object
       required:
-        - file_id
-        - filename
-        - score
-        - content
+      - file_id
+      - filename
+      - score
+      - content
       title: VectorStoreSearchResponse
       description: Response from searching a vector store.
     VectorStoreSearchResponsePage:
-      type: object
       properties:
         object:
           type: string
+          title: Object
           default: vector_store.search_results.page
-          description: >-
-            Object type identifier for the search results page
         search_query:
-          type: array
           items:
             type: string
-          description: >-
-            The original search query that was executed
-        data:
           type: array
+          title: Search Query
+        data:
           items:
             $ref: '#/components/schemas/VectorStoreSearchResponse'
-          description: List of search result objects
+          type: array
+          title: Data
         has_more:
           type: boolean
+          title: Has More
           default: false
-          description: >-
-            Whether there are more results available beyond this page
         next_page:
-          type: string
-          description: >-
-            (Optional) Token for retrieving the next page of results
-      additionalProperties: false
-      required:
-        - object
-        - search_query
-        - data
-        - has_more
-      title: VectorStoreSearchResponsePage
-      description: >-
-        Paginated response from searching a vector store.
-    VersionInfo:
+          anyOf:
+          - type: string
+          - type: 'null'
       type: object
+      required:
+      - search_query
+      - data
+      title: VectorStoreSearchResponsePage
+      description: Paginated response from searching a vector store.
+    VersionInfo:
       properties:
         version:
           type: string
-          description: Version number of the service
-      additionalProperties: false
+          title: Version
+      type: object
       required:
-        - version
+      - version
       title: VersionInfo
       description: Version information for the service.
     AppendRowsRequest:
-      type: object
       properties:
         rows:
-          type: array
           items:
+            additionalProperties: true
             type: object
-            additionalProperties:
-              oneOf:
-                - type: 'null'
-                - type: boolean
-                - type: number
-                - type: string
-                - type: array
-                - type: object
-          description: The rows to append to the dataset.
-      additionalProperties: false
+          type: array
+          title: Rows
+      type: object
       required:
-        - rows
+      - rows
       title: AppendRowsRequest
     PaginatedResponse:
-      type: object
       properties:
         data:
-          type: array
           items:
+            additionalProperties: true
             type: object
-            additionalProperties:
-              oneOf:
-                - type: 'null'
-                - type: boolean
-                - type: number
-                - type: string
-                - type: array
-                - type: object
-          description: The list of items for the current page
+          type: array
+          title: Data
         has_more:
           type: boolean
-          description: >-
-            Whether there are more items available after this set
+          title: Has More
         url:
-          type: string
-          description: The URL for accessing this list
-      additionalProperties: false
-      required:
-        - data
-        - has_more
-      title: PaginatedResponse
-      description: >-
-        A generic paginated response that follows a simple format.
-    Dataset:
+          anyOf:
+          - type: string
+          - type: 'null'
       type: object
+      required:
+      - data
+      - has_more
+      title: PaginatedResponse
+      description: A generic paginated response that follows a simple format.
+    Dataset:
       properties:
         identifier:
           type: string
+          title: Identifier
+          description: Unique identifier for this resource in llama stack
         provider_resource_id:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: Unique identifier for this resource in the provider
         provider_id:
           type: string
+          title: Provider Id
+          description: ID of the provider that owns this resource
         type:
           type: string
-          enum:
-            - model
-            - shield
-            - vector_store
-            - dataset
-            - scoring_function
-            - benchmark
-            - tool
-            - tool_group
-            - prompt
           const: dataset
+          title: Type
           default: dataset
-          description: >-
-            Type of resource, always 'dataset' for datasets
         purpose:
-          type: string
-          enum:
-            - post-training/messages
-            - eval/question-answer
-            - eval/messages-answer
-          description: >-
-            Purpose of the dataset indicating its intended use
+          $ref: '#/components/schemas/DatasetPurpose'
         source:
           oneOf:
-            - $ref: '#/components/schemas/URIDataSource'
-            - $ref: '#/components/schemas/RowsDataSource'
+          - $ref: '#/components/schemas/URIDataSource'
+            title: URIDataSource
+          - $ref: '#/components/schemas/RowsDataSource'
+            title: RowsDataSource
+          title: URIDataSource | RowsDataSource
           discriminator:
             propertyName: type
             mapping:
-              uri: '#/components/schemas/URIDataSource'
               rows: '#/components/schemas/RowsDataSource'
-          description: >-
-            Data source configuration for the dataset
+              uri: '#/components/schemas/URIDataSource'
         metadata:
+          additionalProperties: true
           type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: Additional metadata for the dataset
-      additionalProperties: false
-      required:
-        - identifier
-        - provider_id
-        - type
-        - purpose
-        - source
-        - metadata
-      title: Dataset
-      description: >-
-        Dataset resource for storing and accessing training or evaluation data.
-    RowsDataSource:
+          title: Metadata
+          description: Any additional metadata for this dataset
       type: object
+      required:
+      - identifier
+      - provider_id
+      - purpose
+      - source
+      title: Dataset
+      description: Dataset resource for storing and accessing training or evaluation data.
+    RowsDataSource:
       properties:
         type:
           type: string
           const: rows
+          title: Type
           default: rows
         rows:
-          type: array
           items:
+            additionalProperties: true
             type: object
-            additionalProperties:
-              oneOf:
-                - type: 'null'
-                - type: boolean
-                - type: number
-                - type: string
-                - type: array
-                - type: object
-          description: >-
-            The dataset is stored in rows. E.g. - [ {"messages": [{"role": "user",
-            "content": "Hello, world!"}, {"role": "assistant", "content": "Hello,
-            world!"}]} ]
-      additionalProperties: false
+          type: array
+          title: Rows
+      type: object
       required:
-        - type
-        - rows
+      - rows
       title: RowsDataSource
       description: A dataset stored in rows.
     URIDataSource:
-      type: object
       properties:
         type:
           type: string
           const: uri
+          title: Type
           default: uri
         uri:
           type: string
-          description: >-
-            The dataset can be obtained from a URI. E.g. - "https://mywebsite.com/mydata.jsonl"
-            - "lsfs://mydata.jsonl" - "data:csv;base64,{base64_content}"
-      additionalProperties: false
-      required:
-        - type
-        - uri
-      title: URIDataSource
-      description: >-
-        A dataset that can be obtained from a URI.
-    ListDatasetsResponse:
+          title: Uri
       type: object
+      required:
+      - uri
+      title: URIDataSource
+      description: A dataset that can be obtained from a URI.
+    ListDatasetsResponse:
       properties:
         data:
-          type: array
           items:
             $ref: '#/components/schemas/Dataset'
-          description: List of datasets
-      additionalProperties: false
+          type: array
+          title: Data
+      type: object
       required:
-        - data
+      - data
       title: ListDatasetsResponse
       description: Response from listing datasets.
     Benchmark:
-      type: object
       properties:
         identifier:
           type: string
+          title: Identifier
+          description: Unique identifier for this resource in llama stack
         provider_resource_id:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: Unique identifier for this resource in the provider
         provider_id:
           type: string
+          title: Provider Id
+          description: ID of the provider that owns this resource
         type:
           type: string
-          enum:
-            - model
-            - shield
-            - vector_store
-            - dataset
-            - scoring_function
-            - benchmark
-            - tool
-            - tool_group
-            - prompt
           const: benchmark
+          title: Type
           default: benchmark
-          description: The resource type, always benchmark
         dataset_id:
           type: string
-          description: >-
-            Identifier of the dataset to use for the benchmark evaluation
+          title: Dataset Id
         scoring_functions:
-          type: array
           items:
             type: string
-          description: >-
-            List of scoring function identifiers to apply during evaluation
+          type: array
+          title: Scoring Functions
         metadata:
+          additionalProperties: true
           type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
+          title: Metadata
           description: Metadata for this evaluation task
-      additionalProperties: false
-      required:
-        - identifier
-        - provider_id
-        - type
-        - dataset_id
-        - scoring_functions
-        - metadata
-      title: Benchmark
-      description: >-
-        A benchmark resource for evaluating model performance.
-    ListBenchmarksResponse:
       type: object
+      required:
+      - identifier
+      - provider_id
+      - dataset_id
+      - scoring_functions
+      title: Benchmark
+      description: A benchmark resource for evaluating model performance.
+    ListBenchmarksResponse:
       properties:
         data:
-          type: array
           items:
             $ref: '#/components/schemas/Benchmark'
-      additionalProperties: false
+          type: array
+          title: Data
+      type: object
       required:
-        - data
+      - data
       title: ListBenchmarksResponse
     BenchmarkConfig:
-      type: object
       properties:
         eval_candidate:
           $ref: '#/components/schemas/ModelCandidate'
-          description: The candidate to evaluate.
         scoring_params:
-          type: object
           additionalProperties:
-            $ref: '#/components/schemas/ScoringFnParams'
-          description: >-
-            Map between scoring function id and parameters for each scoring function
-            you want to run
+            oneOf:
+            - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
+              title: LLMAsJudgeScoringFnParams
+            - $ref: '#/components/schemas/RegexParserScoringFnParams'
+              title: RegexParserScoringFnParams
+            - $ref: '#/components/schemas/BasicScoringFnParams'
+              title: BasicScoringFnParams
+            discriminator:
+              propertyName: type
+              mapping:
+                basic: '#/components/schemas/BasicScoringFnParams'
+                llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams'
+                regex_parser: '#/components/schemas/RegexParserScoringFnParams'
+            title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams
+          type: object
+          title: Scoring Params
+          description: Map between scoring function id and parameters for each scoring function you want to run
         num_examples:
-          type: integer
-          description: >-
-            (Optional) The number of examples to evaluate. If not provided, all examples
-            in the dataset will be evaluated
-      additionalProperties: false
-      required:
-        - eval_candidate
-        - scoring_params
-      title: BenchmarkConfig
-      description: >-
-        A benchmark configuration for evaluation.
-    GreedySamplingStrategy:
+          anyOf:
+          - type: integer
+          - type: 'null'
+          description: Number of examples to evaluate (useful for testing), if not provided, all examples in the dataset will be evaluated
       type: object
+      required:
+      - eval_candidate
+      title: BenchmarkConfig
+      description: A benchmark configuration for evaluation.
+    GreedySamplingStrategy:
       properties:
         type:
           type: string
           const: greedy
+          title: Type
           default: greedy
-          description: >-
-            Must be "greedy" to identify this sampling strategy
-      additionalProperties: false
-      required:
-        - type
-      title: GreedySamplingStrategy
-      description: >-
-        Greedy sampling strategy that selects the highest probability token at each
-        step.
-    ModelCandidate:
       type: object
+      title: GreedySamplingStrategy
+      description: Greedy sampling strategy that selects the highest probability token at each step.
+    ModelCandidate:
       properties:
         type:
           type: string
           const: model
+          title: Type
           default: model
         model:
           type: string
-          description: The model ID to evaluate.
+          title: Model
         sampling_params:
           $ref: '#/components/schemas/SamplingParams'
-          description: The sampling parameters for the model.
         system_message:
-          $ref: '#/components/schemas/SystemMessage'
-          description: >-
-            (Optional) The system message providing instructions or context to the
-            model.
-      additionalProperties: false
+          anyOf:
+          - $ref: '#/components/schemas/SystemMessage'
+            title: SystemMessage
+          - type: 'null'
+          title: SystemMessage
+      type: object
       required:
-        - type
-        - model
-        - sampling_params
+      - model
+      - sampling_params
       title: ModelCandidate
       description: A model candidate for evaluation.
     SamplingParams:
-      type: object
       properties:
         strategy:
           oneOf:
-            - $ref: '#/components/schemas/GreedySamplingStrategy'
-            - $ref: '#/components/schemas/TopPSamplingStrategy'
-            - $ref: '#/components/schemas/TopKSamplingStrategy'
+          - $ref: '#/components/schemas/GreedySamplingStrategy'
+            title: GreedySamplingStrategy
+          - $ref: '#/components/schemas/TopPSamplingStrategy'
+            title: TopPSamplingStrategy
+          - $ref: '#/components/schemas/TopKSamplingStrategy'
+            title: TopKSamplingStrategy
+          title: GreedySamplingStrategy | TopPSamplingStrategy | TopKSamplingStrategy
           discriminator:
             propertyName: type
             mapping:
               greedy: '#/components/schemas/GreedySamplingStrategy'
-              top_p: '#/components/schemas/TopPSamplingStrategy'
               top_k: '#/components/schemas/TopKSamplingStrategy'
-          description: The sampling strategy.
+              top_p: '#/components/schemas/TopPSamplingStrategy'
         max_tokens:
-          type: integer
-          description: >-
-            The maximum number of tokens that can be generated in the completion.
-            The token count of your prompt plus max_tokens cannot exceed the model's
-            context length.
+          anyOf:
+          - type: integer
+          - type: 'null'
         repetition_penalty:
-          type: number
+          anyOf:
+          - type: number
+          - type: 'null'
           default: 1.0
-          description: >-
-            Number between -2.0 and 2.0. Positive values penalize new tokens based
-            on whether they appear in the text so far, increasing the model's likelihood
-            to talk about new topics.
         stop:
-          type: array
-          items:
-            type: string
-          description: >-
-            Up to 4 sequences where the API will stop generating further tokens. The
-            returned text will not contain the stop sequence.
-      additionalProperties: false
-      required:
-        - strategy
+          anyOf:
+          - items:
+              type: string
+            type: array
+          - type: 'null'
+      type: object
       title: SamplingParams
       description: Sampling parameters.
     SystemMessage:
-      type: object
       properties:
         role:
           type: string
           const: system
+          title: Role
           default: system
-          description: >-
-            Must be "system" to identify this as a system message
         content:
-          $ref: '#/components/schemas/InterleavedContent'
-          description: >-
-            The content of the "system prompt". If multiple system messages are provided,
-            they are concatenated. The underlying Llama Stack code may also add other
-            system messages (for example, for formatting tool definitions).
-      additionalProperties: false
-      required:
-        - role
-        - content
-      title: SystemMessage
-      description: >-
-        A system message providing instructions or context to the model.
-    TopKSamplingStrategy:
+          anyOf:
+          - type: string
+          - oneOf:
+            - $ref: '#/components/schemas/ImageContentItem-Input'
+              title: ImageContentItem-Input
+            - $ref: '#/components/schemas/TextContentItem'
+              title: TextContentItem
+            discriminator:
+              propertyName: type
+              mapping:
+                image: '#/components/schemas/ImageContentItem-Input'
+                text: '#/components/schemas/TextContentItem'
+            title: ImageContentItem-Input | TextContentItem
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/ImageContentItem-Input'
+                title: ImageContentItem-Input
+              - $ref: '#/components/schemas/TextContentItem'
+                title: TextContentItem
+              discriminator:
+                propertyName: type
+                mapping:
+                  image: '#/components/schemas/ImageContentItem-Input'
+                  text: '#/components/schemas/TextContentItem'
+              title: ImageContentItem-Input | TextContentItem
+            type: array
+            title: list[ImageContentItem-Input | TextContentItem]
+          title: string | list[ImageContentItem-Input | TextContentItem]
       type: object
+      required:
+      - content
+      title: SystemMessage
+      description: A system message providing instructions or context to the model.
+    TopKSamplingStrategy:
       properties:
         type:
           type: string
           const: top_k
+          title: Type
           default: top_k
-          description: >-
-            Must be "top_k" to identify this sampling strategy
         top_k:
           type: integer
-          description: >-
-            Number of top tokens to consider for sampling. Must be at least 1
-      additionalProperties: false
-      required:
-        - type
-        - top_k
-      title: TopKSamplingStrategy
-      description: >-
-        Top-k sampling strategy that restricts sampling to the k most likely tokens.
-    TopPSamplingStrategy:
+          minimum: 1.0
+          title: Top K
       type: object
+      required:
+      - top_k
+      title: TopKSamplingStrategy
+      description: Top-k sampling strategy that restricts sampling to the k most likely tokens.
+    TopPSamplingStrategy:
       properties:
         type:
           type: string
           const: top_p
+          title: Type
           default: top_p
-          description: >-
-            Must be "top_p" to identify this sampling strategy
         temperature:
-          type: number
-          description: >-
-            Controls randomness in sampling. Higher values increase randomness
+          anyOf:
+          - type: number
+            minimum: 0.0
+          - type: 'null'
         top_p:
-          type: number
+          anyOf:
+          - type: number
+          - type: 'null'
           default: 0.95
-          description: >-
-            Cumulative probability threshold for nucleus sampling. Defaults to 0.95
-      additionalProperties: false
-      required:
-        - type
-      title: TopPSamplingStrategy
-      description: >-
-        Top-p (nucleus) sampling strategy that samples from the smallest set of tokens
-        with cumulative probability >= p.
-    EvaluateRowsRequest:
       type: object
+      required:
+      - temperature
+      title: TopPSamplingStrategy
+      description: Top-p (nucleus) sampling strategy that samples from the smallest set of tokens with cumulative probability >= p.
+    EvaluateRowsRequest:
       properties:
         input_rows:
-          type: array
           items:
+            additionalProperties: true
             type: object
-            additionalProperties:
-              oneOf:
-                - type: 'null'
-                - type: boolean
-                - type: number
-                - type: string
-                - type: array
-                - type: object
-          description: The rows to evaluate.
-        scoring_functions:
           type: array
+          title: Input Rows
+        scoring_functions:
           items:
             type: string
-          description: >-
-            The scoring functions to use for the evaluation.
+          type: array
+          title: Scoring Functions
         benchmark_config:
           $ref: '#/components/schemas/BenchmarkConfig'
-          description: The configuration for the benchmark.
-      additionalProperties: false
+      type: object
       required:
-        - input_rows
-        - scoring_functions
-        - benchmark_config
+      - input_rows
+      - scoring_functions
+      - benchmark_config
       title: EvaluateRowsRequest
     EvaluateResponse:
-      type: object
       properties:
         generations:
-          type: array
           items:
+            additionalProperties: true
             type: object
-            additionalProperties:
-              oneOf:
-                - type: 'null'
-                - type: boolean
-                - type: number
-                - type: string
-                - type: array
-                - type: object
-          description: The generations from the evaluation.
+          type: array
+          title: Generations
         scores:
-          type: object
           additionalProperties:
             $ref: '#/components/schemas/ScoringResult'
-          description: The scores from the evaluation.
-      additionalProperties: false
+          type: object
+          title: Scores
+      type: object
       required:
-        - generations
-        - scores
+      - generations
+      - scores
       title: EvaluateResponse
       description: The response from an evaluation.
-    RunEvalRequest:
-      type: object
-      properties:
-        benchmark_config:
-          $ref: '#/components/schemas/BenchmarkConfig'
-          description: The configuration for the benchmark.
-      additionalProperties: false
-      required:
-        - benchmark_config
-      title: RunEvalRequest
     Job:
-      type: object
       properties:
         job_id:
           type: string
-          description: Unique identifier for the job
+          title: Job Id
         status:
-          type: string
-          enum:
-            - completed
-            - in_progress
-            - failed
-            - scheduled
-            - cancelled
-          description: Current execution status of the job
-      additionalProperties: false
-      required:
-        - job_id
-        - status
-      title: Job
-      description: >-
-        A job execution instance with status tracking.
-    RerankRequest:
+          $ref: '#/components/schemas/JobStatus'
       type: object
+      required:
+      - job_id
+      - status
+      title: Job
+      description: A job execution instance with status tracking.
+    RerankRequest:
       properties:
         model:
           type: string
-          description: >-
-            The identifier of the reranking model to use.
+          title: Model
         query:
-          oneOf:
+          anyOf:
+          - type: string
+          - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+            title: OpenAIChatCompletionContentPartTextParam
+          - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+            title: OpenAIChatCompletionContentPartImageParam
+          title: string | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam
+        items:
+          items:
+            anyOf:
             - type: string
             - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+              title: OpenAIChatCompletionContentPartTextParam
             - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
-          description: >-
-            The search query to rank items against. Can be a string, text content
-            part, or image content part. The input must not exceed the model's max
-            input token length.
-        items:
+              title: OpenAIChatCompletionContentPartImageParam
+            title: string | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam
           type: array
-          items:
-            oneOf:
-              - type: string
-              - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
-              - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
-          description: >-
-            List of items to rerank. Each item can be a string, text content part,
-            or image content part. Each input must not exceed the model's max input
-            token length.
+          title: Items
         max_num_results:
-          type: integer
-          description: >-
-            (Optional) Maximum number of results to return. Default: returns all.
-      additionalProperties: false
+          anyOf:
+          - type: integer
+          - type: 'null'
+      type: object
       required:
-        - model
-        - query
-        - items
+      - model
+      - query
+      - items
       title: RerankRequest
     RerankData:
-      type: object
       properties:
         index:
           type: integer
-          description: >-
-            The original index of the document in the input list
+          title: Index
         relevance_score:
           type: number
-          description: >-
-            The relevance score from the model output. Values are inverted when applicable
-            so that higher scores indicate greater relevance.
-      additionalProperties: false
-      required:
-        - index
-        - relevance_score
-      title: RerankData
-      description: >-
-        A single rerank result from a reranking response.
-    RerankResponse:
+          title: Relevance Score
       type: object
+      required:
+      - index
+      - relevance_score
+      title: RerankData
+      description: A single rerank result from a reranking response.
+    RerankResponse:
       properties:
         data:
-          type: array
           items:
             $ref: '#/components/schemas/RerankData'
-          description: >-
-            List of rerank result objects, sorted by relevance score (descending)
-      additionalProperties: false
+          type: array
+          title: Data
+      type: object
       required:
-        - data
+      - data
       title: RerankResponse
       description: Response from a reranking request.
     Checkpoint:
-      type: object
       properties:
         identifier:
           type: string
-          description: Unique identifier for the checkpoint
+          title: Identifier
         created_at:
           type: string
           format: date-time
-          description: >-
-            Timestamp when the checkpoint was created
+          title: Created At
         epoch:
           type: integer
-          description: >-
-            Training epoch when the checkpoint was saved
+          title: Epoch
         post_training_job_id:
           type: string
-          description: >-
-            Identifier of the training job that created this checkpoint
+          title: Post Training Job Id
         path:
           type: string
-          description: >-
-            File system path where the checkpoint is stored
+          title: Path
         training_metrics:
-          $ref: '#/components/schemas/PostTrainingMetric'
-          description: >-
-            (Optional) Training metrics associated with this checkpoint
-      additionalProperties: false
+          anyOf:
+          - $ref: '#/components/schemas/PostTrainingMetric'
+            title: PostTrainingMetric
+          - type: 'null'
+          title: PostTrainingMetric
+      type: object
       required:
-        - identifier
-        - created_at
-        - epoch
-        - post_training_job_id
-        - path
+      - identifier
+      - created_at
+      - epoch
+      - post_training_job_id
+      - path
       title: Checkpoint
       description: Checkpoint created during training runs.
     PostTrainingJobArtifactsResponse:
-      type: object
       properties:
         job_uuid:
           type: string
-          description: Unique identifier for the training job
+          title: Job Uuid
         checkpoints:
-          type: array
           items:
             $ref: '#/components/schemas/Checkpoint'
-          description: >-
-            List of model checkpoints created during training
-      additionalProperties: false
+          type: array
+          title: Checkpoints
+      type: object
       required:
-        - job_uuid
-        - checkpoints
+      - job_uuid
       title: PostTrainingJobArtifactsResponse
       description: Artifacts of a finetuning job.
     PostTrainingMetric:
-      type: object
       properties:
         epoch:
           type: integer
-          description: Training epoch number
+          title: Epoch
         train_loss:
           type: number
-          description: Loss value on the training dataset
+          title: Train Loss
         validation_loss:
           type: number
-          description: Loss value on the validation dataset
+          title: Validation Loss
         perplexity:
           type: number
-          description: >-
-            Perplexity metric indicating model confidence
-      additionalProperties: false
-      required:
-        - epoch
-        - train_loss
-        - validation_loss
-        - perplexity
-      title: PostTrainingMetric
-      description: >-
-        Training metrics captured during post-training jobs.
-    CancelTrainingJobRequest:
+          title: Perplexity
       type: object
+      required:
+      - epoch
+      - train_loss
+      - validation_loss
+      - perplexity
+      title: PostTrainingMetric
+      description: Training metrics captured during post-training jobs.
+    CancelTrainingJobRequest:
       properties:
         job_uuid:
           type: string
-          description: The UUID of the job to cancel.
-      additionalProperties: false
+          title: Job Uuid
+      type: object
       required:
-        - job_uuid
+      - job_uuid
       title: CancelTrainingJobRequest
     PostTrainingJobStatusResponse:
-      type: object
       properties:
         job_uuid:
           type: string
-          description: Unique identifier for the training job
+          title: Job Uuid
         status:
-          type: string
-          enum:
-            - completed
-            - in_progress
-            - failed
-            - scheduled
-            - cancelled
-          description: Current status of the training job
+          $ref: '#/components/schemas/JobStatus'
         scheduled_at:
-          type: string
-          format: date-time
-          description: >-
-            (Optional) Timestamp when the job was scheduled
+          anyOf:
+          - type: string
+            format: date-time
+          - type: 'null'
         started_at:
-          type: string
-          format: date-time
-          description: >-
-            (Optional) Timestamp when the job execution began
+          anyOf:
+          - type: string
+            format: date-time
+          - type: 'null'
         completed_at:
-          type: string
-          format: date-time
-          description: >-
-            (Optional) Timestamp when the job finished, if completed
+          anyOf:
+          - type: string
+            format: date-time
+          - type: 'null'
         resources_allocated:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) Information about computational resources allocated to the
-            job
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
         checkpoints:
-          type: array
           items:
             $ref: '#/components/schemas/Checkpoint'
-          description: >-
-            List of model checkpoints created during training
-      additionalProperties: false
+          type: array
+          title: Checkpoints
+      type: object
       required:
-        - job_uuid
-        - status
-        - checkpoints
+      - job_uuid
+      - status
       title: PostTrainingJobStatusResponse
       description: Status of a finetuning job.
     ListPostTrainingJobsResponse:
-      type: object
       properties:
         data:
-          type: array
           items:
-            type: object
-            properties:
-              job_uuid:
-                type: string
-            additionalProperties: false
-            required:
-              - job_uuid
-            title: PostTrainingJob
-      additionalProperties: false
+            $ref: '#/components/schemas/PostTrainingJob'
+          type: array
+          title: Data
+      type: object
       required:
-        - data
+      - data
       title: ListPostTrainingJobsResponse
     DPOAlignmentConfig:
-      type: object
       properties:
         beta:
           type: number
-          description: Temperature parameter for the DPO loss
+          title: Beta
         loss_type:
           $ref: '#/components/schemas/DPOLossType'
           default: sigmoid
-          description: The type of loss function to use for DPO
-      additionalProperties: false
+      type: object
       required:
-        - beta
-        - loss_type
+      - beta
       title: DPOAlignmentConfig
-      description: >-
-        Configuration for Direct Preference Optimization (DPO) alignment.
+      description: Configuration for Direct Preference Optimization (DPO) alignment.
     DPOLossType:
       type: string
       enum:
-        - sigmoid
-        - hinge
-        - ipo
-        - kto_pair
+      - sigmoid
+      - hinge
+      - ipo
+      - kto_pair
       title: DPOLossType
     DataConfig:
-      type: object
       properties:
         dataset_id:
           type: string
-          description: >-
-            Unique identifier for the training dataset
+          title: Dataset Id
         batch_size:
           type: integer
-          description: Number of samples per training batch
+          title: Batch Size
         shuffle:
           type: boolean
-          description: >-
-            Whether to shuffle the dataset during training
+          title: Shuffle
         data_format:
           $ref: '#/components/schemas/DatasetFormat'
-          description: >-
-            Format of the dataset (instruct or dialog)
         validation_dataset_id:
-          type: string
-          description: >-
-            (Optional) Unique identifier for the validation dataset
+          anyOf:
+          - type: string
+          - type: 'null'
         packed:
-          type: boolean
+          anyOf:
+          - type: boolean
+          - type: 'null'
           default: false
-          description: >-
-            (Optional) Whether to pack multiple samples into a single sequence for
-            efficiency
         train_on_input:
-          type: boolean
+          anyOf:
+          - type: boolean
+          - type: 'null'
           default: false
-          description: >-
-            (Optional) Whether to compute loss on input tokens as well as output tokens
-      additionalProperties: false
+      type: object
       required:
-        - dataset_id
-        - batch_size
-        - shuffle
-        - data_format
+      - dataset_id
+      - batch_size
+      - shuffle
+      - data_format
       title: DataConfig
-      description: >-
-        Configuration for training data and data loading.
+      description: Configuration for training data and data loading.
     DatasetFormat:
       type: string
       enum:
-        - instruct
-        - dialog
+      - instruct
+      - dialog
       title: DatasetFormat
       description: Format of the training dataset.
     EfficiencyConfig:
-      type: object
       properties:
         enable_activation_checkpointing:
-          type: boolean
+          anyOf:
+          - type: boolean
+          - type: 'null'
           default: false
-          description: >-
-            (Optional) Whether to use activation checkpointing to reduce memory usage
         enable_activation_offloading:
-          type: boolean
+          anyOf:
+          - type: boolean
+          - type: 'null'
           default: false
-          description: >-
-            (Optional) Whether to offload activations to CPU to save GPU memory
         memory_efficient_fsdp_wrap:
-          type: boolean
+          anyOf:
+          - type: boolean
+          - type: 'null'
           default: false
-          description: >-
-            (Optional) Whether to use memory-efficient FSDP wrapping
         fsdp_cpu_offload:
-          type: boolean
+          anyOf:
+          - type: boolean
+          - type: 'null'
           default: false
-          description: >-
-            (Optional) Whether to offload FSDP parameters to CPU
-      additionalProperties: false
-      title: EfficiencyConfig
-      description: >-
-        Configuration for memory and compute efficiency optimizations.
-    OptimizerConfig:
       type: object
+      title: EfficiencyConfig
+      description: Configuration for memory and compute efficiency optimizations.
+    OptimizerConfig:
       properties:
         optimizer_type:
           $ref: '#/components/schemas/OptimizerType'
-          description: >-
-            Type of optimizer to use (adam, adamw, or sgd)
         lr:
           type: number
-          description: Learning rate for the optimizer
+          title: Lr
         weight_decay:
           type: number
-          description: >-
-            Weight decay coefficient for regularization
+          title: Weight Decay
         num_warmup_steps:
           type: integer
-          description: Number of steps for learning rate warmup
-      additionalProperties: false
+          title: Num Warmup Steps
+      type: object
       required:
-        - optimizer_type
-        - lr
-        - weight_decay
-        - num_warmup_steps
+      - optimizer_type
+      - lr
+      - weight_decay
+      - num_warmup_steps
       title: OptimizerConfig
-      description: >-
-        Configuration parameters for the optimization algorithm.
+      description: Configuration parameters for the optimization algorithm.
     OptimizerType:
       type: string
       enum:
-        - adam
-        - adamw
-        - sgd
+      - adam
+      - adamw
+      - sgd
       title: OptimizerType
-      description: >-
-        Available optimizer algorithms for training.
+      description: Available optimizer algorithms for training.
     TrainingConfig:
-      type: object
       properties:
         n_epochs:
           type: integer
-          description: Number of training epochs to run
+          title: N Epochs
         max_steps_per_epoch:
           type: integer
+          title: Max Steps Per Epoch
           default: 1
-          description: Maximum number of steps to run per epoch
         gradient_accumulation_steps:
           type: integer
+          title: Gradient Accumulation Steps
           default: 1
-          description: >-
-            Number of steps to accumulate gradients before updating
         max_validation_steps:
-          type: integer
+          anyOf:
+          - type: integer
+          - type: 'null'
           default: 1
-          description: >-
-            (Optional) Maximum number of validation steps per epoch
         data_config:
-          $ref: '#/components/schemas/DataConfig'
-          description: >-
-            (Optional) Configuration for data loading and formatting
+          anyOf:
+          - $ref: '#/components/schemas/DataConfig'
+            title: DataConfig
+          - type: 'null'
+          title: DataConfig
         optimizer_config:
-          $ref: '#/components/schemas/OptimizerConfig'
-          description: >-
-            (Optional) Configuration for the optimization algorithm
+          anyOf:
+          - $ref: '#/components/schemas/OptimizerConfig'
+            title: OptimizerConfig
+          - type: 'null'
+          title: OptimizerConfig
         efficiency_config:
-          $ref: '#/components/schemas/EfficiencyConfig'
-          description: >-
-            (Optional) Configuration for memory and compute optimizations
+          anyOf:
+          - $ref: '#/components/schemas/EfficiencyConfig'
+            title: EfficiencyConfig
+          - type: 'null'
+          title: EfficiencyConfig
         dtype:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
           default: bf16
-          description: >-
-            (Optional) Data type for model parameters (bf16, fp16, fp32)
-      additionalProperties: false
-      required:
-        - n_epochs
-        - max_steps_per_epoch
-        - gradient_accumulation_steps
-      title: TrainingConfig
-      description: >-
-        Comprehensive configuration for the training process.
-    PreferenceOptimizeRequest:
       type: object
+      required:
+      - n_epochs
+      title: TrainingConfig
+      description: Comprehensive configuration for the training process.
+    PreferenceOptimizeRequest:
       properties:
         job_uuid:
           type: string
-          description: The UUID of the job to create.
+          title: Job Uuid
         finetuned_model:
           type: string
-          description: The model to fine-tune.
+          title: Finetuned Model
         algorithm_config:
           $ref: '#/components/schemas/DPOAlignmentConfig'
-          description: The algorithm configuration.
         training_config:
           $ref: '#/components/schemas/TrainingConfig'
-          description: The training configuration.
         hyperparam_search_config:
+          additionalProperties: true
           type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: The hyperparam search configuration.
+          title: Hyperparam Search Config
         logger_config:
+          additionalProperties: true
           type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: The logger configuration.
-      additionalProperties: false
+          title: Logger Config
+      type: object
       required:
-        - job_uuid
-        - finetuned_model
-        - algorithm_config
-        - training_config
-        - hyperparam_search_config
-        - logger_config
+      - job_uuid
+      - finetuned_model
+      - algorithm_config
+      - training_config
+      - hyperparam_search_config
+      - logger_config
       title: PreferenceOptimizeRequest
     PostTrainingJob:
-      type: object
       properties:
         job_uuid:
           type: string
-      additionalProperties: false
+          title: Job Uuid
+      type: object
       required:
-        - job_uuid
+      - job_uuid
       title: PostTrainingJob
     AlgorithmConfig:
-      oneOf:
-        - $ref: '#/components/schemas/LoraFinetuningConfig'
-        - $ref: '#/components/schemas/QATFinetuningConfig'
       discriminator:
-        propertyName: type
         mapping:
           LoRA: '#/components/schemas/LoraFinetuningConfig'
           QAT: '#/components/schemas/QATFinetuningConfig'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/LoraFinetuningConfig'
+        title: LoraFinetuningConfig
+      - $ref: '#/components/schemas/QATFinetuningConfig'
+        title: QATFinetuningConfig
+      title: LoraFinetuningConfig | QATFinetuningConfig
     LoraFinetuningConfig:
-      type: object
       properties:
         type:
           type: string
           const: LoRA
+          title: Type
           default: LoRA
-          description: Algorithm type identifier, always "LoRA"
         lora_attn_modules:
-          type: array
           items:
             type: string
-          description: >-
-            List of attention module names to apply LoRA to
+          type: array
+          title: Lora Attn Modules
         apply_lora_to_mlp:
           type: boolean
-          description: Whether to apply LoRA to MLP layers
+          title: Apply Lora To Mlp
         apply_lora_to_output:
           type: boolean
-          description: >-
-            Whether to apply LoRA to output projection layers
+          title: Apply Lora To Output
         rank:
           type: integer
-          description: >-
-            Rank of the LoRA adaptation (lower rank = fewer parameters)
+          title: Rank
         alpha:
           type: integer
-          description: >-
-            LoRA scaling parameter that controls adaptation strength
+          title: Alpha
         use_dora:
-          type: boolean
+          anyOf:
+          - type: boolean
+          - type: 'null'
           default: false
-          description: >-
-            (Optional) Whether to use DoRA (Weight-Decomposed Low-Rank Adaptation)
         quantize_base:
-          type: boolean
+          anyOf:
+          - type: boolean
+          - type: 'null'
           default: false
-          description: >-
-            (Optional) Whether to quantize the base model weights
-      additionalProperties: false
-      required:
-        - type
-        - lora_attn_modules
-        - apply_lora_to_mlp
-        - apply_lora_to_output
-        - rank
-        - alpha
-      title: LoraFinetuningConfig
-      description: >-
-        Configuration for Low-Rank Adaptation (LoRA) fine-tuning.
-    QATFinetuningConfig:
       type: object
+      required:
+      - lora_attn_modules
+      - apply_lora_to_mlp
+      - apply_lora_to_output
+      - rank
+      - alpha
+      title: LoraFinetuningConfig
+      description: Configuration for Low-Rank Adaptation (LoRA) fine-tuning.
+    QATFinetuningConfig:
       properties:
         type:
           type: string
           const: QAT
+          title: Type
           default: QAT
-          description: Algorithm type identifier, always "QAT"
         quantizer_name:
           type: string
-          description: >-
-            Name of the quantization algorithm to use
+          title: Quantizer Name
         group_size:
           type: integer
-          description: Size of groups for grouped quantization
-      additionalProperties: false
-      required:
-        - type
-        - quantizer_name
-        - group_size
-      title: QATFinetuningConfig
-      description: >-
-        Configuration for Quantization-Aware Training (QAT) fine-tuning.
-    SupervisedFineTuneRequest:
+          title: Group Size
       type: object
+      required:
+      - quantizer_name
+      - group_size
+      title: QATFinetuningConfig
+      description: Configuration for Quantization-Aware Training (QAT) fine-tuning.
+    SupervisedFineTuneRequest:
       properties:
         job_uuid:
           type: string
-          description: The UUID of the job to create.
+          title: Job Uuid
         training_config:
           $ref: '#/components/schemas/TrainingConfig'
-          description: The training configuration.
         hyperparam_search_config:
+          additionalProperties: true
           type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: The hyperparam search configuration.
+          title: Hyperparam Search Config
         logger_config:
+          additionalProperties: true
           type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: The logger configuration.
+          title: Logger Config
         model:
-          type: string
-          description: The model to fine-tune.
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: Model descriptor for training if not in provider config`
         checkpoint_dir:
-          type: string
-          description: The directory to save checkpoint(s) to.
+          anyOf:
+          - type: string
+          - type: 'null'
         algorithm_config:
-          $ref: '#/components/schemas/AlgorithmConfig'
-          description: The algorithm configuration.
-      additionalProperties: false
+          anyOf:
+          - oneOf:
+            - $ref: '#/components/schemas/LoraFinetuningConfig'
+              title: LoraFinetuningConfig
+            - $ref: '#/components/schemas/QATFinetuningConfig'
+              title: QATFinetuningConfig
+            discriminator:
+              propertyName: type
+              mapping:
+                LoRA: '#/components/schemas/LoraFinetuningConfig'
+                QAT: '#/components/schemas/QATFinetuningConfig'
+            title: LoraFinetuningConfig | QATFinetuningConfig
+          - type: 'null'
+          title: Algorithm Config
+      type: object
       required:
-        - job_uuid
-        - training_config
-        - hyperparam_search_config
-        - logger_config
+      - job_uuid
+      - training_config
+      - hyperparam_search_config
+      - logger_config
       title: SupervisedFineTuneRequest
     RegisterModelRequest:
-      type: object
       properties:
         model_id:
           type: string
-          description: The identifier of the model to register.
+          title: Model Id
         provider_model_id:
-          type: string
-          description: >-
-            The identifier of the model in the provider.
+          anyOf:
+          - type: string
+          - type: 'null'
         provider_id:
-          type: string
-          description: The identifier of the provider.
+          anyOf:
+          - type: string
+          - type: 'null'
         metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: Any additional metadata for this model.
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
         model_type:
-          $ref: '#/components/schemas/ModelType'
-          description: The type of model to register.
-      additionalProperties: false
+          anyOf:
+          - $ref: '#/components/schemas/ModelType'
+            title: ModelType
+          - type: 'null'
+          title: ModelType
+      type: object
       required:
-        - model_id
+      - model_id
       title: RegisterModelRequest
     ParamType:
-      oneOf:
-        - $ref: '#/components/schemas/StringType'
-        - $ref: '#/components/schemas/NumberType'
-        - $ref: '#/components/schemas/BooleanType'
-        - $ref: '#/components/schemas/ArrayType'
-        - $ref: '#/components/schemas/ObjectType'
-        - $ref: '#/components/schemas/JsonType'
-        - $ref: '#/components/schemas/UnionType'
-        - $ref: '#/components/schemas/ChatCompletionInputType'
-        - $ref: '#/components/schemas/CompletionInputType'
       discriminator:
-        propertyName: type
         mapping:
-          string: '#/components/schemas/StringType'
-          number: '#/components/schemas/NumberType'
-          boolean: '#/components/schemas/BooleanType'
           array: '#/components/schemas/ArrayType'
-          object: '#/components/schemas/ObjectType'
-          json: '#/components/schemas/JsonType'
-          union: '#/components/schemas/UnionType'
+          boolean: '#/components/schemas/BooleanType'
           chat_completion_input: '#/components/schemas/ChatCompletionInputType'
           completion_input: '#/components/schemas/CompletionInputType'
-    RegisterScoringFunctionRequest:
-      type: object
-      properties:
-        scoring_fn_id:
-          type: string
-          description: >-
-            The ID of the scoring function to register.
-        description:
-          type: string
-          description: The description of the scoring function.
-        return_type:
-          $ref: '#/components/schemas/ParamType'
-          description: The return type of the scoring function.
-        provider_scoring_fn_id:
-          type: string
-          description: >-
-            The ID of the provider scoring function to use for the scoring function.
-        provider_id:
-          type: string
-          description: >-
-            The ID of the provider to use for the scoring function.
-        params:
-          $ref: '#/components/schemas/ScoringFnParams'
-          description: >-
-            The parameters for the scoring function for benchmark eval, these can
-            be overridden for app eval.
-      additionalProperties: false
-      required:
-        - scoring_fn_id
-        - description
-        - return_type
-      title: RegisterScoringFunctionRequest
+          json: '#/components/schemas/JsonType'
+          number: '#/components/schemas/NumberType'
+          object: '#/components/schemas/ObjectType'
+          string: '#/components/schemas/StringType'
+          union: '#/components/schemas/UnionType'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/StringType'
+        title: StringType
+      - $ref: '#/components/schemas/NumberType'
+        title: NumberType
+      - $ref: '#/components/schemas/BooleanType'
+        title: BooleanType
+      - $ref: '#/components/schemas/ArrayType'
+        title: ArrayType
+      - $ref: '#/components/schemas/ObjectType'
+        title: ObjectType
+      - $ref: '#/components/schemas/JsonType'
+        title: JsonType
+      - $ref: '#/components/schemas/UnionType'
+        title: UnionType
+      - $ref: '#/components/schemas/ChatCompletionInputType'
+        title: ChatCompletionInputType
+      - $ref: '#/components/schemas/CompletionInputType'
+        title: CompletionInputType
+      title: StringType | ... (9 variants)
     RegisterShieldRequest:
-      type: object
       properties:
         shield_id:
           type: string
-          description: >-
-            The identifier of the shield to register.
+          title: Shield Id
         provider_shield_id:
-          type: string
-          description: >-
-            The identifier of the shield in the provider.
+          anyOf:
+          - type: string
+          - type: 'null'
         provider_id:
-          type: string
-          description: The identifier of the provider.
+          anyOf:
+          - type: string
+          - type: 'null'
         params:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: The parameters of the shield.
-      additionalProperties: false
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+      type: object
       required:
-        - shield_id
+      - shield_id
       title: RegisterShieldRequest
     RegisterToolGroupRequest:
-      type: object
       properties:
         toolgroup_id:
           type: string
-          description: The ID of the tool group to register.
+          title: Toolgroup Id
         provider_id:
           type: string
-          description: >-
-            The ID of the provider to use for the tool group.
+          title: Provider Id
         mcp_endpoint:
-          $ref: '#/components/schemas/URL'
-          description: >-
-            The MCP endpoint to use for the tool group.
+          anyOf:
+          - $ref: '#/components/schemas/URL'
+            title: URL
+          - type: 'null'
+          title: URL
         args:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            A dictionary of arguments to pass to the tool group.
-      additionalProperties: false
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+      type: object
       required:
-        - toolgroup_id
-        - provider_id
+      - toolgroup_id
+      - provider_id
       title: RegisterToolGroupRequest
     DataSource:
-      oneOf:
-        - $ref: '#/components/schemas/URIDataSource'
-        - $ref: '#/components/schemas/RowsDataSource'
       discriminator:
-        propertyName: type
         mapping:
-          uri: '#/components/schemas/URIDataSource'
           rows: '#/components/schemas/RowsDataSource'
-    RegisterDatasetRequest:
-      type: object
-      properties:
-        purpose:
-          type: string
-          enum:
-            - post-training/messages
-            - eval/question-answer
-            - eval/messages-answer
-          description: >-
-            The purpose of the dataset. One of: - "post-training/messages": The dataset
-            contains a messages column with list of messages for post-training. {
-            "messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant",
-            "content": "Hello, world!"}, ] } - "eval/question-answer": The dataset
-            contains a question column and an answer column for evaluation. { "question":
-            "What is the capital of France?", "answer": "Paris" } - "eval/messages-answer":
-            The dataset contains a messages column with list of messages and an answer
-            column for evaluation. { "messages": [ {"role": "user", "content": "Hello,
-            my name is John Doe."}, {"role": "assistant", "content": "Hello, John
-            Doe. How can I help you today?"}, {"role": "user", "content": "What's
-            my name?"}, ], "answer": "John Doe" }
-        source:
-          $ref: '#/components/schemas/DataSource'
-          description: >-
-            The data source of the dataset. Ensure that the data source schema is
-            compatible with the purpose of the dataset. Examples: - { "type": "uri",
-            "uri": "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri":
-            "lsfs://mydata.jsonl" } - { "type": "uri", "uri": "data:csv;base64,{base64_content}"
-            } - { "type": "uri", "uri": "huggingface://llamastack/simpleqa?split=train"
-            } - { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content":
-            "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ]
-            } ] }
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            The metadata for the dataset. - E.g. {"description": "My dataset"}.
-        dataset_id:
-          type: string
-          description: >-
-            The ID of the dataset. If not provided, an ID will be generated.
-      additionalProperties: false
-      required:
-        - purpose
-        - source
-      title: RegisterDatasetRequest
+          uri: '#/components/schemas/URIDataSource'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/URIDataSource'
+        title: URIDataSource
+      - $ref: '#/components/schemas/RowsDataSource'
+        title: RowsDataSource
+      title: URIDataSource | RowsDataSource
     RegisterBenchmarkRequest:
-      type: object
       properties:
         benchmark_id:
           type: string
-          description: The ID of the benchmark to register.
+          title: Benchmark Id
         dataset_id:
           type: string
-          description: >-
-            The ID of the dataset to use for the benchmark.
+          title: Dataset Id
         scoring_functions:
-          type: array
           items:
             type: string
-          description: >-
-            The scoring functions to use for the benchmark.
+          type: array
+          title: Scoring Functions
         provider_benchmark_id:
-          type: string
-          description: >-
-            The ID of the provider benchmark to use for the benchmark.
+          anyOf:
+          - type: string
+          - type: 'null'
         provider_id:
-          type: string
-          description: >-
-            The ID of the provider to use for the benchmark.
+          anyOf:
+          - type: string
+          - type: 'null'
         metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: The metadata to use for the benchmark.
-      additionalProperties: false
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+      type: object
       required:
-        - benchmark_id
-        - dataset_id
-        - scoring_functions
+      - benchmark_id
+      - dataset_id
+      - scoring_functions
       title: RegisterBenchmarkRequest
+    AllowedToolsFilter:
+      properties:
+        tool_names:
+          anyOf:
+          - items:
+              type: string
+            type: array
+          - type: 'null'
+      type: object
+      title: AllowedToolsFilter
+      description: Filter configuration for restricting which MCP tools can be used.
+    ApprovalFilter:
+      properties:
+        always:
+          anyOf:
+          - items:
+              type: string
+            type: array
+          - type: 'null'
+        never:
+          anyOf:
+          - items:
+              type: string
+            type: array
+          - type: 'null'
+      type: object
+      title: ApprovalFilter
+      description: Filter configuration for MCP tool approval requirements.
+    BatchError:
+      properties:
+        code:
+          anyOf:
+          - type: string
+          - type: 'null'
+        line:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        message:
+          anyOf:
+          - type: string
+          - type: 'null'
+        param:
+          anyOf:
+          - type: string
+          - type: 'null'
+      additionalProperties: true
+      type: object
+      title: BatchError
+    BatchRequestCounts:
+      properties:
+        completed:
+          type: integer
+          title: Completed
+        failed:
+          type: integer
+          title: Failed
+        total:
+          type: integer
+          title: Total
+      additionalProperties: true
+      type: object
+      required:
+      - completed
+      - failed
+      - total
+      title: BatchRequestCounts
+    BatchUsage:
+      properties:
+        input_tokens:
+          type: integer
+          title: Input Tokens
+        input_tokens_details:
+          $ref: '#/components/schemas/InputTokensDetails'
+        output_tokens:
+          type: integer
+          title: Output Tokens
+        output_tokens_details:
+          $ref: '#/components/schemas/OutputTokensDetails'
+        total_tokens:
+          type: integer
+          title: Total Tokens
+      additionalProperties: true
+      type: object
+      required:
+      - input_tokens
+      - input_tokens_details
+      - output_tokens
+      - output_tokens_details
+      - total_tokens
+      title: BatchUsage
+    Body_openai_upload_file_v1_files_post:
+      properties:
+        file:
+          type: string
+          format: binary
+          title: File
+        purpose:
+          $ref: '#/components/schemas/OpenAIFilePurpose'
+        expires_after:
+          anyOf:
+          - $ref: '#/components/schemas/ExpiresAfter'
+            title: ExpiresAfter
+          - type: 'null'
+          title: ExpiresAfter
+      type: object
+      required:
+      - file
+      - purpose
+      title: Body_openai_upload_file_v1_files_post
+    Chunk-Input:
+      properties:
+        content:
+          anyOf:
+          - type: string
+          - oneOf:
+            - $ref: '#/components/schemas/ImageContentItem-Input'
+              title: ImageContentItem-Input
+            - $ref: '#/components/schemas/TextContentItem'
+              title: TextContentItem
+            discriminator:
+              propertyName: type
+              mapping:
+                image: '#/components/schemas/ImageContentItem-Input'
+                text: '#/components/schemas/TextContentItem'
+            title: ImageContentItem-Input | TextContentItem
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/ImageContentItem-Input'
+                title: ImageContentItem-Input
+              - $ref: '#/components/schemas/TextContentItem'
+                title: TextContentItem
+              discriminator:
+                propertyName: type
+                mapping:
+                  image: '#/components/schemas/ImageContentItem-Input'
+                  text: '#/components/schemas/TextContentItem'
+              title: ImageContentItem-Input | TextContentItem
+            type: array
+            title: list[ImageContentItem-Input | TextContentItem]
+          title: string | list[ImageContentItem-Input | TextContentItem]
+        chunk_id:
+          type: string
+          title: Chunk Id
+        metadata:
+          additionalProperties: true
+          type: object
+          title: Metadata
+        embedding:
+          anyOf:
+          - items:
+              type: number
+            type: array
+          - type: 'null'
+        chunk_metadata:
+          anyOf:
+          - $ref: '#/components/schemas/ChunkMetadata'
+            title: ChunkMetadata
+          - type: 'null'
+          title: ChunkMetadata
+      type: object
+      required:
+      - content
+      - chunk_id
+      title: Chunk
+      description: A chunk of content that can be inserted into a vector database.
+    Chunk-Output:
+      properties:
+        content:
+          anyOf:
+          - type: string
+          - oneOf:
+            - $ref: '#/components/schemas/ImageContentItem-Output'
+              title: ImageContentItem-Output
+            - $ref: '#/components/schemas/TextContentItem'
+              title: TextContentItem
+            discriminator:
+              propertyName: type
+              mapping:
+                image: '#/components/schemas/ImageContentItem-Output'
+                text: '#/components/schemas/TextContentItem'
+            title: ImageContentItem-Output | TextContentItem
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/ImageContentItem-Output'
+                title: ImageContentItem-Output
+              - $ref: '#/components/schemas/TextContentItem'
+                title: TextContentItem
+              discriminator:
+                propertyName: type
+                mapping:
+                  image: '#/components/schemas/ImageContentItem-Output'
+                  text: '#/components/schemas/TextContentItem'
+              title: ImageContentItem-Output | TextContentItem
+            type: array
+            title: list[ImageContentItem-Output | TextContentItem]
+          title: string | list[ImageContentItem-Output | TextContentItem]
+        chunk_id:
+          type: string
+          title: Chunk Id
+        metadata:
+          additionalProperties: true
+          type: object
+          title: Metadata
+        embedding:
+          anyOf:
+          - items:
+              type: number
+            type: array
+          - type: 'null'
+        chunk_metadata:
+          anyOf:
+          - $ref: '#/components/schemas/ChunkMetadata'
+            title: ChunkMetadata
+          - type: 'null'
+          title: ChunkMetadata
+      type: object
+      required:
+      - content
+      - chunk_id
+      title: Chunk
+      description: A chunk of content that can be inserted into a vector database.
+    ConversationItemInclude:
+      type: string
+      enum:
+      - web_search_call.action.sources
+      - code_interpreter_call.outputs
+      - computer_call_output.output.image_url
+      - file_search_call.results
+      - message.input_image.image_url
+      - message.output_text.logprobs
+      - reasoning.encrypted_content
+      title: ConversationItemInclude
+      description: Specify additional output data to include in the model response.
+    DatasetPurpose:
+      type: string
+      enum:
+      - post-training/messages
+      - eval/question-answer
+      - eval/messages-answer
+      title: DatasetPurpose
+      description: Purpose of the dataset. Each purpose has a required input data schema.
+    Errors:
+      properties:
+        data:
+          anyOf:
+          - items:
+              $ref: '#/components/schemas/BatchError'
+            type: array
+          - type: 'null'
+        object:
+          anyOf:
+          - type: string
+          - type: 'null'
+      additionalProperties: true
+      type: object
+      title: Errors
+    HealthStatus:
+      type: string
+      enum:
+      - OK
+      - Error
+      - Not Implemented
+      title: HealthStatus
+    ImageContentItem-Input:
+      properties:
+        type:
+          type: string
+          const: image
+          title: Type
+          default: image
+        image:
+          $ref: '#/components/schemas/_URLOrData'
+      type: object
+      required:
+      - image
+      title: ImageContentItem
+      description: A image content item
+    ImageContentItem-Output:
+      properties:
+        type:
+          type: string
+          const: image
+          title: Type
+          default: image
+        image:
+          $ref: '#/components/schemas/_URLOrData'
+      type: object
+      required:
+      - image
+      title: ImageContentItem
+      description: A image content item
+    InputTokensDetails:
+      properties:
+        cached_tokens:
+          type: integer
+          title: Cached Tokens
+      additionalProperties: true
+      type: object
+      required:
+      - cached_tokens
+      title: InputTokensDetails
+    JobStatus:
+      type: string
+      enum:
+      - completed
+      - in_progress
+      - failed
+      - scheduled
+      - cancelled
+      title: JobStatus
+      description: Status of a job execution.
+    MCPListToolsTool:
+      properties:
+        input_schema:
+          additionalProperties: true
+          type: object
+          title: Input Schema
+        name:
+          type: string
+          title: Name
+        description:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      required:
+      - input_schema
+      - name
+      title: MCPListToolsTool
+      description: Tool definition returned by MCP list tools operation.
+    OpenAIAssistantMessageParam-Input:
+      properties:
+        role:
+          type: string
+          const: assistant
+          title: Role
+          default: assistant
+        content:
+          anyOf:
+          - type: string
+          - items:
+              $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+            type: array
+            title: list[OpenAIChatCompletionContentPartTextParam]
+          - type: 'null'
+          title: string | list[OpenAIChatCompletionContentPartTextParam]
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+        tool_calls:
+          anyOf:
+          - items:
+              $ref: '#/components/schemas/OpenAIChatCompletionToolCall'
+            type: array
+          - type: 'null'
+      type: object
+      title: OpenAIAssistantMessageParam
+      description: A message containing the model's (assistant) response in an OpenAI-compatible chat completion request.
+    OpenAIAssistantMessageParam-Output:
+      properties:
+        role:
+          type: string
+          const: assistant
+          title: Role
+          default: assistant
+        content:
+          anyOf:
+          - type: string
+          - items:
+              $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+            type: array
+            title: list[OpenAIChatCompletionContentPartTextParam]
+          - type: 'null'
+          title: string | list[OpenAIChatCompletionContentPartTextParam]
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+        tool_calls:
+          anyOf:
+          - items:
+              $ref: '#/components/schemas/OpenAIChatCompletionToolCall'
+            type: array
+          - type: 'null'
+      type: object
+      title: OpenAIAssistantMessageParam
+      description: A message containing the model's (assistant) response in an OpenAI-compatible chat completion request.
+    OpenAIChatCompletionUsageCompletionTokensDetails:
+      properties:
+        reasoning_tokens:
+          anyOf:
+          - type: integer
+          - type: 'null'
+      type: object
+      title: OpenAIChatCompletionUsageCompletionTokensDetails
+      description: Token details for output tokens in OpenAI chat completion usage.
+    OpenAIChatCompletionUsagePromptTokensDetails:
+      properties:
+        cached_tokens:
+          anyOf:
+          - type: integer
+          - type: 'null'
+      type: object
+      title: OpenAIChatCompletionUsagePromptTokensDetails
+      description: Token details for prompt tokens in OpenAI chat completion usage.
+    OpenAIResponseMessage-Input:
+      properties:
+        content:
+          anyOf:
+          - type: string
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
+                title: OpenAIResponseInputMessageContentText
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+                title: OpenAIResponseInputMessageContentImage
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile'
+                title: OpenAIResponseInputMessageContentFile
+              discriminator:
+                propertyName: type
+                mapping:
+                  input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile'
+                  input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+                  input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
+              title: OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile
+            type: array
+            title: list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile]
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText'
+                title: OpenAIResponseOutputMessageContentOutputText
+              - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
+                title: OpenAIResponseContentPartRefusal
+              discriminator:
+                propertyName: type
+                mapping:
+                  output_text: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText'
+                  refusal: '#/components/schemas/OpenAIResponseContentPartRefusal'
+              title: OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal
+            type: array
+            title: list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal]
+          title: string | list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile] | list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal]
+        role:
+          title: Role
+          type: string
+          enum:
+          - system
+          - developer
+          - user
+          - assistant
+          default: system
+        type:
+          type: string
+          const: message
+          title: Type
+          default: message
+        id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        status:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      required:
+      - content
+      - role
+      title: OpenAIResponseMessage
+      description: |-
+        Corresponds to the various Message types in the Responses API.
+        They are all under one type because the Responses API gives them all
+        the same "type" value, and there is no way to tell them apart in certain
+        scenarios.
+    OpenAIResponseMessage-Output:
+      properties:
+        content:
+          anyOf:
+          - type: string
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
+                title: OpenAIResponseInputMessageContentText
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+                title: OpenAIResponseInputMessageContentImage
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile'
+                title: OpenAIResponseInputMessageContentFile
+              discriminator:
+                propertyName: type
+                mapping:
+                  input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile'
+                  input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+                  input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
+              title: OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile
+            type: array
+            title: list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile]
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText'
+                title: OpenAIResponseOutputMessageContentOutputText
+              - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
+                title: OpenAIResponseContentPartRefusal
+              discriminator:
+                propertyName: type
+                mapping:
+                  output_text: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText'
+                  refusal: '#/components/schemas/OpenAIResponseContentPartRefusal'
+              title: OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal
+            type: array
+            title: list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal]
+          title: string | list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile] | list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal]
+        role:
+          title: Role
+          type: string
+          enum:
+          - system
+          - developer
+          - user
+          - assistant
+          default: system
+        type:
+          type: string
+          const: message
+          title: Type
+          default: message
+        id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        status:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      required:
+      - content
+      - role
+      title: OpenAIResponseMessage
+      description: |-
+        Corresponds to the various Message types in the Responses API.
+        They are all under one type because the Responses API gives them all
+        the same "type" value, and there is no way to tell them apart in certain
+        scenarios.
+    OpenAIResponseOutputMessageFileSearchToolCallResults:
+      properties:
+        attributes:
+          additionalProperties: true
+          type: object
+          title: Attributes
+        file_id:
+          type: string
+          title: File Id
+        filename:
+          type: string
+          title: Filename
+        score:
+          type: number
+          title: Score
+        text:
+          type: string
+          title: Text
+      type: object
+      required:
+      - attributes
+      - file_id
+      - filename
+      - score
+      - text
+      title: OpenAIResponseOutputMessageFileSearchToolCallResults
+      description: Search results returned by the file search operation.
+    OpenAIResponseTextFormat:
+      properties:
+        type:
+          title: Type
+          type: string
+          enum:
+          - text
+          - json_schema
+          - json_object
+          default: text
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+        schema:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+        description:
+          anyOf:
+          - type: string
+          - type: 'null'
+        strict:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+      type: object
+      title: OpenAIResponseTextFormat
+      description: Configuration for Responses API text format.
+    OpenAIResponseUsageInputTokensDetails:
+      properties:
+        cached_tokens:
+          anyOf:
+          - type: integer
+          - type: 'null'
+      type: object
+      title: OpenAIResponseUsageInputTokensDetails
+      description: Token details for input tokens in OpenAI response usage.
+    OpenAIResponseUsageOutputTokensDetails:
+      properties:
+        reasoning_tokens:
+          anyOf:
+          - type: integer
+          - type: 'null'
+      type: object
+      title: OpenAIResponseUsageOutputTokensDetails
+      description: Token details for output tokens in OpenAI response usage.
+    OpenAIUserMessageParam-Input:
+      properties:
+        role:
+          type: string
+          const: user
+          title: Role
+          default: user
+        content:
+          anyOf:
+          - type: string
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+                title: OpenAIChatCompletionContentPartTextParam
+              - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+                title: OpenAIChatCompletionContentPartImageParam
+              - $ref: '#/components/schemas/OpenAIFile'
+                title: OpenAIFile
+              discriminator:
+                propertyName: type
+                mapping:
+                  file: '#/components/schemas/OpenAIFile'
+                  image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+                  text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+              title: OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile
+            type: array
+            title: list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile]
+          title: string | list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile]
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      required:
+      - content
+      title: OpenAIUserMessageParam
+      description: A message from the user in an OpenAI-compatible chat completion request.
+    OpenAIUserMessageParam-Output:
+      properties:
+        role:
+          type: string
+          const: user
+          title: Role
+          default: user
+        content:
+          anyOf:
+          - type: string
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+                title: OpenAIChatCompletionContentPartTextParam
+              - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+                title: OpenAIChatCompletionContentPartImageParam
+              - $ref: '#/components/schemas/OpenAIFile'
+                title: OpenAIFile
+              discriminator:
+                propertyName: type
+                mapping:
+                  file: '#/components/schemas/OpenAIFile'
+                  image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+                  text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+              title: OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile
+            type: array
+            title: list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile]
+          title: string | list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile]
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      required:
+      - content
+      title: OpenAIUserMessageParam
+      description: A message from the user in an OpenAI-compatible chat completion request.
+    OutputTokensDetails:
+      properties:
+        reasoning_tokens:
+          type: integer
+          title: Reasoning Tokens
+      additionalProperties: true
+      type: object
+      required:
+      - reasoning_tokens
+      title: OutputTokensDetails
+    RegisterDatasetRequestLoose:
+      properties:
+        purpose:
+          title: Purpose
+        source:
+          title: Source
+        metadata:
+          title: Metadata
+        dataset_id:
+          title: Dataset Id
+      type: object
+      required:
+      - purpose
+      - source
+      title: RegisterDatasetRequestLoose
+    RegisterScoringFunctionRequestLoose:
+      properties:
+        scoring_fn_id:
+          title: Scoring Fn Id
+        description:
+          title: Description
+        return_type:
+          title: Return Type
+        provider_scoring_fn_id:
+          title: Provider Scoring Fn Id
+        provider_id:
+          title: Provider Id
+        params:
+          title: Params
+      type: object
+      required:
+      - scoring_fn_id
+      - description
+      - return_type
+      title: RegisterScoringFunctionRequestLoose
+    SearchRankingOptions:
+      properties:
+        ranker:
+          anyOf:
+          - type: string
+          - type: 'null'
+        score_threshold:
+          anyOf:
+          - type: number
+          - type: 'null'
+          default: 0.0
+      type: object
+      title: SearchRankingOptions
+      description: Options for ranking and filtering search results.
+    _URLOrData:
+      properties:
+        url:
+          anyOf:
+          - $ref: '#/components/schemas/URL'
+            title: URL
+          - type: 'null'
+          title: URL
+        data:
+          anyOf:
+          - type: string
+          - type: 'null'
+          contentEncoding: base64
+      type: object
+      title: _URLOrData
+      description: A URL or a base64 encoded string
+    SamplingStrategy:
+      discriminator:
+        mapping:
+          greedy: '#/components/schemas/GreedySamplingStrategy'
+          top_k: '#/components/schemas/TopKSamplingStrategy'
+          top_p: '#/components/schemas/TopPSamplingStrategy'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/GreedySamplingStrategy'
+        title: GreedySamplingStrategy
+      - $ref: '#/components/schemas/TopPSamplingStrategy'
+        title: TopPSamplingStrategy
+      - $ref: '#/components/schemas/TopKSamplingStrategy'
+        title: TopKSamplingStrategy
+      title: GreedySamplingStrategy | TopPSamplingStrategy | TopKSamplingStrategy
+    GrammarResponseFormat:
+      description: Configuration for grammar-guided response generation.
+      properties:
+        type:
+          const: grammar
+          default: grammar
+          title: Type
+          type: string
+        bnf:
+          additionalProperties: true
+          title: Bnf
+          type: object
+      required:
+      - bnf
+      title: GrammarResponseFormat
+      type: object
+    JsonSchemaResponseFormat:
+      description: Configuration for JSON schema-guided response generation.
+      properties:
+        type:
+          const: json_schema
+          default: json_schema
+          title: Type
+          type: string
+        json_schema:
+          additionalProperties: true
+          title: Json Schema
+          type: object
+      required:
+      - json_schema
+      title: JsonSchemaResponseFormat
+      type: object
+    ResponseFormat:
+      discriminator:
+        mapping:
+          grammar: '#/components/schemas/GrammarResponseFormat'
+          json_schema: '#/components/schemas/JsonSchemaResponseFormat'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/JsonSchemaResponseFormat'
+        title: JsonSchemaResponseFormat
+      - $ref: '#/components/schemas/GrammarResponseFormat'
+        title: GrammarResponseFormat
+      title: JsonSchemaResponseFormat | GrammarResponseFormat
+    OpenAIResponseContentPart:
+      discriminator:
+        mapping:
+          output_text: '#/components/schemas/OpenAIResponseContentPartOutputText'
+          reasoning_text: '#/components/schemas/OpenAIResponseContentPartReasoningText'
+          refusal: '#/components/schemas/OpenAIResponseContentPartRefusal'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText'
+        title: OpenAIResponseContentPartOutputText
+      - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
+        title: OpenAIResponseContentPartRefusal
+      - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
+        title: OpenAIResponseContentPartReasoningText
+      title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText
+    SpanEndPayload:
+      description: Payload for a span end event.
+      properties:
+        type:
+          const: span_end
+          default: span_end
+          title: Type
+          type: string
+        status:
+          $ref: '#/components/schemas/SpanStatus'
+      required:
+      - status
+      title: SpanEndPayload
+      type: object
+    SpanStartPayload:
+      description: Payload for a span start event.
+      properties:
+        type:
+          const: span_start
+          default: span_start
+          title: Type
+          type: string
+        name:
+          title: Name
+          type: string
+        parent_span_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+      required:
+      - name
+      title: SpanStartPayload
+      type: object
+    SpanStatus:
+      description: The status of a span indicating whether it completed successfully or with an error.
+      enum:
+      - ok
+      - error
+      title: SpanStatus
+      type: string
+    StructuredLogPayload:
+      discriminator:
+        mapping:
+          span_end: '#/components/schemas/SpanEndPayload'
+          span_start: '#/components/schemas/SpanStartPayload'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/SpanStartPayload'
+        title: SpanStartPayload
+      - $ref: '#/components/schemas/SpanEndPayload'
+        title: SpanEndPayload
+      title: SpanStartPayload | SpanEndPayload
+    LogSeverity:
+      description: The severity level of a log message.
+      enum:
+      - verbose
+      - debug
+      - info
+      - warn
+      - error
+      - critical
+      title: LogSeverity
+      type: string
+    MetricEvent:
+      description: A metric event containing a measured value.
+      properties:
+        trace_id:
+          title: Trace Id
+          type: string
+        span_id:
+          title: Span Id
+          type: string
+        timestamp:
+          format: date-time
+          title: Timestamp
+          type: string
+        attributes:
+          anyOf:
+          - additionalProperties:
+              anyOf:
+              - type: string
+              - type: integer
+              - type: number
+              - type: boolean
+              - type: 'null'
+              title: string | ... (4 variants)
+            type: object
+          - type: 'null'
+        type:
+          const: metric
+          default: metric
+          title: Type
+          type: string
+        metric:
+          title: Metric
+          type: string
+        value:
+          anyOf:
+          - type: integer
+          - type: number
+          title: integer | number
+        unit:
+          title: Unit
+          type: string
+      required:
+      - trace_id
+      - span_id
+      - timestamp
+      - metric
+      - value
+      - unit
+      title: MetricEvent
+      type: object
+    StructuredLogEvent:
+      description: A structured log event containing typed payload data.
+      properties:
+        trace_id:
+          title: Trace Id
+          type: string
+        span_id:
+          title: Span Id
+          type: string
+        timestamp:
+          format: date-time
+          title: Timestamp
+          type: string
+        attributes:
+          anyOf:
+          - additionalProperties:
+              anyOf:
+              - type: string
+              - type: integer
+              - type: number
+              - type: boolean
+              - type: 'null'
+              title: string | ... (4 variants)
+            type: object
+          - type: 'null'
+        type:
+          const: structured_log
+          default: structured_log
+          title: Type
+          type: string
+        payload:
+          discriminator:
+            mapping:
+              span_end: '#/components/schemas/SpanEndPayload'
+              span_start: '#/components/schemas/SpanStartPayload'
+            propertyName: type
+          oneOf:
+          - $ref: '#/components/schemas/SpanStartPayload'
+            title: SpanStartPayload
+          - $ref: '#/components/schemas/SpanEndPayload'
+            title: SpanEndPayload
+          title: SpanStartPayload | SpanEndPayload
+      required:
+      - trace_id
+      - span_id
+      - timestamp
+      - payload
+      title: StructuredLogEvent
+      type: object
+    UnstructuredLogEvent:
+      description: An unstructured log event containing a simple text message.
+      properties:
+        trace_id:
+          title: Trace Id
+          type: string
+        span_id:
+          title: Span Id
+          type: string
+        timestamp:
+          format: date-time
+          title: Timestamp
+          type: string
+        attributes:
+          anyOf:
+          - additionalProperties:
+              anyOf:
+              - type: string
+              - type: integer
+              - type: number
+              - type: boolean
+              - type: 'null'
+              title: string | ... (4 variants)
+            type: object
+          - type: 'null'
+        type:
+          const: unstructured_log
+          default: unstructured_log
+          title: Type
+          type: string
+        message:
+          title: Message
+          type: string
+        severity:
+          $ref: '#/components/schemas/LogSeverity'
+      required:
+      - trace_id
+      - span_id
+      - timestamp
+      - message
+      - severity
+      title: UnstructuredLogEvent
+      type: object
+    Event:
+      discriminator:
+        mapping:
+          metric: '#/components/schemas/MetricEvent'
+          structured_log: '#/components/schemas/StructuredLogEvent'
+          unstructured_log: '#/components/schemas/UnstructuredLogEvent'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/UnstructuredLogEvent'
+        title: UnstructuredLogEvent
+      - $ref: '#/components/schemas/MetricEvent'
+        title: MetricEvent
+      - $ref: '#/components/schemas/StructuredLogEvent'
+        title: StructuredLogEvent
+      title: UnstructuredLogEvent | MetricEvent | StructuredLogEvent
+    MetricInResponse:
+      description: A metric value included in API responses.
+      properties:
+        metric:
+          title: Metric
+          type: string
+        value:
+          anyOf:
+          - type: integer
+          - type: number
+          title: integer | number
+        unit:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+      required:
+      - metric
+      - value
+      title: MetricInResponse
+      type: object
+    TextDelta:
+      description: A text content delta for streaming responses.
+      properties:
+        type:
+          const: text
+          default: text
+          title: Type
+          type: string
+        text:
+          title: Text
+          type: string
+      required:
+      - text
+      title: TextDelta
+      type: object
+    ImageDelta:
+      description: An image content delta for streaming responses.
+      properties:
+        type:
+          const: image
+          default: image
+          title: Type
+          type: string
+        image:
+          format: binary
+          title: Image
+          type: string
+      required:
+      - image
+      title: ImageDelta
+      type: object
+    Fp8QuantizationConfig:
+      description: Configuration for 8-bit floating point quantization.
+      properties:
+        type:
+          const: fp8_mixed
+          default: fp8_mixed
+          title: Type
+          type: string
+      title: Fp8QuantizationConfig
+      type: object
+    Bf16QuantizationConfig:
+      description: Configuration for BFloat16 precision (typically no quantization).
+      properties:
+        type:
+          const: bf16
+          default: bf16
+          title: Type
+          type: string
+      title: Bf16QuantizationConfig
+      type: object
+    Int4QuantizationConfig:
+      description: Configuration for 4-bit integer quantization.
+      properties:
+        type:
+          const: int4_mixed
+          default: int4_mixed
+          title: Type
+          type: string
+        scheme:
+          anyOf:
+          - type: string
+          - type: 'null'
+          default: int4_weight_int8_dynamic_activation
+      title: Int4QuantizationConfig
+      type: object
+    UserMessage:
+      description: A message from the user in a chat conversation.
+      properties:
+        role:
+          const: user
+          default: user
+          title: Role
+          type: string
+        content:
+          anyOf:
+          - type: string
+          - discriminator:
+              mapping:
+                image: '#/components/schemas/ImageContentItem'
+                text: '#/components/schemas/TextContentItem'
+              propertyName: type
+            oneOf:
+            - $ref: '#/components/schemas/ImageContentItem'
+              title: ImageContentItem
+            - $ref: '#/components/schemas/TextContentItem'
+              title: TextContentItem
+            title: ImageContentItem | TextContentItem
+          - items:
+              discriminator:
+                mapping:
+                  image: '#/components/schemas/ImageContentItem'
+                  text: '#/components/schemas/TextContentItem'
+                propertyName: type
+              oneOf:
+              - $ref: '#/components/schemas/ImageContentItem'
+                title: ImageContentItem
+              - $ref: '#/components/schemas/TextContentItem'
+                title: TextContentItem
+              title: ImageContentItem | TextContentItem
+            type: array
+            title: list[ImageContentItem | TextContentItem]
+          title: string | list[ImageContentItem | TextContentItem]
+        context:
+          anyOf:
+          - type: string
+          - discriminator:
+              mapping:
+                image: '#/components/schemas/ImageContentItem'
+                text: '#/components/schemas/TextContentItem'
+              propertyName: type
+            oneOf:
+            - $ref: '#/components/schemas/ImageContentItem'
+              title: ImageContentItem
+            - $ref: '#/components/schemas/TextContentItem'
+              title: TextContentItem
+            title: ImageContentItem | TextContentItem
+          - items:
+              discriminator:
+                mapping:
+                  image: '#/components/schemas/ImageContentItem'
+                  text: '#/components/schemas/TextContentItem'
+                propertyName: type
+              oneOf:
+              - $ref: '#/components/schemas/ImageContentItem'
+                title: ImageContentItem
+              - $ref: '#/components/schemas/TextContentItem'
+                title: TextContentItem
+              title: ImageContentItem | TextContentItem
+            type: array
+            title: list[ImageContentItem | TextContentItem]
+          - type: 'null'
+          title: string | list[ImageContentItem | TextContentItem]
+          nullable: true
+      required:
+      - content
+      title: UserMessage
+      type: object
+    ToolResponseMessage:
+      description: A message representing the result of a tool invocation.
+      properties:
+        role:
+          const: tool
+          default: tool
+          title: Role
+          type: string
+        call_id:
+          title: Call Id
+          type: string
+        content:
+          anyOf:
+          - type: string
+          - discriminator:
+              mapping:
+                image: '#/components/schemas/ImageContentItem'
+                text: '#/components/schemas/TextContentItem'
+              propertyName: type
+            oneOf:
+            - $ref: '#/components/schemas/ImageContentItem'
+              title: ImageContentItem
+            - $ref: '#/components/schemas/TextContentItem'
+              title: TextContentItem
+            title: ImageContentItem | TextContentItem
+          - items:
+              discriminator:
+                mapping:
+                  image: '#/components/schemas/ImageContentItem'
+                  text: '#/components/schemas/TextContentItem'
+                propertyName: type
+              oneOf:
+              - $ref: '#/components/schemas/ImageContentItem'
+                title: ImageContentItem
+              - $ref: '#/components/schemas/TextContentItem'
+                title: TextContentItem
+              title: ImageContentItem | TextContentItem
+            type: array
+            title: list[ImageContentItem | TextContentItem]
+          title: string | list[ImageContentItem | TextContentItem]
+      required:
+      - call_id
+      - content
+      title: ToolResponseMessage
+      type: object
+    TokenLogProbs:
+      description: Log probabilities for generated tokens.
+      properties:
+        logprobs_by_token:
+          additionalProperties:
+            type: number
+          title: Logprobs By Token
+          type: object
+      required:
+      - logprobs_by_token
+      title: TokenLogProbs
+      type: object
+    EmbeddingsResponse:
+      description: Response containing generated embeddings.
+      properties:
+        embeddings:
+          items:
+            items:
+              type: number
+            type: array
+          title: Embeddings
+          type: array
+      required:
+      - embeddings
+      title: EmbeddingsResponse
+      type: object
+    OpenAICompletionLogprobs:
+      description: |-
+        The log probabilities for the tokens in the message from an OpenAI-compatible completion response.
+
+        :text_offset: (Optional) The offset of the token in the text
+        :token_logprobs: (Optional) The log probabilities for the tokens
+        :tokens: (Optional) The tokens
+        :top_logprobs: (Optional) The top log probabilities for the tokens
+      properties:
+        text_offset:
+          anyOf:
+          - items:
+              type: integer
+            type: array
+          - type: 'null'
+          nullable: true
+        token_logprobs:
+          anyOf:
+          - items:
+              type: number
+            type: array
+          - type: 'null'
+          nullable: true
+        tokens:
+          anyOf:
+          - items:
+              type: string
+            type: array
+          - type: 'null'
+          nullable: true
+        top_logprobs:
+          anyOf:
+          - items:
+              additionalProperties:
+                type: number
+              type: object
+            type: array
+          - type: 'null'
+          nullable: true
+      title: OpenAICompletionLogprobs
+      type: object
+    VectorStoreCreateRequest:
+      description: Request to create a vector store.
+      properties:
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+        file_ids:
+          items:
+            type: string
+          title: File Ids
+          type: array
+        expires_after:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+          nullable: true
+        chunking_strategy:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+          nullable: true
+        metadata:
+          additionalProperties: true
+          title: Metadata
+          type: object
+      title: VectorStoreCreateRequest
+      type: object
+    VectorStoreModifyRequest:
+      description: Request to modify a vector store.
+      properties:
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+        expires_after:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+          nullable: true
+        metadata:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+          nullable: true
+      title: VectorStoreModifyRequest
+      type: object
+    VectorStoreSearchRequest:
+      description: Request to search a vector store.
+      properties:
+        query:
+          anyOf:
+          - type: string
+          - items:
+              type: string
+            type: array
+            title: list[string]
+          title: string | list[string]
+        filters:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+          nullable: true
+        max_num_results:
+          default: 10
+          title: Max Num Results
+          type: integer
+        ranking_options:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+          nullable: true
+        rewrite_query:
+          default: false
+          title: Rewrite Query
+          type: boolean
+      required:
+      - query
+      title: VectorStoreSearchRequest
+      type: object
+    DialogType:
+      description: Parameter type for dialog data with semantic output labels.
+      properties:
+        type:
+          const: dialog
+          default: dialog
+          title: Type
+          type: string
+      title: DialogType
+      type: object
+    ConversationMessage:
+      description: OpenAI-compatible message item for conversations.
+      properties:
+        id:
+          description: unique identifier for this message
+          title: Id
+          type: string
+        content:
+          description: message content
+          items:
+            additionalProperties: true
+            type: object
+          title: Content
+          type: array
+        role:
+          description: message role
+          title: Role
+          type: string
+        status:
+          description: message status
+          title: Status
+          type: string
+        type:
+          const: message
+          default: message
+          title: Type
+          type: string
+        object:
+          const: message
+          default: message
+          title: Object
+          type: string
+      required:
+      - id
+      - content
+      - role
+      - status
+      title: ConversationMessage
+      type: object
+    ConversationItemCreateRequest:
+      description: Request body for creating conversation items.
+      properties:
+        items:
+          description: Items to include in the conversation context. You may add up to 20 items at a time.
+          items:
+            discriminator:
+              mapping:
+                file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                function_call_output: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+                mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                mcp_approval_response: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+                mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                message: '#/components/schemas/OpenAIResponseMessage'
+                web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+              propertyName: type
+            oneOf:
+            - $ref: '#/components/schemas/OpenAIResponseMessage'
+              title: OpenAIResponseMessage
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+              title: OpenAIResponseOutputMessageWebSearchToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+              title: OpenAIResponseOutputMessageFileSearchToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+              title: OpenAIResponseOutputMessageFunctionToolCall
+            - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+              title: OpenAIResponseInputFunctionToolCallOutput
+            - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+              title: OpenAIResponseMCPApprovalRequest
+            - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+              title: OpenAIResponseMCPApprovalResponse
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+              title: OpenAIResponseOutputMessageMCPCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+              title: OpenAIResponseOutputMessageMCPListTools
+            title: OpenAIResponseMessage | ... (9 variants)
+          maxItems: 20
+          title: Items
+          type: array
+      required:
+      - items
+      title: ConversationItemCreateRequest
+      type: object
+    ToolGroupInput:
+      description: Input data for registering a tool group.
+      properties:
+        toolgroup_id:
+          title: Toolgroup Id
+          type: string
+        provider_id:
+          title: Provider Id
+          type: string
+        args:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+          nullable: true
+        mcp_endpoint:
+          anyOf:
+          - $ref: '#/components/schemas/URL'
+            title: URL
+          - type: 'null'
+          nullable: true
+          title: URL
+      required:
+      - toolgroup_id
+      - provider_id
+      title: ToolGroupInput
+      type: object
+    Api:
+      description: Enumeration of all available APIs in the Llama Stack system.
+      enum:
+      - providers
+      - inference
+      - safety
+      - agents
+      - batches
+      - vector_io
+      - datasetio
+      - scoring
+      - eval
+      - post_training
+      - tool_runtime
+      - models
+      - shields
+      - vector_stores
+      - datasets
+      - scoring_functions
+      - benchmarks
+      - tool_groups
+      - files
+      - prompts
+      - conversations
+      - inspect
+      title: Api
+      type: string
+    ProviderSpec:
+      properties:
+        api:
+          $ref: '#/components/schemas/Api'
+        provider_type:
+          title: Provider Type
+          type: string
+        config_class:
+          description: Fully-qualified classname of the config for this provider
+          title: Config Class
+          type: string
+        api_dependencies:
+          description: Higher-level API surfaces may depend on other providers to provide their functionality
+          items:
+            $ref: '#/components/schemas/Api'
+          title: Api Dependencies
+          type: array
+        optional_api_dependencies:
+          items:
+            $ref: '#/components/schemas/Api'
+          title: Optional Api Dependencies
+          type: array
+        deprecation_warning:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: If this provider is deprecated, specify the warning message here
+          nullable: true
+        deprecation_error:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: If this provider is deprecated and does NOT work, specify the error message here
+          nullable: true
+        module:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: |2-
+
+             Fully-qualified name of the module to import. The module is expected to have:
+
+              - `get_adapter_impl(config, deps)`: returns the adapter implementation
+
+              Example: `module: ramalama_stack`
+
+          nullable: true
+        pip_packages:
+          description: The pip dependencies needed for this implementation
+          items:
+            type: string
+          title: Pip Packages
+          type: array
+        provider_data_validator:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+        is_external:
+          default: false
+          description: Notes whether this provider is an external provider.
+          title: Is External
+          type: boolean
+        deps__:
+          items:
+            type: string
+          title: Deps
+          type: array
+      required:
+      - api
+      - provider_type
+      - config_class
+      title: ProviderSpec
+      type: object
+    InlineProviderSpec:
+      properties:
+        api:
+          $ref: '#/components/schemas/Api'
+        provider_type:
+          title: Provider Type
+          type: string
+        config_class:
+          description: Fully-qualified classname of the config for this provider
+          title: Config Class
+          type: string
+        api_dependencies:
+          description: Higher-level API surfaces may depend on other providers to provide their functionality
+          items:
+            $ref: '#/components/schemas/Api'
+          title: Api Dependencies
+          type: array
+        optional_api_dependencies:
+          items:
+            $ref: '#/components/schemas/Api'
+          title: Optional Api Dependencies
+          type: array
+        deprecation_warning:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: If this provider is deprecated, specify the warning message here
+          nullable: true
+        deprecation_error:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: If this provider is deprecated and does NOT work, specify the error message here
+          nullable: true
+        module:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: |2-
+
+             Fully-qualified name of the module to import. The module is expected to have:
+
+              - `get_adapter_impl(config, deps)`: returns the adapter implementation
+
+              Example: `module: ramalama_stack`
+
+          nullable: true
+        pip_packages:
+          description: The pip dependencies needed for this implementation
+          items:
+            type: string
+          title: Pip Packages
+          type: array
+        provider_data_validator:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+        is_external:
+          default: false
+          description: Notes whether this provider is an external provider.
+          title: Is External
+          type: boolean
+        deps__:
+          items:
+            type: string
+          title: Deps
+          type: array
+        container_image:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: |2
+
+            The container image to use for this implementation. If one is provided, pip_packages will be ignored.
+            If a provider depends on other providers, the dependencies MUST NOT specify a container image.
+          nullable: true
+        description:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: |2
+
+            A description of the provider. This is used to display in the documentation.
+          nullable: true
+      required:
+      - api
+      - provider_type
+      - config_class
+      title: InlineProviderSpec
+      type: object
+    RemoteProviderSpec:
+      properties:
+        api:
+          $ref: '#/components/schemas/Api'
+        provider_type:
+          title: Provider Type
+          type: string
+        config_class:
+          description: Fully-qualified classname of the config for this provider
+          title: Config Class
+          type: string
+        api_dependencies:
+          description: Higher-level API surfaces may depend on other providers to provide their functionality
+          items:
+            $ref: '#/components/schemas/Api'
+          title: Api Dependencies
+          type: array
+        optional_api_dependencies:
+          items:
+            $ref: '#/components/schemas/Api'
+          title: Optional Api Dependencies
+          type: array
+        deprecation_warning:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: If this provider is deprecated, specify the warning message here
+          nullable: true
+        deprecation_error:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: If this provider is deprecated and does NOT work, specify the error message here
+          nullable: true
+        module:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: |2-
+
+             Fully-qualified name of the module to import. The module is expected to have:
+
+              - `get_adapter_impl(config, deps)`: returns the adapter implementation
+
+              Example: `module: ramalama_stack`
+
+          nullable: true
+        pip_packages:
+          description: The pip dependencies needed for this implementation
+          items:
+            type: string
+          title: Pip Packages
+          type: array
+        provider_data_validator:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+        is_external:
+          default: false
+          description: Notes whether this provider is an external provider.
+          title: Is External
+          type: boolean
+        deps__:
+          items:
+            type: string
+          title: Deps
+          type: array
+        adapter_type:
+          description: Unique identifier for this adapter
+          title: Adapter Type
+          type: string
+        description:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: |2
+
+            A description of the provider. This is used to display in the documentation.
+          nullable: true
+      required:
+      - api
+      - provider_type
+      - config_class
+      - adapter_type
+      title: RemoteProviderSpec
+      type: object
+    PostTrainingJobLogStream:
+      description: Stream of logs from a finetuning job.
+      properties:
+        job_uuid:
+          title: Job Uuid
+          type: string
+        log_lines:
+          items:
+            type: string
+          title: Log Lines
+          type: array
+      required:
+      - job_uuid
+      - log_lines
+      title: PostTrainingJobLogStream
+      type: object
+    RLHFAlgorithm:
+      description: Available reinforcement learning from human feedback algorithms.
+      enum:
+      - dpo
+      title: RLHFAlgorithm
+      type: string
+    PostTrainingRLHFRequest:
+      description: Request to finetune a model using reinforcement learning from human feedback.
+      properties:
+        job_uuid:
+          title: Job Uuid
+          type: string
+        finetuned_model:
+          $ref: '#/components/schemas/URL'
+        dataset_id:
+          title: Dataset Id
+          type: string
+        validation_dataset_id:
+          title: Validation Dataset Id
+          type: string
+        algorithm:
+          $ref: '#/components/schemas/RLHFAlgorithm'
+        algorithm_config:
+          $ref: '#/components/schemas/DPOAlignmentConfig'
+        optimizer_config:
+          $ref: '#/components/schemas/OptimizerConfig'
+        training_config:
+          $ref: '#/components/schemas/TrainingConfig'
+        hyperparam_search_config:
+          additionalProperties: true
+          title: Hyperparam Search Config
+          type: object
+        logger_config:
+          additionalProperties: true
+          title: Logger Config
+          type: object
+      required:
+      - job_uuid
+      - finetuned_model
+      - dataset_id
+      - validation_dataset_id
+      - algorithm
+      - algorithm_config
+      - optimizer_config
+      - training_config
+      - hyperparam_search_config
+      - logger_config
+      title: PostTrainingRLHFRequest
+      type: object
+    Span:
+      description: A span representing a single operation within a trace.
+      properties:
+        span_id:
+          title: Span Id
+          type: string
+        trace_id:
+          title: Trace Id
+          type: string
+        parent_span_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+        name:
+          title: Name
+          type: string
+        start_time:
+          format: date-time
+          title: Start Time
+          type: string
+        end_time:
+          anyOf:
+          - format: date-time
+            type: string
+          - type: 'null'
+          nullable: true
+        attributes:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+      required:
+      - span_id
+      - trace_id
+      - name
+      - start_time
+      title: Span
+      type: object
+    Trace:
+      description: A trace representing the complete execution path of a request across multiple operations.
+      properties:
+        trace_id:
+          title: Trace Id
+          type: string
+        root_span_id:
+          title: Root Span Id
+          type: string
+        start_time:
+          format: date-time
+          title: Start Time
+          type: string
+        end_time:
+          anyOf:
+          - format: date-time
+            type: string
+          - type: 'null'
+          nullable: true
+      required:
+      - trace_id
+      - root_span_id
+      - start_time
+      title: Trace
+      type: object
+    EventType:
+      description: The type of telemetry event being logged.
+      enum:
+      - unstructured_log
+      - structured_log
+      - metric
+      title: EventType
+      type: string
+    StructuredLogType:
+      description: The type of structured log event payload.
+      enum:
+      - span_start
+      - span_end
+      title: StructuredLogType
+      type: string
+    EvalTrace:
+      description: A trace record for evaluation purposes.
+      properties:
+        session_id:
+          title: Session Id
+          type: string
+        step:
+          title: Step
+          type: string
+        input:
+          title: Input
+          type: string
+        output:
+          title: Output
+          type: string
+        expected_output:
+          title: Expected Output
+          type: string
+      required:
+      - session_id
+      - step
+      - input
+      - output
+      - expected_output
+      title: EvalTrace
+      type: object
+    SpanWithStatus:
+      description: A span that includes status information.
+      properties:
+        span_id:
+          title: Span Id
+          type: string
+        trace_id:
+          title: Trace Id
+          type: string
+        parent_span_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+        name:
+          title: Name
+          type: string
+        start_time:
+          format: date-time
+          title: Start Time
+          type: string
+        end_time:
+          anyOf:
+          - format: date-time
+            type: string
+          - type: 'null'
+          nullable: true
+        attributes:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+        status:
+          anyOf:
+          - $ref: '#/components/schemas/SpanStatus'
+            title: SpanStatus
+          - type: 'null'
+          nullable: true
+          title: SpanStatus
+      required:
+      - span_id
+      - trace_id
+      - name
+      - start_time
+      title: SpanWithStatus
+      type: object
+    QueryConditionOp:
+      description: Comparison operators for query conditions.
+      enum:
+      - eq
+      - ne
+      - gt
+      - lt
+      title: QueryConditionOp
+      type: string
+    QueryCondition:
+      description: A condition for filtering query results.
+      properties:
+        key:
+          title: Key
+          type: string
+        op:
+          $ref: '#/components/schemas/QueryConditionOp'
+        value:
+          title: Value
+      required:
+      - key
+      - op
+      - value
+      title: QueryCondition
+      type: object
+    MetricLabel:
+      description: A label associated with a metric.
+      properties:
+        name:
+          title: Name
+          type: string
+        value:
+          title: Value
+          type: string
+      required:
+      - name
+      - value
+      title: MetricLabel
+      type: object
+    MetricDataPoint:
+      description: A single data point in a metric time series.
+      properties:
+        timestamp:
+          title: Timestamp
+          type: integer
+        value:
+          title: Value
+          type: number
+        unit:
+          title: Unit
+          type: string
+      required:
+      - timestamp
+      - value
+      - unit
+      title: MetricDataPoint
+      type: object
+    MetricSeries:
+      description: A time series of metric data points.
+      properties:
+        metric:
+          title: Metric
+          type: string
+        labels:
+          items:
+            $ref: '#/components/schemas/MetricLabel'
+          title: Labels
+          type: array
+        values:
+          items:
+            $ref: '#/components/schemas/MetricDataPoint'
+          title: Values
+          type: array
+      required:
+      - metric
+      - labels
+      - values
+      title: MetricSeries
+      type: object
   responses:
     BadRequest400:
       description: The request was invalid or malformed
@@ -11921,8 +13389,7 @@ components:
             title: Bad Request
             detail: The request was invalid or malformed
     TooManyRequests429:
-      description: >-
-        The client has sent too many requests in a given amount of time
+      description: The client has sent too many requests in a given amount of time
       content:
         application/json:
           schema:
@@ -11930,11 +13397,9 @@ components:
           example:
             status: 429
             title: Too Many Requests
-            detail: >-
-              You have exceeded the rate limit. Please try again later.
+            detail: You have exceeded the rate limit. Please try again later.
     InternalServerError500:
-      description: >-
-        The server encountered an unexpected error
+      description: The server encountered an unexpected error
       content:
         application/json:
           schema:
@@ -11942,127 +13407,101 @@ components:
           example:
             status: 500
             title: Internal Server Error
-            detail: >-
-              An unexpected error occurred. Our team has been notified.
+            detail: An unexpected error occurred
     DefaultError:
-      description: An unexpected error occurred
+      description: An error occurred
       content:
         application/json:
           schema:
             $ref: '#/components/schemas/Error'
-          example:
-            status: 0
-            title: Error
-            detail: An unexpected error occurred
-security:
-  - Default: []
 tags:
-  - name: Agents
-    description: >-
-      APIs for creating and interacting with agentic systems.
-    x-displayName: Agents
-  - name: Batches
-    description: >-
-      The API is designed to allow use of openai client libraries for seamless integration.
+- description: APIs for creating and interacting with agentic systems.
+  name: Agents
+  x-displayName: Agents
+- description: |-
+    The API is designed to allow use of openai client libraries for seamless integration.
 
+    This API provides the following extensions:
+     - idempotent batch creation
 
-      This API provides the following extensions:
-       - idempotent batch creation
+    Note: This API is currently under active development and may undergo changes.
+  name: Batches
+  x-displayName: The Batches API enables efficient processing of multiple requests in a single operation, particularly useful for processing large datasets, batch evaluation workflows, and cost-effective inference at scale.
+- description: ''
+  name: Benchmarks
+- description: Protocol for conversation management operations.
+  name: Conversations
+  x-displayName: Conversations
+- description: ''
+  name: DatasetIO
+- description: ''
+  name: Datasets
+- description: Llama Stack Evaluation API for running evaluations on model and agent candidates.
+  name: Eval
+  x-displayName: Evaluations
+- description: This API is used to upload documents that can be used with other Llama Stack APIs.
+  name: Files
+  x-displayName: Files
+- description: |-
+    Llama Stack Inference API for generating completions, chat completions, and embeddings.
 
-      Note: This API is currently under active development and may undergo changes.
-    x-displayName: >-
-      The Batches API enables efficient processing of multiple requests in a single
-      operation, particularly useful for processing large datasets, batch evaluation
-      workflows, and cost-effective inference at scale.
-  - name: Benchmarks
-    description: ''
-  - name: Conversations
-    description: >-
-      Protocol for conversation management operations.
-    x-displayName: Conversations
-  - name: DatasetIO
-    description: ''
-  - name: Datasets
-    description: ''
-  - name: Eval
-    description: >-
-      Llama Stack Evaluation API for running evaluations on model and agent candidates.
-    x-displayName: Evaluations
-  - name: Files
-    description: >-
-      This API is used to upload documents that can be used with other Llama Stack
-      APIs.
-    x-displayName: Files
-  - name: Inference
-    description: >-
-      Llama Stack Inference API for generating completions, chat completions, and
-      embeddings.
-
-
-      This API provides the raw interface to the underlying models. Three kinds of
-      models are supported:
-
-      - LLM models: these models generate "raw" and "chat" (conversational) completions.
-
-      - Embedding models: these models generate embeddings to be used for semantic
-      search.
-
-      - Rerank models: these models reorder the documents based on their relevance
-      to a query.
-    x-displayName: Inference
-  - name: Inspect
-    description: >-
-      APIs for inspecting the Llama Stack service, including health status, available
-      API routes with methods and implementing providers.
-    x-displayName: Inspect
-  - name: Models
-    description: ''
-  - name: PostTraining (Coming Soon)
-    description: ''
-  - name: Prompts
-    description: >-
-      Protocol for prompt management operations.
-    x-displayName: Prompts
-  - name: Providers
-    description: >-
-      Providers API for inspecting, listing, and modifying providers and their configurations.
-    x-displayName: Providers
-  - name: Safety
-    description: OpenAI-compatible Moderations API.
-    x-displayName: Safety
-  - name: Scoring
-    description: ''
-  - name: ScoringFunctions
-    description: ''
-  - name: Shields
-    description: ''
-  - name: ToolGroups
-    description: ''
-  - name: ToolRuntime
-    description: ''
-  - name: VectorIO
-    description: ''
+    This API provides the raw interface to the underlying models. Three kinds of models are supported:
+    - LLM models: these models generate "raw" and "chat" (conversational) completions.
+    - Embedding models: these models generate embeddings to be used for semantic search.
+    - Rerank models: these models reorder the documents based on their relevance to a query.
+  name: Inference
+  x-displayName: Inference
+- description: APIs for inspecting the Llama Stack service, including health status, available API routes with methods and implementing providers.
+  name: Inspect
+  x-displayName: Inspect
+- description: ''
+  name: Models
+- description: ''
+  name: PostTraining (Coming Soon)
+- description: Protocol for prompt management operations.
+  name: Prompts
+  x-displayName: Prompts
+- description: Providers API for inspecting, listing, and modifying providers and their configurations.
+  name: Providers
+  x-displayName: Providers
+- description: OpenAI-compatible Moderations API.
+  name: Safety
+  x-displayName: Safety
+- description: ''
+  name: Scoring
+- description: ''
+  name: ScoringFunctions
+- description: ''
+  name: Shields
+- description: ''
+  name: ToolGroups
+- description: ''
+  name: ToolRuntime
+- description: ''
+  name: VectorIO
 x-tagGroups:
-  - name: Operations
-    tags:
-      - Agents
-      - Batches
-      - Benchmarks
-      - Conversations
-      - DatasetIO
-      - Datasets
-      - Eval
-      - Files
-      - Inference
-      - Inspect
-      - Models
-      - PostTraining (Coming Soon)
-      - Prompts
-      - Providers
-      - Safety
-      - Scoring
-      - ScoringFunctions
-      - Shields
-      - ToolGroups
-      - ToolRuntime
-      - VectorIO
+- name: Operations
+  tags:
+  - Agents
+  - Batches
+  - Benchmarks
+  - Conversations
+  - DatasetIO
+  - Datasets
+  - Eval
+  - Files
+  - Inference
+  - Inspect
+  - Models
+  - PostTraining (Coming Soon)
+  - Prompts
+  - Providers
+  - Safety
+  - Scoring
+  - ScoringFunctions
+  - Shields
+  - ToolGroups
+  - ToolRuntime
+  - VectorIO
+security:
+- Default: []
diff --git a/docs/openapi_generator/README.md b/docs/openapi_generator/README.md
deleted file mode 100644
index 85021d911..000000000
--- a/docs/openapi_generator/README.md
+++ /dev/null
@@ -1 +0,0 @@
-The RFC Specification (OpenAPI format) is generated from the set of API endpoints located in `llama_stack.core/server/endpoints.py` using the `generate.py` utility.
diff --git a/docs/openapi_generator/generate.py b/docs/openapi_generator/generate.py
deleted file mode 100644
index 769db32a7..000000000
--- a/docs/openapi_generator/generate.py
+++ /dev/null
@@ -1,134 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described found in the
-# LICENSE file in the root directory of this source tree.
-
-from datetime import datetime
-from pathlib import Path
-import sys
-import fire
-import ruamel.yaml as yaml
-
-from llama_stack_api import LLAMA_STACK_API_V1 # noqa: E402
-from llama_stack.core.stack import LlamaStack  # noqa: E402
-
-from .pyopenapi.options import Options  # noqa: E402
-from .pyopenapi.specification import Info, Server  # noqa: E402
-from .pyopenapi.utility import Specification, validate_api  # noqa: E402
-
-
-def str_presenter(dumper, data):
-    if data.startswith(f"/{LLAMA_STACK_API_V1}") or data.startswith(
-        "#/components/schemas/"
-    ):
-        style = None
-    else:
-        style = ">" if "\n" in data or len(data) > 40 else None
-    return dumper.represent_scalar("tag:yaml.org,2002:str", data, style=style)
-
-
-def generate_spec(output_dir: Path, stability_filter: str = None, main_spec: bool = False, combined_spec: bool = False):
-    """Generate OpenAPI spec with optional stability filtering."""
-
-    if combined_spec:
-        # Special case for combined stable + experimental APIs
-        title_suffix = " - Stable & Experimental APIs"
-        filename_prefix = "stainless-"
-        description_suffix = "\n\n**🔗 COMBINED**: This specification includes both stable production-ready APIs and experimental pre-release APIs. Use stable APIs for production deployments and experimental APIs for testing new features."
-        # Use the special "stainless" filter to include stable + experimental APIs
-        stability_filter = "stainless"
-    elif stability_filter:
-        title_suffix = {
-            "stable": " - Stable APIs" if not main_spec else "",
-            "experimental": " - Experimental APIs",
-            "deprecated": " - Deprecated APIs"
-        }.get(stability_filter, f" - {stability_filter.title()} APIs")
-
-        # Use main spec filename for stable when main_spec=True
-        if main_spec and stability_filter == "stable":
-            filename_prefix = ""
-        else:
-            filename_prefix = f"{stability_filter}-"
-
-        description_suffix = {
-            "stable": "\n\n**✅ STABLE**: Production-ready APIs with backward compatibility guarantees.",
-            "experimental": "\n\n**🧪 EXPERIMENTAL**: Pre-release APIs (v1alpha, v1beta) that may change before becoming stable.",
-            "deprecated": "\n\n**⚠️ DEPRECATED**: Legacy APIs that may be removed in future versions. Use for migration reference only."
-        }.get(stability_filter, "")
-    else:
-        title_suffix = ""
-        filename_prefix = ""
-        description_suffix = ""
-
-    spec = Specification(
-        LlamaStack,
-        Options(
-            server=Server(url="http://any-hosted-llama-stack.com"),
-            info=Info(
-                title=f"Llama Stack Specification{title_suffix}",
-                version=LLAMA_STACK_API_V1,
-                description=f"""This is the specification of the Llama Stack that provides
-                a set of endpoints and their corresponding interfaces that are tailored to
-                best leverage Llama Models.{description_suffix}""",
-            ),
-            include_standard_error_responses=True,
-            stability_filter=stability_filter,  # Pass the filter to the generator
-        ),
-    )
-
-    yaml_filename = f"{filename_prefix}llama-stack-spec.yaml"
-
-    with open(output_dir / yaml_filename, "w", encoding="utf-8") as fp:
-        y = yaml.YAML()
-        y.default_flow_style = False
-        y.block_seq_indent = 2
-        y.map_indent = 2
-        y.sequence_indent = 4
-        y.sequence_dash_offset = 2
-        y.width = 80
-        y.allow_unicode = True
-        y.representer.add_representer(str, str_presenter)
-
-        y.dump(
-            spec.get_json(),
-            fp,
-        )
-
-def main(output_dir: str):
-    output_dir = Path(output_dir)
-    if not output_dir.exists():
-        raise ValueError(f"Directory {output_dir} does not exist")
-
-    # Validate API protocols before generating spec
-    return_type_errors = validate_api()
-    if return_type_errors:
-        print("\nAPI Method Return Type Validation Errors:\n")
-        for error in return_type_errors:
-            print(error, file=sys.stderr)
-        sys.exit(1)
-
-    now = str(datetime.now())
-    print(f"Converting the spec to YAML (openapi.yaml) and HTML (openapi.html) at {now}")
-    print("")
-
-    # Generate main spec as stable APIs (llama-stack-spec.yaml)
-    print("Generating main specification (stable APIs)...")
-    generate_spec(output_dir, "stable", main_spec=True)
-
-    print("Generating other stability-filtered specifications...")
-    generate_spec(output_dir, "experimental")
-    generate_spec(output_dir, "deprecated")
-
-    print("Generating combined stable + experimental specification...")
-    generate_spec(output_dir, combined_spec=True)
-
-
-if __name__ == "__main__":
-    fire.Fire(main)
diff --git a/docs/openapi_generator/pyopenapi/README.md b/docs/openapi_generator/pyopenapi/README.md
deleted file mode 100644
index 1b5fbce19..000000000
--- a/docs/openapi_generator/pyopenapi/README.md
+++ /dev/null
@@ -1 +0,0 @@
-This is forked from https://github.com/hunyadi/pyopenapi
diff --git a/docs/openapi_generator/pyopenapi/generator.py b/docs/openapi_generator/pyopenapi/generator.py
deleted file mode 100644
index 9b5f76e2a..000000000
--- a/docs/openapi_generator/pyopenapi/generator.py
+++ /dev/null
@@ -1,1175 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import hashlib
-import inspect
-import ipaddress
-import os
-import types
-import typing
-from dataclasses import make_dataclass
-from pathlib import Path
-from typing import Annotated, Any, Dict, get_args, get_origin, Set, Union
-
-from fastapi import UploadFile
-
-from llama_stack_api import (
-    Docstring,
-    Error,
-    JsonSchemaGenerator,
-    JsonType,
-    Schema,
-    SchemaOptions,
-    get_schema_identifier,
-    is_generic_list,
-    is_type_optional,
-    is_type_union,
-    is_unwrapped_body_param,
-    json_dump_string,
-    object_to_json,
-    parse_type,
-    python_type_to_name,
-    register_schema,
-    unwrap_generic_list,
-    unwrap_optional_type,
-    unwrap_union_types,
-)
-from pydantic import BaseModel
-
-from .operations import (
-    EndpointOperation,
-    get_endpoint_events,
-    get_endpoint_operations,
-    HTTPMethod,
-)
-from .options import *
-from .specification import (
-    Components,
-    Document,
-    Example,
-    ExampleRef,
-    ExtraBodyParameter,
-    MediaType,
-    Operation,
-    Parameter,
-    ParameterLocation,
-    PathItem,
-    RequestBody,
-    Response,
-    ResponseRef,
-    SchemaOrRef,
-    SchemaRef,
-    Tag,
-    TagGroup,
-)
-
-register_schema(
-    ipaddress.IPv4Address,
-    schema={
-        "type": "string",
-        "format": "ipv4",
-        "title": "IPv4 address",
-        "description": "IPv4 address, according to dotted-quad ABNF syntax as defined in RFC 2673, section 3.2.",
-    },
-    examples=["192.0.2.0", "198.51.100.1", "203.0.113.255"],
-)
-
-register_schema(
-    ipaddress.IPv6Address,
-    schema={
-        "type": "string",
-        "format": "ipv6",
-        "title": "IPv6 address",
-        "description": "IPv6 address, as defined in RFC 2373, section 2.2.",
-    },
-    examples=[
-        "FEDC:BA98:7654:3210:FEDC:BA98:7654:3210",
-        "1080:0:0:0:8:800:200C:417A",
-        "1080::8:800:200C:417A",
-        "FF01::101",
-        "::1",
-    ],
-)
-
-
-def http_status_to_string(status_code: HTTPStatusCode) -> str:
-    "Converts an HTTP status code to a string."
-
-    if isinstance(status_code, HTTPStatus):
-        return str(status_code.value)
-    elif isinstance(status_code, int):
-        return str(status_code)
-    elif isinstance(status_code, str):
-        return status_code
-    else:
-        raise TypeError("expected: HTTP status code")
-
-
-class SchemaBuilder:
-    schema_generator: JsonSchemaGenerator
-    schemas: Dict[str, Schema]
-
-    def __init__(self, schema_generator: JsonSchemaGenerator) -> None:
-        self.schema_generator = schema_generator
-        self.schemas = {}
-
-    def classdef_to_schema(self, typ: type) -> Schema:
-        """
-        Converts a type to a JSON schema.
-        For nested types found in the type hierarchy, adds the type to the schema registry in the OpenAPI specification section `components`.
-        """
-
-        type_schema, type_definitions = self.schema_generator.classdef_to_schema(typ)
-
-        # append schema to list of known schemas, to be used in OpenAPI's Components Object section
-        for ref, schema in type_definitions.items():
-            self._add_ref(ref, schema)
-
-        return type_schema
-
-    def classdef_to_named_schema(self, name: str, typ: type) -> Schema:
-        schema = self.classdef_to_schema(typ)
-        self._add_ref(name, schema)
-        return schema
-
-    def classdef_to_ref(self, typ: type) -> SchemaOrRef:
-        """
-        Converts a type to a JSON schema, and if possible, returns a schema reference.
-        For composite types (such as classes), adds the type to the schema registry in the OpenAPI specification section `components`.
-        """
-
-        type_schema = self.classdef_to_schema(typ)
-        if typ is str or typ is int or typ is float:
-            # represent simple types as themselves
-            return type_schema
-
-        type_name = get_schema_identifier(typ)
-        if type_name is not None:
-            return self._build_ref(type_name, type_schema)
-
-        try:
-            type_name = python_type_to_name(typ)
-            return self._build_ref(type_name, type_schema)
-        except TypeError:
-            pass
-
-        return type_schema
-
-    def _build_ref(self, type_name: str, type_schema: Schema) -> SchemaRef:
-        self._add_ref(type_name, type_schema)
-        return SchemaRef(type_name)
-
-    def _add_ref(self, type_name: str, type_schema: Schema) -> None:
-        if type_name not in self.schemas:
-            self.schemas[type_name] = type_schema
-
-
-class ContentBuilder:
-    schema_builder: SchemaBuilder
-    schema_transformer: Optional[Callable[[SchemaOrRef], SchemaOrRef]]
-    sample_transformer: Optional[Callable[[JsonType], JsonType]]
-
-    def __init__(
-        self,
-        schema_builder: SchemaBuilder,
-        schema_transformer: Optional[Callable[[SchemaOrRef], SchemaOrRef]] = None,
-        sample_transformer: Optional[Callable[[JsonType], JsonType]] = None,
-    ) -> None:
-        self.schema_builder = schema_builder
-        self.schema_transformer = schema_transformer
-        self.sample_transformer = sample_transformer
-
-    def build_content(
-        self, payload_type: type, examples: Optional[List[Any]] = None
-    ) -> Dict[str, MediaType]:
-        "Creates the content subtree for a request or response."
-
-        def is_iterator_type(t):
-            return "StreamChunk" in str(t) or "OpenAIResponseObjectStream" in str(t)
-
-        def get_media_type(t):
-            if is_generic_list(t):
-                return "application/jsonl"
-            elif is_iterator_type(t):
-                return "text/event-stream"
-            else:
-                return "application/json"
-
-        if typing.get_origin(payload_type) in (typing.Union, types.UnionType):
-            media_types = []
-            item_types = []
-            for x in typing.get_args(payload_type):
-                media_types.append(get_media_type(x))
-                item_types.append(x)
-
-            if len(set(media_types)) == 1:
-                # all types have the same media type
-                return {media_types[0]: self.build_media_type(payload_type, examples)}
-            else:
-                # different types have different media types
-                return {
-                    media_type: self.build_media_type(item_type, examples)
-                    for media_type, item_type in zip(media_types, item_types)
-                }
-
-        if is_generic_list(payload_type):
-            media_type = "application/jsonl"
-            item_type = unwrap_generic_list(payload_type)
-        else:
-            media_type = "application/json"
-            item_type = payload_type
-
-        return {media_type: self.build_media_type(item_type, examples)}
-
-    def build_media_type(
-        self, item_type: type, examples: Optional[List[Any]] = None
-    ) -> MediaType:
-        schema = self.schema_builder.classdef_to_ref(item_type)
-        if self.schema_transformer:
-            schema_transformer: Callable[[SchemaOrRef], SchemaOrRef] = (
-                self.schema_transformer
-            )
-            schema = schema_transformer(schema)
-
-        if not examples:
-            return MediaType(schema=schema)
-
-        if len(examples) == 1:
-            return MediaType(schema=schema, example=self._build_example(examples[0]))
-
-        return MediaType(
-            schema=schema,
-            examples=self._build_examples(examples),
-        )
-
-    def _build_examples(
-        self, examples: List[Any]
-    ) -> Dict[str, Union[Example, ExampleRef]]:
-        "Creates a set of several examples for a media type."
-
-        if self.sample_transformer:
-            sample_transformer: Callable[[JsonType], JsonType] = self.sample_transformer  # type: ignore
-        else:
-            sample_transformer = lambda sample: sample
-
-        results: Dict[str, Union[Example, ExampleRef]] = {}
-        for example in examples:
-            value = sample_transformer(object_to_json(example))
-
-            hash_string = (
-                hashlib.sha256(json_dump_string(value).encode("utf-8"))
-                .digest()
-                .hex()[:16]
-            )
-            name = f"ex-{hash_string}"
-
-            results[name] = Example(value=value)
-
-        return results
-
-    def _build_example(self, example: Any) -> Any:
-        "Creates a single example for a media type."
-
-        if self.sample_transformer:
-            sample_transformer: Callable[[JsonType], JsonType] = self.sample_transformer  # type: ignore
-        else:
-            sample_transformer = lambda sample: sample
-
-        return sample_transformer(object_to_json(example))
-
-
-@dataclass
-class ResponseOptions:
-    """
-    Configuration options for building a response for an operation.
-
-    :param type_descriptions: Maps each response type to a textual description (if available).
-    :param examples: A list of response examples.
-    :param status_catalog: Maps each response type to an HTTP status code.
-    :param default_status_code: HTTP status code assigned to responses that have no mapping.
-    """
-
-    type_descriptions: Dict[type, str]
-    examples: Optional[List[Any]]
-    status_catalog: Dict[type, HTTPStatusCode]
-    default_status_code: HTTPStatusCode
-
-
-@dataclass
-class StatusResponse:
-    status_code: str
-    types: List[type] = dataclasses.field(default_factory=list)
-    examples: List[Any] = dataclasses.field(default_factory=list)
-
-
-def create_docstring_for_request(
-    request_name: str, fields: List[Tuple[str, type, Any]], doc_params: Dict[str, str]
-) -> str:
-    """Creates a ReST-style docstring for a dynamically generated request dataclass."""
-    lines = ["\n"]  # Short description
-
-    # Add parameter documentation in ReST format
-    for name, type_ in fields:
-        desc = doc_params.get(name, "")
-        lines.append(f":param {name}: {desc}")
-
-    return "\n".join(lines)
-
-
-class ResponseBuilder:
-    content_builder: ContentBuilder
-
-    def __init__(self, content_builder: ContentBuilder) -> None:
-        self.content_builder = content_builder
-
-    def _get_status_responses(
-        self, options: ResponseOptions
-    ) -> Dict[str, StatusResponse]:
-        status_responses: Dict[str, StatusResponse] = {}
-
-        for response_type in options.type_descriptions.keys():
-            status_code = http_status_to_string(
-                options.status_catalog.get(response_type, options.default_status_code)
-            )
-
-            # look up response for status code
-            if status_code not in status_responses:
-                status_responses[status_code] = StatusResponse(status_code)
-            status_response = status_responses[status_code]
-
-            # append response types that are assigned the given status code
-            status_response.types.append(response_type)
-
-            # append examples that have the matching response type
-            if options.examples:
-                status_response.examples.extend(
-                    example
-                    for example in options.examples
-                    if isinstance(example, response_type)
-                )
-
-        return dict(sorted(status_responses.items()))
-
-    def build_response(
-        self, options: ResponseOptions
-    ) -> Dict[str, Union[Response, ResponseRef]]:
-        """
-        Groups responses that have the same status code.
-        """
-
-        responses: Dict[str, Union[Response, ResponseRef]] = {}
-        status_responses = self._get_status_responses(options)
-        for status_code, status_response in status_responses.items():
-            response_types = tuple(status_response.types)
-            if len(response_types) > 1:
-                composite_response_type: type = Union[response_types]  # type: ignore
-            else:
-                (response_type,) = response_types
-                composite_response_type = response_type
-
-            description = " **OR** ".join(
-                filter(
-                    None,
-                    (
-                        options.type_descriptions[response_type]
-                        for response_type in response_types
-                    ),
-                )
-            )
-
-            responses[status_code] = self._build_response(
-                response_type=composite_response_type,
-                description=description,
-                examples=status_response.examples or None,
-            )
-
-        return responses
-
-    def _build_response(
-        self,
-        response_type: type,
-        description: str,
-        examples: Optional[List[Any]] = None,
-    ) -> Response:
-        "Creates a response subtree."
-
-        if response_type is not None:
-            return Response(
-                description=description,
-                content=self.content_builder.build_content(response_type, examples),
-            )
-        else:
-            return Response(description=description)
-
-
-def schema_error_wrapper(schema: SchemaOrRef) -> Schema:
-    "Wraps an error output schema into a top-level error schema."
-
-    return {
-        "type": "object",
-        "properties": {
-            "error": schema,  # type: ignore
-        },
-        "additionalProperties": False,
-        "required": [
-            "error",
-        ],
-    }
-
-
-def sample_error_wrapper(error: JsonType) -> JsonType:
-    "Wraps an error output sample into a top-level error sample."
-
-    return {"error": error}
-
-
-class Generator:
-    endpoint: type
-    options: Options
-    schema_builder: SchemaBuilder
-    responses: Dict[str, Response]
-
-    def __init__(self, endpoint: type, options: Options) -> None:
-        self.endpoint = endpoint
-        self.options = options
-        schema_generator = JsonSchemaGenerator(
-            SchemaOptions(
-                definitions_path="#/components/schemas/",
-                use_examples=self.options.use_examples,
-                property_description_fun=options.property_description_fun,
-            )
-        )
-        self.schema_builder = SchemaBuilder(schema_generator)
-        self.responses = {}
-
-        # Create standard error responses
-        self._create_standard_error_responses()
-
-    def _create_standard_error_responses(self) -> None:
-        """
-        Creates standard error responses that can be reused across operations.
-        These will be added to the components.responses section of the OpenAPI document.
-        """
-        # Get the Error schema
-        error_schema = self.schema_builder.classdef_to_ref(Error)
-
-        # Create standard error responses
-        self.responses["BadRequest400"] = Response(
-            description="The request was invalid or malformed",
-            content={
-                "application/json": MediaType(
-                    schema=error_schema,
-                    example={
-                        "status": 400,
-                        "title": "Bad Request",
-                        "detail": "The request was invalid or malformed",
-                    },
-                )
-            },
-        )
-
-        self.responses["TooManyRequests429"] = Response(
-            description="The client has sent too many requests in a given amount of time",
-            content={
-                "application/json": MediaType(
-                    schema=error_schema,
-                    example={
-                        "status": 429,
-                        "title": "Too Many Requests",
-                        "detail": "You have exceeded the rate limit. Please try again later.",
-                    },
-                )
-            },
-        )
-
-        self.responses["InternalServerError500"] = Response(
-            description="The server encountered an unexpected error",
-            content={
-                "application/json": MediaType(
-                    schema=error_schema,
-                    example={
-                        "status": 500,
-                        "title": "Internal Server Error",
-                        "detail": "An unexpected error occurred. Our team has been notified.",
-                    },
-                )
-            },
-        )
-
-        # Add a default error response for any unhandled error cases
-        self.responses["DefaultError"] = Response(
-            description="An unexpected error occurred",
-            content={
-                "application/json": MediaType(
-                    schema=error_schema,
-                    example={
-                        "status": 0,
-                        "title": "Error",
-                        "detail": "An unexpected error occurred",
-                    },
-                )
-            },
-        )
-
-    def _build_type_tag(self, ref: str, schema: Schema) -> Tag:
-        # Don't include schema definition in the tag description because for one,
-        # it is not very valuable and for another, it causes string formatting
-        # discrepancies via the Stainless Studio.
-        #
-        # definition = f'<SchemaDefinition schemaRef="#/components/schemas/{ref}" />'
-        title = typing.cast(str, schema.get("title"))
-        description = typing.cast(str, schema.get("description"))
-        return Tag(
-            name=ref,
-            description="\n\n".join(s for s in (title, description) if s is not None),
-        )
-
-    def _build_extra_tag_groups(
-        self, extra_types: Dict[str, Dict[str, type]]
-    ) -> Dict[str, List[Tag]]:
-        """
-        Creates a dictionary of tag group captions as keys, and tag lists as values.
-
-        :param extra_types: A dictionary of type categories and list of types in that category.
-        """
-
-        extra_tags: Dict[str, List[Tag]] = {}
-
-        for category_name, category_items in extra_types.items():
-            tag_list: List[Tag] = []
-
-            for name, extra_type in category_items.items():
-                schema = self.schema_builder.classdef_to_schema(extra_type)
-                tag_list.append(self._build_type_tag(name, schema))
-
-            if tag_list:
-                extra_tags[category_name] = tag_list
-
-        return extra_tags
-
-    def _get_api_group_for_operation(self, op) -> str | None:
-        """
-        Determine the API group for an operation based on its route path.
-
-        Args:
-            op: The endpoint operation
-
-        Returns:
-            The API group name derived from the route, or None if unable to determine
-        """
-        if not hasattr(op, 'webmethod') or not op.webmethod or not hasattr(op.webmethod, 'route'):
-            return None
-
-        route = op.webmethod.route
-        if not route or not route.startswith('/'):
-            return None
-
-        # Extract API group from route path
-        # Examples: /v1/agents/list -> agents-api
-        #          /v1/responses -> responses-api
-        #          /v1/models -> models-api
-        path_parts = route.strip('/').split('/')
-
-        if len(path_parts) < 2:
-            return None
-
-        # Skip version prefix (v1, v1alpha, v1beta, etc.)
-        if path_parts[0].startswith('v1'):
-            if len(path_parts) < 2:
-                return None
-            api_segment = path_parts[1]
-        else:
-            api_segment = path_parts[0]
-
-        # Convert to supplementary file naming convention
-        # agents -> agents-api, responses -> responses-api, etc.
-        return f"{api_segment}-api"
-
-    def _load_supplemental_content(self, api_group: str | None) -> str:
-        """
-        Load supplemental content for an API group based on stability level.
-
-        Follows this resolution order:
-        1. docs/supplementary/{stability}/{api_group}.md
-        2. docs/supplementary/shared/{api_group}.md (fallback)
-        3. Empty string if no files found
-
-        Args:
-            api_group: The API group name (e.g., "agents-responses-api"), or None if no mapping exists
-
-        Returns:
-            The supplemental content as markdown string, or empty string if not found
-        """
-        if not api_group:
-            return ""
-
-        base_path = Path(__file__).parent.parent.parent / "supplementary"
-
-        # Try stability-specific content first if stability filter is set
-        if self.options.stability_filter:
-            stability_path = base_path / self.options.stability_filter / f"{api_group}.md"
-            if stability_path.exists():
-                try:
-                    return stability_path.read_text(encoding="utf-8")
-                except Exception as e:
-                    print(f"Warning: Could not read stability-specific supplemental content from {stability_path}: {e}")
-
-        # Fall back to shared content
-        shared_path = base_path / "shared" / f"{api_group}.md"
-        if shared_path.exists():
-            try:
-                return shared_path.read_text(encoding="utf-8")
-            except Exception as e:
-                print(f"Warning: Could not read shared supplemental content from {shared_path}: {e}")
-
-        # No supplemental content found
-        return ""
-
-    def _build_operation(self, op: EndpointOperation) -> Operation:
-        if op.defining_class.__name__ in [
-            "SyntheticDataGeneration",
-            "PostTraining",
-        ]:
-            op.defining_class.__name__ = f"{op.defining_class.__name__} (Coming Soon)"
-            print(op.defining_class.__name__)
-
-        # TODO (xiyan): temporary fix for datasetio inner impl + datasets api
-        # if op.defining_class.__name__ in ["DatasetIO"]:
-        #     op.defining_class.__name__ = "Datasets"
-
-        doc_string = parse_type(op.func_ref)
-        doc_params = dict(
-            (param.name, param.description) for param in doc_string.params.values()
-        )
-
-        # parameters passed in URL component path
-        path_parameters = [
-            Parameter(
-                name=param_name,
-                in_=ParameterLocation.Path,
-                description=doc_params.get(param_name),
-                required=True,
-                schema=self.schema_builder.classdef_to_ref(param_type),
-            )
-            for param_name, param_type in op.path_params
-        ]
-
-        # parameters passed in URL component query string
-        query_parameters = []
-        for param_name, param_type in op.query_params:
-            if is_type_optional(param_type):
-                inner_type: type = unwrap_optional_type(param_type)
-                required = False
-            else:
-                inner_type = param_type
-                required = True
-
-            query_parameter = Parameter(
-                name=param_name,
-                in_=ParameterLocation.Query,
-                description=doc_params.get(param_name),
-                required=required,
-                schema=self.schema_builder.classdef_to_ref(inner_type),
-            )
-            query_parameters.append(query_parameter)
-
-        # parameters passed anywhere
-        parameters = path_parameters + query_parameters
-
-        # Build extra body parameters documentation
-        extra_body_parameters = []
-        for param_name, param_type, description in op.extra_body_params:
-            if is_type_optional(param_type):
-                inner_type: type = unwrap_optional_type(param_type)
-                required = False
-            else:
-                inner_type = param_type
-                required = True
-
-            # Use description from ExtraBodyField if available, otherwise from docstring
-            param_description = description or doc_params.get(param_name)
-
-            extra_body_param = ExtraBodyParameter(
-                name=param_name,
-                schema=self.schema_builder.classdef_to_ref(inner_type),
-                description=param_description,
-                required=required,
-            )
-            extra_body_parameters.append(extra_body_param)
-
-        webmethod = getattr(op.func_ref, "__webmethod__", None)
-        raw_bytes_request_body = False
-        if webmethod:
-            raw_bytes_request_body = getattr(webmethod, "raw_bytes_request_body", False)
-
-        # data passed in request body as raw bytes cannot have request parameters
-        if raw_bytes_request_body and op.request_params:
-            raise ValueError(
-                "Cannot have both raw bytes request body and request parameters"
-            )
-
-        # data passed in request body as raw bytes
-        if raw_bytes_request_body:
-            requestBody = RequestBody(
-                content={
-                    "application/octet-stream": {
-                        "schema": {
-                            "type": "string",
-                            "format": "binary",
-                        }
-                    }
-                },
-                required=True,
-            )
-        # data passed in request body as multipart/form-data
-        elif op.multipart_params:
-            builder = ContentBuilder(self.schema_builder)
-
-            # Create schema properties for multipart form fields
-            properties = {}
-            required_fields = []
-
-            for name, param_type in op.multipart_params:
-                if get_origin(param_type) is Annotated:
-                    base_type = get_args(param_type)[0]
-                else:
-                    base_type = param_type
-
-                # Check if the type is optional
-                is_optional = is_type_optional(base_type)
-                if is_optional:
-                    base_type = unwrap_optional_type(base_type)
-
-                if base_type is UploadFile:
-                    # File upload
-                    properties[name] = {"type": "string", "format": "binary"}
-                else:
-                    # All other types - generate schema reference
-                    # This includes enums, BaseModels, and simple types
-                    properties[name] = self.schema_builder.classdef_to_ref(base_type)
-
-                if not is_optional:
-                    required_fields.append(name)
-
-            multipart_schema = {
-                "type": "object",
-                "properties": properties,
-                "required": required_fields,
-            }
-
-            requestBody = RequestBody(
-                content={"multipart/form-data": {"schema": multipart_schema}},
-                required=True,
-            )
-        # data passed in payload as JSON and mapped to request parameters
-        elif op.request_params:
-            builder = ContentBuilder(self.schema_builder)
-            first = next(iter(op.request_params))
-            request_name, request_type = first
-
-            # Special case: if there's a single parameter with Body(embed=False) that's a BaseModel,
-            # unwrap it to show the flat structure in the OpenAPI spec
-            # Example: openai_chat_completion()
-            if (len(op.request_params) == 1 and is_unwrapped_body_param(request_type)):
-                pass
-            else:
-                op_name = "".join(word.capitalize() for word in op.name.split("_"))
-                request_name = f"{op_name}Request"
-                fields = [
-                    (
-                        name,
-                        type_,
-                    )
-                    for name, type_ in op.request_params
-                ]
-                request_type = make_dataclass(
-                    request_name,
-                    fields,
-                    namespace={
-                        "__doc__": create_docstring_for_request(
-                            request_name, fields, doc_params
-                        )
-                    },
-                )
-
-            requestBody = RequestBody(
-                content={
-                    "application/json": builder.build_media_type(
-                        request_type, op.request_examples
-                    )
-                },
-                description=doc_params.get(request_name),
-                required=True,
-            )
-        else:
-            requestBody = None
-
-        # success response types
-        if doc_string.returns is None and is_type_union(op.response_type):
-            # split union of return types into a list of response types
-            success_type_docstring: Dict[type, Docstring] = {
-                typing.cast(type, item): parse_type(item)
-                for item in unwrap_union_types(op.response_type)
-            }
-            success_type_descriptions = {
-                item: doc_string.short_description
-                for item, doc_string in success_type_docstring.items()
-            }
-        else:
-            # use return type as a single response type
-            success_type_descriptions = {
-                op.response_type: (
-                    doc_string.returns.description if doc_string.returns else "OK"
-                )
-            }
-
-        response_examples = op.response_examples or []
-        success_examples = [
-            example
-            for example in response_examples
-            if not isinstance(example, Exception)
-        ]
-
-        content_builder = ContentBuilder(self.schema_builder)
-        response_builder = ResponseBuilder(content_builder)
-        response_options = ResponseOptions(
-            success_type_descriptions,
-            success_examples if self.options.use_examples else None,
-            self.options.success_responses,
-            "200",
-        )
-        responses = response_builder.build_response(response_options)
-
-        # failure response types
-        if doc_string.raises:
-            exception_types: Dict[type, str] = {
-                item.raise_type: item.description for item in doc_string.raises.values()
-            }
-            exception_examples = [
-                example
-                for example in response_examples
-                if isinstance(example, Exception)
-            ]
-
-            if self.options.error_wrapper:
-                schema_transformer = schema_error_wrapper
-                sample_transformer = sample_error_wrapper
-            else:
-                schema_transformer = None
-                sample_transformer = None
-
-            content_builder = ContentBuilder(
-                self.schema_builder,
-                schema_transformer=schema_transformer,
-                sample_transformer=sample_transformer,
-            )
-            response_builder = ResponseBuilder(content_builder)
-            response_options = ResponseOptions(
-                exception_types,
-                exception_examples if self.options.use_examples else None,
-                self.options.error_responses,
-                "500",
-            )
-            responses.update(response_builder.build_response(response_options))
-
-        assert len(responses.keys()) > 0, f"No responses found for {op.name}"
-
-        # Add standard error response references
-        if self.options.include_standard_error_responses:
-            if "400" not in responses:
-                responses["400"] = ResponseRef("BadRequest400")
-            if "429" not in responses:
-                responses["429"] = ResponseRef("TooManyRequests429")
-            if "500" not in responses:
-                responses["500"] = ResponseRef("InternalServerError500")
-            if "default" not in responses:
-                responses["default"] = ResponseRef("DefaultError")
-
-        if op.event_type is not None:
-            builder = ContentBuilder(self.schema_builder)
-            callbacks = {
-                f"{op.func_name}_callback": {
-                    "{$request.query.callback}": PathItem(
-                        post=Operation(
-                            requestBody=RequestBody(
-                                content=builder.build_content(op.event_type)
-                            ),
-                            responses={"200": Response(description="OK")},
-                        )
-                    )
-                }
-            }
-
-        else:
-            callbacks = None
-
-        # Build base description from docstring
-        base_description = "\n".join(
-            filter(None, [doc_string.short_description, doc_string.long_description])
-        )
-
-        # Individual endpoints get clean descriptions only
-        description = base_description
-
-        return Operation(
-            tags=[
-                getattr(op.defining_class, "API_NAMESPACE", op.defining_class.__name__)
-            ],
-            summary=doc_string.short_description,
-            description=description,
-            parameters=parameters,
-            requestBody=requestBody,
-            responses=responses,
-            callbacks=callbacks,
-            deprecated=getattr(op.webmethod, "deprecated", False)
-            or "DEPRECATED" in op.func_name,
-            security=[] if op.public else None,
-            extraBodyParameters=extra_body_parameters if extra_body_parameters else None,
-        )
-
-    def _get_api_stability_priority(self, api_level: str) -> int:
-        """
-        Return sorting priority for API stability levels.
-        Lower numbers = higher priority (appear first)
-
-        :param api_level: The API level (e.g., "v1", "v1beta", "v1alpha")
-        :return: Priority number for sorting
-        """
-        stability_order = {
-            "v1": 0,  # Stable - highest priority
-            "v1beta": 1,  # Beta - medium priority
-            "v1alpha": 2,  # Alpha - lowest priority
-        }
-        return stability_order.get(api_level, 999)  # Unknown levels go last
-
-    def generate(self) -> Document:
-        paths: Dict[str, PathItem] = {}
-        endpoint_classes: Set[type] = set()
-
-        # Collect all operations and filter by stability if specified
-        operations = list(
-            get_endpoint_operations(
-                self.endpoint, use_examples=self.options.use_examples
-            )
-        )
-
-        # Filter operations by stability level if requested
-        if self.options.stability_filter:
-            filtered_operations = []
-            for op in operations:
-                deprecated = (
-                    getattr(op.webmethod, "deprecated", False)
-                    or "DEPRECATED" in op.func_name
-                )
-                stability_level = op.webmethod.level
-
-                if self.options.stability_filter == "stable":
-                    # Include v1 non-deprecated endpoints
-                    if stability_level == "v1" and not deprecated:
-                        filtered_operations.append(op)
-                elif self.options.stability_filter == "experimental":
-                    # Include v1alpha and v1beta endpoints (deprecated or not)
-                    if stability_level in ["v1alpha", "v1beta"]:
-                        filtered_operations.append(op)
-                elif self.options.stability_filter == "deprecated":
-                    # Include only deprecated endpoints
-                    if deprecated:
-                        filtered_operations.append(op)
-                elif self.options.stability_filter == "stainless":
-                    # Include stable (v1), deprecated (v1 deprecated), and experimental (v1alpha, v1beta) endpoints
-                    if stability_level == "v1" or stability_level in ["v1alpha", "v1beta"]:
-                        filtered_operations.append(op)
-
-            operations = filtered_operations
-            print(
-                f"Filtered to {len(operations)} operations for stability level: {self.options.stability_filter}"
-            )
-
-        # Sort operations by multiple criteria for consistent ordering:
-        # 1. Stability level with deprecation handling (global priority):
-        #    - Active stable (v1) comes first
-        #    - Beta (v1beta) comes next
-        #    - Alpha (v1alpha) comes next
-        #    - Deprecated stable (v1 deprecated) comes last
-        # 2. Route path (group related endpoints within same stability level)
-        # 3. HTTP method (GET, POST, PUT, DELETE, PATCH)
-        # 4. Operation name (alphabetical)
-        def sort_key(op):
-            http_method_order = {
-                HTTPMethod.GET: 0,
-                HTTPMethod.POST: 1,
-                HTTPMethod.PUT: 2,
-                HTTPMethod.DELETE: 3,
-                HTTPMethod.PATCH: 4,
-            }
-
-            # Enhanced stability priority for migration pattern support
-            deprecated = getattr(op.webmethod, "deprecated", False)
-            stability_priority = self._get_api_stability_priority(op.webmethod.level)
-
-            # Deprecated versions should appear after everything else
-            # This ensures deprecated stable endpoints come last globally
-            if deprecated:
-                stability_priority += 10  # Push deprecated endpoints to the end
-
-            return (
-                stability_priority,  # Global stability handling comes first
-                op.get_route(
-                    op.webmethod
-                ),  # Group by route path within stability level
-                http_method_order.get(op.http_method, 999),
-                op.func_name,
-            )
-
-        operations.sort(key=sort_key)
-
-        # Debug output for migration pattern tracking
-        migration_routes = {}
-        for op in operations:
-            route_key = (op.get_route(op.webmethod), op.http_method)
-            if route_key not in migration_routes:
-                migration_routes[route_key] = []
-            migration_routes[route_key].append(
-                (op.webmethod.level, getattr(op.webmethod, "deprecated", False))
-            )
-
-        for route_key, versions in migration_routes.items():
-            if len(versions) > 1:
-                print(f"Migration pattern detected for {route_key[1]} {route_key[0]}:")
-                for level, deprecated in versions:
-                    status = "DEPRECATED" if deprecated else "ACTIVE"
-                    print(f"  - {level} ({status})")
-
-        for op in operations:
-            endpoint_classes.add(op.defining_class)
-
-            operation = self._build_operation(op)
-
-            if op.http_method is HTTPMethod.GET:
-                pathItem = PathItem(get=operation)
-            elif op.http_method is HTTPMethod.PUT:
-                pathItem = PathItem(put=operation)
-            elif op.http_method is HTTPMethod.POST:
-                pathItem = PathItem(post=operation)
-            elif op.http_method is HTTPMethod.DELETE:
-                pathItem = PathItem(delete=operation)
-            elif op.http_method is HTTPMethod.PATCH:
-                pathItem = PathItem(patch=operation)
-            else:
-                raise NotImplementedError(f"unknown HTTP method: {op.http_method}")
-
-            route = op.get_route(op.webmethod)
-            route = route.replace(":path", "")
-            print(f"route: {route}")
-            if route in paths:
-                paths[route].update(pathItem)
-            else:
-                paths[route] = pathItem
-
-        operation_tags: List[Tag] = []
-        for cls in endpoint_classes:
-            doc_string = parse_type(cls)
-            if hasattr(cls, "API_NAMESPACE") and cls.API_NAMESPACE != cls.__name__:
-                continue
-
-            # Add supplemental content to tag pages
-            api_group = f"{cls.__name__.lower()}-api"
-            supplemental_content = self._load_supplemental_content(api_group)
-
-            tag_description = doc_string.long_description or ""
-            if supplemental_content:
-                if tag_description:
-                    tag_description = f"{tag_description}\n\n{supplemental_content}"
-                else:
-                    tag_description = supplemental_content
-
-            operation_tags.append(
-                Tag(
-                    name=cls.__name__,
-                    description=tag_description,
-                    displayName=doc_string.short_description,
-                )
-            )
-
-        # types that are emitted by events
-        event_tags: List[Tag] = []
-        events = get_endpoint_events(self.endpoint)
-        for ref, event_type in events.items():
-            event_schema = self.schema_builder.classdef_to_named_schema(ref, event_type)
-            event_tags.append(self._build_type_tag(ref, event_schema))
-
-        # types that are explicitly declared
-        extra_tag_groups: Dict[str, List[Tag]] = {}
-        if self.options.extra_types is not None:
-            if isinstance(self.options.extra_types, list):
-                extra_tag_groups = self._build_extra_tag_groups(
-                    {"AdditionalTypes": self.options.extra_types}
-                )
-            elif isinstance(self.options.extra_types, dict):
-                extra_tag_groups = self._build_extra_tag_groups(
-                    self.options.extra_types
-                )
-            else:
-                raise TypeError(
-                    f"type mismatch for collection of extra types: {type(self.options.extra_types)}"
-                )
-
-        # list all operations and types
-        tags: List[Tag] = []
-        tags.extend(operation_tags)
-        tags.extend(event_tags)
-        for extra_tag_group in extra_tag_groups.values():
-            tags.extend(extra_tag_group)
-
-        tags = sorted(tags, key=lambda t: t.name)
-
-        tag_groups = []
-        if operation_tags:
-            tag_groups.append(
-                TagGroup(
-                    name=self.options.map("Operations"),
-                    tags=sorted(tag.name for tag in operation_tags),
-                )
-            )
-        if event_tags:
-            tag_groups.append(
-                TagGroup(
-                    name=self.options.map("Events"),
-                    tags=sorted(tag.name for tag in event_tags),
-                )
-            )
-        for caption, extra_tag_group in extra_tag_groups.items():
-            tag_groups.append(
-                TagGroup(
-                    name=caption,
-                    tags=sorted(tag.name for tag in extra_tag_group),
-                )
-            )
-
-        if self.options.default_security_scheme:
-            securitySchemes = {"Default": self.options.default_security_scheme}
-        else:
-            securitySchemes = None
-
-        return Document(
-            openapi=".".join(str(item) for item in self.options.version),
-            info=self.options.info,
-            jsonSchemaDialect=(
-                "https://json-schema.org/draft/2020-12/schema"
-                if self.options.version >= (3, 1, 0)
-                else None
-            ),
-            servers=[self.options.server],
-            paths=paths,
-            components=Components(
-                schemas=self.schema_builder.schemas,
-                responses=self.responses,
-                securitySchemes=securitySchemes,
-            ),
-            security=[{"Default": []}],
-            tags=tags,
-            tagGroups=tag_groups,
-        )
diff --git a/docs/openapi_generator/pyopenapi/operations.py b/docs/openapi_generator/pyopenapi/operations.py
deleted file mode 100644
index 42a554f2c..000000000
--- a/docs/openapi_generator/pyopenapi/operations.py
+++ /dev/null
@@ -1,459 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import collections.abc
-import enum
-import inspect
-import typing
-from dataclasses import dataclass
-from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Tuple, Union
-
-from termcolor import colored
-
-from typing import get_origin, get_args
-
-from fastapi import UploadFile
-from fastapi.params import File, Form
-from typing import Annotated
-
-from llama_stack_api import (
-    ExtraBodyField,
-    LLAMA_STACK_API_V1,
-    LLAMA_STACK_API_V1ALPHA,
-    LLAMA_STACK_API_V1BETA,
-    get_signature,
-)
-
-
-def split_prefix(
-    s: str, sep: str, prefix: Union[str, Iterable[str]]
-) -> Tuple[Optional[str], str]:
-    """
-    Recognizes a prefix at the beginning of a string.
-
-    :param s: The string to check.
-    :param sep: A separator between (one of) the prefix(es) and the rest of the string.
-    :param prefix: A string or a set of strings to identify as a prefix.
-    :return: A tuple of the recognized prefix (if any) and the rest of the string excluding the separator (or the entire string).
-    """
-
-    if isinstance(prefix, str):
-        if s.startswith(prefix + sep):
-            return prefix, s[len(prefix) + len(sep) :]
-        else:
-            return None, s
-
-    for p in prefix:
-        if s.startswith(p + sep):
-            return p, s[len(p) + len(sep) :]
-
-    return None, s
-
-
-def _get_annotation_type(annotation: Union[type, str], callable: Callable) -> type:
-    "Maps a stringized reference to a type, as if using `from __future__ import annotations`."
-
-    if isinstance(annotation, str):
-        return eval(annotation, callable.__globals__)
-    else:
-        return annotation
-
-
-class HTTPMethod(enum.Enum):
-    "HTTP method used to invoke an endpoint operation."
-
-    GET = "GET"
-    POST = "POST"
-    PUT = "PUT"
-    DELETE = "DELETE"
-    PATCH = "PATCH"
-
-
-OperationParameter = Tuple[str, type]
-
-
-class ValidationError(TypeError):
-    pass
-
-
-@dataclass
-class EndpointOperation:
-    """
-    Type information and metadata associated with an endpoint operation.
-
-    "param defining_class: The most specific class that defines the endpoint operation.
-    :param name: The short name of the endpoint operation.
-    :param func_name: The name of the function to invoke when the operation is triggered.
-    :param func_ref: The callable to invoke when the operation is triggered.
-    :param route: A custom route string assigned to the operation.
-    :param path_params: Parameters of the operation signature that are passed in the path component of the URL string.
-    :param query_params: Parameters of the operation signature that are passed in the query string as `key=value` pairs.
-    :param request_params: The parameter that corresponds to the data transmitted in the request body.
-    :param multipart_params: Parameters that indicate multipart/form-data request body.
-    :param extra_body_params: Parameters that arrive via extra_body and are documented but not in SDK.
-    :param event_type: The Python type of the data that is transmitted out-of-band (e.g. via websockets) while the operation is in progress.
-    :param response_type: The Python type of the data that is transmitted in the response body.
-    :param http_method: The HTTP method used to invoke the endpoint such as POST, GET or PUT.
-    :param public: True if the operation can be invoked without prior authentication.
-    :param request_examples: Sample requests that the operation might take.
-    :param response_examples: Sample responses that the operation might produce.
-    """
-
-    defining_class: type
-    name: str
-    func_name: str
-    func_ref: Callable[..., Any]
-    route: Optional[str]
-    path_params: List[OperationParameter]
-    query_params: List[OperationParameter]
-    request_params: Optional[OperationParameter]
-    multipart_params: List[OperationParameter]
-    extra_body_params: List[tuple[str, type, str | None]]
-    event_type: Optional[type]
-    response_type: type
-    http_method: HTTPMethod
-    public: bool
-    request_examples: Optional[List[Any]] = None
-    response_examples: Optional[List[Any]] = None
-
-    def get_route(self, webmethod) -> str:
-        api_level = webmethod.level
-
-        if self.route is not None:
-            return "/".join(["", api_level, self.route.lstrip("/")])
-
-        route_parts = ["", api_level, self.name]
-        for param_name, _ in self.path_params:
-            route_parts.append("{" + param_name + "}")
-        return "/".join(route_parts)
-
-
-class _FormatParameterExtractor:
-    "A visitor to exract parameters in a format string."
-
-    keys: List[str]
-
-    def __init__(self) -> None:
-        self.keys = []
-
-    def __getitem__(self, key: str) -> None:
-        self.keys.append(key)
-        return None
-
-
-def _get_route_parameters(route: str) -> List[str]:
-    extractor = _FormatParameterExtractor()
-    # Replace all occurrences of ":path" with empty string
-    route = route.replace(":path", "")
-    route.format_map(extractor)
-    return extractor.keys
-
-
-def _get_endpoint_functions(
-    endpoint: type, prefixes: List[str]
-) -> Iterator[Tuple[str, str, str, Callable]]:
-    if not inspect.isclass(endpoint):
-        raise ValueError(f"object is not a class type: {endpoint}")
-
-    functions = inspect.getmembers(endpoint, inspect.isfunction)
-    for func_name, func_ref in functions:
-        webmethods = []
-
-        # Check for multiple webmethods (stacked decorators)
-        if hasattr(func_ref, "__webmethods__"):
-            webmethods = func_ref.__webmethods__
-
-        if not webmethods:
-            continue
-
-        for webmethod in webmethods:
-            print(f"Processing {colored(func_name, 'white')}...")
-            operation_name = func_name
-
-            if webmethod.method == "GET":
-                prefix = "get"
-            elif webmethod.method == "DELETE":
-                prefix = "delete"
-            elif webmethod.method == "POST":
-                prefix = "post"
-            elif operation_name.startswith("get_") or operation_name.endswith("/get"):
-                prefix = "get"
-            elif (
-                operation_name.startswith("delete_")
-                or operation_name.startswith("remove_")
-                or operation_name.endswith("/delete")
-                or operation_name.endswith("/remove")
-            ):
-                prefix = "delete"
-            else:
-                # by default everything else is a POST
-                prefix = "post"
-
-            yield prefix, operation_name, func_name, func_ref
-
-
-def _get_defining_class(member_fn: str, derived_cls: type) -> type:
-    "Find the class in which a member function is first defined in a class inheritance hierarchy."
-
-    # iterate in reverse member resolution order to find most specific class first
-    for cls in reversed(inspect.getmro(derived_cls)):
-        for name, _ in inspect.getmembers(cls, inspect.isfunction):
-            if name == member_fn:
-                return cls
-
-    raise ValidationError(
-        f"cannot find defining class for {member_fn} in {derived_cls}"
-    )
-
-
-def get_endpoint_operations(
-    endpoint: type, use_examples: bool = True
-) -> List[EndpointOperation]:
-    """
-    Extracts a list of member functions in a class eligible for HTTP interface binding.
-
-    These member functions are expected to have a signature like
-    ```
-    async def get_object(self, uuid: str, version: int) -> Object:
-        ...
-    ```
-    where the prefix `get_` translates to an HTTP GET, `object` corresponds to the name of the endpoint operation,
-    `uuid` and `version` are mapped to route path elements in "/object/{uuid}/{version}", and `Object` becomes
-    the response payload type, transmitted as an object serialized to JSON.
-
-    If the member function has a composite class type in the argument list, it becomes the request payload type,
-    and the caller is expected to provide the data as serialized JSON in an HTTP POST request.
-
-    :param endpoint: A class with member functions that can be mapped to an HTTP endpoint.
-    :param use_examples: Whether to return examples associated with member functions.
-    """
-
-    result = []
-
-    for prefix, operation_name, func_name, func_ref in _get_endpoint_functions(
-        endpoint,
-        [
-            "create",
-            "delete",
-            "do",
-            "get",
-            "post",
-            "put",
-            "remove",
-            "set",
-            "update",
-        ],
-    ):
-        # Get all webmethods for this function
-        webmethods = getattr(func_ref, "__webmethods__", [])
-
-        # Create one EndpointOperation for each webmethod
-        for webmethod in webmethods:
-            route = webmethod.route
-            route_params = _get_route_parameters(route) if route is not None else None
-            public = webmethod.public
-            request_examples = webmethod.request_examples
-            response_examples = webmethod.response_examples
-
-            # inspect function signature for path and query parameters, and request/response payload type
-            signature = get_signature(func_ref)
-
-            path_params = []
-            query_params = []
-            request_params = []
-            multipart_params = []
-            extra_body_params = []
-
-            for param_name, parameter in signature.parameters.items():
-                param_type = _get_annotation_type(parameter.annotation, func_ref)
-
-                # omit "self" for instance methods
-                if param_name == "self" and param_type is inspect.Parameter.empty:
-                    continue
-
-                # check if all parameters have explicit type
-                if parameter.annotation is inspect.Parameter.empty:
-                    raise ValidationError(
-                        f"parameter '{param_name}' in function '{func_name}' has no type annotation"
-                    )
-
-                # Check if this is an extra_body parameter
-                is_extra_body, extra_body_desc = _is_extra_body_param(param_type)
-                if is_extra_body:
-                    # Store in a separate list for documentation
-                    extra_body_params.append((param_name, param_type, extra_body_desc))
-                    continue  # Skip adding to request_params
-
-                is_multipart = _is_multipart_param(param_type)
-
-                if prefix in ["get", "delete"]:
-                    if route_params is not None and param_name in route_params:
-                        path_params.append((param_name, param_type))
-                    else:
-                        query_params.append((param_name, param_type))
-                else:
-                    if route_params is not None and param_name in route_params:
-                        path_params.append((param_name, param_type))
-                    elif is_multipart:
-                        multipart_params.append((param_name, param_type))
-                    else:
-                        request_params.append((param_name, param_type))
-
-            # check if function has explicit return type
-            if signature.return_annotation is inspect.Signature.empty:
-                raise ValidationError(
-                    f"function '{func_name}' has no return type annotation"
-                )
-
-            return_type = _get_annotation_type(signature.return_annotation, func_ref)
-
-            # operations that produce events are labeled as Generator[YieldType, SendType, ReturnType]
-            # where YieldType is the event type, SendType is None, and ReturnType is the immediate response type to the request
-            if typing.get_origin(return_type) is collections.abc.Generator:
-                event_type, send_type, response_type = typing.get_args(return_type)
-                if send_type is not type(None):
-                    raise ValidationError(
-                        f"function '{func_name}' has a return type Generator[Y,S,R] and therefore looks like an event but has an explicit send type"
-                    )
-            else:
-                event_type = None
-
-                def process_type(t):
-                    if typing.get_origin(t) is collections.abc.AsyncIterator:
-                        # NOTE(ashwin): this is SSE and there is no way to represent it. either we make it a List
-                        # or the item type. I am choosing it to be the latter
-                        args = typing.get_args(t)
-                        return args[0]
-                    elif typing.get_origin(t) is typing.Union:
-                        types = [process_type(a) for a in typing.get_args(t)]
-                        return typing._UnionGenericAlias(typing.Union, tuple(types))
-                    else:
-                        return t
-
-                response_type = process_type(return_type)
-
-                if prefix in ["delete", "remove"]:
-                    http_method = HTTPMethod.DELETE
-                elif prefix == "post":
-                    http_method = HTTPMethod.POST
-                elif prefix == "get":
-                    http_method = HTTPMethod.GET
-                elif prefix == "set":
-                    http_method = HTTPMethod.PUT
-                elif prefix == "update":
-                    http_method = HTTPMethod.PATCH
-                else:
-                    raise ValidationError(f"unknown prefix {prefix}")
-
-            # Create an EndpointOperation for this specific webmethod
-            operation = EndpointOperation(
-                defining_class=_get_defining_class(func_name, endpoint),
-                name=operation_name,
-                func_name=func_name,
-                func_ref=func_ref,
-                route=route,
-                path_params=path_params,
-                query_params=query_params,
-                request_params=request_params,
-                multipart_params=multipart_params,
-                extra_body_params=extra_body_params,
-                event_type=event_type,
-                response_type=response_type,
-                http_method=http_method,
-                public=public,
-                request_examples=request_examples if use_examples else None,
-                response_examples=response_examples if use_examples else None,
-            )
-
-            # Store the specific webmethod with this operation
-            operation.webmethod = webmethod
-            result.append(operation)
-
-    if not result:
-        raise ValidationError(f"no eligible endpoint operations in type {endpoint}")
-
-    return result
-
-
-def get_endpoint_events(endpoint: type) -> Dict[str, type]:
-    results = {}
-
-    for decl in typing.get_type_hints(endpoint).values():
-        # check if signature is Callable[...]
-        origin = typing.get_origin(decl)
-        if origin is None or not issubclass(origin, Callable):  # type: ignore
-            continue
-
-        # check if signature is Callable[[...], Any]
-        args = typing.get_args(decl)
-        if len(args) != 2:
-            continue
-        params_type, return_type = args
-        if not isinstance(params_type, list):
-            continue
-
-        # check if signature is Callable[[...], None]
-        if not issubclass(return_type, type(None)):
-            continue
-
-        # check if signature is Callable[[EventType], None]
-        if len(params_type) != 1:
-            continue
-
-        param_type = params_type[0]
-        results[param_type.__name__] = param_type
-
-    return results
-
-
-def _is_multipart_param(param_type: type) -> bool:
-    """
-    Check if a parameter type indicates multipart form data.
-
-    Returns True if the type is:
-    - UploadFile
-    - Annotated[UploadFile, File()]
-    - Annotated[str, Form()]
-    - Annotated[Any, File()]
-    - Annotated[Any, Form()]
-    """
-    if param_type is UploadFile:
-        return True
-
-    # Check for Annotated types
-    origin = get_origin(param_type)
-    if origin is None:
-        return False
-
-    if origin is Annotated:
-        args = get_args(param_type)
-        if len(args) < 2:
-            return False
-
-        # Check the annotations for File() or Form()
-        for annotation in args[1:]:
-            if isinstance(annotation, (File, Form)):
-                return True
-    return False
-
-
-def _is_extra_body_param(param_type: type) -> tuple[bool, str | None]:
-    """
-    Check if parameter is marked as coming from extra_body.
-
-    Returns:
-        (is_extra_body, description): Tuple of boolean and optional description
-    """
-    origin = get_origin(param_type)
-    if origin is Annotated:
-        args = get_args(param_type)
-        for annotation in args[1:]:
-            if isinstance(annotation, ExtraBodyField):
-                return True, annotation.description
-            # Also check by type name for cases where import matters
-            if type(annotation).__name__ == 'ExtraBodyField':
-                return True, getattr(annotation, 'description', None)
-    return False, None
diff --git a/docs/openapi_generator/pyopenapi/options.py b/docs/openapi_generator/pyopenapi/options.py
deleted file mode 100644
index 53855b5b6..000000000
--- a/docs/openapi_generator/pyopenapi/options.py
+++ /dev/null
@@ -1,78 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import dataclasses
-from dataclasses import dataclass
-from http import HTTPStatus
-from typing import Callable, ClassVar, Dict, List, Optional, Tuple, Union
-
-from .specification import (
-    Info,
-    SecurityScheme,
-    SecuritySchemeAPI,
-    SecuritySchemeHTTP,
-    SecuritySchemeOpenIDConnect,
-    Server,
-)
-
-HTTPStatusCode = Union[HTTPStatus, int, str]
-
-
-@dataclass
-class Options:
-    """
-    :param server: Base URL for the API endpoint.
-    :param info: Meta-information for the endpoint specification.
-    :param version: OpenAPI specification version as a tuple of major, minor, revision.
-    :param default_security_scheme: Security scheme to apply to endpoints, unless overridden on a per-endpoint basis.
-    :param extra_types: Extra types in addition to those found in operation signatures. Use a dictionary to group related types.
-    :param use_examples: Whether to emit examples for operations.
-    :param success_responses: Associates operation response types with HTTP status codes.
-    :param error_responses: Associates error response types with HTTP status codes.
-    :param error_wrapper: True if errors are encapsulated in an error object wrapper.
-    :param property_description_fun: Custom transformation function to apply to class property documentation strings.
-    :param captions: User-defined captions for sections such as "Operations" or "Types", and (if applicable) groups of extra types.
-    :param include_standard_error_responses: Whether to include standard error responses (400, 429, 500, 503) in all operations.
-    """
-
-    server: Server
-    info: Info
-    version: Tuple[int, int, int] = (3, 1, 0)
-    default_security_scheme: Optional[SecurityScheme] = None
-    extra_types: Union[List[type], Dict[str, List[type]], None] = None
-    use_examples: bool = True
-    success_responses: Dict[type, HTTPStatusCode] = dataclasses.field(
-        default_factory=dict
-    )
-    error_responses: Dict[type, HTTPStatusCode] = dataclasses.field(
-        default_factory=dict
-    )
-    error_wrapper: bool = False
-    property_description_fun: Optional[Callable[[type, str, str], str]] = None
-    captions: Optional[Dict[str, str]] = None
-    include_standard_error_responses: bool = True
-    stability_filter: Optional[str] = None
-
-    default_captions: ClassVar[Dict[str, str]] = {
-        "Operations": "Operations",
-        "Types": "Types",
-        "Events": "Events",
-        "AdditionalTypes": "Additional types",
-    }
-
-    def map(self, id: str) -> str:
-        "Maps a language-neutral placeholder string to language-dependent text."
-
-        if self.captions is not None:
-            caption = self.captions.get(id)
-            if caption is not None:
-                return caption
-
-        caption = self.__class__.default_captions.get(id)
-        if caption is not None:
-            return caption
-
-        raise KeyError(f"no caption found for ID: {id}")
diff --git a/docs/openapi_generator/pyopenapi/specification.py b/docs/openapi_generator/pyopenapi/specification.py
deleted file mode 100644
index bfa35f539..000000000
--- a/docs/openapi_generator/pyopenapi/specification.py
+++ /dev/null
@@ -1,269 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import dataclasses
-import enum
-from dataclasses import dataclass
-from typing import Any, ClassVar, Dict, List, Optional, Union
-
-from llama_stack_api import JsonType, Schema, StrictJsonType
-
-URL = str
-
-
-@dataclass
-class Ref:
-    ref_type: ClassVar[str]
-    id: str
-
-    def to_json(self) -> StrictJsonType:
-        return {"$ref": f"#/components/{self.ref_type}/{self.id}"}
-
-
-@dataclass
-class SchemaRef(Ref):
-    ref_type: ClassVar[str] = "schemas"
-
-
-SchemaOrRef = Union[Schema, SchemaRef]
-
-
-@dataclass
-class ResponseRef(Ref):
-    ref_type: ClassVar[str] = "responses"
-
-
-@dataclass
-class ParameterRef(Ref):
-    ref_type: ClassVar[str] = "parameters"
-
-
-@dataclass
-class ExampleRef(Ref):
-    ref_type: ClassVar[str] = "examples"
-
-
-@dataclass
-class Contact:
-    name: Optional[str] = None
-    url: Optional[URL] = None
-    email: Optional[str] = None
-
-
-@dataclass
-class License:
-    name: str
-    url: Optional[URL] = None
-
-
-@dataclass
-class Info:
-    title: str
-    version: str
-    description: Optional[str] = None
-    termsOfService: Optional[str] = None
-    contact: Optional[Contact] = None
-    license: Optional[License] = None
-
-
-@dataclass
-class MediaType:
-    schema: Optional[SchemaOrRef] = None
-    example: Optional[Any] = None
-    examples: Optional[Dict[str, Union["Example", ExampleRef]]] = None
-
-
-@dataclass
-class RequestBody:
-    content: Dict[str, MediaType | Dict[str, Any]]
-    description: Optional[str] = None
-    required: Optional[bool] = None
-
-
-@dataclass
-class Response:
-    description: str
-    content: Optional[Dict[str, MediaType]] = None
-
-
-class ParameterLocation(enum.Enum):
-    Query = "query"
-    Header = "header"
-    Path = "path"
-    Cookie = "cookie"
-
-
-@dataclass
-class Parameter:
-    name: str
-    in_: ParameterLocation
-    description: Optional[str] = None
-    required: Optional[bool] = None
-    schema: Optional[SchemaOrRef] = None
-    example: Optional[Any] = None
-
-
-@dataclass
-class ExtraBodyParameter:
-    """Represents a parameter that arrives via extra_body in the request."""
-    name: str
-    schema: SchemaOrRef
-    description: Optional[str] = None
-    required: Optional[bool] = None
-
-
-@dataclass
-class Operation:
-    responses: Dict[str, Union[Response, ResponseRef]]
-    tags: Optional[List[str]] = None
-    summary: Optional[str] = None
-    description: Optional[str] = None
-    operationId: Optional[str] = None
-    parameters: Optional[List[Parameter]] = None
-    requestBody: Optional[RequestBody] = None
-    callbacks: Optional[Dict[str, "Callback"]] = None
-    security: Optional[List["SecurityRequirement"]] = None
-    deprecated: Optional[bool] = None
-    extraBodyParameters: Optional[List[ExtraBodyParameter]] = None
-
-
-@dataclass
-class PathItem:
-    summary: Optional[str] = None
-    description: Optional[str] = None
-    get: Optional[Operation] = None
-    put: Optional[Operation] = None
-    post: Optional[Operation] = None
-    delete: Optional[Operation] = None
-    options: Optional[Operation] = None
-    head: Optional[Operation] = None
-    patch: Optional[Operation] = None
-    trace: Optional[Operation] = None
-
-    def update(self, other: "PathItem") -> None:
-        "Merges another instance of this class into this object."
-
-        for field in dataclasses.fields(self.__class__):
-            value = getattr(other, field.name)
-            if value is not None:
-                setattr(self, field.name, value)
-
-
-# maps run-time expressions such as "$request.body#/url" to path items
-Callback = Dict[str, PathItem]
-
-
-@dataclass
-class Example:
-    summary: Optional[str] = None
-    description: Optional[str] = None
-    value: Optional[Any] = None
-    externalValue: Optional[URL] = None
-
-
-@dataclass
-class Server:
-    url: URL
-    description: Optional[str] = None
-
-
-class SecuritySchemeType(enum.Enum):
-    ApiKey = "apiKey"
-    HTTP = "http"
-    OAuth2 = "oauth2"
-    OpenIDConnect = "openIdConnect"
-
-
-@dataclass
-class SecurityScheme:
-    type: SecuritySchemeType
-    description: str
-
-
-@dataclass(init=False)
-class SecuritySchemeAPI(SecurityScheme):
-    name: str
-    in_: ParameterLocation
-
-    def __init__(self, description: str, name: str, in_: ParameterLocation) -> None:
-        super().__init__(SecuritySchemeType.ApiKey, description)
-        self.name = name
-        self.in_ = in_
-
-
-@dataclass(init=False)
-class SecuritySchemeHTTP(SecurityScheme):
-    scheme: str
-    bearerFormat: Optional[str] = None
-
-    def __init__(
-        self, description: str, scheme: str, bearerFormat: Optional[str] = None
-    ) -> None:
-        super().__init__(SecuritySchemeType.HTTP, description)
-        self.scheme = scheme
-        self.bearerFormat = bearerFormat
-
-
-@dataclass(init=False)
-class SecuritySchemeOpenIDConnect(SecurityScheme):
-    openIdConnectUrl: str
-
-    def __init__(self, description: str, openIdConnectUrl: str) -> None:
-        super().__init__(SecuritySchemeType.OpenIDConnect, description)
-        self.openIdConnectUrl = openIdConnectUrl
-
-
-@dataclass
-class Components:
-    schemas: Optional[Dict[str, Schema]] = None
-    responses: Optional[Dict[str, Response]] = None
-    parameters: Optional[Dict[str, Parameter]] = None
-    examples: Optional[Dict[str, Example]] = None
-    requestBodies: Optional[Dict[str, RequestBody]] = None
-    securitySchemes: Optional[Dict[str, SecurityScheme]] = None
-    callbacks: Optional[Dict[str, Callback]] = None
-
-
-SecurityScope = str
-SecurityRequirement = Dict[str, List[SecurityScope]]
-
-
-@dataclass
-class Tag:
-    name: str
-    description: Optional[str] = None
-    displayName: Optional[str] = None
-
-
-@dataclass
-class TagGroup:
-    """
-    A ReDoc extension to provide information about groups of tags.
-
-    Exposed via the vendor-specific property "x-tagGroups" of the top-level object.
-    """
-
-    name: str
-    tags: List[str]
-
-
-@dataclass
-class Document:
-    """
-    This class is a Python dataclass adaptation of the OpenAPI Specification.
-
-    For details, see <https://swagger.io/specification/>
-    """
-
-    openapi: str
-    info: Info
-    servers: List[Server]
-    paths: Dict[str, PathItem]
-    jsonSchemaDialect: Optional[str] = None
-    components: Optional[Components] = None
-    security: Optional[List[SecurityRequirement]] = None
-    tags: Optional[List[Tag]] = None
-    tagGroups: Optional[List[TagGroup]] = None
diff --git a/docs/openapi_generator/pyopenapi/template.html b/docs/openapi_generator/pyopenapi/template.html
deleted file mode 100644
index 5848f364e..000000000
--- a/docs/openapi_generator/pyopenapi/template.html
+++ /dev/null
@@ -1,41 +0,0 @@
-<!DOCTYPE html>
-<html>
-
-<head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1">
-    <title>OpenAPI specification</title>
-    <link href="https://fonts.googleapis.com/css?family=Montserrat:300,400,700|Roboto:300,400,700" rel="stylesheet">
-    <script type="module" src="https://cdn.jsdelivr.net/npm/@stoplight/elements/web-components.min.js"></script>
-    <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@stoplight/elements/styles.min.css">
-    <style>
-        body {
-            margin: 0;
-            padding: 0;
-            height: 100vh;
-        }
-
-        elements-api {
-            height: 100%;
-        }
-    </style>
-</head>
-
-<body>
-    <elements-api id="openapi-container" router="hash" layout="sidebar" hideExport="true"
-        hideInternal="true"></elements-api>
-
-    <script>
-        document.addEventListener("DOMContentLoaded", function () {
-            const spec = { /* OPENAPI_SPECIFICATION */ };
-            const element = document.getElementById("openapi-container");
-            element.apiDescriptionDocument = spec;
-
-            if (spec.info && spec.info.title) {
-                document.title = spec.info.title;
-            }
-        });
-    </script>
-</body>
-
-</html>
diff --git a/docs/openapi_generator/pyopenapi/utility.py b/docs/openapi_generator/pyopenapi/utility.py
deleted file mode 100644
index 762249eb8..000000000
--- a/docs/openapi_generator/pyopenapi/utility.py
+++ /dev/null
@@ -1,287 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import json
-import typing
-import inspect
-from pathlib import Path
-from typing import Any, List, Optional, TextIO, Union, get_type_hints, get_origin, get_args
-
-from pydantic import BaseModel
-from llama_stack_api import StrictJsonType, is_unwrapped_body_param, object_to_json
-from llama_stack.core.resolver import api_protocol_map
-
-from .generator import Generator
-from .options import Options
-from .specification import Document
-
-THIS_DIR = Path(__file__).parent
-
-
-class Specification:
-    document: Document
-
-    def __init__(self, endpoint: type, options: Options):
-        generator = Generator(endpoint, options)
-        self.document = generator.generate()
-
-    def get_json(self) -> StrictJsonType:
-        """
-        Returns the OpenAPI specification as a Python data type (e.g. `dict` for an object, `list` for an array).
-
-        The result can be serialized to a JSON string with `json.dump` or `json.dumps`.
-        """
-
-        json_doc = typing.cast(StrictJsonType, object_to_json(self.document))
-
-        if isinstance(json_doc, dict):
-            # rename vendor-specific properties
-            tag_groups = json_doc.pop("tagGroups", None)
-            if tag_groups:
-                json_doc["x-tagGroups"] = tag_groups
-            tags = json_doc.get("tags")
-            if tags and isinstance(tags, list):
-                for tag in tags:
-                    if not isinstance(tag, dict):
-                        continue
-
-                    display_name = tag.pop("displayName", None)
-                    if display_name:
-                        tag["x-displayName"] = display_name
-
-            # Handle operations to rename extraBodyParameters -> x-llama-stack-extra-body-params
-            paths = json_doc.get("paths", {})
-            for path_item in paths.values():
-                if isinstance(path_item, dict):
-                    for method in ["get", "post", "put", "delete", "patch"]:
-                        operation = path_item.get(method)
-                        if operation and isinstance(operation, dict):
-                            extra_body_params = operation.pop("extraBodyParameters", None)
-                            if extra_body_params:
-                                operation["x-llama-stack-extra-body-params"] = extra_body_params
-
-        return json_doc
-
-    def get_json_string(self, pretty_print: bool = False) -> str:
-        """
-        Returns the OpenAPI specification as a JSON string.
-
-        :param pretty_print: Whether to use line indents to beautify the output.
-        """
-
-        json_doc = self.get_json()
-        if pretty_print:
-            return json.dumps(
-                json_doc, check_circular=False, ensure_ascii=False, indent=4
-            )
-        else:
-            return json.dumps(
-                json_doc,
-                check_circular=False,
-                ensure_ascii=False,
-                separators=(",", ":"),
-            )
-
-    def write_json(self, f: TextIO, pretty_print: bool = False) -> None:
-        """
-        Writes the OpenAPI specification to a file as a JSON string.
-
-        :param pretty_print: Whether to use line indents to beautify the output.
-        """
-
-        json_doc = self.get_json()
-        if pretty_print:
-            json.dump(
-                json_doc,
-                f,
-                check_circular=False,
-                ensure_ascii=False,
-                indent=4,
-            )
-        else:
-            json.dump(
-                json_doc,
-                f,
-                check_circular=False,
-                ensure_ascii=False,
-                separators=(",", ":"),
-            )
-
-    def write_html(self, f: TextIO, pretty_print: bool = False) -> None:
-        """
-        Creates a stand-alone HTML page for the OpenAPI specification with ReDoc.
-
-        :param pretty_print: Whether to use line indents to beautify the JSON string in the HTML file.
-        """
-
-        path = THIS_DIR / "template.html"
-        with path.open(encoding="utf-8", errors="strict") as html_template_file:
-            html_template = html_template_file.read()
-
-        html = html_template.replace(
-            "{ /* OPENAPI_SPECIFICATION */ }",
-            self.get_json_string(pretty_print=pretty_print),
-        )
-
-        f.write(html)
-
-def is_optional_type(type_: Any) -> bool:
-    """Check if a type is Optional."""
-    origin = get_origin(type_)
-    args = get_args(type_)
-    return origin is Optional or (origin is Union and type(None) in args)
-
-
-def _validate_api_method_return_type(method) -> str | None:
-    hints = get_type_hints(method)
-
-    if 'return' not in hints:
-        return "has no return type annotation"
-
-    return_type = hints['return']
-    if is_optional_type(return_type):
-        return "returns Optional type where a return value is mandatory"
-
-
-def _validate_api_method_doesnt_return_list(method) -> str | None:
-    hints = get_type_hints(method)
-
-    if 'return' not in hints:
-        return "has no return type annotation"
-
-    return_type = hints['return']
-    if get_origin(return_type) is list:
-        return "returns a list where a PaginatedResponse or List*Response object is expected"
-
-
-def _validate_api_delete_method_returns_none(method) -> str | None:
-    hints = get_type_hints(method)
-
-    if 'return' not in hints:
-        return "has no return type annotation"
-
-    return_type = hints['return']
-
-    # Allow OpenAI endpoints to return response objects since they follow OpenAI specification
-    method_name = getattr(method, '__name__', '')
-    if method_name.__contains__('openai_'):
-        return None
-
-    if return_type is not None and return_type is not type(None):
-        return "does not return None where None is mandatory"
-
-
-def _validate_list_parameters_contain_data(method) -> str | None:
-    hints = get_type_hints(method)
-
-    if 'return' not in hints:
-        return "has no return type annotation"
-
-    return_type = hints['return']
-    if not inspect.isclass(return_type):
-        return
-
-    if not return_type.__name__.startswith('List'):
-        return
-
-    if 'data' not in return_type.model_fields:
-        return "does not have a mandatory data attribute containing the list of objects"
-
-
-def _validate_has_ellipsis(method) -> str | None:
-    source = inspect.getsource(method)
-    if "..." not in source and not "NotImplementedError" in source:
-        return "does not contain ellipsis (...) in its implementation"
-
-def _validate_has_return_in_docstring(method) -> str | None:
-    source = inspect.getsource(method)
-    return_type = method.__annotations__.get('return')
-    if return_type is not None and return_type != type(None) and ":returns:" not in source:
-        return "does not have a ':returns:' in its docstring"
-
-def _validate_has_params_in_docstring(method) -> str | None:
-    source = inspect.getsource(method)
-    sig = inspect.signature(method)
-
-    params_list = [p for p in sig.parameters.values() if p.name != "self"]
-    if len(params_list) == 1:
-        param = params_list[0]
-        param_type = param.annotation
-        if is_unwrapped_body_param(param_type):
-            return
-
-    # Only check if the method has more than one parameter
-    if len(sig.parameters) > 1 and ":param" not in source:
-        return "does not have a ':param' in its docstring"
-
-def _validate_has_no_return_none_in_docstring(method) -> str | None:
-    source = inspect.getsource(method)
-    return_type = method.__annotations__.get('return')
-    if return_type is None and ":returns: None" in source:
-        return "has a ':returns: None' in its docstring which is redundant for None-returning functions"
-
-def _validate_docstring_lines_end_with_dot(method) -> str | None:
-    docstring = inspect.getdoc(method)
-    if docstring is None:
-        return None
-
-    lines = docstring.split('\n')
-    for line in lines:
-        line = line.strip()
-        if line and not any(line.endswith(char) for char in '.:{}[]()",'):
-            return f"docstring line '{line}' does not end with a valid character: . : {{ }} [ ] ( ) , \""
-
-_VALIDATORS = {
-    "GET": [
-        _validate_api_method_return_type,
-        _validate_list_parameters_contain_data,
-        _validate_api_method_doesnt_return_list,
-        _validate_has_ellipsis,
-        _validate_has_return_in_docstring,
-        _validate_has_params_in_docstring,
-        _validate_docstring_lines_end_with_dot,
-    ],
-    "DELETE": [
-        _validate_api_delete_method_returns_none,
-        _validate_has_ellipsis,
-        _validate_has_return_in_docstring,
-        _validate_has_params_in_docstring,
-        _validate_has_no_return_none_in_docstring
-    ],
-    "POST": [
-        _validate_has_ellipsis,
-        _validate_has_return_in_docstring,
-        _validate_has_params_in_docstring,
-        _validate_has_no_return_none_in_docstring,
-        _validate_docstring_lines_end_with_dot,
-    ],
-}
-
-
-def _get_methods_by_type(protocol, method_type: str):
-    members = inspect.getmembers(protocol, predicate=inspect.isfunction)
-    return {
-        method_name: method
-        for method_name, method in members
-        if (webmethod := getattr(method, '__webmethod__', None))
-        if webmethod and webmethod.method == method_type
-    }
-
-
-def validate_api() -> List[str]:
-    """Validate the API protocols."""
-    errors = []
-    protocols = api_protocol_map()
-
-    for target, validators in _VALIDATORS.items():
-        for protocol_name, protocol in protocols.items():
-            for validator in validators:
-                for method_name, method in _get_methods_by_type(protocol, target).items():
-                    err = validator(method)
-                    if err:
-                        errors.append(f"Method {protocol_name}.{method_name} {err}")
-
-    return errors
diff --git a/docs/openapi_generator/run_openapi_generator.sh b/docs/openapi_generator/run_openapi_generator.sh
deleted file mode 100755
index 6cffd42b0..000000000
--- a/docs/openapi_generator/run_openapi_generator.sh
+++ /dev/null
@@ -1,34 +0,0 @@
-#!/bin/bash
-
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-PYTHONPATH=${PYTHONPATH:-}
-THIS_DIR="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" && pwd)"
-
-set -euo pipefail
-
-missing_packages=()
-
-check_package() {
-    if ! pip show "$1" &>/dev/null; then
-        missing_packages+=("$1")
-    fi
-}
-
-if [ ${#missing_packages[@]} -ne 0 ]; then
-    echo "Error: The following package(s) are not installed:"
-    printf " - %s\n" "${missing_packages[@]}"
-    echo "Please install them using:"
-    echo "pip install ${missing_packages[*]}"
-    exit 1
-fi
-
-stack_dir=$(dirname $(dirname $THIS_DIR))
-PYTHONPATH=$PYTHONPATH:$stack_dir \
-  python -m docs.openapi_generator.generate $(dirname $THIS_DIR)/static
-
-cp $stack_dir/docs/static/stainless-llama-stack-spec.yaml $stack_dir/client-sdks/stainless/openapi.yml
diff --git a/docs/static/deprecated-llama-stack-spec.yaml b/docs/static/deprecated-llama-stack-spec.yaml
index dea2e5bbe..3bc06d7d7 100644
--- a/docs/static/deprecated-llama-stack-spec.yaml
+++ b/docs/static/deprecated-llama-stack-spec.yaml
@@ -1,20 +1,44 @@
 openapi: 3.1.0
 info:
-  title: >-
-    Llama Stack Specification - Deprecated APIs
-  version: v1
-  description: >-
+  title: Llama Stack Specification - Deprecated APIs
+  description: |-
     This is the specification of the Llama Stack that provides
-                    a set of endpoints and their corresponding interfaces that are
-    tailored to
-                    best leverage Llama Models.
+                        a set of endpoints and their corresponding interfaces that are
+        tailored to
+                        best leverage Llama Models.
 
-    **⚠️ DEPRECATED**: Legacy APIs that may be removed in future versions. Use for
-    migration reference only.
+        **⚠️ DEPRECATED**: Legacy APIs that may be removed in future versions. Use for
+        migration reference only.
+  version: v1
 servers:
-  - url: http://any-hosted-llama-stack.com
+- url: http://any-hosted-llama-stack.com
 paths:
   /v1/models:
+    get:
+      responses:
+        '200':
+          description: A OpenAIListModelsResponse.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/OpenAIListModelsResponse'
+        '400':
+          description: Bad Request
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
+        '500':
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
+        default:
+          description: Default Response
+          $ref: '#/components/responses/DefaultError'
+      tags:
+      - Models
+      summary: Openai List Models
+      description: List models using the OpenAI API.
+      operationId: openai_list_models_v1_models_get
     post:
       responses:
         '200':
@@ -24,23 +48,25 @@ paths:
               schema:
                 $ref: '#/components/schemas/Model'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Models
-      summary: Register model.
-      description: >-
+      - Models
+      summary: Register Model
+      description: |-
         Register model.
 
         Register a model.
-      parameters: []
+      operationId: register_model_v1_models_post
       requestBody:
         content:
           application/json:
@@ -49,92 +75,215 @@ paths:
         required: true
       deprecated: true
   /v1/models/{model_id}:
-    delete:
+    get:
       responses:
         '200':
-          description: OK
+          description: A Model.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/Model'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Models
-      summary: Unregister model.
-      description: >-
+      - Models
+      summary: Get Model
+      description: |-
+        Get model.
+
+        Get a model by its identifier.
+      operationId: get_model_v1_models__model_id__get
+      parameters:
+      - name: model_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: model_id'
+    delete:
+      responses:
+        '400':
+          description: Bad Request
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
+        '500':
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
+        default:
+          description: Default Response
+          $ref: '#/components/responses/DefaultError'
+        '204':
+          description: Successful Response
+      tags:
+      - Models
+      summary: Unregister Model
+      description: |-
         Unregister model.
 
         Unregister a model.
+      operationId: unregister_model_v1_models__model_id__delete
       parameters:
-        - name: model_id
-          in: path
-          description: >-
-            The identifier of the model to unregister.
-          required: true
-          schema:
-            type: string
+      - name: model_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: model_id'
       deprecated: true
   /v1/scoring-functions:
-    post:
+    get:
       responses:
         '200':
-          description: OK
+          description: A ListScoringFunctionsResponse.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ListScoringFunctionsResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - ScoringFunctions
-      summary: Register a scoring function.
+      - Scoring Functions
+      summary: List Scoring Functions
+      description: List all scoring functions.
+      operationId: list_scoring_functions_v1_scoring_functions_get
+    post:
+      responses:
+        '400':
+          description: Bad Request
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
+        '500':
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
+        default:
+          description: Default Response
+          $ref: '#/components/responses/DefaultError'
+        '204':
+          description: Successful Response
+      tags:
+      - Scoring Functions
+      summary: Register Scoring Function
       description: Register a scoring function.
-      parameters: []
+      operationId: register_scoring_function_v1_scoring_functions_post
       requestBody:
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/RegisterScoringFunctionRequest'
+              $ref: '#/components/schemas/RegisterScoringFunctionRequestLoose'
         required: true
       deprecated: true
   /v1/scoring-functions/{scoring_fn_id}:
-    delete:
+    get:
       responses:
         '200':
-          description: OK
+          description: A ScoringFn.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ScoringFn'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - ScoringFunctions
-      summary: Unregister a scoring function.
-      description: Unregister a scoring function.
+      - Scoring Functions
+      summary: Get Scoring Function
+      description: Get a scoring function by its ID.
+      operationId: get_scoring_function_v1_scoring_functions__scoring_fn_id__get
       parameters:
-        - name: scoring_fn_id
-          in: path
-          description: >-
-            The ID of the scoring function to unregister.
-          required: true
-          schema:
-            type: string
+      - name: scoring_fn_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: scoring_fn_id'
+    delete:
+      responses:
+        '400':
+          description: Bad Request
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
+        '500':
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
+        default:
+          description: Default Response
+          $ref: '#/components/responses/DefaultError'
+        '204':
+          description: Successful Response
+      tags:
+      - Scoring Functions
+      summary: Unregister Scoring Function
+      description: Unregister a scoring function.
+      operationId: unregister_scoring_function_v1_scoring_functions__scoring_fn_id__delete
+      parameters:
+      - name: scoring_fn_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: scoring_fn_id'
       deprecated: true
   /v1/shields:
+    get:
+      responses:
+        '200':
+          description: A ListShieldsResponse.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ListShieldsResponse'
+        '400':
+          description: Bad Request
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
+        '500':
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
+        default:
+          description: Default Response
+          $ref: '#/components/responses/DefaultError'
+      tags:
+      - Shields
+      summary: List Shields
+      description: List all shields.
+      operationId: list_shields_v1_shields_get
     post:
       responses:
         '200':
@@ -144,20 +293,22 @@ paths:
               schema:
                 $ref: '#/components/schemas/Shield'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Shields
-      summary: Register a shield.
+      - Shields
+      summary: Register Shield
       description: Register a shield.
-      parameters: []
+      operationId: register_shield_v1_shields_post
       requestBody:
         content:
           application/json:
@@ -166,53 +317,114 @@ paths:
         required: true
       deprecated: true
   /v1/shields/{identifier}:
+    get:
+      responses:
+        '200':
+          description: A Shield.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/Shield'
+        '400':
+          description: Bad Request
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
+        '500':
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
+        default:
+          description: Default Response
+          $ref: '#/components/responses/DefaultError'
+      tags:
+      - Shields
+      summary: Get Shield
+      description: Get a shield by its identifier.
+      operationId: get_shield_v1_shields__identifier__get
+      parameters:
+      - name: identifier
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: identifier'
     delete:
       responses:
-        '200':
-          description: OK
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
+        '204':
+          description: Successful Response
       tags:
-        - Shields
-      summary: Unregister a shield.
+      - Shields
+      summary: Unregister Shield
       description: Unregister a shield.
+      operationId: unregister_shield_v1_shields__identifier__delete
       parameters:
-        - name: identifier
-          in: path
-          description: >-
-            The identifier of the shield to unregister.
-          required: true
-          schema:
-            type: string
+      - name: identifier
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: identifier'
       deprecated: true
   /v1/toolgroups:
-    post:
+    get:
       responses:
         '200':
-          description: OK
+          description: A ListToolGroupsResponse.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ListToolGroupsResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - ToolGroups
-      summary: Register a tool group.
+      - Tool Groups
+      summary: List Tool Groups
+      description: List tool groups with optional provider.
+      operationId: list_tool_groups_v1_toolgroups_get
+    post:
+      responses:
+        '400':
+          description: Bad Request
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
+        '500':
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
+        default:
+          description: Default Response
+          $ref: '#/components/responses/DefaultError'
+        '204':
+          description: Successful Response
+      tags:
+      - Tool Groups
+      summary: Register Tool Group
       description: Register a tool group.
-      parameters: []
+      operationId: register_tool_group_v1_toolgroups_post
       requestBody:
         content:
           application/json:
@@ -221,33 +433,93 @@ paths:
         required: true
       deprecated: true
   /v1/toolgroups/{toolgroup_id}:
-    delete:
+    get:
       responses:
         '200':
-          description: OK
+          description: A ToolGroup.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ToolGroup'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - ToolGroups
-      summary: Unregister a tool group.
-      description: Unregister a tool group.
+      - Tool Groups
+      summary: Get Tool Group
+      description: Get a tool group by its ID.
+      operationId: get_tool_group_v1_toolgroups__toolgroup_id__get
       parameters:
-        - name: toolgroup_id
-          in: path
-          description: The ID of the tool group to unregister.
-          required: true
-          schema:
-            type: string
+      - name: toolgroup_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: toolgroup_id'
+    delete:
+      responses:
+        '400':
+          description: Bad Request
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
+        '500':
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
+        default:
+          description: Default Response
+          $ref: '#/components/responses/DefaultError'
+        '204':
+          description: Successful Response
+      tags:
+      - Tool Groups
+      summary: Unregister Toolgroup
+      description: Unregister a tool group.
+      operationId: unregister_toolgroup_v1_toolgroups__toolgroup_id__delete
+      parameters:
+      - name: toolgroup_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: toolgroup_id'
       deprecated: true
   /v1beta/datasets:
+    get:
+      responses:
+        '200':
+          description: A ListDatasetsResponse.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ListDatasetsResponse'
+        '400':
+          description: Bad Request
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
+        '500':
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
+        default:
+          description: Default Response
+          $ref: '#/components/responses/DefaultError'
+      tags:
+      - Datasets
+      summary: List Datasets
+      description: List all datasets.
+      operationId: list_datasets_v1beta_datasets_get
     post:
       responses:
         '200':
@@ -257,74 +529,138 @@ paths:
               schema:
                 $ref: '#/components/schemas/Dataset'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Datasets
-      summary: Register a new dataset.
+      - Datasets
+      summary: Register Dataset
       description: Register a new dataset.
-      parameters: []
+      operationId: register_dataset_v1beta_datasets_post
       requestBody:
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/RegisterDatasetRequest'
+              $ref: '#/components/schemas/RegisterDatasetRequestLoose'
         required: true
       deprecated: true
   /v1beta/datasets/{dataset_id}:
+    get:
+      responses:
+        '200':
+          description: A Dataset.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/Dataset'
+        '400':
+          description: Bad Request
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
+        '500':
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
+        default:
+          description: Default Response
+          $ref: '#/components/responses/DefaultError'
+      tags:
+      - Datasets
+      summary: Get Dataset
+      description: Get a dataset by its ID.
+      operationId: get_dataset_v1beta_datasets__dataset_id__get
+      parameters:
+      - name: dataset_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: dataset_id'
     delete:
       responses:
-        '200':
-          description: OK
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
+        '204':
+          description: Successful Response
       tags:
-        - Datasets
-      summary: Unregister a dataset by its ID.
+      - Datasets
+      summary: Unregister Dataset
       description: Unregister a dataset by its ID.
+      operationId: unregister_dataset_v1beta_datasets__dataset_id__delete
       parameters:
-        - name: dataset_id
-          in: path
-          description: The ID of the dataset to unregister.
-          required: true
-          schema:
-            type: string
+      - name: dataset_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: dataset_id'
       deprecated: true
   /v1alpha/eval/benchmarks:
-    post:
+    get:
       responses:
         '200':
-          description: OK
+          description: A ListBenchmarksResponse.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ListBenchmarksResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Benchmarks
-      summary: Register a benchmark.
+      - Benchmarks
+      summary: List Benchmarks
+      description: List all benchmarks.
+      operationId: list_benchmarks_v1alpha_eval_benchmarks_get
+    post:
+      responses:
+        '400':
+          description: Bad Request
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
+        '500':
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
+        default:
+          description: Default Response
+          $ref: '#/components/responses/DefaultError'
+        '204':
+          description: Successful Response
+      tags:
+      - Benchmarks
+      summary: Register Benchmark
       description: Register a benchmark.
-      parameters: []
+      operationId: register_benchmark_v1alpha_eval_benchmarks_post
       requestBody:
         content:
           application/json:
@@ -333,788 +669,9557 @@ paths:
         required: true
       deprecated: true
   /v1alpha/eval/benchmarks/{benchmark_id}:
-    delete:
+    get:
       responses:
         '200':
-          description: OK
+          description: A Benchmark.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/Benchmark'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Benchmarks
-      summary: Unregister a benchmark.
-      description: Unregister a benchmark.
+      - Benchmarks
+      summary: Get Benchmark
+      description: Get a benchmark by its ID.
+      operationId: get_benchmark_v1alpha_eval_benchmarks__benchmark_id__get
       parameters:
-        - name: benchmark_id
-          in: path
-          description: The ID of the benchmark to unregister.
-          required: true
-          schema:
-            type: string
+      - name: benchmark_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: benchmark_id'
+    delete:
+      responses:
+        '400':
+          description: Bad Request
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
+        '500':
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
+        default:
+          description: Default Response
+          $ref: '#/components/responses/DefaultError'
+        '204':
+          description: Successful Response
+      tags:
+      - Benchmarks
+      summary: Unregister Benchmark
+      description: Unregister a benchmark.
+      operationId: unregister_benchmark_v1alpha_eval_benchmarks__benchmark_id__delete
+      parameters:
+      - name: benchmark_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: benchmark_id'
       deprecated: true
-jsonSchemaDialect: >-
-  https://json-schema.org/draft/2020-12/schema
 components:
   schemas:
     Error:
-      type: object
+      description: Error response from the API. Roughly follows RFC 7807.
       properties:
         status:
+          title: Status
           type: integer
-          description: HTTP status code
         title:
+          title: Title
           type: string
-          description: >-
-            Error title, a short summary of the error which is invariant for an error
-            type
         detail:
+          title: Detail
           type: string
-          description: >-
-            Error detail, a longer human-readable description of the error
         instance:
-          type: string
-          description: >-
-            (Optional) A URL which can be used to retrieve more information about
-            the specific occurrence of the error
-      additionalProperties: false
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
       required:
-        - status
-        - title
-        - detail
+      - status
+      - title
+      - detail
       title: Error
-      description: >-
-        Error response from the API. Roughly follows RFC 7807.
-    ModelType:
+      type: object
+    ListBatchesResponse:
+      properties:
+        object:
+          type: string
+          const: list
+          title: Object
+          default: list
+        data:
+          items:
+            $ref: '#/components/schemas/Batch'
+          type: array
+          title: Data
+          description: List of batch objects
+        first_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: ID of the first batch in the list
+        last_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: ID of the last batch in the list
+        has_more:
+          type: boolean
+          title: Has More
+          description: Whether there are more batches available
+          default: false
+      type: object
+      required:
+      - data
+      title: ListBatchesResponse
+      description: Response containing a list of batch objects.
+    CreateBatchRequest:
+      properties:
+        input_file_id:
+          type: string
+          title: Input File Id
+        endpoint:
+          type: string
+          title: Endpoint
+        completion_window:
+          type: string
+          const: 24h
+          title: Completion Window
+        metadata:
+          anyOf:
+          - additionalProperties:
+              type: string
+            type: object
+          - type: 'null'
+        idempotency_key:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      required:
+      - input_file_id
+      - endpoint
+      - completion_window
+      title: CreateBatchRequest
+    Batch:
+      properties:
+        id:
+          type: string
+          title: Id
+        completion_window:
+          type: string
+          title: Completion Window
+        created_at:
+          type: integer
+          title: Created At
+        endpoint:
+          type: string
+          title: Endpoint
+        input_file_id:
+          type: string
+          title: Input File Id
+        object:
+          type: string
+          const: batch
+          title: Object
+        status:
+          type: string
+          enum:
+          - validating
+          - failed
+          - in_progress
+          - finalizing
+          - completed
+          - expired
+          - cancelling
+          - cancelled
+          title: Status
+        cancelled_at:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        cancelling_at:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        completed_at:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        error_file_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        errors:
+          anyOf:
+          - $ref: '#/components/schemas/Errors'
+            title: Errors
+          - type: 'null'
+          title: Errors
+        expired_at:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        expires_at:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        failed_at:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        finalizing_at:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        in_progress_at:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        metadata:
+          anyOf:
+          - additionalProperties:
+              type: string
+            type: object
+          - type: 'null'
+        model:
+          anyOf:
+          - type: string
+          - type: 'null'
+        output_file_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        request_counts:
+          anyOf:
+          - $ref: '#/components/schemas/BatchRequestCounts'
+            title: BatchRequestCounts
+          - type: 'null'
+          title: BatchRequestCounts
+        usage:
+          anyOf:
+          - $ref: '#/components/schemas/BatchUsage'
+            title: BatchUsage
+          - type: 'null'
+          title: BatchUsage
+      additionalProperties: true
+      type: object
+      required:
+      - id
+      - completion_window
+      - created_at
+      - endpoint
+      - input_file_id
+      - object
+      - status
+      title: Batch
+    Order:
       type: string
       enum:
-        - llm
-        - embedding
-        - rerank
-      title: ModelType
-      description: >-
-        Enumeration of supported model types in Llama Stack.
-    RegisterModelRequest:
-      type: object
+      - asc
+      - desc
+      title: Order
+      description: Sort order for paginated responses.
+    ListOpenAIChatCompletionResponse:
       properties:
-        model_id:
+        data:
+          items:
+            $ref: '#/components/schemas/OpenAICompletionWithInputMessages'
+          type: array
+          title: Data
+        has_more:
+          type: boolean
+          title: Has More
+        first_id:
           type: string
-          description: The identifier of the model to register.
-        provider_model_id:
+          title: First Id
+        last_id:
           type: string
-          description: >-
-            The identifier of the model in the provider.
-        provider_id:
+          title: Last Id
+        object:
           type: string
-          description: The identifier of the provider.
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: Any additional metadata for this model.
-        model_type:
-          $ref: '#/components/schemas/ModelType'
-          description: The type of model to register.
-      additionalProperties: false
-      required:
-        - model_id
-      title: RegisterModelRequest
-    Model:
+          const: list
+          title: Object
+          default: list
       type: object
+      required:
+      - data
+      - has_more
+      - first_id
+      - last_id
+      title: ListOpenAIChatCompletionResponse
+      description: Response from listing OpenAI-compatible chat completions.
+    OpenAIAssistantMessageParam:
+      description: A message containing the model's (assistant) response in an OpenAI-compatible chat completion request.
+      properties:
+        role:
+          const: assistant
+          default: assistant
+          title: Role
+          type: string
+        content:
+          anyOf:
+          - type: string
+          - items:
+              $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+            type: array
+            title: list[OpenAIChatCompletionContentPartTextParam]
+          - type: 'null'
+          title: string | list[OpenAIChatCompletionContentPartTextParam]
+          nullable: true
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+        tool_calls:
+          anyOf:
+          - items:
+              $ref: '#/components/schemas/OpenAIChatCompletionToolCall'
+            type: array
+          - type: 'null'
+          nullable: true
+      title: OpenAIAssistantMessageParam
+      type: object
+    OpenAIChatCompletionContentPartImageParam:
+      properties:
+        type:
+          type: string
+          const: image_url
+          title: Type
+          default: image_url
+        image_url:
+          $ref: '#/components/schemas/OpenAIImageURL'
+      type: object
+      required:
+      - image_url
+      title: OpenAIChatCompletionContentPartImageParam
+      description: Image content part for OpenAI-compatible chat completion messages.
+    OpenAIChatCompletionContentPartParam:
+      discriminator:
+        mapping:
+          file: '#/components/schemas/OpenAIFile'
+          image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+          text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+        title: OpenAIChatCompletionContentPartTextParam
+      - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+        title: OpenAIChatCompletionContentPartImageParam
+      - $ref: '#/components/schemas/OpenAIFile'
+        title: OpenAIFile
+      title: OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile
+    OpenAIChatCompletionContentPartTextParam:
+      properties:
+        type:
+          type: string
+          const: text
+          title: Type
+          default: text
+        text:
+          type: string
+          title: Text
+      type: object
+      required:
+      - text
+      title: OpenAIChatCompletionContentPartTextParam
+      description: Text content part for OpenAI-compatible chat completion messages.
+    OpenAIChatCompletionToolCall:
+      properties:
+        index:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        type:
+          type: string
+          const: function
+          title: Type
+          default: function
+        function:
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIChatCompletionToolCallFunction'
+            title: OpenAIChatCompletionToolCallFunction
+          - type: 'null'
+          title: OpenAIChatCompletionToolCallFunction
+      type: object
+      title: OpenAIChatCompletionToolCall
+      description: Tool call specification for OpenAI-compatible chat completion responses.
+    OpenAIChatCompletionToolCallFunction:
+      properties:
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+        arguments:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      title: OpenAIChatCompletionToolCallFunction
+      description: Function call details for OpenAI-compatible tool calls.
+    OpenAIChatCompletionUsage:
+      properties:
+        prompt_tokens:
+          type: integer
+          title: Prompt Tokens
+        completion_tokens:
+          type: integer
+          title: Completion Tokens
+        total_tokens:
+          type: integer
+          title: Total Tokens
+        prompt_tokens_details:
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIChatCompletionUsagePromptTokensDetails'
+            title: OpenAIChatCompletionUsagePromptTokensDetails
+          - type: 'null'
+          title: OpenAIChatCompletionUsagePromptTokensDetails
+        completion_tokens_details:
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIChatCompletionUsageCompletionTokensDetails'
+            title: OpenAIChatCompletionUsageCompletionTokensDetails
+          - type: 'null'
+          title: OpenAIChatCompletionUsageCompletionTokensDetails
+      type: object
+      required:
+      - prompt_tokens
+      - completion_tokens
+      - total_tokens
+      title: OpenAIChatCompletionUsage
+      description: Usage information for OpenAI chat completion.
+    OpenAIChoice:
+      properties:
+        message:
+          oneOf:
+          - $ref: '#/components/schemas/OpenAIUserMessageParam-Output'
+            title: OpenAIUserMessageParam-Output
+          - $ref: '#/components/schemas/OpenAISystemMessageParam'
+            title: OpenAISystemMessageParam
+          - $ref: '#/components/schemas/OpenAIAssistantMessageParam-Output'
+            title: OpenAIAssistantMessageParam-Output
+          - $ref: '#/components/schemas/OpenAIToolMessageParam'
+            title: OpenAIToolMessageParam
+          - $ref: '#/components/schemas/OpenAIDeveloperMessageParam'
+            title: OpenAIDeveloperMessageParam
+          title: OpenAIUserMessageParam-Output | ... (5 variants)
+          discriminator:
+            propertyName: role
+            mapping:
+              assistant: '#/components/schemas/OpenAIAssistantMessageParam-Output'
+              developer: '#/components/schemas/OpenAIDeveloperMessageParam'
+              system: '#/components/schemas/OpenAISystemMessageParam'
+              tool: '#/components/schemas/OpenAIToolMessageParam'
+              user: '#/components/schemas/OpenAIUserMessageParam-Output'
+        finish_reason:
+          type: string
+          title: Finish Reason
+        index:
+          type: integer
+          title: Index
+        logprobs:
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIChoiceLogprobs'
+            title: OpenAIChoiceLogprobs
+          - type: 'null'
+          title: OpenAIChoiceLogprobs
+      type: object
+      required:
+      - message
+      - finish_reason
+      - index
+      title: OpenAIChoice
+      description: A choice from an OpenAI-compatible chat completion response.
+    OpenAIChoiceLogprobs:
+      properties:
+        content:
+          anyOf:
+          - items:
+              $ref: '#/components/schemas/OpenAITokenLogProb'
+            type: array
+          - type: 'null'
+        refusal:
+          anyOf:
+          - items:
+              $ref: '#/components/schemas/OpenAITokenLogProb'
+            type: array
+          - type: 'null'
+      type: object
+      title: OpenAIChoiceLogprobs
+      description: The log probabilities for the tokens in the message from an OpenAI-compatible chat completion response.
+    OpenAIDeveloperMessageParam:
+      properties:
+        role:
+          type: string
+          const: developer
+          title: Role
+          default: developer
+        content:
+          anyOf:
+          - type: string
+          - items:
+              $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+            type: array
+            title: list[OpenAIChatCompletionContentPartTextParam]
+          title: string | list[OpenAIChatCompletionContentPartTextParam]
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      required:
+      - content
+      title: OpenAIDeveloperMessageParam
+      description: A message from the developer in an OpenAI-compatible chat completion request.
+    OpenAIFile:
+      properties:
+        type:
+          type: string
+          const: file
+          title: Type
+          default: file
+        file:
+          $ref: '#/components/schemas/OpenAIFileFile'
+      type: object
+      required:
+      - file
+      title: OpenAIFile
+    OpenAIFileFile:
+      properties:
+        file_data:
+          anyOf:
+          - type: string
+          - type: 'null'
+        file_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        filename:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      title: OpenAIFileFile
+    OpenAIImageURL:
+      properties:
+        url:
+          type: string
+          title: Url
+        detail:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      required:
+      - url
+      title: OpenAIImageURL
+      description: Image URL specification for OpenAI-compatible chat completion messages.
+    OpenAIMessageParam:
+      discriminator:
+        mapping:
+          assistant: '#/components/schemas/OpenAIAssistantMessageParam'
+          developer: '#/components/schemas/OpenAIDeveloperMessageParam'
+          system: '#/components/schemas/OpenAISystemMessageParam'
+          tool: '#/components/schemas/OpenAIToolMessageParam'
+          user: '#/components/schemas/OpenAIUserMessageParam'
+        propertyName: role
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIUserMessageParam'
+        title: OpenAIUserMessageParam
+      - $ref: '#/components/schemas/OpenAISystemMessageParam'
+        title: OpenAISystemMessageParam
+      - $ref: '#/components/schemas/OpenAIAssistantMessageParam'
+        title: OpenAIAssistantMessageParam
+      - $ref: '#/components/schemas/OpenAIToolMessageParam'
+        title: OpenAIToolMessageParam
+      - $ref: '#/components/schemas/OpenAIDeveloperMessageParam'
+        title: OpenAIDeveloperMessageParam
+      title: OpenAIUserMessageParam | ... (5 variants)
+    OpenAISystemMessageParam:
+      properties:
+        role:
+          type: string
+          const: system
+          title: Role
+          default: system
+        content:
+          anyOf:
+          - type: string
+          - items:
+              $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+            type: array
+            title: list[OpenAIChatCompletionContentPartTextParam]
+          title: string | list[OpenAIChatCompletionContentPartTextParam]
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      required:
+      - content
+      title: OpenAISystemMessageParam
+      description: A system message providing instructions or context to the model.
+    OpenAITokenLogProb:
+      properties:
+        token:
+          type: string
+          title: Token
+        bytes:
+          anyOf:
+          - items:
+              type: integer
+            type: array
+          - type: 'null'
+        logprob:
+          type: number
+          title: Logprob
+        top_logprobs:
+          items:
+            $ref: '#/components/schemas/OpenAITopLogProb'
+          type: array
+          title: Top Logprobs
+      type: object
+      required:
+      - token
+      - logprob
+      - top_logprobs
+      title: OpenAITokenLogProb
+      description: |-
+        The log probability for a token from an OpenAI-compatible chat completion response.
+
+        :token: The token
+        :bytes: (Optional) The bytes for the token
+        :logprob: The log probability of the token
+        :top_logprobs: The top log probabilities for the token
+    OpenAIToolMessageParam:
+      properties:
+        role:
+          type: string
+          const: tool
+          title: Role
+          default: tool
+        tool_call_id:
+          type: string
+          title: Tool Call Id
+        content:
+          anyOf:
+          - type: string
+          - items:
+              $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+            type: array
+            title: list[OpenAIChatCompletionContentPartTextParam]
+          title: string | list[OpenAIChatCompletionContentPartTextParam]
+      type: object
+      required:
+      - tool_call_id
+      - content
+      title: OpenAIToolMessageParam
+      description: A message representing the result of a tool invocation in an OpenAI-compatible chat completion request.
+    OpenAITopLogProb:
+      properties:
+        token:
+          type: string
+          title: Token
+        bytes:
+          anyOf:
+          - items:
+              type: integer
+            type: array
+          - type: 'null'
+        logprob:
+          type: number
+          title: Logprob
+      type: object
+      required:
+      - token
+      - logprob
+      title: OpenAITopLogProb
+      description: |-
+        The top log probability for a token from an OpenAI-compatible chat completion response.
+
+        :token: The token
+        :bytes: (Optional) The bytes for the token
+        :logprob: The log probability of the token
+    OpenAIUserMessageParam:
+      description: A message from the user in an OpenAI-compatible chat completion request.
+      properties:
+        role:
+          const: user
+          default: user
+          title: Role
+          type: string
+        content:
+          anyOf:
+          - type: string
+          - items:
+              discriminator:
+                mapping:
+                  file: '#/components/schemas/OpenAIFile'
+                  image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+                  text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+                propertyName: type
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+                title: OpenAIChatCompletionContentPartTextParam
+              - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+                title: OpenAIChatCompletionContentPartImageParam
+              - $ref: '#/components/schemas/OpenAIFile'
+                title: OpenAIFile
+              title: OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile
+            type: array
+            title: list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile]
+          title: string | list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile]
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+      required:
+      - content
+      title: OpenAIUserMessageParam
+      type: object
+    OpenAIJSONSchema:
+      properties:
+        name:
+          type: string
+          title: Name
+        description:
+          anyOf:
+          - type: string
+          - type: 'null'
+        strict:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+        schema:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+      type: object
+      title: OpenAIJSONSchema
+      description: JSON schema specification for OpenAI-compatible structured response format.
+    OpenAIResponseFormatJSONObject:
+      properties:
+        type:
+          type: string
+          const: json_object
+          title: Type
+          default: json_object
+      type: object
+      title: OpenAIResponseFormatJSONObject
+      description: JSON object response format for OpenAI-compatible chat completion requests.
+    OpenAIResponseFormatJSONSchema:
+      properties:
+        type:
+          type: string
+          const: json_schema
+          title: Type
+          default: json_schema
+        json_schema:
+          $ref: '#/components/schemas/OpenAIJSONSchema'
+      type: object
+      required:
+      - json_schema
+      title: OpenAIResponseFormatJSONSchema
+      description: JSON schema response format for OpenAI-compatible chat completion requests.
+    OpenAIResponseFormatParam:
+      discriminator:
+        mapping:
+          json_object: '#/components/schemas/OpenAIResponseFormatJSONObject'
+          json_schema: '#/components/schemas/OpenAIResponseFormatJSONSchema'
+          text: '#/components/schemas/OpenAIResponseFormatText'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIResponseFormatText'
+        title: OpenAIResponseFormatText
+      - $ref: '#/components/schemas/OpenAIResponseFormatJSONSchema'
+        title: OpenAIResponseFormatJSONSchema
+      - $ref: '#/components/schemas/OpenAIResponseFormatJSONObject'
+        title: OpenAIResponseFormatJSONObject
+      title: OpenAIResponseFormatText | OpenAIResponseFormatJSONSchema | OpenAIResponseFormatJSONObject
+    OpenAIResponseFormatText:
+      properties:
+        type:
+          type: string
+          const: text
+          title: Type
+          default: text
+      type: object
+      title: OpenAIResponseFormatText
+      description: Text response format for OpenAI-compatible chat completion requests.
+    OpenAIChatCompletionRequestWithExtraBody:
+      properties:
+        model:
+          type: string
+          title: Model
+        messages:
+          items:
+            oneOf:
+            - $ref: '#/components/schemas/OpenAIUserMessageParam-Input'
+              title: OpenAIUserMessageParam-Input
+            - $ref: '#/components/schemas/OpenAISystemMessageParam'
+              title: OpenAISystemMessageParam
+            - $ref: '#/components/schemas/OpenAIAssistantMessageParam-Input'
+              title: OpenAIAssistantMessageParam-Input
+            - $ref: '#/components/schemas/OpenAIToolMessageParam'
+              title: OpenAIToolMessageParam
+            - $ref: '#/components/schemas/OpenAIDeveloperMessageParam'
+              title: OpenAIDeveloperMessageParam
+            discriminator:
+              propertyName: role
+              mapping:
+                assistant: '#/components/schemas/OpenAIAssistantMessageParam-Input'
+                developer: '#/components/schemas/OpenAIDeveloperMessageParam'
+                system: '#/components/schemas/OpenAISystemMessageParam'
+                tool: '#/components/schemas/OpenAIToolMessageParam'
+                user: '#/components/schemas/OpenAIUserMessageParam-Input'
+            title: OpenAIUserMessageParam-Input | ... (5 variants)
+          type: array
+          minItems: 1
+          title: Messages
+        frequency_penalty:
+          anyOf:
+          - type: number
+          - type: 'null'
+        function_call:
+          anyOf:
+          - type: string
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+          title: string | object
+        functions:
+          anyOf:
+          - items:
+              additionalProperties: true
+              type: object
+            type: array
+          - type: 'null'
+        logit_bias:
+          anyOf:
+          - additionalProperties:
+              type: number
+            type: object
+          - type: 'null'
+        logprobs:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+        max_completion_tokens:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        max_tokens:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        n:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        parallel_tool_calls:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+        presence_penalty:
+          anyOf:
+          - type: number
+          - type: 'null'
+        response_format:
+          anyOf:
+          - oneOf:
+            - $ref: '#/components/schemas/OpenAIResponseFormatText'
+              title: OpenAIResponseFormatText
+            - $ref: '#/components/schemas/OpenAIResponseFormatJSONSchema'
+              title: OpenAIResponseFormatJSONSchema
+            - $ref: '#/components/schemas/OpenAIResponseFormatJSONObject'
+              title: OpenAIResponseFormatJSONObject
+            discriminator:
+              propertyName: type
+              mapping:
+                json_object: '#/components/schemas/OpenAIResponseFormatJSONObject'
+                json_schema: '#/components/schemas/OpenAIResponseFormatJSONSchema'
+                text: '#/components/schemas/OpenAIResponseFormatText'
+            title: OpenAIResponseFormatText | OpenAIResponseFormatJSONSchema | OpenAIResponseFormatJSONObject
+          - type: 'null'
+          title: Response Format
+        seed:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        stop:
+          anyOf:
+          - type: string
+          - items:
+              type: string
+            type: array
+            title: list[string]
+          - type: 'null'
+          title: string | list[string]
+        stream:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+        stream_options:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+        temperature:
+          anyOf:
+          - type: number
+          - type: 'null'
+        tool_choice:
+          anyOf:
+          - type: string
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+          title: string | object
+        tools:
+          anyOf:
+          - items:
+              additionalProperties: true
+              type: object
+            type: array
+          - type: 'null'
+        top_logprobs:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        top_p:
+          anyOf:
+          - type: number
+          - type: 'null'
+        user:
+          anyOf:
+          - type: string
+          - type: 'null'
+      additionalProperties: true
+      type: object
+      required:
+      - model
+      - messages
+      title: OpenAIChatCompletionRequestWithExtraBody
+      description: Request parameters for OpenAI-compatible chat completion endpoint.
+    OpenAIChatCompletion:
+      properties:
+        id:
+          type: string
+          title: Id
+        choices:
+          items:
+            $ref: '#/components/schemas/OpenAIChoice'
+          type: array
+          title: Choices
+        object:
+          type: string
+          const: chat.completion
+          title: Object
+          default: chat.completion
+        created:
+          type: integer
+          title: Created
+        model:
+          type: string
+          title: Model
+        usage:
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIChatCompletionUsage'
+            title: OpenAIChatCompletionUsage
+          - type: 'null'
+          title: OpenAIChatCompletionUsage
+      type: object
+      required:
+      - id
+      - choices
+      - created
+      - model
+      title: OpenAIChatCompletion
+      description: Response from an OpenAI-compatible chat completion request.
+    OpenAIChatCompletionChunk:
+      description: Chunk from a streaming response to an OpenAI-compatible chat completion request.
+      properties:
+        id:
+          title: Id
+          type: string
+        choices:
+          items:
+            $ref: '#/components/schemas/OpenAIChunkChoice'
+          title: Choices
+          type: array
+        object:
+          const: chat.completion.chunk
+          default: chat.completion.chunk
+          title: Object
+          type: string
+        created:
+          title: Created
+          type: integer
+        model:
+          title: Model
+          type: string
+        usage:
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIChatCompletionUsage'
+            title: OpenAIChatCompletionUsage
+          - type: 'null'
+          nullable: true
+          title: OpenAIChatCompletionUsage
+      required:
+      - id
+      - choices
+      - created
+      - model
+      title: OpenAIChatCompletionChunk
+      type: object
+    OpenAIChoiceDelta:
+      description: A delta from an OpenAI-compatible chat completion streaming response.
+      properties:
+        content:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+        refusal:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+        role:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+        tool_calls:
+          anyOf:
+          - items:
+              $ref: '#/components/schemas/OpenAIChatCompletionToolCall'
+            type: array
+          - type: 'null'
+          nullable: true
+        reasoning_content:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+      title: OpenAIChoiceDelta
+      type: object
+    OpenAIChunkChoice:
+      description: A chunk choice from an OpenAI-compatible chat completion streaming response.
+      properties:
+        delta:
+          $ref: '#/components/schemas/OpenAIChoiceDelta'
+        finish_reason:
+          title: Finish Reason
+          type: string
+        index:
+          title: Index
+          type: integer
+        logprobs:
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIChoiceLogprobs'
+            title: OpenAIChoiceLogprobs
+          - type: 'null'
+          nullable: true
+          title: OpenAIChoiceLogprobs
+      required:
+      - delta
+      - finish_reason
+      - index
+      title: OpenAIChunkChoice
+      type: object
+    OpenAICompletionWithInputMessages:
+      properties:
+        id:
+          type: string
+          title: Id
+        choices:
+          items:
+            $ref: '#/components/schemas/OpenAIChoice'
+          type: array
+          title: Choices
+        object:
+          type: string
+          const: chat.completion
+          title: Object
+          default: chat.completion
+        created:
+          type: integer
+          title: Created
+        model:
+          type: string
+          title: Model
+        usage:
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIChatCompletionUsage'
+            title: OpenAIChatCompletionUsage
+          - type: 'null'
+          title: OpenAIChatCompletionUsage
+        input_messages:
+          items:
+            oneOf:
+            - $ref: '#/components/schemas/OpenAIUserMessageParam-Output'
+              title: OpenAIUserMessageParam-Output
+            - $ref: '#/components/schemas/OpenAISystemMessageParam'
+              title: OpenAISystemMessageParam
+            - $ref: '#/components/schemas/OpenAIAssistantMessageParam-Output'
+              title: OpenAIAssistantMessageParam-Output
+            - $ref: '#/components/schemas/OpenAIToolMessageParam'
+              title: OpenAIToolMessageParam
+            - $ref: '#/components/schemas/OpenAIDeveloperMessageParam'
+              title: OpenAIDeveloperMessageParam
+            discriminator:
+              propertyName: role
+              mapping:
+                assistant: '#/components/schemas/OpenAIAssistantMessageParam-Output'
+                developer: '#/components/schemas/OpenAIDeveloperMessageParam'
+                system: '#/components/schemas/OpenAISystemMessageParam'
+                tool: '#/components/schemas/OpenAIToolMessageParam'
+                user: '#/components/schemas/OpenAIUserMessageParam-Output'
+            title: OpenAIUserMessageParam-Output | ... (5 variants)
+          type: array
+          title: Input Messages
+      type: object
+      required:
+      - id
+      - choices
+      - created
+      - model
+      - input_messages
+      title: OpenAICompletionWithInputMessages
+    OpenAICompletionRequestWithExtraBody:
+      properties:
+        model:
+          type: string
+          title: Model
+        prompt:
+          anyOf:
+          - type: string
+          - items:
+              type: string
+            type: array
+            title: list[string]
+          - items:
+              type: integer
+            type: array
+            title: list[integer]
+          - items:
+              items:
+                type: integer
+              type: array
+            type: array
+            title: list[array]
+          title: string | ... (4 variants)
+        best_of:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        echo:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+        frequency_penalty:
+          anyOf:
+          - type: number
+          - type: 'null'
+        logit_bias:
+          anyOf:
+          - additionalProperties:
+              type: number
+            type: object
+          - type: 'null'
+        logprobs:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+        max_tokens:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        n:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        presence_penalty:
+          anyOf:
+          - type: number
+          - type: 'null'
+        seed:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        stop:
+          anyOf:
+          - type: string
+          - items:
+              type: string
+            type: array
+            title: list[string]
+          - type: 'null'
+          title: string | list[string]
+        stream:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+        stream_options:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+        temperature:
+          anyOf:
+          - type: number
+          - type: 'null'
+        top_p:
+          anyOf:
+          - type: number
+          - type: 'null'
+        user:
+          anyOf:
+          - type: string
+          - type: 'null'
+        suffix:
+          anyOf:
+          - type: string
+          - type: 'null'
+      additionalProperties: true
+      type: object
+      required:
+      - model
+      - prompt
+      title: OpenAICompletionRequestWithExtraBody
+      description: Request parameters for OpenAI-compatible completion endpoint.
+    OpenAICompletion:
+      properties:
+        id:
+          type: string
+          title: Id
+        choices:
+          items:
+            $ref: '#/components/schemas/OpenAICompletionChoice'
+          type: array
+          title: Choices
+        created:
+          type: integer
+          title: Created
+        model:
+          type: string
+          title: Model
+        object:
+          type: string
+          const: text_completion
+          title: Object
+          default: text_completion
+      type: object
+      required:
+      - id
+      - choices
+      - created
+      - model
+      title: OpenAICompletion
+      description: |-
+        Response from an OpenAI-compatible completion request.
+
+        :id: The ID of the completion
+        :choices: List of choices
+        :created: The Unix timestamp in seconds when the completion was created
+        :model: The model that was used to generate the completion
+        :object: The object type, which will be "text_completion"
+    OpenAICompletionChoice:
+      properties:
+        finish_reason:
+          type: string
+          title: Finish Reason
+        text:
+          type: string
+          title: Text
+        index:
+          type: integer
+          title: Index
+        logprobs:
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIChoiceLogprobs'
+            title: OpenAIChoiceLogprobs
+          - type: 'null'
+          title: OpenAIChoiceLogprobs
+      type: object
+      required:
+      - finish_reason
+      - text
+      - index
+      title: OpenAICompletionChoice
+      description: |-
+        A choice from an OpenAI-compatible completion response.
+
+        :finish_reason: The reason the model stopped generating
+        :text: The text of the choice
+        :index: The index of the choice
+        :logprobs: (Optional) The log probabilities for the tokens in the choice
+    ConversationItem:
+      discriminator:
+        mapping:
+          file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+          function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+          function_call_output: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+          mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+          mcp_approval_response: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+          mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+          mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+          message: '#/components/schemas/OpenAIResponseMessage'
+          web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIResponseMessage'
+        title: OpenAIResponseMessage
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+        title: OpenAIResponseOutputMessageWebSearchToolCall
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+        title: OpenAIResponseOutputMessageFileSearchToolCall
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+        title: OpenAIResponseOutputMessageFunctionToolCall
+      - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+        title: OpenAIResponseInputFunctionToolCallOutput
+      - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+        title: OpenAIResponseMCPApprovalRequest
+      - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+        title: OpenAIResponseMCPApprovalResponse
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+        title: OpenAIResponseOutputMessageMCPCall
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+        title: OpenAIResponseOutputMessageMCPListTools
+      title: OpenAIResponseMessage | ... (9 variants)
+    OpenAIResponseAnnotationCitation:
+      properties:
+        type:
+          type: string
+          const: url_citation
+          title: Type
+          default: url_citation
+        end_index:
+          type: integer
+          title: End Index
+        start_index:
+          type: integer
+          title: Start Index
+        title:
+          type: string
+          title: Title
+        url:
+          type: string
+          title: Url
+      type: object
+      required:
+      - end_index
+      - start_index
+      - title
+      - url
+      title: OpenAIResponseAnnotationCitation
+      description: URL citation annotation for referencing external web resources.
+    OpenAIResponseAnnotationContainerFileCitation:
+      properties:
+        type:
+          type: string
+          const: container_file_citation
+          title: Type
+          default: container_file_citation
+        container_id:
+          type: string
+          title: Container Id
+        end_index:
+          type: integer
+          title: End Index
+        file_id:
+          type: string
+          title: File Id
+        filename:
+          type: string
+          title: Filename
+        start_index:
+          type: integer
+          title: Start Index
+      type: object
+      required:
+      - container_id
+      - end_index
+      - file_id
+      - filename
+      - start_index
+      title: OpenAIResponseAnnotationContainerFileCitation
+    OpenAIResponseAnnotationFileCitation:
+      properties:
+        type:
+          type: string
+          const: file_citation
+          title: Type
+          default: file_citation
+        file_id:
+          type: string
+          title: File Id
+        filename:
+          type: string
+          title: Filename
+        index:
+          type: integer
+          title: Index
+      type: object
+      required:
+      - file_id
+      - filename
+      - index
+      title: OpenAIResponseAnnotationFileCitation
+      description: File citation annotation for referencing specific files in response content.
+    OpenAIResponseAnnotationFilePath:
+      properties:
+        type:
+          type: string
+          const: file_path
+          title: Type
+          default: file_path
+        file_id:
+          type: string
+          title: File Id
+        index:
+          type: integer
+          title: Index
+      type: object
+      required:
+      - file_id
+      - index
+      title: OpenAIResponseAnnotationFilePath
+    OpenAIResponseAnnotations:
+      discriminator:
+        mapping:
+          container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
+          file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
+          file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath'
+          url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
+        title: OpenAIResponseAnnotationFileCitation
+      - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation'
+        title: OpenAIResponseAnnotationCitation
+      - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
+        title: OpenAIResponseAnnotationContainerFileCitation
+      - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath'
+        title: OpenAIResponseAnnotationFilePath
+      title: OpenAIResponseAnnotationFileCitation | ... (4 variants)
+    OpenAIResponseContentPartRefusal:
+      properties:
+        type:
+          type: string
+          const: refusal
+          title: Type
+          default: refusal
+        refusal:
+          type: string
+          title: Refusal
+      type: object
+      required:
+      - refusal
+      title: OpenAIResponseContentPartRefusal
+      description: Refusal content within a streamed response part.
+    OpenAIResponseInputFunctionToolCallOutput:
+      properties:
+        call_id:
+          type: string
+          title: Call Id
+        output:
+          type: string
+          title: Output
+        type:
+          type: string
+          const: function_call_output
+          title: Type
+          default: function_call_output
+        id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        status:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      required:
+      - call_id
+      - output
+      title: OpenAIResponseInputFunctionToolCallOutput
+      description: This represents the output of a function call that gets passed back to the model.
+    OpenAIResponseInputMessageContent:
+      discriminator:
+        mapping:
+          input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile'
+          input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+          input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
+        title: OpenAIResponseInputMessageContentText
+      - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+        title: OpenAIResponseInputMessageContentImage
+      - $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile'
+        title: OpenAIResponseInputMessageContentFile
+      title: OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile
+    OpenAIResponseInputMessageContentFile:
+      properties:
+        type:
+          type: string
+          const: input_file
+          title: Type
+          default: input_file
+        file_data:
+          anyOf:
+          - type: string
+          - type: 'null'
+        file_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        file_url:
+          anyOf:
+          - type: string
+          - type: 'null'
+        filename:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      title: OpenAIResponseInputMessageContentFile
+      description: File content for input messages in OpenAI response format.
+    OpenAIResponseInputMessageContentImage:
+      properties:
+        detail:
+          title: Detail
+          default: auto
+          type: string
+          enum:
+          - low
+          - high
+          - auto
+        type:
+          type: string
+          const: input_image
+          title: Type
+          default: input_image
+        file_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        image_url:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      title: OpenAIResponseInputMessageContentImage
+      description: Image content for input messages in OpenAI response format.
+    OpenAIResponseInputMessageContentText:
+      properties:
+        text:
+          type: string
+          title: Text
+        type:
+          type: string
+          const: input_text
+          title: Type
+          default: input_text
+      type: object
+      required:
+      - text
+      title: OpenAIResponseInputMessageContentText
+      description: Text content for input messages in OpenAI response format.
+    OpenAIResponseMCPApprovalRequest:
+      properties:
+        arguments:
+          type: string
+          title: Arguments
+        id:
+          type: string
+          title: Id
+        name:
+          type: string
+          title: Name
+        server_label:
+          type: string
+          title: Server Label
+        type:
+          type: string
+          const: mcp_approval_request
+          title: Type
+          default: mcp_approval_request
+      type: object
+      required:
+      - arguments
+      - id
+      - name
+      - server_label
+      title: OpenAIResponseMCPApprovalRequest
+      description: A request for human approval of a tool invocation.
+    OpenAIResponseMCPApprovalResponse:
+      properties:
+        approval_request_id:
+          type: string
+          title: Approval Request Id
+        approve:
+          type: boolean
+          title: Approve
+        type:
+          type: string
+          const: mcp_approval_response
+          title: Type
+          default: mcp_approval_response
+        id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        reason:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      required:
+      - approval_request_id
+      - approve
+      title: OpenAIResponseMCPApprovalResponse
+      description: A response to an MCP approval request.
+    OpenAIResponseMessage:
+      description: |-
+        Corresponds to the various Message types in the Responses API.
+        They are all under one type because the Responses API gives them all
+        the same "type" value, and there is no way to tell them apart in certain
+        scenarios.
+      properties:
+        content:
+          anyOf:
+          - type: string
+          - items:
+              discriminator:
+                mapping:
+                  input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile'
+                  input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+                  input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
+                propertyName: type
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
+                title: OpenAIResponseInputMessageContentText
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+                title: OpenAIResponseInputMessageContentImage
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile'
+                title: OpenAIResponseInputMessageContentFile
+              title: OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile
+            type: array
+            title: list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile]
+          - items:
+              discriminator:
+                mapping:
+                  output_text: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText'
+                  refusal: '#/components/schemas/OpenAIResponseContentPartRefusal'
+                propertyName: type
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText'
+                title: OpenAIResponseOutputMessageContentOutputText
+              - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
+                title: OpenAIResponseContentPartRefusal
+              title: OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal
+            type: array
+            title: list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal]
+          title: string | list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile] | list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal]
+        role:
+          title: Role
+          type: string
+          enum:
+          - system
+          - developer
+          - user
+          - assistant
+          default: system
+        type:
+          const: message
+          default: message
+          title: Type
+          type: string
+        id:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+        status:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+      required:
+      - content
+      - role
+      title: OpenAIResponseMessage
+      type: object
+    OpenAIResponseOutputMessageContent:
+      discriminator:
+        mapping:
+          output_text: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText'
+          refusal: '#/components/schemas/OpenAIResponseContentPartRefusal'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText'
+        title: OpenAIResponseOutputMessageContentOutputText
+      - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
+        title: OpenAIResponseContentPartRefusal
+      title: OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal
+    OpenAIResponseOutputMessageContentOutputText:
+      properties:
+        text:
+          type: string
+          title: Text
+        type:
+          type: string
+          const: output_text
+          title: Type
+          default: output_text
+        annotations:
+          items:
+            oneOf:
+            - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
+              title: OpenAIResponseAnnotationFileCitation
+            - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation'
+              title: OpenAIResponseAnnotationCitation
+            - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
+              title: OpenAIResponseAnnotationContainerFileCitation
+            - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath'
+              title: OpenAIResponseAnnotationFilePath
+            discriminator:
+              propertyName: type
+              mapping:
+                container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
+                file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
+                file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath'
+                url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation'
+            title: OpenAIResponseAnnotationFileCitation | ... (4 variants)
+          type: array
+          title: Annotations
+      type: object
+      required:
+      - text
+      title: OpenAIResponseOutputMessageContentOutputText
+    OpenAIResponseOutputMessageFileSearchToolCall:
+      properties:
+        id:
+          type: string
+          title: Id
+        queries:
+          items:
+            type: string
+          type: array
+          title: Queries
+        status:
+          type: string
+          title: Status
+        type:
+          type: string
+          const: file_search_call
+          title: Type
+          default: file_search_call
+        results:
+          anyOf:
+          - items:
+              $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCallResults'
+            type: array
+          - type: 'null'
+      type: object
+      required:
+      - id
+      - queries
+      - status
+      title: OpenAIResponseOutputMessageFileSearchToolCall
+      description: File search tool call output message for OpenAI responses.
+    OpenAIResponseOutputMessageFunctionToolCall:
+      properties:
+        call_id:
+          type: string
+          title: Call Id
+        name:
+          type: string
+          title: Name
+        arguments:
+          type: string
+          title: Arguments
+        type:
+          type: string
+          const: function_call
+          title: Type
+          default: function_call
+        id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        status:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      required:
+      - call_id
+      - name
+      - arguments
+      title: OpenAIResponseOutputMessageFunctionToolCall
+      description: Function tool call output message for OpenAI responses.
+    OpenAIResponseOutputMessageMCPCall:
+      properties:
+        id:
+          type: string
+          title: Id
+        type:
+          type: string
+          const: mcp_call
+          title: Type
+          default: mcp_call
+        arguments:
+          type: string
+          title: Arguments
+        name:
+          type: string
+          title: Name
+        server_label:
+          type: string
+          title: Server Label
+        error:
+          anyOf:
+          - type: string
+          - type: 'null'
+        output:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      required:
+      - id
+      - arguments
+      - name
+      - server_label
+      title: OpenAIResponseOutputMessageMCPCall
+      description: Model Context Protocol (MCP) call output message for OpenAI responses.
+    OpenAIResponseOutputMessageMCPListTools:
+      properties:
+        id:
+          type: string
+          title: Id
+        type:
+          type: string
+          const: mcp_list_tools
+          title: Type
+          default: mcp_list_tools
+        server_label:
+          type: string
+          title: Server Label
+        tools:
+          items:
+            $ref: '#/components/schemas/MCPListToolsTool'
+          type: array
+          title: Tools
+      type: object
+      required:
+      - id
+      - server_label
+      - tools
+      title: OpenAIResponseOutputMessageMCPListTools
+      description: MCP list tools output message containing available tools from an MCP server.
+    OpenAIResponseOutputMessageWebSearchToolCall:
+      properties:
+        id:
+          type: string
+          title: Id
+        status:
+          type: string
+          title: Status
+        type:
+          type: string
+          const: web_search_call
+          title: Type
+          default: web_search_call
+      type: object
+      required:
+      - id
+      - status
+      title: OpenAIResponseOutputMessageWebSearchToolCall
+      description: Web search tool call output message for OpenAI responses.
+    CreateConversationRequest:
+      properties:
+        items:
+          anyOf:
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseMessage-Input'
+                title: OpenAIResponseMessage-Input
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+                title: OpenAIResponseOutputMessageWebSearchToolCall
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                title: OpenAIResponseOutputMessageFileSearchToolCall
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                title: OpenAIResponseOutputMessageFunctionToolCall
+              - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+                title: OpenAIResponseInputFunctionToolCallOutput
+              - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                title: OpenAIResponseMCPApprovalRequest
+              - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+                title: OpenAIResponseMCPApprovalResponse
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                title: OpenAIResponseOutputMessageMCPCall
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                title: OpenAIResponseOutputMessageMCPListTools
+              discriminator:
+                propertyName: type
+                mapping:
+                  file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                  function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                  function_call_output: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+                  mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                  mcp_approval_response: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+                  mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                  mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                  message: '#/components/schemas/OpenAIResponseMessage-Input'
+                  web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+              title: OpenAIResponseMessage-Input | ... (9 variants)
+            type: array
+          - type: 'null'
+        metadata:
+          anyOf:
+          - additionalProperties:
+              type: string
+            type: object
+          - type: 'null'
+      type: object
+      title: CreateConversationRequest
+    Conversation:
+      properties:
+        id:
+          type: string
+          title: Id
+          description: The unique ID of the conversation.
+        object:
+          type: string
+          const: conversation
+          title: Object
+          description: The object type, which is always conversation.
+          default: conversation
+        created_at:
+          type: integer
+          title: Created At
+          description: The time at which the conversation was created, measured in seconds since the Unix epoch.
+        metadata:
+          anyOf:
+          - additionalProperties:
+              type: string
+            type: object
+          - type: 'null'
+          description: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard.
+        items:
+          anyOf:
+          - items:
+              additionalProperties: true
+              type: object
+            type: array
+          - type: 'null'
+          description: Initial items to include in the conversation context. You may add up to 20 items at a time.
+      type: object
+      required:
+      - id
+      - created_at
+      title: Conversation
+      description: OpenAI-compatible conversation object.
+    UpdateConversationRequest:
+      properties:
+        metadata:
+          additionalProperties:
+            type: string
+          type: object
+          title: Metadata
+      type: object
+      required:
+      - metadata
+      title: UpdateConversationRequest
+    ConversationDeletedResource:
+      properties:
+        id:
+          type: string
+          title: Id
+          description: The deleted conversation identifier
+        object:
+          type: string
+          title: Object
+          description: Object type
+          default: conversation.deleted
+        deleted:
+          type: boolean
+          title: Deleted
+          description: Whether the object was deleted
+          default: true
+      type: object
+      required:
+      - id
+      title: ConversationDeletedResource
+      description: Response for deleted conversation.
+    ConversationItemList:
+      properties:
+        object:
+          type: string
+          title: Object
+          description: Object type
+          default: list
+        data:
+          items:
+            oneOf:
+            - $ref: '#/components/schemas/OpenAIResponseMessage-Output'
+              title: OpenAIResponseMessage-Output
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+              title: OpenAIResponseOutputMessageWebSearchToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+              title: OpenAIResponseOutputMessageFileSearchToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+              title: OpenAIResponseOutputMessageFunctionToolCall
+            - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+              title: OpenAIResponseInputFunctionToolCallOutput
+            - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+              title: OpenAIResponseMCPApprovalRequest
+            - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+              title: OpenAIResponseMCPApprovalResponse
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+              title: OpenAIResponseOutputMessageMCPCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+              title: OpenAIResponseOutputMessageMCPListTools
+            discriminator:
+              propertyName: type
+              mapping:
+                file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                function_call_output: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+                mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                mcp_approval_response: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+                mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                message: '#/components/schemas/OpenAIResponseMessage-Output'
+                web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+            title: OpenAIResponseMessage-Output | ... (9 variants)
+          type: array
+          title: Data
+          description: List of conversation items
+        first_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: The ID of the first item in the list
+        last_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: The ID of the last item in the list
+        has_more:
+          type: boolean
+          title: Has More
+          description: Whether there are more items available
+          default: false
+      type: object
+      required:
+      - data
+      title: ConversationItemList
+      description: List of conversation items with pagination.
+    AddItemsRequest:
+      properties:
+        items:
+          items:
+            oneOf:
+            - $ref: '#/components/schemas/OpenAIResponseMessage-Input'
+              title: OpenAIResponseMessage-Input
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+              title: OpenAIResponseOutputMessageWebSearchToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+              title: OpenAIResponseOutputMessageFileSearchToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+              title: OpenAIResponseOutputMessageFunctionToolCall
+            - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+              title: OpenAIResponseInputFunctionToolCallOutput
+            - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+              title: OpenAIResponseMCPApprovalRequest
+            - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+              title: OpenAIResponseMCPApprovalResponse
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+              title: OpenAIResponseOutputMessageMCPCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+              title: OpenAIResponseOutputMessageMCPListTools
+            discriminator:
+              propertyName: type
+              mapping:
+                file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                function_call_output: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+                mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                mcp_approval_response: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+                mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                message: '#/components/schemas/OpenAIResponseMessage-Input'
+                web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+            title: OpenAIResponseMessage-Input | ... (9 variants)
+          type: array
+          title: Items
+      type: object
+      required:
+      - items
+      title: AddItemsRequest
+    ConversationItemDeletedResource:
+      properties:
+        id:
+          type: string
+          title: Id
+          description: The deleted item identifier
+        object:
+          type: string
+          title: Object
+          description: Object type
+          default: conversation.item.deleted
+        deleted:
+          type: boolean
+          title: Deleted
+          description: Whether the object was deleted
+          default: true
+      type: object
+      required:
+      - id
+      title: ConversationItemDeletedResource
+      description: Response for deleted conversation item.
+    OpenAIEmbeddingsRequestWithExtraBody:
+      properties:
+        model:
+          type: string
+          title: Model
+        input:
+          anyOf:
+          - type: string
+          - items:
+              type: string
+            type: array
+            title: list[string]
+          title: string | list[string]
+        encoding_format:
+          anyOf:
+          - type: string
+          - type: 'null'
+          default: float
+        dimensions:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        user:
+          anyOf:
+          - type: string
+          - type: 'null'
+      additionalProperties: true
+      type: object
+      required:
+      - model
+      - input
+      title: OpenAIEmbeddingsRequestWithExtraBody
+      description: Request parameters for OpenAI-compatible embeddings endpoint.
+    OpenAIEmbeddingData:
+      properties:
+        object:
+          type: string
+          const: embedding
+          title: Object
+          default: embedding
+        embedding:
+          anyOf:
+          - items:
+              type: number
+            type: array
+            title: list[number]
+          - type: string
+          title: list[number] | string
+        index:
+          type: integer
+          title: Index
+      type: object
+      required:
+      - embedding
+      - index
+      title: OpenAIEmbeddingData
+      description: A single embedding data object from an OpenAI-compatible embeddings response.
+    OpenAIEmbeddingUsage:
+      properties:
+        prompt_tokens:
+          type: integer
+          title: Prompt Tokens
+        total_tokens:
+          type: integer
+          title: Total Tokens
+      type: object
+      required:
+      - prompt_tokens
+      - total_tokens
+      title: OpenAIEmbeddingUsage
+      description: Usage information for an OpenAI-compatible embeddings response.
+    OpenAIEmbeddingsResponse:
+      properties:
+        object:
+          type: string
+          const: list
+          title: Object
+          default: list
+        data:
+          items:
+            $ref: '#/components/schemas/OpenAIEmbeddingData'
+          type: array
+          title: Data
+        model:
+          type: string
+          title: Model
+        usage:
+          $ref: '#/components/schemas/OpenAIEmbeddingUsage'
+      type: object
+      required:
+      - data
+      - model
+      - usage
+      title: OpenAIEmbeddingsResponse
+      description: Response from an OpenAI-compatible embeddings request.
+    OpenAIFilePurpose:
+      type: string
+      enum:
+      - assistants
+      - batch
+      title: OpenAIFilePurpose
+      description: Valid purpose values for OpenAI Files API.
+    ListOpenAIFileResponse:
+      properties:
+        data:
+          items:
+            $ref: '#/components/schemas/OpenAIFileObject'
+          type: array
+          title: Data
+        has_more:
+          type: boolean
+          title: Has More
+        first_id:
+          type: string
+          title: First Id
+        last_id:
+          type: string
+          title: Last Id
+        object:
+          type: string
+          const: list
+          title: Object
+          default: list
+      type: object
+      required:
+      - data
+      - has_more
+      - first_id
+      - last_id
+      title: ListOpenAIFileResponse
+      description: Response for listing files in OpenAI Files API.
+    OpenAIFileObject:
+      properties:
+        object:
+          type: string
+          const: file
+          title: Object
+          default: file
+        id:
+          type: string
+          title: Id
+        bytes:
+          type: integer
+          title: Bytes
+        created_at:
+          type: integer
+          title: Created At
+        expires_at:
+          type: integer
+          title: Expires At
+        filename:
+          type: string
+          title: Filename
+        purpose:
+          $ref: '#/components/schemas/OpenAIFilePurpose'
+      type: object
+      required:
+      - id
+      - bytes
+      - created_at
+      - expires_at
+      - filename
+      - purpose
+      title: OpenAIFileObject
+      description: OpenAI File object as defined in the OpenAI Files API.
+    ExpiresAfter:
+      properties:
+        anchor:
+          type: string
+          const: created_at
+          title: Anchor
+        seconds:
+          type: integer
+          maximum: 2592000.0
+          minimum: 3600.0
+          title: Seconds
+      type: object
+      required:
+      - anchor
+      - seconds
+      title: ExpiresAfter
+      description: |-
+        Control expiration of uploaded files.
+
+        Params:
+         - anchor, must be "created_at"
+         - seconds, must be int between 3600 and 2592000 (1 hour to 30 days)
+    OpenAIFileDeleteResponse:
+      properties:
+        id:
+          type: string
+          title: Id
+        object:
+          type: string
+          const: file
+          title: Object
+          default: file
+        deleted:
+          type: boolean
+          title: Deleted
+      type: object
+      required:
+      - id
+      - deleted
+      title: OpenAIFileDeleteResponse
+      description: Response for deleting a file in OpenAI Files API.
+    Response:
+      title: Response
+      type: object
+    HealthInfo:
+      properties:
+        status:
+          $ref: '#/components/schemas/HealthStatus'
+      type: object
+      required:
+      - status
+      title: HealthInfo
+      description: Health status information for the service.
+    RouteInfo:
+      properties:
+        route:
+          type: string
+          title: Route
+        method:
+          type: string
+          title: Method
+        provider_types:
+          items:
+            type: string
+          type: array
+          title: Provider Types
+      type: object
+      required:
+      - route
+      - method
+      - provider_types
+      title: RouteInfo
+      description: Information about an API route including its path, method, and implementing providers.
+    ListRoutesResponse:
+      properties:
+        data:
+          items:
+            $ref: '#/components/schemas/RouteInfo'
+          type: array
+          title: Data
+      type: object
+      required:
+      - data
+      title: ListRoutesResponse
+      description: Response containing a list of all available API routes.
+    OpenAIModel:
+      properties:
+        id:
+          type: string
+          title: Id
+        object:
+          type: string
+          const: model
+          title: Object
+          default: model
+        created:
+          type: integer
+          title: Created
+        owned_by:
+          type: string
+          title: Owned By
+        custom_metadata:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+      type: object
+      required:
+      - id
+      - created
+      - owned_by
+      title: OpenAIModel
+      description: |-
+        A model from OpenAI.
+
+        :id: The ID of the model
+        :object: The object type, which will be "model"
+        :created: The Unix timestamp in seconds when the model was created
+        :owned_by: The owner of the model
+        :custom_metadata: Llama Stack-specific metadata including model_type, provider info, and additional metadata
+    OpenAIListModelsResponse:
+      properties:
+        data:
+          items:
+            $ref: '#/components/schemas/OpenAIModel'
+          type: array
+          title: Data
+      type: object
+      required:
+      - data
+      title: OpenAIListModelsResponse
+    Model:
       properties:
         identifier:
           type: string
-          description: >-
-            Unique identifier for this resource in llama stack
+          title: Identifier
+          description: Unique identifier for this resource in llama stack
         provider_resource_id:
-          type: string
-          description: >-
-            Unique identifier for this resource in the provider
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: Unique identifier for this resource in the provider
         provider_id:
           type: string
-          description: >-
-            ID of the provider that owns this resource
+          title: Provider Id
+          description: ID of the provider that owns this resource
         type:
           type: string
-          enum:
-            - model
-            - shield
-            - vector_store
-            - dataset
-            - scoring_function
-            - benchmark
-            - tool
-            - tool_group
-            - prompt
           const: model
+          title: Type
           default: model
-          description: >-
-            The resource type, always 'model' for model resources
         metadata:
+          additionalProperties: true
           type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
+          title: Metadata
           description: Any additional metadata for this model
         model_type:
           $ref: '#/components/schemas/ModelType'
           default: llm
-          description: >-
-            The type of model (LLM or embedding model)
-      additionalProperties: false
+      type: object
       required:
-        - identifier
-        - provider_id
-        - type
-        - metadata
-        - model_type
+      - identifier
+      - provider_id
       title: Model
-      description: >-
-        A model resource representing an AI model registered in Llama Stack.
+      description: A model resource representing an AI model registered in Llama Stack.
+    ModelType:
+      type: string
+      enum:
+      - llm
+      - embedding
+      - rerank
+      title: ModelType
+      description: Enumeration of supported model types in Llama Stack.
+    RunModerationRequest:
+      properties:
+        input:
+          anyOf:
+          - type: string
+          - items:
+              type: string
+            type: array
+            title: list[string]
+          title: string | list[string]
+        model:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      required:
+      - input
+      title: RunModerationRequest
+    ModerationObject:
+      properties:
+        id:
+          type: string
+          title: Id
+        model:
+          type: string
+          title: Model
+        results:
+          items:
+            $ref: '#/components/schemas/ModerationObjectResults'
+          type: array
+          title: Results
+      type: object
+      required:
+      - id
+      - model
+      - results
+      title: ModerationObject
+      description: A moderation object.
+    ModerationObjectResults:
+      properties:
+        flagged:
+          type: boolean
+          title: Flagged
+        categories:
+          anyOf:
+          - additionalProperties:
+              type: boolean
+            type: object
+          - type: 'null'
+        category_applied_input_types:
+          anyOf:
+          - additionalProperties:
+              items:
+                type: string
+              type: array
+            type: object
+          - type: 'null'
+        category_scores:
+          anyOf:
+          - additionalProperties:
+              type: number
+            type: object
+          - type: 'null'
+        user_message:
+          anyOf:
+          - type: string
+          - type: 'null'
+        metadata:
+          additionalProperties: true
+          type: object
+          title: Metadata
+      type: object
+      required:
+      - flagged
+      title: ModerationObjectResults
+      description: A moderation object.
+    Prompt:
+      properties:
+        prompt:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: The system prompt with variable placeholders
+        version:
+          type: integer
+          minimum: 1.0
+          title: Version
+          description: Version (integer starting at 1, incremented on save)
+        prompt_id:
+          type: string
+          title: Prompt Id
+          description: Unique identifier in format 'pmpt_<48-digit-hash>'
+        variables:
+          items:
+            type: string
+          type: array
+          title: Variables
+          description: List of variable names that can be used in the prompt template
+        is_default:
+          type: boolean
+          title: Is Default
+          description: Boolean indicating whether this version is the default version
+          default: false
+      type: object
+      required:
+      - version
+      - prompt_id
+      title: Prompt
+      description: A prompt resource representing a stored OpenAI Compatible prompt template in Llama Stack.
+    ListPromptsResponse:
+      properties:
+        data:
+          items:
+            $ref: '#/components/schemas/Prompt'
+          type: array
+          title: Data
+      type: object
+      required:
+      - data
+      title: ListPromptsResponse
+      description: Response model to list prompts.
+    CreatePromptRequest:
+      properties:
+        prompt:
+          type: string
+          title: Prompt
+        variables:
+          anyOf:
+          - items:
+              type: string
+            type: array
+          - type: 'null'
+      type: object
+      required:
+      - prompt
+      title: CreatePromptRequest
+    UpdatePromptRequest:
+      properties:
+        prompt:
+          type: string
+          title: Prompt
+        version:
+          type: integer
+          title: Version
+        variables:
+          anyOf:
+          - items:
+              type: string
+            type: array
+          - type: 'null'
+        set_as_default:
+          type: boolean
+          title: Set As Default
+          default: true
+      type: object
+      required:
+      - prompt
+      - version
+      title: UpdatePromptRequest
+    SetDefaultVersionRequest:
+      properties:
+        version:
+          type: integer
+          title: Version
+      type: object
+      required:
+      - version
+      title: SetDefaultVersionRequest
+    ProviderInfo:
+      properties:
+        api:
+          type: string
+          title: Api
+        provider_id:
+          type: string
+          title: Provider Id
+        provider_type:
+          type: string
+          title: Provider Type
+        config:
+          additionalProperties: true
+          type: object
+          title: Config
+        health:
+          additionalProperties: true
+          type: object
+          title: Health
+      type: object
+      required:
+      - api
+      - provider_id
+      - provider_type
+      - config
+      - health
+      title: ProviderInfo
+      description: Information about a registered provider including its configuration and health status.
+    ListProvidersResponse:
+      properties:
+        data:
+          items:
+            $ref: '#/components/schemas/ProviderInfo'
+          type: array
+          title: Data
+      type: object
+      required:
+      - data
+      title: ListProvidersResponse
+      description: Response containing a list of all available providers.
+    ListOpenAIResponseObject:
+      properties:
+        data:
+          items:
+            $ref: '#/components/schemas/OpenAIResponseObjectWithInput'
+          type: array
+          title: Data
+        has_more:
+          type: boolean
+          title: Has More
+        first_id:
+          type: string
+          title: First Id
+        last_id:
+          type: string
+          title: Last Id
+        object:
+          type: string
+          const: list
+          title: Object
+          default: list
+      type: object
+      required:
+      - data
+      - has_more
+      - first_id
+      - last_id
+      title: ListOpenAIResponseObject
+      description: Paginated list of OpenAI response objects with navigation metadata.
+    OpenAIResponseError:
+      properties:
+        code:
+          type: string
+          title: Code
+        message:
+          type: string
+          title: Message
+      type: object
+      required:
+      - code
+      - message
+      title: OpenAIResponseError
+      description: Error details for failed OpenAI response requests.
+    OpenAIResponseInput:
+      anyOf:
+      - discriminator:
+          mapping:
+            file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+            function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+            mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+            mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+            mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+            message: '#/components/schemas/OpenAIResponseMessage'
+            web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+          propertyName: type
+        oneOf:
+        - $ref: '#/components/schemas/OpenAIResponseMessage'
+          title: OpenAIResponseMessage
+        - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+          title: OpenAIResponseOutputMessageWebSearchToolCall
+        - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+          title: OpenAIResponseOutputMessageFileSearchToolCall
+        - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+          title: OpenAIResponseOutputMessageFunctionToolCall
+        - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+          title: OpenAIResponseOutputMessageMCPCall
+        - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+          title: OpenAIResponseOutputMessageMCPListTools
+        - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+          title: OpenAIResponseMCPApprovalRequest
+        title: OpenAIResponseMessage | ... (7 variants)
+      - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+        title: OpenAIResponseInputFunctionToolCallOutput
+      - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+        title: OpenAIResponseMCPApprovalResponse
+      - $ref: '#/components/schemas/OpenAIResponseMessage'
+        title: OpenAIResponseMessage
+      title: OpenAIResponseInputFunctionToolCallOutput | OpenAIResponseMCPApprovalResponse | OpenAIResponseMessage
+    OpenAIResponseInputToolFileSearch:
+      properties:
+        type:
+          type: string
+          const: file_search
+          title: Type
+          default: file_search
+        vector_store_ids:
+          items:
+            type: string
+          type: array
+          title: Vector Store Ids
+        filters:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+        max_num_results:
+          anyOf:
+          - type: integer
+            maximum: 50.0
+            minimum: 1.0
+          - type: 'null'
+          default: 10
+        ranking_options:
+          anyOf:
+          - $ref: '#/components/schemas/SearchRankingOptions'
+            title: SearchRankingOptions
+          - type: 'null'
+          title: SearchRankingOptions
+      type: object
+      required:
+      - vector_store_ids
+      title: OpenAIResponseInputToolFileSearch
+      description: File search tool configuration for OpenAI response inputs.
+    OpenAIResponseInputToolFunction:
+      properties:
+        type:
+          type: string
+          const: function
+          title: Type
+          default: function
+        name:
+          type: string
+          title: Name
+        description:
+          anyOf:
+          - type: string
+          - type: 'null'
+        parameters:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+        strict:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+      type: object
+      required:
+      - name
+      - parameters
+      title: OpenAIResponseInputToolFunction
+      description: Function tool configuration for OpenAI response inputs.
+    OpenAIResponseInputToolWebSearch:
+      properties:
+        type:
+          title: Type
+          default: web_search
+          type: string
+          enum:
+          - web_search
+          - web_search_preview
+          - web_search_preview_2025_03_11
+          - web_search_2025_08_26
+        search_context_size:
+          anyOf:
+          - type: string
+            pattern: ^low|medium|high$
+          - type: 'null'
+          default: medium
+      type: object
+      title: OpenAIResponseInputToolWebSearch
+      description: Web search tool configuration for OpenAI response inputs.
+    OpenAIResponseObjectWithInput:
+      properties:
+        created_at:
+          type: integer
+          title: Created At
+        error:
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIResponseError'
+            title: OpenAIResponseError
+          - type: 'null'
+          title: OpenAIResponseError
+        id:
+          type: string
+          title: Id
+        model:
+          type: string
+          title: Model
+        object:
+          type: string
+          const: response
+          title: Object
+          default: response
+        output:
+          items:
+            oneOf:
+            - $ref: '#/components/schemas/OpenAIResponseMessage-Output'
+              title: OpenAIResponseMessage-Output
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+              title: OpenAIResponseOutputMessageWebSearchToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+              title: OpenAIResponseOutputMessageFileSearchToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+              title: OpenAIResponseOutputMessageFunctionToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+              title: OpenAIResponseOutputMessageMCPCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+              title: OpenAIResponseOutputMessageMCPListTools
+            - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+              title: OpenAIResponseMCPApprovalRequest
+            discriminator:
+              propertyName: type
+              mapping:
+                file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                message: '#/components/schemas/OpenAIResponseMessage-Output'
+                web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+            title: OpenAIResponseMessage-Output | ... (7 variants)
+          type: array
+          title: Output
+        parallel_tool_calls:
+          type: boolean
+          title: Parallel Tool Calls
+          default: false
+        previous_response_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        prompt:
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIResponsePrompt'
+            title: OpenAIResponsePrompt
+          - type: 'null'
+          title: OpenAIResponsePrompt
+        status:
+          type: string
+          title: Status
+        temperature:
+          anyOf:
+          - type: number
+          - type: 'null'
+        text:
+          $ref: '#/components/schemas/OpenAIResponseText'
+          default:
+            format:
+              type: text
+        top_p:
+          anyOf:
+          - type: number
+          - type: 'null'
+        tools:
+          anyOf:
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                title: OpenAIResponseInputToolWebSearch
+              - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch'
+                title: OpenAIResponseInputToolFileSearch
+              - $ref: '#/components/schemas/OpenAIResponseInputToolFunction'
+                title: OpenAIResponseInputToolFunction
+              - $ref: '#/components/schemas/OpenAIResponseToolMCP'
+                title: OpenAIResponseToolMCP
+              discriminator:
+                propertyName: type
+                mapping:
+                  file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch'
+                  function: '#/components/schemas/OpenAIResponseInputToolFunction'
+                  mcp: '#/components/schemas/OpenAIResponseToolMCP'
+                  web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                  web_search_2025_08_26: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                  web_search_preview: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                  web_search_preview_2025_03_11: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+              title: OpenAIResponseInputToolWebSearch | ... (4 variants)
+            type: array
+          - type: 'null'
+        truncation:
+          anyOf:
+          - type: string
+          - type: 'null'
+        usage:
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIResponseUsage'
+            title: OpenAIResponseUsage
+          - type: 'null'
+          title: OpenAIResponseUsage
+        instructions:
+          anyOf:
+          - type: string
+          - type: 'null'
+        max_tool_calls:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        input:
+          items:
+            anyOf:
+            - oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseMessage-Output'
+                title: OpenAIResponseMessage-Output
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+                title: OpenAIResponseOutputMessageWebSearchToolCall
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                title: OpenAIResponseOutputMessageFileSearchToolCall
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                title: OpenAIResponseOutputMessageFunctionToolCall
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                title: OpenAIResponseOutputMessageMCPCall
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                title: OpenAIResponseOutputMessageMCPListTools
+              - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                title: OpenAIResponseMCPApprovalRequest
+              discriminator:
+                propertyName: type
+                mapping:
+                  file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                  function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                  mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                  mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                  mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                  message: '#/components/schemas/OpenAIResponseMessage-Output'
+                  web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+              title: OpenAIResponseMessage-Output | ... (7 variants)
+            - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+              title: OpenAIResponseInputFunctionToolCallOutput
+            - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+              title: OpenAIResponseMCPApprovalResponse
+            - $ref: '#/components/schemas/OpenAIResponseMessage-Output'
+              title: OpenAIResponseMessage-Output
+            title: OpenAIResponseInputFunctionToolCallOutput | OpenAIResponseMCPApprovalResponse | OpenAIResponseMessage-Output
+          type: array
+          title: Input
+      type: object
+      required:
+      - created_at
+      - id
+      - model
+      - output
+      - status
+      - input
+      title: OpenAIResponseObjectWithInput
+      description: OpenAI response object extended with input context information.
+    OpenAIResponseOutput:
+      discriminator:
+        mapping:
+          file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+          function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+          mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+          mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+          mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+          message: '#/components/schemas/OpenAIResponseMessage'
+          web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIResponseMessage'
+        title: OpenAIResponseMessage
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+        title: OpenAIResponseOutputMessageWebSearchToolCall
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+        title: OpenAIResponseOutputMessageFileSearchToolCall
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+        title: OpenAIResponseOutputMessageFunctionToolCall
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+        title: OpenAIResponseOutputMessageMCPCall
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+        title: OpenAIResponseOutputMessageMCPListTools
+      - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+        title: OpenAIResponseMCPApprovalRequest
+      title: OpenAIResponseMessage | ... (7 variants)
+    OpenAIResponsePrompt:
+      properties:
+        id:
+          type: string
+          title: Id
+        variables:
+          anyOf:
+          - additionalProperties:
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
+                title: OpenAIResponseInputMessageContentText
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+                title: OpenAIResponseInputMessageContentImage
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile'
+                title: OpenAIResponseInputMessageContentFile
+              discriminator:
+                propertyName: type
+                mapping:
+                  input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile'
+                  input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+                  input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
+              title: OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile
+            type: object
+          - type: 'null'
+        version:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      required:
+      - id
+      title: OpenAIResponsePrompt
+      description: OpenAI compatible Prompt object that is used in OpenAI responses.
+    OpenAIResponseText:
+      properties:
+        format:
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIResponseTextFormat'
+            title: OpenAIResponseTextFormat
+          - type: 'null'
+          title: OpenAIResponseTextFormat
+      type: object
+      title: OpenAIResponseText
+      description: Text response configuration for OpenAI responses.
+    OpenAIResponseTool:
+      discriminator:
+        mapping:
+          file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch'
+          function: '#/components/schemas/OpenAIResponseInputToolFunction'
+          mcp: '#/components/schemas/OpenAIResponseToolMCP'
+          web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+          web_search_2025_08_26: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+          web_search_preview: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+          web_search_preview_2025_03_11: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+        title: OpenAIResponseInputToolWebSearch
+      - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch'
+        title: OpenAIResponseInputToolFileSearch
+      - $ref: '#/components/schemas/OpenAIResponseInputToolFunction'
+        title: OpenAIResponseInputToolFunction
+      - $ref: '#/components/schemas/OpenAIResponseToolMCP'
+        title: OpenAIResponseToolMCP
+      title: OpenAIResponseInputToolWebSearch | ... (4 variants)
+    OpenAIResponseToolMCP:
+      properties:
+        type:
+          type: string
+          const: mcp
+          title: Type
+          default: mcp
+        server_label:
+          type: string
+          title: Server Label
+        allowed_tools:
+          anyOf:
+          - items:
+              type: string
+            type: array
+            title: list[string]
+          - $ref: '#/components/schemas/AllowedToolsFilter'
+            title: AllowedToolsFilter
+          - type: 'null'
+          title: list[string] | AllowedToolsFilter
+      type: object
+      required:
+      - server_label
+      title: OpenAIResponseToolMCP
+      description: Model Context Protocol (MCP) tool configuration for OpenAI response object.
+    OpenAIResponseUsage:
+      properties:
+        input_tokens:
+          type: integer
+          title: Input Tokens
+        output_tokens:
+          type: integer
+          title: Output Tokens
+        total_tokens:
+          type: integer
+          title: Total Tokens
+        input_tokens_details:
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIResponseUsageInputTokensDetails'
+            title: OpenAIResponseUsageInputTokensDetails
+          - type: 'null'
+          title: OpenAIResponseUsageInputTokensDetails
+        output_tokens_details:
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIResponseUsageOutputTokensDetails'
+            title: OpenAIResponseUsageOutputTokensDetails
+          - type: 'null'
+          title: OpenAIResponseUsageOutputTokensDetails
+      type: object
+      required:
+      - input_tokens
+      - output_tokens
+      - total_tokens
+      title: OpenAIResponseUsage
+      description: Usage information for OpenAI response.
+    ResponseGuardrailSpec:
+      description: Specification for a guardrail to apply during response generation.
+      properties:
+        type:
+          title: Type
+          type: string
+      required:
+      - type
+      title: ResponseGuardrailSpec
+      type: object
+    OpenAIResponseInputTool:
+      discriminator:
+        mapping:
+          file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch'
+          function: '#/components/schemas/OpenAIResponseInputToolFunction'
+          mcp: '#/components/schemas/OpenAIResponseInputToolMCP'
+          web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+          web_search_2025_08_26: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+          web_search_preview: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+          web_search_preview_2025_03_11: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+        title: OpenAIResponseInputToolWebSearch
+      - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch'
+        title: OpenAIResponseInputToolFileSearch
+      - $ref: '#/components/schemas/OpenAIResponseInputToolFunction'
+        title: OpenAIResponseInputToolFunction
+      - $ref: '#/components/schemas/OpenAIResponseInputToolMCP'
+        title: OpenAIResponseInputToolMCP
+      title: OpenAIResponseInputToolWebSearch | ... (4 variants)
+    OpenAIResponseInputToolMCP:
+      properties:
+        type:
+          type: string
+          const: mcp
+          title: Type
+          default: mcp
+        server_label:
+          type: string
+          title: Server Label
+        server_url:
+          type: string
+          title: Server Url
+        headers:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+        authorization:
+          anyOf:
+          - type: string
+          - type: 'null'
+        require_approval:
+          anyOf:
+          - type: string
+            const: always
+          - type: string
+            const: never
+          - $ref: '#/components/schemas/ApprovalFilter'
+            title: ApprovalFilter
+          title: string | ApprovalFilter
+          default: never
+        allowed_tools:
+          anyOf:
+          - items:
+              type: string
+            type: array
+            title: list[string]
+          - $ref: '#/components/schemas/AllowedToolsFilter'
+            title: AllowedToolsFilter
+          - type: 'null'
+          title: list[string] | AllowedToolsFilter
+      type: object
+      required:
+      - server_label
+      - server_url
+      title: OpenAIResponseInputToolMCP
+      description: Model Context Protocol (MCP) tool configuration for OpenAI response inputs.
+    CreateOpenaiResponseRequest:
+      properties:
+        input:
+          anyOf:
+          - type: string
+          - items:
+              anyOf:
+              - oneOf:
+                - $ref: '#/components/schemas/OpenAIResponseMessage-Input'
+                  title: OpenAIResponseMessage-Input
+                - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+                  title: OpenAIResponseOutputMessageWebSearchToolCall
+                - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                  title: OpenAIResponseOutputMessageFileSearchToolCall
+                - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                  title: OpenAIResponseOutputMessageFunctionToolCall
+                - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                  title: OpenAIResponseOutputMessageMCPCall
+                - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                  title: OpenAIResponseOutputMessageMCPListTools
+                - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                  title: OpenAIResponseMCPApprovalRequest
+                discriminator:
+                  propertyName: type
+                  mapping:
+                    file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                    function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                    mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                    mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                    mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                    message: '#/components/schemas/OpenAIResponseMessage-Input'
+                    web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+                title: OpenAIResponseMessage-Input | ... (7 variants)
+              - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+                title: OpenAIResponseInputFunctionToolCallOutput
+              - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+                title: OpenAIResponseMCPApprovalResponse
+              - $ref: '#/components/schemas/OpenAIResponseMessage-Input'
+                title: OpenAIResponseMessage-Input
+              title: OpenAIResponseInputFunctionToolCallOutput | OpenAIResponseMCPApprovalResponse | OpenAIResponseMessage-Input
+            type: array
+            title: list[OpenAIResponseMessageUnion | OpenAIResponseInputFunctionToolCallOutput | ...]
+          title: string | list[OpenAIResponseMessageUnion | OpenAIResponseInputFunctionToolCallOutput | ...]
+        model:
+          type: string
+          title: Model
+        prompt:
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIResponsePrompt'
+            title: OpenAIResponsePrompt
+          - type: 'null'
+          title: OpenAIResponsePrompt
+        instructions:
+          anyOf:
+          - type: string
+          - type: 'null'
+        previous_response_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        conversation:
+          anyOf:
+          - type: string
+          - type: 'null'
+        store:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          default: true
+        stream:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          default: false
+        temperature:
+          anyOf:
+          - type: number
+          - type: 'null'
+        text:
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIResponseText'
+            title: OpenAIResponseText
+          - type: 'null'
+          title: OpenAIResponseText
+        tools:
+          anyOf:
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                title: OpenAIResponseInputToolWebSearch
+              - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch'
+                title: OpenAIResponseInputToolFileSearch
+              - $ref: '#/components/schemas/OpenAIResponseInputToolFunction'
+                title: OpenAIResponseInputToolFunction
+              - $ref: '#/components/schemas/OpenAIResponseInputToolMCP'
+                title: OpenAIResponseInputToolMCP
+              discriminator:
+                propertyName: type
+                mapping:
+                  file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch'
+                  function: '#/components/schemas/OpenAIResponseInputToolFunction'
+                  mcp: '#/components/schemas/OpenAIResponseInputToolMCP'
+                  web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                  web_search_2025_08_26: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                  web_search_preview: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                  web_search_preview_2025_03_11: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+              title: OpenAIResponseInputToolWebSearch | ... (4 variants)
+            type: array
+          - type: 'null'
+        include:
+          anyOf:
+          - items:
+              type: string
+            type: array
+          - type: 'null'
+        max_infer_iters:
+          anyOf:
+          - type: integer
+          - type: 'null'
+          default: 10
+        max_tool_calls:
+          anyOf:
+          - type: integer
+          - type: 'null'
+      type: object
+      required:
+      - input
+      - model
+      title: CreateOpenaiResponseRequest
+    OpenAIResponseObject:
+      properties:
+        created_at:
+          type: integer
+          title: Created At
+        error:
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIResponseError'
+            title: OpenAIResponseError
+          - type: 'null'
+          title: OpenAIResponseError
+        id:
+          type: string
+          title: Id
+        model:
+          type: string
+          title: Model
+        object:
+          type: string
+          const: response
+          title: Object
+          default: response
+        output:
+          items:
+            oneOf:
+            - $ref: '#/components/schemas/OpenAIResponseMessage-Output'
+              title: OpenAIResponseMessage-Output
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+              title: OpenAIResponseOutputMessageWebSearchToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+              title: OpenAIResponseOutputMessageFileSearchToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+              title: OpenAIResponseOutputMessageFunctionToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+              title: OpenAIResponseOutputMessageMCPCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+              title: OpenAIResponseOutputMessageMCPListTools
+            - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+              title: OpenAIResponseMCPApprovalRequest
+            discriminator:
+              propertyName: type
+              mapping:
+                file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                message: '#/components/schemas/OpenAIResponseMessage-Output'
+                web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+            title: OpenAIResponseMessage-Output | ... (7 variants)
+          type: array
+          title: Output
+        parallel_tool_calls:
+          type: boolean
+          title: Parallel Tool Calls
+          default: false
+        previous_response_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        prompt:
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIResponsePrompt'
+            title: OpenAIResponsePrompt
+          - type: 'null'
+          title: OpenAIResponsePrompt
+        status:
+          type: string
+          title: Status
+        temperature:
+          anyOf:
+          - type: number
+          - type: 'null'
+        text:
+          $ref: '#/components/schemas/OpenAIResponseText'
+          default:
+            format:
+              type: text
+        top_p:
+          anyOf:
+          - type: number
+          - type: 'null'
+        tools:
+          anyOf:
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                title: OpenAIResponseInputToolWebSearch
+              - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch'
+                title: OpenAIResponseInputToolFileSearch
+              - $ref: '#/components/schemas/OpenAIResponseInputToolFunction'
+                title: OpenAIResponseInputToolFunction
+              - $ref: '#/components/schemas/OpenAIResponseToolMCP'
+                title: OpenAIResponseToolMCP
+              discriminator:
+                propertyName: type
+                mapping:
+                  file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch'
+                  function: '#/components/schemas/OpenAIResponseInputToolFunction'
+                  mcp: '#/components/schemas/OpenAIResponseToolMCP'
+                  web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                  web_search_2025_08_26: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                  web_search_preview: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                  web_search_preview_2025_03_11: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+              title: OpenAIResponseInputToolWebSearch | ... (4 variants)
+            type: array
+          - type: 'null'
+        truncation:
+          anyOf:
+          - type: string
+          - type: 'null'
+        usage:
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIResponseUsage'
+            title: OpenAIResponseUsage
+          - type: 'null'
+          title: OpenAIResponseUsage
+        instructions:
+          anyOf:
+          - type: string
+          - type: 'null'
+        max_tool_calls:
+          anyOf:
+          - type: integer
+          - type: 'null'
+      type: object
+      required:
+      - created_at
+      - id
+      - model
+      - output
+      - status
+      title: OpenAIResponseObject
+      description: Complete OpenAI response object containing generation results and metadata.
+    OpenAIResponseContentPartOutputText:
+      description: Text content within a streamed response part.
+      properties:
+        type:
+          const: output_text
+          default: output_text
+          title: Type
+          type: string
+        text:
+          title: Text
+          type: string
+        annotations:
+          items:
+            discriminator:
+              mapping:
+                container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
+                file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
+                file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath'
+                url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation'
+              propertyName: type
+            oneOf:
+            - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
+              title: OpenAIResponseAnnotationFileCitation
+            - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation'
+              title: OpenAIResponseAnnotationCitation
+            - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
+              title: OpenAIResponseAnnotationContainerFileCitation
+            - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath'
+              title: OpenAIResponseAnnotationFilePath
+            title: OpenAIResponseAnnotationFileCitation | ... (4 variants)
+          title: Annotations
+          type: array
+        logprobs:
+          anyOf:
+          - items:
+              additionalProperties: true
+              type: object
+            type: array
+          - type: 'null'
+          nullable: true
+      required:
+      - text
+      title: OpenAIResponseContentPartOutputText
+      type: object
+    OpenAIResponseContentPartReasoningSummary:
+      description: Reasoning summary part in a streamed response.
+      properties:
+        type:
+          const: summary_text
+          default: summary_text
+          title: Type
+          type: string
+        text:
+          title: Text
+          type: string
+      required:
+      - text
+      title: OpenAIResponseContentPartReasoningSummary
+      type: object
+    OpenAIResponseContentPartReasoningText:
+      description: Reasoning text emitted as part of a streamed response.
+      properties:
+        type:
+          const: reasoning_text
+          default: reasoning_text
+          title: Type
+          type: string
+        text:
+          title: Text
+          type: string
+      required:
+      - text
+      title: OpenAIResponseContentPartReasoningText
+      type: object
+    OpenAIResponseObjectStream:
+      discriminator:
+        mapping:
+          response.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
+          response.content_part.added: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartAdded'
+          response.content_part.done: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartDone'
+          response.created: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated'
+          response.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseFailed'
+          response.file_search_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallCompleted'
+          response.file_search_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallInProgress'
+          response.file_search_call.searching: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallSearching'
+          response.function_call_arguments.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta'
+          response.function_call_arguments.done: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone'
+          response.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseInProgress'
+          response.incomplete: '#/components/schemas/OpenAIResponseObjectStreamResponseIncomplete'
+          response.mcp_call.arguments.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta'
+          response.mcp_call.arguments.done: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDone'
+          response.mcp_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted'
+          response.mcp_call.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallFailed'
+          response.mcp_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallInProgress'
+          response.mcp_list_tools.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsCompleted'
+          response.mcp_list_tools.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsFailed'
+          response.mcp_list_tools.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsInProgress'
+          response.output_item.added: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded'
+          response.output_item.done: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone'
+          response.output_text.annotation.added: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded'
+          response.output_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta'
+          response.output_text.done: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDone'
+          response.reasoning_summary_part.added: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded'
+          response.reasoning_summary_part.done: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartDone'
+          response.reasoning_summary_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta'
+          response.reasoning_summary_text.done: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDone'
+          response.reasoning_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDelta'
+          response.reasoning_text.done: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDone'
+          response.refusal.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDelta'
+          response.refusal.done: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDone'
+          response.web_search_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallCompleted'
+          response.web_search_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallInProgress'
+          response.web_search_call.searching: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallSearching'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated'
+        title: OpenAIResponseObjectStreamResponseCreated
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseInProgress'
+        title: OpenAIResponseObjectStreamResponseInProgress
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded'
+        title: OpenAIResponseObjectStreamResponseOutputItemAdded
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone'
+        title: OpenAIResponseObjectStreamResponseOutputItemDone
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta'
+        title: OpenAIResponseObjectStreamResponseOutputTextDelta
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDone'
+        title: OpenAIResponseObjectStreamResponseOutputTextDone
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta'
+        title: OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone'
+        title: OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallInProgress'
+        title: OpenAIResponseObjectStreamResponseWebSearchCallInProgress
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallSearching'
+        title: OpenAIResponseObjectStreamResponseWebSearchCallSearching
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallCompleted'
+        title: OpenAIResponseObjectStreamResponseWebSearchCallCompleted
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsInProgress'
+        title: OpenAIResponseObjectStreamResponseMcpListToolsInProgress
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsFailed'
+        title: OpenAIResponseObjectStreamResponseMcpListToolsFailed
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsCompleted'
+        title: OpenAIResponseObjectStreamResponseMcpListToolsCompleted
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta'
+        title: OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDone'
+        title: OpenAIResponseObjectStreamResponseMcpCallArgumentsDone
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallInProgress'
+        title: OpenAIResponseObjectStreamResponseMcpCallInProgress
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallFailed'
+        title: OpenAIResponseObjectStreamResponseMcpCallFailed
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted'
+        title: OpenAIResponseObjectStreamResponseMcpCallCompleted
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartAdded'
+        title: OpenAIResponseObjectStreamResponseContentPartAdded
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartDone'
+        title: OpenAIResponseObjectStreamResponseContentPartDone
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDelta'
+        title: OpenAIResponseObjectStreamResponseReasoningTextDelta
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDone'
+        title: OpenAIResponseObjectStreamResponseReasoningTextDone
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded'
+        title: OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartDone'
+        title: OpenAIResponseObjectStreamResponseReasoningSummaryPartDone
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta'
+        title: OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDone'
+        title: OpenAIResponseObjectStreamResponseReasoningSummaryTextDone
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDelta'
+        title: OpenAIResponseObjectStreamResponseRefusalDelta
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDone'
+        title: OpenAIResponseObjectStreamResponseRefusalDone
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded'
+        title: OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallInProgress'
+        title: OpenAIResponseObjectStreamResponseFileSearchCallInProgress
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallSearching'
+        title: OpenAIResponseObjectStreamResponseFileSearchCallSearching
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallCompleted'
+        title: OpenAIResponseObjectStreamResponseFileSearchCallCompleted
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseIncomplete'
+        title: OpenAIResponseObjectStreamResponseIncomplete
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFailed'
+        title: OpenAIResponseObjectStreamResponseFailed
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
+        title: OpenAIResponseObjectStreamResponseCompleted
+      title: OpenAIResponseObjectStreamResponseCreated | ... (36 variants)
+    OpenAIResponseObjectStreamResponseCompleted:
+      description: Streaming event indicating a response has been completed.
+      properties:
+        response:
+          $ref: '#/components/schemas/OpenAIResponseObject'
+        type:
+          const: response.completed
+          default: response.completed
+          title: Type
+          type: string
+      required:
+      - response
+      title: OpenAIResponseObjectStreamResponseCompleted
+      type: object
+    OpenAIResponseObjectStreamResponseContentPartAdded:
+      description: Streaming event for when a new content part is added to a response item.
+      properties:
+        content_index:
+          title: Content Index
+          type: integer
+        response_id:
+          title: Response Id
+          type: string
+        item_id:
+          title: Item Id
+          type: string
+        output_index:
+          title: Output Index
+          type: integer
+        part:
+          discriminator:
+            mapping:
+              output_text: '#/components/schemas/OpenAIResponseContentPartOutputText'
+              reasoning_text: '#/components/schemas/OpenAIResponseContentPartReasoningText'
+              refusal: '#/components/schemas/OpenAIResponseContentPartRefusal'
+            propertyName: type
+          oneOf:
+          - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText'
+            title: OpenAIResponseContentPartOutputText
+          - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
+            title: OpenAIResponseContentPartRefusal
+          - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
+            title: OpenAIResponseContentPartReasoningText
+          title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.content_part.added
+          default: response.content_part.added
+          title: Type
+          type: string
+      required:
+      - content_index
+      - response_id
+      - item_id
+      - output_index
+      - part
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseContentPartAdded
+      type: object
+    OpenAIResponseObjectStreamResponseContentPartDone:
+      description: Streaming event for when a content part is completed.
+      properties:
+        content_index:
+          title: Content Index
+          type: integer
+        response_id:
+          title: Response Id
+          type: string
+        item_id:
+          title: Item Id
+          type: string
+        output_index:
+          title: Output Index
+          type: integer
+        part:
+          discriminator:
+            mapping:
+              output_text: '#/components/schemas/OpenAIResponseContentPartOutputText'
+              reasoning_text: '#/components/schemas/OpenAIResponseContentPartReasoningText'
+              refusal: '#/components/schemas/OpenAIResponseContentPartRefusal'
+            propertyName: type
+          oneOf:
+          - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText'
+            title: OpenAIResponseContentPartOutputText
+          - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
+            title: OpenAIResponseContentPartRefusal
+          - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
+            title: OpenAIResponseContentPartReasoningText
+          title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.content_part.done
+          default: response.content_part.done
+          title: Type
+          type: string
+      required:
+      - content_index
+      - response_id
+      - item_id
+      - output_index
+      - part
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseContentPartDone
+      type: object
+    OpenAIResponseObjectStreamResponseCreated:
+      description: Streaming event indicating a new response has been created.
+      properties:
+        response:
+          $ref: '#/components/schemas/OpenAIResponseObject'
+        type:
+          const: response.created
+          default: response.created
+          title: Type
+          type: string
+      required:
+      - response
+      title: OpenAIResponseObjectStreamResponseCreated
+      type: object
+    OpenAIResponseObjectStreamResponseFailed:
+      description: Streaming event emitted when a response fails.
+      properties:
+        response:
+          $ref: '#/components/schemas/OpenAIResponseObject'
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.failed
+          default: response.failed
+          title: Type
+          type: string
+      required:
+      - response
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseFailed
+      type: object
+    OpenAIResponseObjectStreamResponseFileSearchCallCompleted:
+      description: Streaming event for completed file search calls.
+      properties:
+        item_id:
+          title: Item Id
+          type: string
+        output_index:
+          title: Output Index
+          type: integer
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.file_search_call.completed
+          default: response.file_search_call.completed
+          title: Type
+          type: string
+      required:
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseFileSearchCallCompleted
+      type: object
+    OpenAIResponseObjectStreamResponseFileSearchCallInProgress:
+      description: Streaming event for file search calls in progress.
+      properties:
+        item_id:
+          title: Item Id
+          type: string
+        output_index:
+          title: Output Index
+          type: integer
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.file_search_call.in_progress
+          default: response.file_search_call.in_progress
+          title: Type
+          type: string
+      required:
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseFileSearchCallInProgress
+      type: object
+    OpenAIResponseObjectStreamResponseFileSearchCallSearching:
+      description: Streaming event for file search currently searching.
+      properties:
+        item_id:
+          title: Item Id
+          type: string
+        output_index:
+          title: Output Index
+          type: integer
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.file_search_call.searching
+          default: response.file_search_call.searching
+          title: Type
+          type: string
+      required:
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseFileSearchCallSearching
+      type: object
+    OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta:
+      description: Streaming event for incremental function call argument updates.
+      properties:
+        delta:
+          title: Delta
+          type: string
+        item_id:
+          title: Item Id
+          type: string
+        output_index:
+          title: Output Index
+          type: integer
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.function_call_arguments.delta
+          default: response.function_call_arguments.delta
+          title: Type
+          type: string
+      required:
+      - delta
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta
+      type: object
+    OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone:
+      description: Streaming event for when function call arguments are completed.
+      properties:
+        arguments:
+          title: Arguments
+          type: string
+        item_id:
+          title: Item Id
+          type: string
+        output_index:
+          title: Output Index
+          type: integer
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.function_call_arguments.done
+          default: response.function_call_arguments.done
+          title: Type
+          type: string
+      required:
+      - arguments
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone
+      type: object
+    OpenAIResponseObjectStreamResponseInProgress:
+      description: Streaming event indicating the response remains in progress.
+      properties:
+        response:
+          $ref: '#/components/schemas/OpenAIResponseObject'
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.in_progress
+          default: response.in_progress
+          title: Type
+          type: string
+      required:
+      - response
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseInProgress
+      type: object
+    OpenAIResponseObjectStreamResponseIncomplete:
+      description: Streaming event emitted when a response ends in an incomplete state.
+      properties:
+        response:
+          $ref: '#/components/schemas/OpenAIResponseObject'
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.incomplete
+          default: response.incomplete
+          title: Type
+          type: string
+      required:
+      - response
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseIncomplete
+      type: object
+    OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta:
+      properties:
+        delta:
+          title: Delta
+          type: string
+        item_id:
+          title: Item Id
+          type: string
+        output_index:
+          title: Output Index
+          type: integer
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.mcp_call.arguments.delta
+          default: response.mcp_call.arguments.delta
+          title: Type
+          type: string
+      required:
+      - delta
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta
+      type: object
+    OpenAIResponseObjectStreamResponseMcpCallArgumentsDone:
+      properties:
+        arguments:
+          title: Arguments
+          type: string
+        item_id:
+          title: Item Id
+          type: string
+        output_index:
+          title: Output Index
+          type: integer
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.mcp_call.arguments.done
+          default: response.mcp_call.arguments.done
+          title: Type
+          type: string
+      required:
+      - arguments
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseMcpCallArgumentsDone
+      type: object
+    OpenAIResponseObjectStreamResponseMcpCallCompleted:
+      description: Streaming event for completed MCP calls.
+      properties:
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.mcp_call.completed
+          default: response.mcp_call.completed
+          title: Type
+          type: string
+      required:
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseMcpCallCompleted
+      type: object
+    OpenAIResponseObjectStreamResponseMcpCallFailed:
+      description: Streaming event for failed MCP calls.
+      properties:
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.mcp_call.failed
+          default: response.mcp_call.failed
+          title: Type
+          type: string
+      required:
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseMcpCallFailed
+      type: object
+    OpenAIResponseObjectStreamResponseMcpCallInProgress:
+      description: Streaming event for MCP calls in progress.
+      properties:
+        item_id:
+          title: Item Id
+          type: string
+        output_index:
+          title: Output Index
+          type: integer
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.mcp_call.in_progress
+          default: response.mcp_call.in_progress
+          title: Type
+          type: string
+      required:
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseMcpCallInProgress
+      type: object
+    OpenAIResponseObjectStreamResponseMcpListToolsCompleted:
+      properties:
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.mcp_list_tools.completed
+          default: response.mcp_list_tools.completed
+          title: Type
+          type: string
+      required:
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseMcpListToolsCompleted
+      type: object
+    OpenAIResponseObjectStreamResponseMcpListToolsFailed:
+      properties:
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.mcp_list_tools.failed
+          default: response.mcp_list_tools.failed
+          title: Type
+          type: string
+      required:
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseMcpListToolsFailed
+      type: object
+    OpenAIResponseObjectStreamResponseMcpListToolsInProgress:
+      properties:
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.mcp_list_tools.in_progress
+          default: response.mcp_list_tools.in_progress
+          title: Type
+          type: string
+      required:
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseMcpListToolsInProgress
+      type: object
+    OpenAIResponseObjectStreamResponseOutputItemAdded:
+      description: Streaming event for when a new output item is added to the response.
+      properties:
+        response_id:
+          title: Response Id
+          type: string
+        item:
+          discriminator:
+            mapping:
+              file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+              function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+              mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+              mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+              mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+              message: '#/components/schemas/OpenAIResponseMessage'
+              web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+            propertyName: type
+          oneOf:
+          - $ref: '#/components/schemas/OpenAIResponseMessage'
+            title: OpenAIResponseMessage
+          - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+            title: OpenAIResponseOutputMessageWebSearchToolCall
+          - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+            title: OpenAIResponseOutputMessageFileSearchToolCall
+          - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+            title: OpenAIResponseOutputMessageFunctionToolCall
+          - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+            title: OpenAIResponseOutputMessageMCPCall
+          - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+            title: OpenAIResponseOutputMessageMCPListTools
+          - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+            title: OpenAIResponseMCPApprovalRequest
+          title: OpenAIResponseMessage | ... (7 variants)
+        output_index:
+          title: Output Index
+          type: integer
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.output_item.added
+          default: response.output_item.added
+          title: Type
+          type: string
+      required:
+      - response_id
+      - item
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseOutputItemAdded
+      type: object
+    OpenAIResponseObjectStreamResponseOutputItemDone:
+      description: Streaming event for when an output item is completed.
+      properties:
+        response_id:
+          title: Response Id
+          type: string
+        item:
+          discriminator:
+            mapping:
+              file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+              function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+              mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+              mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+              mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+              message: '#/components/schemas/OpenAIResponseMessage'
+              web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+            propertyName: type
+          oneOf:
+          - $ref: '#/components/schemas/OpenAIResponseMessage'
+            title: OpenAIResponseMessage
+          - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+            title: OpenAIResponseOutputMessageWebSearchToolCall
+          - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+            title: OpenAIResponseOutputMessageFileSearchToolCall
+          - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+            title: OpenAIResponseOutputMessageFunctionToolCall
+          - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+            title: OpenAIResponseOutputMessageMCPCall
+          - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+            title: OpenAIResponseOutputMessageMCPListTools
+          - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+            title: OpenAIResponseMCPApprovalRequest
+          title: OpenAIResponseMessage | ... (7 variants)
+        output_index:
+          title: Output Index
+          type: integer
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.output_item.done
+          default: response.output_item.done
+          title: Type
+          type: string
+      required:
+      - response_id
+      - item
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseOutputItemDone
+      type: object
+    OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded:
+      description: Streaming event for when an annotation is added to output text.
+      properties:
+        item_id:
+          title: Item Id
+          type: string
+        output_index:
+          title: Output Index
+          type: integer
+        content_index:
+          title: Content Index
+          type: integer
+        annotation_index:
+          title: Annotation Index
+          type: integer
+        annotation:
+          discriminator:
+            mapping:
+              container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
+              file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
+              file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath'
+              url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation'
+            propertyName: type
+          oneOf:
+          - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
+            title: OpenAIResponseAnnotationFileCitation
+          - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation'
+            title: OpenAIResponseAnnotationCitation
+          - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
+            title: OpenAIResponseAnnotationContainerFileCitation
+          - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath'
+            title: OpenAIResponseAnnotationFilePath
+          title: OpenAIResponseAnnotationFileCitation | ... (4 variants)
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.output_text.annotation.added
+          default: response.output_text.annotation.added
+          title: Type
+          type: string
+      required:
+      - item_id
+      - output_index
+      - content_index
+      - annotation_index
+      - annotation
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded
+      type: object
+    OpenAIResponseObjectStreamResponseOutputTextDelta:
+      description: Streaming event for incremental text content updates.
+      properties:
+        content_index:
+          title: Content Index
+          type: integer
+        delta:
+          title: Delta
+          type: string
+        item_id:
+          title: Item Id
+          type: string
+        output_index:
+          title: Output Index
+          type: integer
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.output_text.delta
+          default: response.output_text.delta
+          title: Type
+          type: string
+      required:
+      - content_index
+      - delta
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseOutputTextDelta
+      type: object
+    OpenAIResponseObjectStreamResponseOutputTextDone:
+      description: Streaming event for when text output is completed.
+      properties:
+        content_index:
+          title: Content Index
+          type: integer
+        text:
+          title: Text
+          type: string
+        item_id:
+          title: Item Id
+          type: string
+        output_index:
+          title: Output Index
+          type: integer
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.output_text.done
+          default: response.output_text.done
+          title: Type
+          type: string
+      required:
+      - content_index
+      - text
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseOutputTextDone
+      type: object
+    OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded:
+      description: Streaming event for when a new reasoning summary part is added.
+      properties:
+        item_id:
+          title: Item Id
+          type: string
+        output_index:
+          title: Output Index
+          type: integer
+        part:
+          $ref: '#/components/schemas/OpenAIResponseContentPartReasoningSummary'
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        summary_index:
+          title: Summary Index
+          type: integer
+        type:
+          const: response.reasoning_summary_part.added
+          default: response.reasoning_summary_part.added
+          title: Type
+          type: string
+      required:
+      - item_id
+      - output_index
+      - part
+      - sequence_number
+      - summary_index
+      title: OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded
+      type: object
+    OpenAIResponseObjectStreamResponseReasoningSummaryPartDone:
+      description: Streaming event for when a reasoning summary part is completed.
+      properties:
+        item_id:
+          title: Item Id
+          type: string
+        output_index:
+          title: Output Index
+          type: integer
+        part:
+          $ref: '#/components/schemas/OpenAIResponseContentPartReasoningSummary'
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        summary_index:
+          title: Summary Index
+          type: integer
+        type:
+          const: response.reasoning_summary_part.done
+          default: response.reasoning_summary_part.done
+          title: Type
+          type: string
+      required:
+      - item_id
+      - output_index
+      - part
+      - sequence_number
+      - summary_index
+      title: OpenAIResponseObjectStreamResponseReasoningSummaryPartDone
+      type: object
+    OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta:
+      description: Streaming event for incremental reasoning summary text updates.
+      properties:
+        delta:
+          title: Delta
+          type: string
+        item_id:
+          title: Item Id
+          type: string
+        output_index:
+          title: Output Index
+          type: integer
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        summary_index:
+          title: Summary Index
+          type: integer
+        type:
+          const: response.reasoning_summary_text.delta
+          default: response.reasoning_summary_text.delta
+          title: Type
+          type: string
+      required:
+      - delta
+      - item_id
+      - output_index
+      - sequence_number
+      - summary_index
+      title: OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta
+      type: object
+    OpenAIResponseObjectStreamResponseReasoningSummaryTextDone:
+      description: Streaming event for when reasoning summary text is completed.
+      properties:
+        text:
+          title: Text
+          type: string
+        item_id:
+          title: Item Id
+          type: string
+        output_index:
+          title: Output Index
+          type: integer
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        summary_index:
+          title: Summary Index
+          type: integer
+        type:
+          const: response.reasoning_summary_text.done
+          default: response.reasoning_summary_text.done
+          title: Type
+          type: string
+      required:
+      - text
+      - item_id
+      - output_index
+      - sequence_number
+      - summary_index
+      title: OpenAIResponseObjectStreamResponseReasoningSummaryTextDone
+      type: object
+    OpenAIResponseObjectStreamResponseReasoningTextDelta:
+      description: Streaming event for incremental reasoning text updates.
+      properties:
+        content_index:
+          title: Content Index
+          type: integer
+        delta:
+          title: Delta
+          type: string
+        item_id:
+          title: Item Id
+          type: string
+        output_index:
+          title: Output Index
+          type: integer
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.reasoning_text.delta
+          default: response.reasoning_text.delta
+          title: Type
+          type: string
+      required:
+      - content_index
+      - delta
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseReasoningTextDelta
+      type: object
+    OpenAIResponseObjectStreamResponseReasoningTextDone:
+      description: Streaming event for when reasoning text is completed.
+      properties:
+        content_index:
+          title: Content Index
+          type: integer
+        text:
+          title: Text
+          type: string
+        item_id:
+          title: Item Id
+          type: string
+        output_index:
+          title: Output Index
+          type: integer
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.reasoning_text.done
+          default: response.reasoning_text.done
+          title: Type
+          type: string
+      required:
+      - content_index
+      - text
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseReasoningTextDone
+      type: object
+    OpenAIResponseObjectStreamResponseRefusalDelta:
+      description: Streaming event for incremental refusal text updates.
+      properties:
+        content_index:
+          title: Content Index
+          type: integer
+        delta:
+          title: Delta
+          type: string
+        item_id:
+          title: Item Id
+          type: string
+        output_index:
+          title: Output Index
+          type: integer
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.refusal.delta
+          default: response.refusal.delta
+          title: Type
+          type: string
+      required:
+      - content_index
+      - delta
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseRefusalDelta
+      type: object
+    OpenAIResponseObjectStreamResponseRefusalDone:
+      description: Streaming event for when refusal text is completed.
+      properties:
+        content_index:
+          title: Content Index
+          type: integer
+        refusal:
+          title: Refusal
+          type: string
+        item_id:
+          title: Item Id
+          type: string
+        output_index:
+          title: Output Index
+          type: integer
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.refusal.done
+          default: response.refusal.done
+          title: Type
+          type: string
+      required:
+      - content_index
+      - refusal
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseRefusalDone
+      type: object
+    OpenAIResponseObjectStreamResponseWebSearchCallCompleted:
+      description: Streaming event for completed web search calls.
+      properties:
+        item_id:
+          title: Item Id
+          type: string
+        output_index:
+          title: Output Index
+          type: integer
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.web_search_call.completed
+          default: response.web_search_call.completed
+          title: Type
+          type: string
+      required:
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseWebSearchCallCompleted
+      type: object
+    OpenAIResponseObjectStreamResponseWebSearchCallInProgress:
+      description: Streaming event for web search calls in progress.
+      properties:
+        item_id:
+          title: Item Id
+          type: string
+        output_index:
+          title: Output Index
+          type: integer
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.web_search_call.in_progress
+          default: response.web_search_call.in_progress
+          title: Type
+          type: string
+      required:
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseWebSearchCallInProgress
+      type: object
+    OpenAIResponseObjectStreamResponseWebSearchCallSearching:
+      properties:
+        item_id:
+          title: Item Id
+          type: string
+        output_index:
+          title: Output Index
+          type: integer
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.web_search_call.searching
+          default: response.web_search_call.searching
+          title: Type
+          type: string
+      required:
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseWebSearchCallSearching
+      type: object
+    OpenAIDeleteResponseObject:
+      properties:
+        id:
+          type: string
+          title: Id
+        object:
+          type: string
+          const: response
+          title: Object
+          default: response
+        deleted:
+          type: boolean
+          title: Deleted
+          default: true
+      type: object
+      required:
+      - id
+      title: OpenAIDeleteResponseObject
+      description: Response object confirming deletion of an OpenAI response.
+    ListOpenAIResponseInputItem:
+      properties:
+        data:
+          items:
+            anyOf:
+            - oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseMessage-Output'
+                title: OpenAIResponseMessage-Output
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+                title: OpenAIResponseOutputMessageWebSearchToolCall
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                title: OpenAIResponseOutputMessageFileSearchToolCall
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                title: OpenAIResponseOutputMessageFunctionToolCall
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                title: OpenAIResponseOutputMessageMCPCall
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                title: OpenAIResponseOutputMessageMCPListTools
+              - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                title: OpenAIResponseMCPApprovalRequest
+              discriminator:
+                propertyName: type
+                mapping:
+                  file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                  function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                  mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                  mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                  mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                  message: '#/components/schemas/OpenAIResponseMessage-Output'
+                  web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+              title: OpenAIResponseMessage-Output | ... (7 variants)
+            - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+              title: OpenAIResponseInputFunctionToolCallOutput
+            - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+              title: OpenAIResponseMCPApprovalResponse
+            - $ref: '#/components/schemas/OpenAIResponseMessage-Output'
+              title: OpenAIResponseMessage-Output
+            title: OpenAIResponseInputFunctionToolCallOutput | OpenAIResponseMCPApprovalResponse | OpenAIResponseMessage-Output
+          type: array
+          title: Data
+        object:
+          type: string
+          const: list
+          title: Object
+          default: list
+      type: object
+      required:
+      - data
+      title: ListOpenAIResponseInputItem
+      description: List container for OpenAI response input items.
+    RunShieldRequest:
+      properties:
+        shield_id:
+          type: string
+          title: Shield Id
+        messages:
+          items:
+            oneOf:
+            - $ref: '#/components/schemas/OpenAIUserMessageParam-Input'
+              title: OpenAIUserMessageParam-Input
+            - $ref: '#/components/schemas/OpenAISystemMessageParam'
+              title: OpenAISystemMessageParam
+            - $ref: '#/components/schemas/OpenAIAssistantMessageParam-Input'
+              title: OpenAIAssistantMessageParam-Input
+            - $ref: '#/components/schemas/OpenAIToolMessageParam'
+              title: OpenAIToolMessageParam
+            - $ref: '#/components/schemas/OpenAIDeveloperMessageParam'
+              title: OpenAIDeveloperMessageParam
+            discriminator:
+              propertyName: role
+              mapping:
+                assistant: '#/components/schemas/OpenAIAssistantMessageParam-Input'
+                developer: '#/components/schemas/OpenAIDeveloperMessageParam'
+                system: '#/components/schemas/OpenAISystemMessageParam'
+                tool: '#/components/schemas/OpenAIToolMessageParam'
+                user: '#/components/schemas/OpenAIUserMessageParam-Input'
+            title: OpenAIUserMessageParam-Input | ... (5 variants)
+          type: array
+          title: Messages
+        params:
+          additionalProperties: true
+          type: object
+          title: Params
+      type: object
+      required:
+      - shield_id
+      - messages
+      - params
+      title: RunShieldRequest
+    RunShieldResponse:
+      properties:
+        violation:
+          anyOf:
+          - $ref: '#/components/schemas/SafetyViolation'
+            title: SafetyViolation
+          - type: 'null'
+          title: SafetyViolation
+      type: object
+      title: RunShieldResponse
+      description: Response from running a safety shield.
+    SafetyViolation:
+      properties:
+        violation_level:
+          $ref: '#/components/schemas/ViolationLevel'
+        user_message:
+          anyOf:
+          - type: string
+          - type: 'null'
+        metadata:
+          additionalProperties: true
+          type: object
+          title: Metadata
+      type: object
+      required:
+      - violation_level
+      title: SafetyViolation
+      description: Details of a safety violation detected by content moderation.
+    ViolationLevel:
+      type: string
+      enum:
+      - info
+      - warn
+      - error
+      title: ViolationLevel
+      description: Severity level of a safety violation.
     AggregationFunctionType:
       type: string
       enum:
-        - average
-        - weighted_average
-        - median
-        - categorical_count
-        - accuracy
+      - average
+      - weighted_average
+      - median
+      - categorical_count
+      - accuracy
       title: AggregationFunctionType
-      description: >-
-        Types of aggregation functions for scoring results.
+      description: Types of aggregation functions for scoring results.
     ArrayType:
-      type: object
       properties:
         type:
           type: string
           const: array
+          title: Type
           default: array
-          description: Discriminator type. Always "array"
-      additionalProperties: false
-      required:
-        - type
+      type: object
       title: ArrayType
       description: Parameter type for array values.
     BasicScoringFnParams:
-      type: object
       properties:
         type:
-          $ref: '#/components/schemas/ScoringFnParamsType'
+          type: string
           const: basic
+          title: Type
           default: basic
-          description: >-
-            The type of scoring function parameters, always basic
         aggregation_functions:
-          type: array
           items:
             $ref: '#/components/schemas/AggregationFunctionType'
-          description: >-
-            Aggregation functions to apply to the scores of each row
-      additionalProperties: false
-      required:
-        - type
-        - aggregation_functions
-      title: BasicScoringFnParams
-      description: >-
-        Parameters for basic scoring function configuration.
-    BooleanType:
+          type: array
+          title: Aggregation Functions
+          description: Aggregation functions to apply to the scores of each row
       type: object
+      title: BasicScoringFnParams
+      description: Parameters for basic scoring function configuration.
+    BooleanType:
       properties:
         type:
           type: string
           const: boolean
+          title: Type
           default: boolean
-          description: Discriminator type. Always "boolean"
-      additionalProperties: false
-      required:
-        - type
+      type: object
       title: BooleanType
       description: Parameter type for boolean values.
     ChatCompletionInputType:
-      type: object
       properties:
         type:
           type: string
           const: chat_completion_input
+          title: Type
           default: chat_completion_input
-          description: >-
-            Discriminator type. Always "chat_completion_input"
-      additionalProperties: false
-      required:
-        - type
-      title: ChatCompletionInputType
-      description: >-
-        Parameter type for chat completion input.
-    CompletionInputType:
       type: object
+      title: ChatCompletionInputType
+      description: Parameter type for chat completion input.
+    CompletionInputType:
       properties:
         type:
           type: string
           const: completion_input
+          title: Type
           default: completion_input
-          description: >-
-            Discriminator type. Always "completion_input"
-      additionalProperties: false
-      required:
-        - type
+      type: object
       title: CompletionInputType
       description: Parameter type for completion input.
     JsonType:
-      type: object
       properties:
         type:
           type: string
           const: json
+          title: Type
           default: json
-          description: Discriminator type. Always "json"
-      additionalProperties: false
-      required:
-        - type
+      type: object
       title: JsonType
       description: Parameter type for JSON values.
     LLMAsJudgeScoringFnParams:
-      type: object
       properties:
         type:
-          $ref: '#/components/schemas/ScoringFnParamsType'
+          type: string
           const: llm_as_judge
+          title: Type
           default: llm_as_judge
-          description: >-
-            The type of scoring function parameters, always llm_as_judge
         judge_model:
           type: string
-          description: >-
-            Identifier of the LLM model to use as a judge for scoring
+          title: Judge Model
         prompt_template:
-          type: string
-          description: >-
-            (Optional) Custom prompt template for the judge model
+          anyOf:
+          - type: string
+          - type: 'null'
         judge_score_regexes:
-          type: array
           items:
             type: string
-          description: >-
-            Regexes to extract the answer from generated response
-        aggregation_functions:
           type: array
+          title: Judge Score Regexes
+          description: Regexes to extract the answer from generated response
+        aggregation_functions:
           items:
             $ref: '#/components/schemas/AggregationFunctionType'
-          description: >-
-            Aggregation functions to apply to the scores of each row
-      additionalProperties: false
-      required:
-        - type
-        - judge_model
-        - judge_score_regexes
-        - aggregation_functions
-      title: LLMAsJudgeScoringFnParams
-      description: >-
-        Parameters for LLM-as-judge scoring function configuration.
-    NumberType:
+          type: array
+          title: Aggregation Functions
+          description: Aggregation functions to apply to the scores of each row
       type: object
+      required:
+      - judge_model
+      title: LLMAsJudgeScoringFnParams
+      description: Parameters for LLM-as-judge scoring function configuration.
+    NumberType:
       properties:
         type:
           type: string
           const: number
+          title: Type
           default: number
-          description: Discriminator type. Always "number"
-      additionalProperties: false
-      required:
-        - type
+      type: object
       title: NumberType
       description: Parameter type for numeric values.
     ObjectType:
-      type: object
       properties:
         type:
           type: string
           const: object
+          title: Type
           default: object
-          description: Discriminator type. Always "object"
-      additionalProperties: false
-      required:
-        - type
+      type: object
       title: ObjectType
       description: Parameter type for object values.
-    ParamType:
-      oneOf:
-        - $ref: '#/components/schemas/StringType'
-        - $ref: '#/components/schemas/NumberType'
-        - $ref: '#/components/schemas/BooleanType'
-        - $ref: '#/components/schemas/ArrayType'
-        - $ref: '#/components/schemas/ObjectType'
-        - $ref: '#/components/schemas/JsonType'
-        - $ref: '#/components/schemas/UnionType'
-        - $ref: '#/components/schemas/ChatCompletionInputType'
-        - $ref: '#/components/schemas/CompletionInputType'
-      discriminator:
-        propertyName: type
-        mapping:
-          string: '#/components/schemas/StringType'
-          number: '#/components/schemas/NumberType'
-          boolean: '#/components/schemas/BooleanType'
-          array: '#/components/schemas/ArrayType'
-          object: '#/components/schemas/ObjectType'
-          json: '#/components/schemas/JsonType'
-          union: '#/components/schemas/UnionType'
-          chat_completion_input: '#/components/schemas/ChatCompletionInputType'
-          completion_input: '#/components/schemas/CompletionInputType'
     RegexParserScoringFnParams:
-      type: object
       properties:
         type:
-          $ref: '#/components/schemas/ScoringFnParamsType'
+          type: string
           const: regex_parser
+          title: Type
           default: regex_parser
-          description: >-
-            The type of scoring function parameters, always regex_parser
         parsing_regexes:
-          type: array
           items:
             type: string
-          description: >-
-            Regex to extract the answer from generated response
-        aggregation_functions:
           type: array
+          title: Parsing Regexes
+          description: Regex to extract the answer from generated response
+        aggregation_functions:
           items:
             $ref: '#/components/schemas/AggregationFunctionType'
-          description: >-
-            Aggregation functions to apply to the scores of each row
-      additionalProperties: false
-      required:
-        - type
-        - parsing_regexes
-        - aggregation_functions
+          type: array
+          title: Aggregation Functions
+          description: Aggregation functions to apply to the scores of each row
+      type: object
       title: RegexParserScoringFnParams
-      description: >-
-        Parameters for regex parser scoring function configuration.
+      description: Parameters for regex parser scoring function configuration.
+    ScoringFn:
+      properties:
+        identifier:
+          type: string
+          title: Identifier
+          description: Unique identifier for this resource in llama stack
+        provider_resource_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: Unique identifier for this resource in the provider
+        provider_id:
+          type: string
+          title: Provider Id
+          description: ID of the provider that owns this resource
+        type:
+          type: string
+          const: scoring_function
+          title: Type
+          default: scoring_function
+        description:
+          anyOf:
+          - type: string
+          - type: 'null'
+        metadata:
+          additionalProperties: true
+          type: object
+          title: Metadata
+          description: Any additional metadata for this definition
+        return_type:
+          oneOf:
+          - $ref: '#/components/schemas/StringType'
+            title: StringType
+          - $ref: '#/components/schemas/NumberType'
+            title: NumberType
+          - $ref: '#/components/schemas/BooleanType'
+            title: BooleanType
+          - $ref: '#/components/schemas/ArrayType'
+            title: ArrayType
+          - $ref: '#/components/schemas/ObjectType'
+            title: ObjectType
+          - $ref: '#/components/schemas/JsonType'
+            title: JsonType
+          - $ref: '#/components/schemas/UnionType'
+            title: UnionType
+          - $ref: '#/components/schemas/ChatCompletionInputType'
+            title: ChatCompletionInputType
+          - $ref: '#/components/schemas/CompletionInputType'
+            title: CompletionInputType
+          title: StringType | ... (9 variants)
+          description: The return type of the deterministic function
+          discriminator:
+            propertyName: type
+            mapping:
+              array: '#/components/schemas/ArrayType'
+              boolean: '#/components/schemas/BooleanType'
+              chat_completion_input: '#/components/schemas/ChatCompletionInputType'
+              completion_input: '#/components/schemas/CompletionInputType'
+              json: '#/components/schemas/JsonType'
+              number: '#/components/schemas/NumberType'
+              object: '#/components/schemas/ObjectType'
+              string: '#/components/schemas/StringType'
+              union: '#/components/schemas/UnionType'
+        params:
+          anyOf:
+          - oneOf:
+            - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
+              title: LLMAsJudgeScoringFnParams
+            - $ref: '#/components/schemas/RegexParserScoringFnParams'
+              title: RegexParserScoringFnParams
+            - $ref: '#/components/schemas/BasicScoringFnParams'
+              title: BasicScoringFnParams
+            discriminator:
+              propertyName: type
+              mapping:
+                basic: '#/components/schemas/BasicScoringFnParams'
+                llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams'
+                regex_parser: '#/components/schemas/RegexParserScoringFnParams'
+            title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams
+          - type: 'null'
+          title: Params
+          description: The parameters for the scoring function for benchmark eval, these can be overridden for app eval
+      type: object
+      required:
+      - identifier
+      - provider_id
+      - return_type
+      title: ScoringFn
+      description: A scoring function resource for evaluating model outputs.
     ScoringFnParams:
-      oneOf:
-        - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
-        - $ref: '#/components/schemas/RegexParserScoringFnParams'
-        - $ref: '#/components/schemas/BasicScoringFnParams'
       discriminator:
-        propertyName: type
         mapping:
+          basic: '#/components/schemas/BasicScoringFnParams'
           llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams'
           regex_parser: '#/components/schemas/RegexParserScoringFnParams'
-          basic: '#/components/schemas/BasicScoringFnParams'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
+        title: LLMAsJudgeScoringFnParams
+      - $ref: '#/components/schemas/RegexParserScoringFnParams'
+        title: RegexParserScoringFnParams
+      - $ref: '#/components/schemas/BasicScoringFnParams'
+        title: BasicScoringFnParams
+      title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams
     ScoringFnParamsType:
-      type: string
+      description: Types of scoring function parameter configurations.
       enum:
-        - llm_as_judge
-        - regex_parser
-        - basic
+      - llm_as_judge
+      - regex_parser
+      - basic
       title: ScoringFnParamsType
-      description: >-
-        Types of scoring function parameter configurations.
+      type: string
     StringType:
-      type: object
       properties:
         type:
           type: string
           const: string
+          title: Type
           default: string
-          description: Discriminator type. Always "string"
-      additionalProperties: false
-      required:
-        - type
+      type: object
       title: StringType
       description: Parameter type for string values.
     UnionType:
-      type: object
       properties:
         type:
           type: string
           const: union
+          title: Type
           default: union
-          description: Discriminator type. Always "union"
-      additionalProperties: false
-      required:
-        - type
+      type: object
       title: UnionType
       description: Parameter type for union values.
-    RegisterScoringFunctionRequest:
-      type: object
+    ListScoringFunctionsResponse:
       properties:
-        scoring_fn_id:
-          type: string
-          description: >-
-            The ID of the scoring function to register.
-        description:
-          type: string
-          description: The description of the scoring function.
-        return_type:
-          $ref: '#/components/schemas/ParamType'
-          description: The return type of the scoring function.
-        provider_scoring_fn_id:
-          type: string
-          description: >-
-            The ID of the provider scoring function to use for the scoring function.
-        provider_id:
-          type: string
-          description: >-
-            The ID of the provider to use for the scoring function.
-        params:
-          $ref: '#/components/schemas/ScoringFnParams'
-          description: >-
-            The parameters for the scoring function for benchmark eval, these can
-            be overridden for app eval.
-      additionalProperties: false
+        data:
+          items:
+            $ref: '#/components/schemas/ScoringFn'
+          type: array
+          title: Data
+      type: object
       required:
-        - scoring_fn_id
-        - description
-        - return_type
-      title: RegisterScoringFunctionRequest
-    RegisterShieldRequest:
-      type: object
+      - data
+      title: ListScoringFunctionsResponse
+    ScoreRequest:
       properties:
-        shield_id:
-          type: string
-          description: >-
-            The identifier of the shield to register.
-        provider_shield_id:
-          type: string
-          description: >-
-            The identifier of the shield in the provider.
-        provider_id:
-          type: string
-          description: The identifier of the provider.
-        params:
-          type: object
+        input_rows:
+          items:
+            additionalProperties: true
+            type: object
+          type: array
+          title: Input Rows
+        scoring_functions:
           additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: The parameters of the shield.
-      additionalProperties: false
-      required:
-        - shield_id
-      title: RegisterShieldRequest
-    Shield:
+            anyOf:
+            - oneOf:
+              - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
+                title: LLMAsJudgeScoringFnParams
+              - $ref: '#/components/schemas/RegexParserScoringFnParams'
+                title: RegexParserScoringFnParams
+              - $ref: '#/components/schemas/BasicScoringFnParams'
+                title: BasicScoringFnParams
+              discriminator:
+                propertyName: type
+                mapping:
+                  basic: '#/components/schemas/BasicScoringFnParams'
+                  llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams'
+                  regex_parser: '#/components/schemas/RegexParserScoringFnParams'
+              title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams
+            - type: 'null'
+            title: AdditionalpropertiesUnion
+          type: object
+          title: Scoring Functions
       type: object
+      required:
+      - input_rows
+      - scoring_functions
+      title: ScoreRequest
+    ScoreResponse:
+      properties:
+        results:
+          additionalProperties:
+            $ref: '#/components/schemas/ScoringResult'
+          type: object
+          title: Results
+      type: object
+      required:
+      - results
+      title: ScoreResponse
+      description: The response from scoring.
+    ScoringResult:
+      properties:
+        score_rows:
+          items:
+            additionalProperties: true
+            type: object
+          type: array
+          title: Score Rows
+        aggregated_results:
+          additionalProperties: true
+          type: object
+          title: Aggregated Results
+      type: object
+      required:
+      - score_rows
+      - aggregated_results
+      title: ScoringResult
+      description: A scoring result for a single row.
+    ScoreBatchRequest:
+      properties:
+        dataset_id:
+          type: string
+          title: Dataset Id
+        scoring_functions:
+          additionalProperties:
+            anyOf:
+            - oneOf:
+              - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
+                title: LLMAsJudgeScoringFnParams
+              - $ref: '#/components/schemas/RegexParserScoringFnParams'
+                title: RegexParserScoringFnParams
+              - $ref: '#/components/schemas/BasicScoringFnParams'
+                title: BasicScoringFnParams
+              discriminator:
+                propertyName: type
+                mapping:
+                  basic: '#/components/schemas/BasicScoringFnParams'
+                  llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams'
+                  regex_parser: '#/components/schemas/RegexParserScoringFnParams'
+              title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams
+            - type: 'null'
+            title: AdditionalpropertiesUnion
+          type: object
+          title: Scoring Functions
+        save_results_dataset:
+          type: boolean
+          title: Save Results Dataset
+          default: false
+      type: object
+      required:
+      - dataset_id
+      - scoring_functions
+      title: ScoreBatchRequest
+    ScoreBatchResponse:
+      properties:
+        dataset_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        results:
+          additionalProperties:
+            $ref: '#/components/schemas/ScoringResult'
+          type: object
+          title: Results
+      type: object
+      required:
+      - results
+      title: ScoreBatchResponse
+      description: Response from batch scoring operations on datasets.
+    Shield:
       properties:
         identifier:
           type: string
+          title: Identifier
+          description: Unique identifier for this resource in llama stack
         provider_resource_id:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: Unique identifier for this resource in the provider
         provider_id:
           type: string
+          title: Provider Id
+          description: ID of the provider that owns this resource
         type:
           type: string
-          enum:
-            - model
-            - shield
-            - vector_store
-            - dataset
-            - scoring_function
-            - benchmark
-            - tool
-            - tool_group
-            - prompt
           const: shield
+          title: Type
           default: shield
-          description: The resource type, always shield
         params:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) Configuration parameters for the shield
-      additionalProperties: false
-      required:
-        - identifier
-        - provider_id
-        - type
-      title: Shield
-      description: >-
-        A safety shield resource that can be used to check content.
-    URL:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
       type: object
+      required:
+      - identifier
+      - provider_id
+      title: Shield
+      description: A safety shield resource that can be used to check content.
+    ListShieldsResponse:
+      properties:
+        data:
+          items:
+            $ref: '#/components/schemas/Shield'
+          type: array
+          title: Data
+      type: object
+      required:
+      - data
+      title: ListShieldsResponse
+    InvokeToolRequest:
+      properties:
+        tool_name:
+          type: string
+          title: Tool Name
+        kwargs:
+          additionalProperties: true
+          type: object
+          title: Kwargs
+        authorization:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      required:
+      - tool_name
+      - kwargs
+      title: InvokeToolRequest
+    ImageContentItem:
+      description: A image content item
+      properties:
+        type:
+          const: image
+          default: image
+          title: Type
+          type: string
+        image:
+          $ref: '#/components/schemas/_URLOrData'
+      required:
+      - image
+      title: ImageContentItem
+      type: object
+    InterleavedContent:
+      anyOf:
+      - type: string
+      - discriminator:
+          mapping:
+            image: '#/components/schemas/ImageContentItem'
+            text: '#/components/schemas/TextContentItem'
+          propertyName: type
+        oneOf:
+        - $ref: '#/components/schemas/ImageContentItem'
+          title: ImageContentItem
+        - $ref: '#/components/schemas/TextContentItem'
+          title: TextContentItem
+        title: ImageContentItem | TextContentItem
+      - items:
+          discriminator:
+            mapping:
+              image: '#/components/schemas/ImageContentItem'
+              text: '#/components/schemas/TextContentItem'
+            propertyName: type
+          oneOf:
+          - $ref: '#/components/schemas/ImageContentItem'
+            title: ImageContentItem
+          - $ref: '#/components/schemas/TextContentItem'
+            title: TextContentItem
+          title: ImageContentItem | TextContentItem
+        type: array
+        title: list[ImageContentItem | TextContentItem]
+      title: string | list[ImageContentItem | TextContentItem]
+    InterleavedContentItem:
+      discriminator:
+        mapping:
+          image: '#/components/schemas/ImageContentItem'
+          text: '#/components/schemas/TextContentItem'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/ImageContentItem'
+        title: ImageContentItem
+      - $ref: '#/components/schemas/TextContentItem'
+        title: TextContentItem
+      title: ImageContentItem | TextContentItem
+    TextContentItem:
+      properties:
+        type:
+          type: string
+          const: text
+          title: Type
+          default: text
+        text:
+          type: string
+          title: Text
+      type: object
+      required:
+      - text
+      title: TextContentItem
+      description: A text content item
+    ToolInvocationResult:
+      properties:
+        content:
+          anyOf:
+          - type: string
+          - oneOf:
+            - $ref: '#/components/schemas/ImageContentItem-Output'
+              title: ImageContentItem-Output
+            - $ref: '#/components/schemas/TextContentItem'
+              title: TextContentItem
+            discriminator:
+              propertyName: type
+              mapping:
+                image: '#/components/schemas/ImageContentItem-Output'
+                text: '#/components/schemas/TextContentItem'
+            title: ImageContentItem-Output | TextContentItem
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/ImageContentItem-Output'
+                title: ImageContentItem-Output
+              - $ref: '#/components/schemas/TextContentItem'
+                title: TextContentItem
+              discriminator:
+                propertyName: type
+                mapping:
+                  image: '#/components/schemas/ImageContentItem-Output'
+                  text: '#/components/schemas/TextContentItem'
+              title: ImageContentItem-Output | TextContentItem
+            type: array
+            title: list[ImageContentItem-Output | TextContentItem]
+          - type: 'null'
+          title: string | list[ImageContentItem-Output | TextContentItem]
+        error_message:
+          anyOf:
+          - type: string
+          - type: 'null'
+        error_code:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        metadata:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+      type: object
+      title: ToolInvocationResult
+      description: Result of a tool invocation.
+    URL:
       properties:
         uri:
           type: string
-          description: The URL string pointing to the resource
-      additionalProperties: false
+          title: Uri
+      type: object
       required:
-        - uri
+      - uri
       title: URL
       description: A URL reference to external content.
-    RegisterToolGroupRequest:
-      type: object
+    ToolDef:
       properties:
         toolgroup_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        name:
           type: string
-          description: The ID of the tool group to register.
+          title: Name
+        description:
+          anyOf:
+          - type: string
+          - type: 'null'
+        input_schema:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+        output_schema:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+        metadata:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+      type: object
+      required:
+      - name
+      title: ToolDef
+      description: Tool definition used in runtime contexts.
+    ListToolDefsResponse:
+      properties:
+        data:
+          items:
+            $ref: '#/components/schemas/ToolDef'
+          type: array
+          title: Data
+      type: object
+      required:
+      - data
+      title: ListToolDefsResponse
+      description: Response containing a list of tool definitions.
+    ToolGroup:
+      properties:
+        identifier:
+          type: string
+          title: Identifier
+          description: Unique identifier for this resource in llama stack
+        provider_resource_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: Unique identifier for this resource in the provider
         provider_id:
           type: string
-          description: >-
-            The ID of the provider to use for the tool group.
+          title: Provider Id
+          description: ID of the provider that owns this resource
+        type:
+          type: string
+          const: tool_group
+          title: Type
+          default: tool_group
         mcp_endpoint:
-          $ref: '#/components/schemas/URL'
-          description: >-
-            The MCP endpoint to use for the tool group.
+          anyOf:
+          - $ref: '#/components/schemas/URL'
+            title: URL
+          - type: 'null'
+          title: URL
         args:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            A dictionary of arguments to pass to the tool group.
-      additionalProperties: false
-      required:
-        - toolgroup_id
-        - provider_id
-      title: RegisterToolGroupRequest
-    DataSource:
-      oneOf:
-        - $ref: '#/components/schemas/URIDataSource'
-        - $ref: '#/components/schemas/RowsDataSource'
-      discriminator:
-        propertyName: type
-        mapping:
-          uri: '#/components/schemas/URIDataSource'
-          rows: '#/components/schemas/RowsDataSource'
-    RowsDataSource:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
       type: object
+      required:
+      - identifier
+      - provider_id
+      title: ToolGroup
+      description: A group of related tools managed together.
+    ListToolGroupsResponse:
+      properties:
+        data:
+          items:
+            $ref: '#/components/schemas/ToolGroup'
+          type: array
+          title: Data
+      type: object
+      required:
+      - data
+      title: ListToolGroupsResponse
+      description: Response containing a list of tool groups.
+    Chunk:
+      description: A chunk of content that can be inserted into a vector database.
+      properties:
+        content:
+          anyOf:
+          - type: string
+          - discriminator:
+              mapping:
+                image: '#/components/schemas/ImageContentItem'
+                text: '#/components/schemas/TextContentItem'
+              propertyName: type
+            oneOf:
+            - $ref: '#/components/schemas/ImageContentItem'
+              title: ImageContentItem
+            - $ref: '#/components/schemas/TextContentItem'
+              title: TextContentItem
+            title: ImageContentItem | TextContentItem
+          - items:
+              discriminator:
+                mapping:
+                  image: '#/components/schemas/ImageContentItem'
+                  text: '#/components/schemas/TextContentItem'
+                propertyName: type
+              oneOf:
+              - $ref: '#/components/schemas/ImageContentItem'
+                title: ImageContentItem
+              - $ref: '#/components/schemas/TextContentItem'
+                title: TextContentItem
+              title: ImageContentItem | TextContentItem
+            type: array
+            title: list[ImageContentItem | TextContentItem]
+          title: string | list[ImageContentItem | TextContentItem]
+        chunk_id:
+          title: Chunk Id
+          type: string
+        metadata:
+          additionalProperties: true
+          title: Metadata
+          type: object
+        embedding:
+          anyOf:
+          - items:
+              type: number
+            type: array
+          - type: 'null'
+          nullable: true
+        chunk_metadata:
+          anyOf:
+          - $ref: '#/components/schemas/ChunkMetadata'
+            title: ChunkMetadata
+          - type: 'null'
+          nullable: true
+          title: ChunkMetadata
+      required:
+      - content
+      - chunk_id
+      title: Chunk
+      type: object
+    ChunkMetadata:
+      properties:
+        chunk_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        document_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        source:
+          anyOf:
+          - type: string
+          - type: 'null'
+        created_timestamp:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        updated_timestamp:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        chunk_window:
+          anyOf:
+          - type: string
+          - type: 'null'
+        chunk_tokenizer:
+          anyOf:
+          - type: string
+          - type: 'null'
+        chunk_embedding_model:
+          anyOf:
+          - type: string
+          - type: 'null'
+        chunk_embedding_dimension:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        content_token_count:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        metadata_token_count:
+          anyOf:
+          - type: integer
+          - type: 'null'
+      type: object
+      title: ChunkMetadata
+      description: |-
+        `ChunkMetadata` is backend metadata for a `Chunk` that is used to store additional information about the chunk that
+            will not be used in the context during inference, but is required for backend functionality. The `ChunkMetadata`
+            is set during chunk creation in `MemoryToolRuntimeImpl().insert()`and is not expected to change after.
+            Use `Chunk.metadata` for metadata that will be used in the context during inference.
+    InsertChunksRequest:
+      properties:
+        vector_store_id:
+          type: string
+          title: Vector Store Id
+        chunks:
+          items:
+            $ref: '#/components/schemas/Chunk-Input'
+          type: array
+          title: Chunks
+        ttl_seconds:
+          anyOf:
+          - type: integer
+          - type: 'null'
+      type: object
+      required:
+      - vector_store_id
+      - chunks
+      title: InsertChunksRequest
+    QueryChunksRequest:
+      properties:
+        vector_store_id:
+          type: string
+          title: Vector Store Id
+        query:
+          anyOf:
+          - type: string
+          - oneOf:
+            - $ref: '#/components/schemas/ImageContentItem-Input'
+              title: ImageContentItem-Input
+            - $ref: '#/components/schemas/TextContentItem'
+              title: TextContentItem
+            discriminator:
+              propertyName: type
+              mapping:
+                image: '#/components/schemas/ImageContentItem-Input'
+                text: '#/components/schemas/TextContentItem'
+            title: ImageContentItem-Input | TextContentItem
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/ImageContentItem-Input'
+                title: ImageContentItem-Input
+              - $ref: '#/components/schemas/TextContentItem'
+                title: TextContentItem
+              discriminator:
+                propertyName: type
+                mapping:
+                  image: '#/components/schemas/ImageContentItem-Input'
+                  text: '#/components/schemas/TextContentItem'
+              title: ImageContentItem-Input | TextContentItem
+            type: array
+            title: list[ImageContentItem-Input | TextContentItem]
+          title: string | list[ImageContentItem-Input | TextContentItem]
+        params:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+      type: object
+      required:
+      - vector_store_id
+      - query
+      title: QueryChunksRequest
+    QueryChunksResponse:
+      properties:
+        chunks:
+          items:
+            $ref: '#/components/schemas/Chunk-Output'
+          type: array
+          title: Chunks
+        scores:
+          items:
+            type: number
+          type: array
+          title: Scores
+      type: object
+      required:
+      - chunks
+      - scores
+      title: QueryChunksResponse
+      description: Response from querying chunks in a vector database.
+    VectorStoreFileCounts:
+      properties:
+        completed:
+          type: integer
+          title: Completed
+        cancelled:
+          type: integer
+          title: Cancelled
+        failed:
+          type: integer
+          title: Failed
+        in_progress:
+          type: integer
+          title: In Progress
+        total:
+          type: integer
+          title: Total
+      type: object
+      required:
+      - completed
+      - cancelled
+      - failed
+      - in_progress
+      - total
+      title: VectorStoreFileCounts
+      description: File processing status counts for a vector store.
+    VectorStoreListResponse:
+      properties:
+        object:
+          type: string
+          title: Object
+          default: list
+        data:
+          items:
+            $ref: '#/components/schemas/VectorStoreObject'
+          type: array
+          title: Data
+        first_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        last_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        has_more:
+          type: boolean
+          title: Has More
+          default: false
+      type: object
+      required:
+      - data
+      title: VectorStoreListResponse
+      description: Response from listing vector stores.
+    VectorStoreObject:
+      properties:
+        id:
+          type: string
+          title: Id
+        object:
+          type: string
+          title: Object
+          default: vector_store
+        created_at:
+          type: integer
+          title: Created At
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+        usage_bytes:
+          type: integer
+          title: Usage Bytes
+          default: 0
+        file_counts:
+          $ref: '#/components/schemas/VectorStoreFileCounts'
+        status:
+          type: string
+          title: Status
+          default: completed
+        expires_after:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+        expires_at:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        last_active_at:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        metadata:
+          additionalProperties: true
+          type: object
+          title: Metadata
+      type: object
+      required:
+      - id
+      - created_at
+      - file_counts
+      title: VectorStoreObject
+      description: OpenAI Vector Store object.
+    VectorStoreChunkingStrategy:
+      discriminator:
+        mapping:
+          auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+          static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+        title: VectorStoreChunkingStrategyAuto
+      - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+        title: VectorStoreChunkingStrategyStatic
+      title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic
+    VectorStoreChunkingStrategyAuto:
+      properties:
+        type:
+          type: string
+          const: auto
+          title: Type
+          default: auto
+      type: object
+      title: VectorStoreChunkingStrategyAuto
+      description: Automatic chunking strategy for vector store files.
+    VectorStoreChunkingStrategyStatic:
+      properties:
+        type:
+          type: string
+          const: static
+          title: Type
+          default: static
+        static:
+          $ref: '#/components/schemas/VectorStoreChunkingStrategyStaticConfig'
+      type: object
+      required:
+      - static
+      title: VectorStoreChunkingStrategyStatic
+      description: Static chunking strategy with configurable parameters.
+    VectorStoreChunkingStrategyStaticConfig:
+      properties:
+        chunk_overlap_tokens:
+          type: integer
+          title: Chunk Overlap Tokens
+          default: 400
+        max_chunk_size_tokens:
+          type: integer
+          maximum: 4096.0
+          minimum: 100.0
+          title: Max Chunk Size Tokens
+          default: 800
+      type: object
+      title: VectorStoreChunkingStrategyStaticConfig
+      description: Configuration for static chunking strategy.
+    OpenAICreateVectorStoreRequestWithExtraBody:
+      properties:
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+        file_ids:
+          anyOf:
+          - items:
+              type: string
+            type: array
+          - type: 'null'
+        expires_after:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+        chunking_strategy:
+          anyOf:
+          - oneOf:
+            - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+              title: VectorStoreChunkingStrategyAuto
+            - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+              title: VectorStoreChunkingStrategyStatic
+            discriminator:
+              propertyName: type
+              mapping:
+                auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+                static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+            title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic
+          - type: 'null'
+          title: Chunking Strategy
+        metadata:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+      additionalProperties: true
+      type: object
+      title: OpenAICreateVectorStoreRequestWithExtraBody
+      description: Request to create a vector store with extra_body support.
+    OpenaiUpdateVectorStoreRequest:
+      properties:
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+        expires_after:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+        metadata:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+      type: object
+      title: OpenaiUpdateVectorStoreRequest
+    VectorStoreDeleteResponse:
+      properties:
+        id:
+          type: string
+          title: Id
+        object:
+          type: string
+          title: Object
+          default: vector_store.deleted
+        deleted:
+          type: boolean
+          title: Deleted
+          default: true
+      type: object
+      required:
+      - id
+      title: VectorStoreDeleteResponse
+      description: Response from deleting a vector store.
+    OpenAICreateVectorStoreFileBatchRequestWithExtraBody:
+      properties:
+        file_ids:
+          items:
+            type: string
+          type: array
+          title: File Ids
+        attributes:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+        chunking_strategy:
+          anyOf:
+          - oneOf:
+            - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+              title: VectorStoreChunkingStrategyAuto
+            - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+              title: VectorStoreChunkingStrategyStatic
+            discriminator:
+              propertyName: type
+              mapping:
+                auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+                static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+            title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic
+          - type: 'null'
+          title: Chunking Strategy
+      additionalProperties: true
+      type: object
+      required:
+      - file_ids
+      title: OpenAICreateVectorStoreFileBatchRequestWithExtraBody
+      description: Request to create a vector store file batch with extra_body support.
+    VectorStoreFileBatchObject:
+      properties:
+        id:
+          type: string
+          title: Id
+        object:
+          type: string
+          title: Object
+          default: vector_store.file_batch
+        created_at:
+          type: integer
+          title: Created At
+        vector_store_id:
+          type: string
+          title: Vector Store Id
+        status:
+          title: Status
+          type: string
+          enum:
+          - completed
+          - in_progress
+          - cancelled
+          - failed
+          default: completed
+        file_counts:
+          $ref: '#/components/schemas/VectorStoreFileCounts'
+      type: object
+      required:
+      - id
+      - created_at
+      - vector_store_id
+      - status
+      - file_counts
+      title: VectorStoreFileBatchObject
+      description: OpenAI Vector Store File Batch object.
+    VectorStoreFileStatus:
+      type: string
+      enum:
+      - completed
+      - in_progress
+      - cancelled
+      - failed
+      default: completed
+    VectorStoreFileLastError:
+      properties:
+        code:
+          title: Code
+          type: string
+          enum:
+          - server_error
+          - rate_limit_exceeded
+          default: server_error
+        message:
+          type: string
+          title: Message
+      type: object
+      required:
+      - code
+      - message
+      title: VectorStoreFileLastError
+      description: Error information for failed vector store file processing.
+    VectorStoreFileObject:
+      properties:
+        id:
+          type: string
+          title: Id
+        object:
+          type: string
+          title: Object
+          default: vector_store.file
+        attributes:
+          additionalProperties: true
+          type: object
+          title: Attributes
+        chunking_strategy:
+          oneOf:
+          - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+            title: VectorStoreChunkingStrategyAuto
+          - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+            title: VectorStoreChunkingStrategyStatic
+          title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic
+          discriminator:
+            propertyName: type
+            mapping:
+              auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+              static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+        created_at:
+          type: integer
+          title: Created At
+        last_error:
+          anyOf:
+          - $ref: '#/components/schemas/VectorStoreFileLastError'
+            title: VectorStoreFileLastError
+          - type: 'null'
+          title: VectorStoreFileLastError
+        status:
+          title: Status
+          type: string
+          enum:
+          - completed
+          - in_progress
+          - cancelled
+          - failed
+          default: completed
+        usage_bytes:
+          type: integer
+          title: Usage Bytes
+          default: 0
+        vector_store_id:
+          type: string
+          title: Vector Store Id
+      type: object
+      required:
+      - id
+      - chunking_strategy
+      - created_at
+      - status
+      - vector_store_id
+      title: VectorStoreFileObject
+      description: OpenAI Vector Store File object.
+    VectorStoreFilesListInBatchResponse:
+      properties:
+        object:
+          type: string
+          title: Object
+          default: list
+        data:
+          items:
+            $ref: '#/components/schemas/VectorStoreFileObject'
+          type: array
+          title: Data
+        first_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        last_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        has_more:
+          type: boolean
+          title: Has More
+          default: false
+      type: object
+      required:
+      - data
+      title: VectorStoreFilesListInBatchResponse
+      description: Response from listing files in a vector store file batch.
+    VectorStoreListFilesResponse:
+      properties:
+        object:
+          type: string
+          title: Object
+          default: list
+        data:
+          items:
+            $ref: '#/components/schemas/VectorStoreFileObject'
+          type: array
+          title: Data
+        first_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        last_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        has_more:
+          type: boolean
+          title: Has More
+          default: false
+      type: object
+      required:
+      - data
+      title: VectorStoreListFilesResponse
+      description: Response from listing files in a vector store.
+    OpenaiAttachFileToVectorStoreRequest:
+      properties:
+        file_id:
+          type: string
+          title: File Id
+        attributes:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+        chunking_strategy:
+          anyOf:
+          - oneOf:
+            - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+              title: VectorStoreChunkingStrategyAuto
+            - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+              title: VectorStoreChunkingStrategyStatic
+            discriminator:
+              propertyName: type
+              mapping:
+                auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+                static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+            title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic
+          - type: 'null'
+          title: Chunking Strategy
+      type: object
+      required:
+      - file_id
+      title: OpenaiAttachFileToVectorStoreRequest
+    OpenaiUpdateVectorStoreFileRequest:
+      properties:
+        attributes:
+          additionalProperties: true
+          type: object
+          title: Attributes
+      type: object
+      required:
+      - attributes
+      title: OpenaiUpdateVectorStoreFileRequest
+    VectorStoreFileDeleteResponse:
+      properties:
+        id:
+          type: string
+          title: Id
+        object:
+          type: string
+          title: Object
+          default: vector_store.file.deleted
+        deleted:
+          type: boolean
+          title: Deleted
+          default: true
+      type: object
+      required:
+      - id
+      title: VectorStoreFileDeleteResponse
+      description: Response from deleting a vector store file.
+    VectorStoreContent:
+      properties:
+        type:
+          type: string
+          const: text
+          title: Type
+        text:
+          type: string
+          title: Text
+        embedding:
+          anyOf:
+          - items:
+              type: number
+            type: array
+          - type: 'null'
+        chunk_metadata:
+          anyOf:
+          - $ref: '#/components/schemas/ChunkMetadata'
+            title: ChunkMetadata
+          - type: 'null'
+          title: ChunkMetadata
+        metadata:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+      type: object
+      required:
+      - type
+      - text
+      title: VectorStoreContent
+      description: Content item from a vector store file or search result.
+    VectorStoreFileContentResponse:
+      properties:
+        object:
+          type: string
+          const: vector_store.file_content.page
+          title: Object
+          default: vector_store.file_content.page
+        data:
+          items:
+            $ref: '#/components/schemas/VectorStoreContent'
+          type: array
+          title: Data
+        has_more:
+          type: boolean
+          title: Has More
+          default: false
+        next_page:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      required:
+      - data
+      title: VectorStoreFileContentResponse
+      description: Represents the parsed content of a vector store file.
+    OpenaiSearchVectorStoreRequest:
+      properties:
+        query:
+          anyOf:
+          - type: string
+          - items:
+              type: string
+            type: array
+            title: list[string]
+          title: string | list[string]
+        filters:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+        max_num_results:
+          anyOf:
+          - type: integer
+          - type: 'null'
+          default: 10
+        ranking_options:
+          anyOf:
+          - $ref: '#/components/schemas/SearchRankingOptions'
+            title: SearchRankingOptions
+          - type: 'null'
+          title: SearchRankingOptions
+        rewrite_query:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          default: false
+        search_mode:
+          anyOf:
+          - type: string
+          - type: 'null'
+          default: vector
+      type: object
+      required:
+      - query
+      title: OpenaiSearchVectorStoreRequest
+    VectorStoreSearchResponse:
+      properties:
+        file_id:
+          type: string
+          title: File Id
+        filename:
+          type: string
+          title: Filename
+        score:
+          type: number
+          title: Score
+        attributes:
+          anyOf:
+          - additionalProperties:
+              anyOf:
+              - type: string
+              - type: number
+              - type: boolean
+              title: string | number | boolean
+            type: object
+          - type: 'null'
+        content:
+          items:
+            $ref: '#/components/schemas/VectorStoreContent'
+          type: array
+          title: Content
+      type: object
+      required:
+      - file_id
+      - filename
+      - score
+      - content
+      title: VectorStoreSearchResponse
+      description: Response from searching a vector store.
+    VectorStoreSearchResponsePage:
+      properties:
+        object:
+          type: string
+          title: Object
+          default: vector_store.search_results.page
+        search_query:
+          items:
+            type: string
+          type: array
+          title: Search Query
+        data:
+          items:
+            $ref: '#/components/schemas/VectorStoreSearchResponse'
+          type: array
+          title: Data
+        has_more:
+          type: boolean
+          title: Has More
+          default: false
+        next_page:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      required:
+      - search_query
+      - data
+      title: VectorStoreSearchResponsePage
+      description: Paginated response from searching a vector store.
+    VersionInfo:
+      properties:
+        version:
+          type: string
+          title: Version
+      type: object
+      required:
+      - version
+      title: VersionInfo
+      description: Version information for the service.
+    AppendRowsRequest:
+      properties:
+        rows:
+          items:
+            additionalProperties: true
+            type: object
+          type: array
+          title: Rows
+      type: object
+      required:
+      - rows
+      title: AppendRowsRequest
+    PaginatedResponse:
+      properties:
+        data:
+          items:
+            additionalProperties: true
+            type: object
+          type: array
+          title: Data
+        has_more:
+          type: boolean
+          title: Has More
+        url:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      required:
+      - data
+      - has_more
+      title: PaginatedResponse
+      description: A generic paginated response that follows a simple format.
+    Dataset:
+      properties:
+        identifier:
+          type: string
+          title: Identifier
+          description: Unique identifier for this resource in llama stack
+        provider_resource_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: Unique identifier for this resource in the provider
+        provider_id:
+          type: string
+          title: Provider Id
+          description: ID of the provider that owns this resource
+        type:
+          type: string
+          const: dataset
+          title: Type
+          default: dataset
+        purpose:
+          $ref: '#/components/schemas/DatasetPurpose'
+        source:
+          oneOf:
+          - $ref: '#/components/schemas/URIDataSource'
+            title: URIDataSource
+          - $ref: '#/components/schemas/RowsDataSource'
+            title: RowsDataSource
+          title: URIDataSource | RowsDataSource
+          discriminator:
+            propertyName: type
+            mapping:
+              rows: '#/components/schemas/RowsDataSource'
+              uri: '#/components/schemas/URIDataSource'
+        metadata:
+          additionalProperties: true
+          type: object
+          title: Metadata
+          description: Any additional metadata for this dataset
+      type: object
+      required:
+      - identifier
+      - provider_id
+      - purpose
+      - source
+      title: Dataset
+      description: Dataset resource for storing and accessing training or evaluation data.
+    RowsDataSource:
       properties:
         type:
           type: string
           const: rows
+          title: Type
           default: rows
         rows:
-          type: array
           items:
+            additionalProperties: true
             type: object
-            additionalProperties:
-              oneOf:
-                - type: 'null'
-                - type: boolean
-                - type: number
-                - type: string
-                - type: array
-                - type: object
-          description: >-
-            The dataset is stored in rows. E.g. - [ {"messages": [{"role": "user",
-            "content": "Hello, world!"}, {"role": "assistant", "content": "Hello,
-            world!"}]} ]
-      additionalProperties: false
+          type: array
+          title: Rows
+      type: object
       required:
-        - type
-        - rows
+      - rows
       title: RowsDataSource
       description: A dataset stored in rows.
     URIDataSource:
-      type: object
       properties:
         type:
           type: string
           const: uri
+          title: Type
           default: uri
         uri:
           type: string
-          description: >-
-            The dataset can be obtained from a URI. E.g. - "https://mywebsite.com/mydata.jsonl"
-            - "lsfs://mydata.jsonl" - "data:csv;base64,{base64_content}"
-      additionalProperties: false
+          title: Uri
+      type: object
       required:
-        - type
-        - uri
+      - uri
       title: URIDataSource
-      description: >-
-        A dataset that can be obtained from a URI.
-    RegisterDatasetRequest:
-      type: object
+      description: A dataset that can be obtained from a URI.
+    ListDatasetsResponse:
       properties:
-        purpose:
-          type: string
-          enum:
-            - post-training/messages
-            - eval/question-answer
-            - eval/messages-answer
-          description: >-
-            The purpose of the dataset. One of: - "post-training/messages": The dataset
-            contains a messages column with list of messages for post-training. {
-            "messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant",
-            "content": "Hello, world!"}, ] } - "eval/question-answer": The dataset
-            contains a question column and an answer column for evaluation. { "question":
-            "What is the capital of France?", "answer": "Paris" } - "eval/messages-answer":
-            The dataset contains a messages column with list of messages and an answer
-            column for evaluation. { "messages": [ {"role": "user", "content": "Hello,
-            my name is John Doe."}, {"role": "assistant", "content": "Hello, John
-            Doe. How can I help you today?"}, {"role": "user", "content": "What's
-            my name?"}, ], "answer": "John Doe" }
-        source:
-          $ref: '#/components/schemas/DataSource'
-          description: >-
-            The data source of the dataset. Ensure that the data source schema is
-            compatible with the purpose of the dataset. Examples: - { "type": "uri",
-            "uri": "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri":
-            "lsfs://mydata.jsonl" } - { "type": "uri", "uri": "data:csv;base64,{base64_content}"
-            } - { "type": "uri", "uri": "huggingface://llamastack/simpleqa?split=train"
-            } - { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content":
-            "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ]
-            } ] }
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            The metadata for the dataset. - E.g. {"description": "My dataset"}.
-        dataset_id:
-          type: string
-          description: >-
-            The ID of the dataset. If not provided, an ID will be generated.
-      additionalProperties: false
-      required:
-        - purpose
-        - source
-      title: RegisterDatasetRequest
-    Dataset:
+        data:
+          items:
+            $ref: '#/components/schemas/Dataset'
+          type: array
+          title: Data
       type: object
+      required:
+      - data
+      title: ListDatasetsResponse
+      description: Response from listing datasets.
+    Benchmark:
       properties:
         identifier:
           type: string
+          title: Identifier
+          description: Unique identifier for this resource in llama stack
         provider_resource_id:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: Unique identifier for this resource in the provider
         provider_id:
           type: string
+          title: Provider Id
+          description: ID of the provider that owns this resource
         type:
           type: string
-          enum:
-            - model
-            - shield
-            - vector_store
-            - dataset
-            - scoring_function
-            - benchmark
-            - tool
-            - tool_group
-            - prompt
-          const: dataset
-          default: dataset
-          description: >-
-            Type of resource, always 'dataset' for datasets
-        purpose:
+          const: benchmark
+          title: Type
+          default: benchmark
+        dataset_id:
           type: string
-          enum:
-            - post-training/messages
-            - eval/question-answer
-            - eval/messages-answer
-          description: >-
-            Purpose of the dataset indicating its intended use
-        source:
+          title: Dataset Id
+        scoring_functions:
+          items:
+            type: string
+          type: array
+          title: Scoring Functions
+        metadata:
+          additionalProperties: true
+          type: object
+          title: Metadata
+          description: Metadata for this evaluation task
+      type: object
+      required:
+      - identifier
+      - provider_id
+      - dataset_id
+      - scoring_functions
+      title: Benchmark
+      description: A benchmark resource for evaluating model performance.
+    ListBenchmarksResponse:
+      properties:
+        data:
+          items:
+            $ref: '#/components/schemas/Benchmark'
+          type: array
+          title: Data
+      type: object
+      required:
+      - data
+      title: ListBenchmarksResponse
+    BenchmarkConfig:
+      properties:
+        eval_candidate:
+          $ref: '#/components/schemas/ModelCandidate'
+        scoring_params:
+          additionalProperties:
+            oneOf:
+            - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
+              title: LLMAsJudgeScoringFnParams
+            - $ref: '#/components/schemas/RegexParserScoringFnParams'
+              title: RegexParserScoringFnParams
+            - $ref: '#/components/schemas/BasicScoringFnParams'
+              title: BasicScoringFnParams
+            discriminator:
+              propertyName: type
+              mapping:
+                basic: '#/components/schemas/BasicScoringFnParams'
+                llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams'
+                regex_parser: '#/components/schemas/RegexParserScoringFnParams'
+            title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams
+          type: object
+          title: Scoring Params
+          description: Map between scoring function id and parameters for each scoring function you want to run
+        num_examples:
+          anyOf:
+          - type: integer
+          - type: 'null'
+          description: Number of examples to evaluate (useful for testing), if not provided, all examples in the dataset will be evaluated
+      type: object
+      required:
+      - eval_candidate
+      title: BenchmarkConfig
+      description: A benchmark configuration for evaluation.
+    GreedySamplingStrategy:
+      properties:
+        type:
+          type: string
+          const: greedy
+          title: Type
+          default: greedy
+      type: object
+      title: GreedySamplingStrategy
+      description: Greedy sampling strategy that selects the highest probability token at each step.
+    ModelCandidate:
+      properties:
+        type:
+          type: string
+          const: model
+          title: Type
+          default: model
+        model:
+          type: string
+          title: Model
+        sampling_params:
+          $ref: '#/components/schemas/SamplingParams'
+        system_message:
+          anyOf:
+          - $ref: '#/components/schemas/SystemMessage'
+            title: SystemMessage
+          - type: 'null'
+          title: SystemMessage
+      type: object
+      required:
+      - model
+      - sampling_params
+      title: ModelCandidate
+      description: A model candidate for evaluation.
+    SamplingParams:
+      properties:
+        strategy:
           oneOf:
-            - $ref: '#/components/schemas/URIDataSource'
-            - $ref: '#/components/schemas/RowsDataSource'
+          - $ref: '#/components/schemas/GreedySamplingStrategy'
+            title: GreedySamplingStrategy
+          - $ref: '#/components/schemas/TopPSamplingStrategy'
+            title: TopPSamplingStrategy
+          - $ref: '#/components/schemas/TopKSamplingStrategy'
+            title: TopKSamplingStrategy
+          title: GreedySamplingStrategy | TopPSamplingStrategy | TopKSamplingStrategy
           discriminator:
             propertyName: type
             mapping:
-              uri: '#/components/schemas/URIDataSource'
-              rows: '#/components/schemas/RowsDataSource'
-          description: >-
-            Data source configuration for the dataset
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: Additional metadata for the dataset
-      additionalProperties: false
-      required:
-        - identifier
-        - provider_id
-        - type
-        - purpose
-        - source
-        - metadata
-      title: Dataset
-      description: >-
-        Dataset resource for storing and accessing training or evaluation data.
-    RegisterBenchmarkRequest:
+              greedy: '#/components/schemas/GreedySamplingStrategy'
+              top_k: '#/components/schemas/TopKSamplingStrategy'
+              top_p: '#/components/schemas/TopPSamplingStrategy'
+        max_tokens:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        repetition_penalty:
+          anyOf:
+          - type: number
+          - type: 'null'
+          default: 1.0
+        stop:
+          anyOf:
+          - items:
+              type: string
+            type: array
+          - type: 'null'
       type: object
+      title: SamplingParams
+      description: Sampling parameters.
+    SystemMessage:
+      properties:
+        role:
+          type: string
+          const: system
+          title: Role
+          default: system
+        content:
+          anyOf:
+          - type: string
+          - oneOf:
+            - $ref: '#/components/schemas/ImageContentItem-Input'
+              title: ImageContentItem-Input
+            - $ref: '#/components/schemas/TextContentItem'
+              title: TextContentItem
+            discriminator:
+              propertyName: type
+              mapping:
+                image: '#/components/schemas/ImageContentItem-Input'
+                text: '#/components/schemas/TextContentItem'
+            title: ImageContentItem-Input | TextContentItem
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/ImageContentItem-Input'
+                title: ImageContentItem-Input
+              - $ref: '#/components/schemas/TextContentItem'
+                title: TextContentItem
+              discriminator:
+                propertyName: type
+                mapping:
+                  image: '#/components/schemas/ImageContentItem-Input'
+                  text: '#/components/schemas/TextContentItem'
+              title: ImageContentItem-Input | TextContentItem
+            type: array
+            title: list[ImageContentItem-Input | TextContentItem]
+          title: string | list[ImageContentItem-Input | TextContentItem]
+      type: object
+      required:
+      - content
+      title: SystemMessage
+      description: A system message providing instructions or context to the model.
+    TopKSamplingStrategy:
+      properties:
+        type:
+          type: string
+          const: top_k
+          title: Type
+          default: top_k
+        top_k:
+          type: integer
+          minimum: 1.0
+          title: Top K
+      type: object
+      required:
+      - top_k
+      title: TopKSamplingStrategy
+      description: Top-k sampling strategy that restricts sampling to the k most likely tokens.
+    TopPSamplingStrategy:
+      properties:
+        type:
+          type: string
+          const: top_p
+          title: Type
+          default: top_p
+        temperature:
+          anyOf:
+          - type: number
+            minimum: 0.0
+          - type: 'null'
+        top_p:
+          anyOf:
+          - type: number
+          - type: 'null'
+          default: 0.95
+      type: object
+      required:
+      - temperature
+      title: TopPSamplingStrategy
+      description: Top-p (nucleus) sampling strategy that samples from the smallest set of tokens with cumulative probability >= p.
+    EvaluateRowsRequest:
+      properties:
+        input_rows:
+          items:
+            additionalProperties: true
+            type: object
+          type: array
+          title: Input Rows
+        scoring_functions:
+          items:
+            type: string
+          type: array
+          title: Scoring Functions
+        benchmark_config:
+          $ref: '#/components/schemas/BenchmarkConfig'
+      type: object
+      required:
+      - input_rows
+      - scoring_functions
+      - benchmark_config
+      title: EvaluateRowsRequest
+    EvaluateResponse:
+      properties:
+        generations:
+          items:
+            additionalProperties: true
+            type: object
+          type: array
+          title: Generations
+        scores:
+          additionalProperties:
+            $ref: '#/components/schemas/ScoringResult'
+          type: object
+          title: Scores
+      type: object
+      required:
+      - generations
+      - scores
+      title: EvaluateResponse
+      description: The response from an evaluation.
+    Job:
+      properties:
+        job_id:
+          type: string
+          title: Job Id
+        status:
+          $ref: '#/components/schemas/JobStatus'
+      type: object
+      required:
+      - job_id
+      - status
+      title: Job
+      description: A job execution instance with status tracking.
+    RerankRequest:
+      properties:
+        model:
+          type: string
+          title: Model
+        query:
+          anyOf:
+          - type: string
+          - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+            title: OpenAIChatCompletionContentPartTextParam
+          - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+            title: OpenAIChatCompletionContentPartImageParam
+          title: string | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam
+        items:
+          items:
+            anyOf:
+            - type: string
+            - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+              title: OpenAIChatCompletionContentPartTextParam
+            - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+              title: OpenAIChatCompletionContentPartImageParam
+            title: string | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam
+          type: array
+          title: Items
+        max_num_results:
+          anyOf:
+          - type: integer
+          - type: 'null'
+      type: object
+      required:
+      - model
+      - query
+      - items
+      title: RerankRequest
+    RerankData:
+      properties:
+        index:
+          type: integer
+          title: Index
+        relevance_score:
+          type: number
+          title: Relevance Score
+      type: object
+      required:
+      - index
+      - relevance_score
+      title: RerankData
+      description: A single rerank result from a reranking response.
+    RerankResponse:
+      properties:
+        data:
+          items:
+            $ref: '#/components/schemas/RerankData'
+          type: array
+          title: Data
+      type: object
+      required:
+      - data
+      title: RerankResponse
+      description: Response from a reranking request.
+    Checkpoint:
+      properties:
+        identifier:
+          type: string
+          title: Identifier
+        created_at:
+          type: string
+          format: date-time
+          title: Created At
+        epoch:
+          type: integer
+          title: Epoch
+        post_training_job_id:
+          type: string
+          title: Post Training Job Id
+        path:
+          type: string
+          title: Path
+        training_metrics:
+          anyOf:
+          - $ref: '#/components/schemas/PostTrainingMetric'
+            title: PostTrainingMetric
+          - type: 'null'
+          title: PostTrainingMetric
+      type: object
+      required:
+      - identifier
+      - created_at
+      - epoch
+      - post_training_job_id
+      - path
+      title: Checkpoint
+      description: Checkpoint created during training runs.
+    PostTrainingJobArtifactsResponse:
+      properties:
+        job_uuid:
+          type: string
+          title: Job Uuid
+        checkpoints:
+          items:
+            $ref: '#/components/schemas/Checkpoint'
+          type: array
+          title: Checkpoints
+      type: object
+      required:
+      - job_uuid
+      title: PostTrainingJobArtifactsResponse
+      description: Artifacts of a finetuning job.
+    PostTrainingMetric:
+      properties:
+        epoch:
+          type: integer
+          title: Epoch
+        train_loss:
+          type: number
+          title: Train Loss
+        validation_loss:
+          type: number
+          title: Validation Loss
+        perplexity:
+          type: number
+          title: Perplexity
+      type: object
+      required:
+      - epoch
+      - train_loss
+      - validation_loss
+      - perplexity
+      title: PostTrainingMetric
+      description: Training metrics captured during post-training jobs.
+    CancelTrainingJobRequest:
+      properties:
+        job_uuid:
+          type: string
+          title: Job Uuid
+      type: object
+      required:
+      - job_uuid
+      title: CancelTrainingJobRequest
+    PostTrainingJobStatusResponse:
+      properties:
+        job_uuid:
+          type: string
+          title: Job Uuid
+        status:
+          $ref: '#/components/schemas/JobStatus'
+        scheduled_at:
+          anyOf:
+          - type: string
+            format: date-time
+          - type: 'null'
+        started_at:
+          anyOf:
+          - type: string
+            format: date-time
+          - type: 'null'
+        completed_at:
+          anyOf:
+          - type: string
+            format: date-time
+          - type: 'null'
+        resources_allocated:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+        checkpoints:
+          items:
+            $ref: '#/components/schemas/Checkpoint'
+          type: array
+          title: Checkpoints
+      type: object
+      required:
+      - job_uuid
+      - status
+      title: PostTrainingJobStatusResponse
+      description: Status of a finetuning job.
+    ListPostTrainingJobsResponse:
+      properties:
+        data:
+          items:
+            $ref: '#/components/schemas/PostTrainingJob'
+          type: array
+          title: Data
+      type: object
+      required:
+      - data
+      title: ListPostTrainingJobsResponse
+    DPOAlignmentConfig:
+      properties:
+        beta:
+          type: number
+          title: Beta
+        loss_type:
+          $ref: '#/components/schemas/DPOLossType'
+          default: sigmoid
+      type: object
+      required:
+      - beta
+      title: DPOAlignmentConfig
+      description: Configuration for Direct Preference Optimization (DPO) alignment.
+    DPOLossType:
+      type: string
+      enum:
+      - sigmoid
+      - hinge
+      - ipo
+      - kto_pair
+      title: DPOLossType
+    DataConfig:
+      properties:
+        dataset_id:
+          type: string
+          title: Dataset Id
+        batch_size:
+          type: integer
+          title: Batch Size
+        shuffle:
+          type: boolean
+          title: Shuffle
+        data_format:
+          $ref: '#/components/schemas/DatasetFormat'
+        validation_dataset_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        packed:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          default: false
+        train_on_input:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          default: false
+      type: object
+      required:
+      - dataset_id
+      - batch_size
+      - shuffle
+      - data_format
+      title: DataConfig
+      description: Configuration for training data and data loading.
+    DatasetFormat:
+      type: string
+      enum:
+      - instruct
+      - dialog
+      title: DatasetFormat
+      description: Format of the training dataset.
+    EfficiencyConfig:
+      properties:
+        enable_activation_checkpointing:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          default: false
+        enable_activation_offloading:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          default: false
+        memory_efficient_fsdp_wrap:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          default: false
+        fsdp_cpu_offload:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          default: false
+      type: object
+      title: EfficiencyConfig
+      description: Configuration for memory and compute efficiency optimizations.
+    OptimizerConfig:
+      properties:
+        optimizer_type:
+          $ref: '#/components/schemas/OptimizerType'
+        lr:
+          type: number
+          title: Lr
+        weight_decay:
+          type: number
+          title: Weight Decay
+        num_warmup_steps:
+          type: integer
+          title: Num Warmup Steps
+      type: object
+      required:
+      - optimizer_type
+      - lr
+      - weight_decay
+      - num_warmup_steps
+      title: OptimizerConfig
+      description: Configuration parameters for the optimization algorithm.
+    OptimizerType:
+      type: string
+      enum:
+      - adam
+      - adamw
+      - sgd
+      title: OptimizerType
+      description: Available optimizer algorithms for training.
+    TrainingConfig:
+      properties:
+        n_epochs:
+          type: integer
+          title: N Epochs
+        max_steps_per_epoch:
+          type: integer
+          title: Max Steps Per Epoch
+          default: 1
+        gradient_accumulation_steps:
+          type: integer
+          title: Gradient Accumulation Steps
+          default: 1
+        max_validation_steps:
+          anyOf:
+          - type: integer
+          - type: 'null'
+          default: 1
+        data_config:
+          anyOf:
+          - $ref: '#/components/schemas/DataConfig'
+            title: DataConfig
+          - type: 'null'
+          title: DataConfig
+        optimizer_config:
+          anyOf:
+          - $ref: '#/components/schemas/OptimizerConfig'
+            title: OptimizerConfig
+          - type: 'null'
+          title: OptimizerConfig
+        efficiency_config:
+          anyOf:
+          - $ref: '#/components/schemas/EfficiencyConfig'
+            title: EfficiencyConfig
+          - type: 'null'
+          title: EfficiencyConfig
+        dtype:
+          anyOf:
+          - type: string
+          - type: 'null'
+          default: bf16
+      type: object
+      required:
+      - n_epochs
+      title: TrainingConfig
+      description: Comprehensive configuration for the training process.
+    PreferenceOptimizeRequest:
+      properties:
+        job_uuid:
+          type: string
+          title: Job Uuid
+        finetuned_model:
+          type: string
+          title: Finetuned Model
+        algorithm_config:
+          $ref: '#/components/schemas/DPOAlignmentConfig'
+        training_config:
+          $ref: '#/components/schemas/TrainingConfig'
+        hyperparam_search_config:
+          additionalProperties: true
+          type: object
+          title: Hyperparam Search Config
+        logger_config:
+          additionalProperties: true
+          type: object
+          title: Logger Config
+      type: object
+      required:
+      - job_uuid
+      - finetuned_model
+      - algorithm_config
+      - training_config
+      - hyperparam_search_config
+      - logger_config
+      title: PreferenceOptimizeRequest
+    PostTrainingJob:
+      properties:
+        job_uuid:
+          type: string
+          title: Job Uuid
+      type: object
+      required:
+      - job_uuid
+      title: PostTrainingJob
+    AlgorithmConfig:
+      discriminator:
+        mapping:
+          LoRA: '#/components/schemas/LoraFinetuningConfig'
+          QAT: '#/components/schemas/QATFinetuningConfig'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/LoraFinetuningConfig'
+        title: LoraFinetuningConfig
+      - $ref: '#/components/schemas/QATFinetuningConfig'
+        title: QATFinetuningConfig
+      title: LoraFinetuningConfig | QATFinetuningConfig
+    LoraFinetuningConfig:
+      properties:
+        type:
+          type: string
+          const: LoRA
+          title: Type
+          default: LoRA
+        lora_attn_modules:
+          items:
+            type: string
+          type: array
+          title: Lora Attn Modules
+        apply_lora_to_mlp:
+          type: boolean
+          title: Apply Lora To Mlp
+        apply_lora_to_output:
+          type: boolean
+          title: Apply Lora To Output
+        rank:
+          type: integer
+          title: Rank
+        alpha:
+          type: integer
+          title: Alpha
+        use_dora:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          default: false
+        quantize_base:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          default: false
+      type: object
+      required:
+      - lora_attn_modules
+      - apply_lora_to_mlp
+      - apply_lora_to_output
+      - rank
+      - alpha
+      title: LoraFinetuningConfig
+      description: Configuration for Low-Rank Adaptation (LoRA) fine-tuning.
+    QATFinetuningConfig:
+      properties:
+        type:
+          type: string
+          const: QAT
+          title: Type
+          default: QAT
+        quantizer_name:
+          type: string
+          title: Quantizer Name
+        group_size:
+          type: integer
+          title: Group Size
+      type: object
+      required:
+      - quantizer_name
+      - group_size
+      title: QATFinetuningConfig
+      description: Configuration for Quantization-Aware Training (QAT) fine-tuning.
+    SupervisedFineTuneRequest:
+      properties:
+        job_uuid:
+          type: string
+          title: Job Uuid
+        training_config:
+          $ref: '#/components/schemas/TrainingConfig'
+        hyperparam_search_config:
+          additionalProperties: true
+          type: object
+          title: Hyperparam Search Config
+        logger_config:
+          additionalProperties: true
+          type: object
+          title: Logger Config
+        model:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: Model descriptor for training if not in provider config`
+        checkpoint_dir:
+          anyOf:
+          - type: string
+          - type: 'null'
+        algorithm_config:
+          anyOf:
+          - oneOf:
+            - $ref: '#/components/schemas/LoraFinetuningConfig'
+              title: LoraFinetuningConfig
+            - $ref: '#/components/schemas/QATFinetuningConfig'
+              title: QATFinetuningConfig
+            discriminator:
+              propertyName: type
+              mapping:
+                LoRA: '#/components/schemas/LoraFinetuningConfig'
+                QAT: '#/components/schemas/QATFinetuningConfig'
+            title: LoraFinetuningConfig | QATFinetuningConfig
+          - type: 'null'
+          title: Algorithm Config
+      type: object
+      required:
+      - job_uuid
+      - training_config
+      - hyperparam_search_config
+      - logger_config
+      title: SupervisedFineTuneRequest
+    RegisterModelRequest:
+      properties:
+        model_id:
+          type: string
+          title: Model Id
+        provider_model_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        provider_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        metadata:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+        model_type:
+          anyOf:
+          - $ref: '#/components/schemas/ModelType'
+            title: ModelType
+          - type: 'null'
+          title: ModelType
+      type: object
+      required:
+      - model_id
+      title: RegisterModelRequest
+    ParamType:
+      discriminator:
+        mapping:
+          array: '#/components/schemas/ArrayType'
+          boolean: '#/components/schemas/BooleanType'
+          chat_completion_input: '#/components/schemas/ChatCompletionInputType'
+          completion_input: '#/components/schemas/CompletionInputType'
+          json: '#/components/schemas/JsonType'
+          number: '#/components/schemas/NumberType'
+          object: '#/components/schemas/ObjectType'
+          string: '#/components/schemas/StringType'
+          union: '#/components/schemas/UnionType'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/StringType'
+        title: StringType
+      - $ref: '#/components/schemas/NumberType'
+        title: NumberType
+      - $ref: '#/components/schemas/BooleanType'
+        title: BooleanType
+      - $ref: '#/components/schemas/ArrayType'
+        title: ArrayType
+      - $ref: '#/components/schemas/ObjectType'
+        title: ObjectType
+      - $ref: '#/components/schemas/JsonType'
+        title: JsonType
+      - $ref: '#/components/schemas/UnionType'
+        title: UnionType
+      - $ref: '#/components/schemas/ChatCompletionInputType'
+        title: ChatCompletionInputType
+      - $ref: '#/components/schemas/CompletionInputType'
+        title: CompletionInputType
+      title: StringType | ... (9 variants)
+    RegisterShieldRequest:
+      properties:
+        shield_id:
+          type: string
+          title: Shield Id
+        provider_shield_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        provider_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        params:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+      type: object
+      required:
+      - shield_id
+      title: RegisterShieldRequest
+    RegisterToolGroupRequest:
+      properties:
+        toolgroup_id:
+          type: string
+          title: Toolgroup Id
+        provider_id:
+          type: string
+          title: Provider Id
+        mcp_endpoint:
+          anyOf:
+          - $ref: '#/components/schemas/URL'
+            title: URL
+          - type: 'null'
+          title: URL
+        args:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+      type: object
+      required:
+      - toolgroup_id
+      - provider_id
+      title: RegisterToolGroupRequest
+    DataSource:
+      discriminator:
+        mapping:
+          rows: '#/components/schemas/RowsDataSource'
+          uri: '#/components/schemas/URIDataSource'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/URIDataSource'
+        title: URIDataSource
+      - $ref: '#/components/schemas/RowsDataSource'
+        title: RowsDataSource
+      title: URIDataSource | RowsDataSource
+    RegisterBenchmarkRequest:
       properties:
         benchmark_id:
           type: string
-          description: The ID of the benchmark to register.
+          title: Benchmark Id
         dataset_id:
           type: string
-          description: >-
-            The ID of the dataset to use for the benchmark.
+          title: Dataset Id
         scoring_functions:
-          type: array
           items:
             type: string
-          description: >-
-            The scoring functions to use for the benchmark.
+          type: array
+          title: Scoring Functions
         provider_benchmark_id:
-          type: string
-          description: >-
-            The ID of the provider benchmark to use for the benchmark.
+          anyOf:
+          - type: string
+          - type: 'null'
         provider_id:
-          type: string
-          description: >-
-            The ID of the provider to use for the benchmark.
+          anyOf:
+          - type: string
+          - type: 'null'
         metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: The metadata to use for the benchmark.
-      additionalProperties: false
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+      type: object
       required:
-        - benchmark_id
-        - dataset_id
-        - scoring_functions
+      - benchmark_id
+      - dataset_id
+      - scoring_functions
       title: RegisterBenchmarkRequest
+    AllowedToolsFilter:
+      properties:
+        tool_names:
+          anyOf:
+          - items:
+              type: string
+            type: array
+          - type: 'null'
+      type: object
+      title: AllowedToolsFilter
+      description: Filter configuration for restricting which MCP tools can be used.
+    ApprovalFilter:
+      properties:
+        always:
+          anyOf:
+          - items:
+              type: string
+            type: array
+          - type: 'null'
+        never:
+          anyOf:
+          - items:
+              type: string
+            type: array
+          - type: 'null'
+      type: object
+      title: ApprovalFilter
+      description: Filter configuration for MCP tool approval requirements.
+    BatchError:
+      properties:
+        code:
+          anyOf:
+          - type: string
+          - type: 'null'
+        line:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        message:
+          anyOf:
+          - type: string
+          - type: 'null'
+        param:
+          anyOf:
+          - type: string
+          - type: 'null'
+      additionalProperties: true
+      type: object
+      title: BatchError
+    BatchRequestCounts:
+      properties:
+        completed:
+          type: integer
+          title: Completed
+        failed:
+          type: integer
+          title: Failed
+        total:
+          type: integer
+          title: Total
+      additionalProperties: true
+      type: object
+      required:
+      - completed
+      - failed
+      - total
+      title: BatchRequestCounts
+    BatchUsage:
+      properties:
+        input_tokens:
+          type: integer
+          title: Input Tokens
+        input_tokens_details:
+          $ref: '#/components/schemas/InputTokensDetails'
+        output_tokens:
+          type: integer
+          title: Output Tokens
+        output_tokens_details:
+          $ref: '#/components/schemas/OutputTokensDetails'
+        total_tokens:
+          type: integer
+          title: Total Tokens
+      additionalProperties: true
+      type: object
+      required:
+      - input_tokens
+      - input_tokens_details
+      - output_tokens
+      - output_tokens_details
+      - total_tokens
+      title: BatchUsage
+    Body_openai_upload_file_v1_files_post:
+      properties:
+        file:
+          type: string
+          format: binary
+          title: File
+        purpose:
+          $ref: '#/components/schemas/OpenAIFilePurpose'
+        expires_after:
+          anyOf:
+          - $ref: '#/components/schemas/ExpiresAfter'
+            title: ExpiresAfter
+          - type: 'null'
+          title: ExpiresAfter
+      type: object
+      required:
+      - file
+      - purpose
+      title: Body_openai_upload_file_v1_files_post
+    Chunk-Input:
+      properties:
+        content:
+          anyOf:
+          - type: string
+          - oneOf:
+            - $ref: '#/components/schemas/ImageContentItem-Input'
+              title: ImageContentItem-Input
+            - $ref: '#/components/schemas/TextContentItem'
+              title: TextContentItem
+            discriminator:
+              propertyName: type
+              mapping:
+                image: '#/components/schemas/ImageContentItem-Input'
+                text: '#/components/schemas/TextContentItem'
+            title: ImageContentItem-Input | TextContentItem
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/ImageContentItem-Input'
+                title: ImageContentItem-Input
+              - $ref: '#/components/schemas/TextContentItem'
+                title: TextContentItem
+              discriminator:
+                propertyName: type
+                mapping:
+                  image: '#/components/schemas/ImageContentItem-Input'
+                  text: '#/components/schemas/TextContentItem'
+              title: ImageContentItem-Input | TextContentItem
+            type: array
+            title: list[ImageContentItem-Input | TextContentItem]
+          title: string | list[ImageContentItem-Input | TextContentItem]
+        chunk_id:
+          type: string
+          title: Chunk Id
+        metadata:
+          additionalProperties: true
+          type: object
+          title: Metadata
+        embedding:
+          anyOf:
+          - items:
+              type: number
+            type: array
+          - type: 'null'
+        chunk_metadata:
+          anyOf:
+          - $ref: '#/components/schemas/ChunkMetadata'
+            title: ChunkMetadata
+          - type: 'null'
+          title: ChunkMetadata
+      type: object
+      required:
+      - content
+      - chunk_id
+      title: Chunk
+      description: A chunk of content that can be inserted into a vector database.
+    Chunk-Output:
+      properties:
+        content:
+          anyOf:
+          - type: string
+          - oneOf:
+            - $ref: '#/components/schemas/ImageContentItem-Output'
+              title: ImageContentItem-Output
+            - $ref: '#/components/schemas/TextContentItem'
+              title: TextContentItem
+            discriminator:
+              propertyName: type
+              mapping:
+                image: '#/components/schemas/ImageContentItem-Output'
+                text: '#/components/schemas/TextContentItem'
+            title: ImageContentItem-Output | TextContentItem
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/ImageContentItem-Output'
+                title: ImageContentItem-Output
+              - $ref: '#/components/schemas/TextContentItem'
+                title: TextContentItem
+              discriminator:
+                propertyName: type
+                mapping:
+                  image: '#/components/schemas/ImageContentItem-Output'
+                  text: '#/components/schemas/TextContentItem'
+              title: ImageContentItem-Output | TextContentItem
+            type: array
+            title: list[ImageContentItem-Output | TextContentItem]
+          title: string | list[ImageContentItem-Output | TextContentItem]
+        chunk_id:
+          type: string
+          title: Chunk Id
+        metadata:
+          additionalProperties: true
+          type: object
+          title: Metadata
+        embedding:
+          anyOf:
+          - items:
+              type: number
+            type: array
+          - type: 'null'
+        chunk_metadata:
+          anyOf:
+          - $ref: '#/components/schemas/ChunkMetadata'
+            title: ChunkMetadata
+          - type: 'null'
+          title: ChunkMetadata
+      type: object
+      required:
+      - content
+      - chunk_id
+      title: Chunk
+      description: A chunk of content that can be inserted into a vector database.
+    ConversationItemInclude:
+      type: string
+      enum:
+      - web_search_call.action.sources
+      - code_interpreter_call.outputs
+      - computer_call_output.output.image_url
+      - file_search_call.results
+      - message.input_image.image_url
+      - message.output_text.logprobs
+      - reasoning.encrypted_content
+      title: ConversationItemInclude
+      description: Specify additional output data to include in the model response.
+    DatasetPurpose:
+      type: string
+      enum:
+      - post-training/messages
+      - eval/question-answer
+      - eval/messages-answer
+      title: DatasetPurpose
+      description: Purpose of the dataset. Each purpose has a required input data schema.
+    Errors:
+      properties:
+        data:
+          anyOf:
+          - items:
+              $ref: '#/components/schemas/BatchError'
+            type: array
+          - type: 'null'
+        object:
+          anyOf:
+          - type: string
+          - type: 'null'
+      additionalProperties: true
+      type: object
+      title: Errors
+    HealthStatus:
+      type: string
+      enum:
+      - OK
+      - Error
+      - Not Implemented
+      title: HealthStatus
+    ImageContentItem-Input:
+      properties:
+        type:
+          type: string
+          const: image
+          title: Type
+          default: image
+        image:
+          $ref: '#/components/schemas/_URLOrData'
+      type: object
+      required:
+      - image
+      title: ImageContentItem
+      description: A image content item
+    ImageContentItem-Output:
+      properties:
+        type:
+          type: string
+          const: image
+          title: Type
+          default: image
+        image:
+          $ref: '#/components/schemas/_URLOrData'
+      type: object
+      required:
+      - image
+      title: ImageContentItem
+      description: A image content item
+    InputTokensDetails:
+      properties:
+        cached_tokens:
+          type: integer
+          title: Cached Tokens
+      additionalProperties: true
+      type: object
+      required:
+      - cached_tokens
+      title: InputTokensDetails
+    JobStatus:
+      type: string
+      enum:
+      - completed
+      - in_progress
+      - failed
+      - scheduled
+      - cancelled
+      title: JobStatus
+      description: Status of a job execution.
+    MCPListToolsTool:
+      properties:
+        input_schema:
+          additionalProperties: true
+          type: object
+          title: Input Schema
+        name:
+          type: string
+          title: Name
+        description:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      required:
+      - input_schema
+      - name
+      title: MCPListToolsTool
+      description: Tool definition returned by MCP list tools operation.
+    OpenAIAssistantMessageParam-Input:
+      properties:
+        role:
+          type: string
+          const: assistant
+          title: Role
+          default: assistant
+        content:
+          anyOf:
+          - type: string
+          - items:
+              $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+            type: array
+            title: list[OpenAIChatCompletionContentPartTextParam]
+          - type: 'null'
+          title: string | list[OpenAIChatCompletionContentPartTextParam]
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+        tool_calls:
+          anyOf:
+          - items:
+              $ref: '#/components/schemas/OpenAIChatCompletionToolCall'
+            type: array
+          - type: 'null'
+      type: object
+      title: OpenAIAssistantMessageParam
+      description: A message containing the model's (assistant) response in an OpenAI-compatible chat completion request.
+    OpenAIAssistantMessageParam-Output:
+      properties:
+        role:
+          type: string
+          const: assistant
+          title: Role
+          default: assistant
+        content:
+          anyOf:
+          - type: string
+          - items:
+              $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+            type: array
+            title: list[OpenAIChatCompletionContentPartTextParam]
+          - type: 'null'
+          title: string | list[OpenAIChatCompletionContentPartTextParam]
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+        tool_calls:
+          anyOf:
+          - items:
+              $ref: '#/components/schemas/OpenAIChatCompletionToolCall'
+            type: array
+          - type: 'null'
+      type: object
+      title: OpenAIAssistantMessageParam
+      description: A message containing the model's (assistant) response in an OpenAI-compatible chat completion request.
+    OpenAIChatCompletionUsageCompletionTokensDetails:
+      properties:
+        reasoning_tokens:
+          anyOf:
+          - type: integer
+          - type: 'null'
+      type: object
+      title: OpenAIChatCompletionUsageCompletionTokensDetails
+      description: Token details for output tokens in OpenAI chat completion usage.
+    OpenAIChatCompletionUsagePromptTokensDetails:
+      properties:
+        cached_tokens:
+          anyOf:
+          - type: integer
+          - type: 'null'
+      type: object
+      title: OpenAIChatCompletionUsagePromptTokensDetails
+      description: Token details for prompt tokens in OpenAI chat completion usage.
+    OpenAIResponseMessage-Input:
+      properties:
+        content:
+          anyOf:
+          - type: string
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
+                title: OpenAIResponseInputMessageContentText
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+                title: OpenAIResponseInputMessageContentImage
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile'
+                title: OpenAIResponseInputMessageContentFile
+              discriminator:
+                propertyName: type
+                mapping:
+                  input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile'
+                  input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+                  input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
+              title: OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile
+            type: array
+            title: list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile]
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText'
+                title: OpenAIResponseOutputMessageContentOutputText
+              - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
+                title: OpenAIResponseContentPartRefusal
+              discriminator:
+                propertyName: type
+                mapping:
+                  output_text: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText'
+                  refusal: '#/components/schemas/OpenAIResponseContentPartRefusal'
+              title: OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal
+            type: array
+            title: list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal]
+          title: string | list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile] | list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal]
+        role:
+          title: Role
+          type: string
+          enum:
+          - system
+          - developer
+          - user
+          - assistant
+          default: system
+        type:
+          type: string
+          const: message
+          title: Type
+          default: message
+        id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        status:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      required:
+      - content
+      - role
+      title: OpenAIResponseMessage
+      description: |-
+        Corresponds to the various Message types in the Responses API.
+        They are all under one type because the Responses API gives them all
+        the same "type" value, and there is no way to tell them apart in certain
+        scenarios.
+    OpenAIResponseMessage-Output:
+      properties:
+        content:
+          anyOf:
+          - type: string
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
+                title: OpenAIResponseInputMessageContentText
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+                title: OpenAIResponseInputMessageContentImage
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile'
+                title: OpenAIResponseInputMessageContentFile
+              discriminator:
+                propertyName: type
+                mapping:
+                  input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile'
+                  input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+                  input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
+              title: OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile
+            type: array
+            title: list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile]
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText'
+                title: OpenAIResponseOutputMessageContentOutputText
+              - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
+                title: OpenAIResponseContentPartRefusal
+              discriminator:
+                propertyName: type
+                mapping:
+                  output_text: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText'
+                  refusal: '#/components/schemas/OpenAIResponseContentPartRefusal'
+              title: OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal
+            type: array
+            title: list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal]
+          title: string | list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile] | list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal]
+        role:
+          title: Role
+          type: string
+          enum:
+          - system
+          - developer
+          - user
+          - assistant
+          default: system
+        type:
+          type: string
+          const: message
+          title: Type
+          default: message
+        id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        status:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      required:
+      - content
+      - role
+      title: OpenAIResponseMessage
+      description: |-
+        Corresponds to the various Message types in the Responses API.
+        They are all under one type because the Responses API gives them all
+        the same "type" value, and there is no way to tell them apart in certain
+        scenarios.
+    OpenAIResponseOutputMessageFileSearchToolCallResults:
+      properties:
+        attributes:
+          additionalProperties: true
+          type: object
+          title: Attributes
+        file_id:
+          type: string
+          title: File Id
+        filename:
+          type: string
+          title: Filename
+        score:
+          type: number
+          title: Score
+        text:
+          type: string
+          title: Text
+      type: object
+      required:
+      - attributes
+      - file_id
+      - filename
+      - score
+      - text
+      title: OpenAIResponseOutputMessageFileSearchToolCallResults
+      description: Search results returned by the file search operation.
+    OpenAIResponseTextFormat:
+      properties:
+        type:
+          title: Type
+          type: string
+          enum:
+          - text
+          - json_schema
+          - json_object
+          default: text
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+        schema:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+        description:
+          anyOf:
+          - type: string
+          - type: 'null'
+        strict:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+      type: object
+      title: OpenAIResponseTextFormat
+      description: Configuration for Responses API text format.
+    OpenAIResponseUsageInputTokensDetails:
+      properties:
+        cached_tokens:
+          anyOf:
+          - type: integer
+          - type: 'null'
+      type: object
+      title: OpenAIResponseUsageInputTokensDetails
+      description: Token details for input tokens in OpenAI response usage.
+    OpenAIResponseUsageOutputTokensDetails:
+      properties:
+        reasoning_tokens:
+          anyOf:
+          - type: integer
+          - type: 'null'
+      type: object
+      title: OpenAIResponseUsageOutputTokensDetails
+      description: Token details for output tokens in OpenAI response usage.
+    OpenAIUserMessageParam-Input:
+      properties:
+        role:
+          type: string
+          const: user
+          title: Role
+          default: user
+        content:
+          anyOf:
+          - type: string
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+                title: OpenAIChatCompletionContentPartTextParam
+              - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+                title: OpenAIChatCompletionContentPartImageParam
+              - $ref: '#/components/schemas/OpenAIFile'
+                title: OpenAIFile
+              discriminator:
+                propertyName: type
+                mapping:
+                  file: '#/components/schemas/OpenAIFile'
+                  image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+                  text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+              title: OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile
+            type: array
+            title: list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile]
+          title: string | list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile]
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      required:
+      - content
+      title: OpenAIUserMessageParam
+      description: A message from the user in an OpenAI-compatible chat completion request.
+    OpenAIUserMessageParam-Output:
+      properties:
+        role:
+          type: string
+          const: user
+          title: Role
+          default: user
+        content:
+          anyOf:
+          - type: string
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+                title: OpenAIChatCompletionContentPartTextParam
+              - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+                title: OpenAIChatCompletionContentPartImageParam
+              - $ref: '#/components/schemas/OpenAIFile'
+                title: OpenAIFile
+              discriminator:
+                propertyName: type
+                mapping:
+                  file: '#/components/schemas/OpenAIFile'
+                  image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+                  text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+              title: OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile
+            type: array
+            title: list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile]
+          title: string | list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile]
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      required:
+      - content
+      title: OpenAIUserMessageParam
+      description: A message from the user in an OpenAI-compatible chat completion request.
+    OutputTokensDetails:
+      properties:
+        reasoning_tokens:
+          type: integer
+          title: Reasoning Tokens
+      additionalProperties: true
+      type: object
+      required:
+      - reasoning_tokens
+      title: OutputTokensDetails
+    RegisterDatasetRequestLoose:
+      properties:
+        purpose:
+          title: Purpose
+        source:
+          title: Source
+        metadata:
+          title: Metadata
+        dataset_id:
+          title: Dataset Id
+      type: object
+      required:
+      - purpose
+      - source
+      title: RegisterDatasetRequestLoose
+    RegisterScoringFunctionRequestLoose:
+      properties:
+        scoring_fn_id:
+          title: Scoring Fn Id
+        description:
+          title: Description
+        return_type:
+          title: Return Type
+        provider_scoring_fn_id:
+          title: Provider Scoring Fn Id
+        provider_id:
+          title: Provider Id
+        params:
+          title: Params
+      type: object
+      required:
+      - scoring_fn_id
+      - description
+      - return_type
+      title: RegisterScoringFunctionRequestLoose
+    SearchRankingOptions:
+      properties:
+        ranker:
+          anyOf:
+          - type: string
+          - type: 'null'
+        score_threshold:
+          anyOf:
+          - type: number
+          - type: 'null'
+          default: 0.0
+      type: object
+      title: SearchRankingOptions
+      description: Options for ranking and filtering search results.
+    _URLOrData:
+      properties:
+        url:
+          anyOf:
+          - $ref: '#/components/schemas/URL'
+            title: URL
+          - type: 'null'
+          title: URL
+        data:
+          anyOf:
+          - type: string
+          - type: 'null'
+          contentEncoding: base64
+      type: object
+      title: _URLOrData
+      description: A URL or a base64 encoded string
+    SamplingStrategy:
+      discriminator:
+        mapping:
+          greedy: '#/components/schemas/GreedySamplingStrategy'
+          top_k: '#/components/schemas/TopKSamplingStrategy'
+          top_p: '#/components/schemas/TopPSamplingStrategy'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/GreedySamplingStrategy'
+        title: GreedySamplingStrategy
+      - $ref: '#/components/schemas/TopPSamplingStrategy'
+        title: TopPSamplingStrategy
+      - $ref: '#/components/schemas/TopKSamplingStrategy'
+        title: TopKSamplingStrategy
+      title: GreedySamplingStrategy | TopPSamplingStrategy | TopKSamplingStrategy
+    GrammarResponseFormat:
+      description: Configuration for grammar-guided response generation.
+      properties:
+        type:
+          const: grammar
+          default: grammar
+          title: Type
+          type: string
+        bnf:
+          additionalProperties: true
+          title: Bnf
+          type: object
+      required:
+      - bnf
+      title: GrammarResponseFormat
+      type: object
+    JsonSchemaResponseFormat:
+      description: Configuration for JSON schema-guided response generation.
+      properties:
+        type:
+          const: json_schema
+          default: json_schema
+          title: Type
+          type: string
+        json_schema:
+          additionalProperties: true
+          title: Json Schema
+          type: object
+      required:
+      - json_schema
+      title: JsonSchemaResponseFormat
+      type: object
+    ResponseFormat:
+      discriminator:
+        mapping:
+          grammar: '#/components/schemas/GrammarResponseFormat'
+          json_schema: '#/components/schemas/JsonSchemaResponseFormat'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/JsonSchemaResponseFormat'
+        title: JsonSchemaResponseFormat
+      - $ref: '#/components/schemas/GrammarResponseFormat'
+        title: GrammarResponseFormat
+      title: JsonSchemaResponseFormat | GrammarResponseFormat
+    OpenAIResponseContentPart:
+      discriminator:
+        mapping:
+          output_text: '#/components/schemas/OpenAIResponseContentPartOutputText'
+          reasoning_text: '#/components/schemas/OpenAIResponseContentPartReasoningText'
+          refusal: '#/components/schemas/OpenAIResponseContentPartRefusal'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText'
+        title: OpenAIResponseContentPartOutputText
+      - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
+        title: OpenAIResponseContentPartRefusal
+      - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
+        title: OpenAIResponseContentPartReasoningText
+      title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText
+    SpanEndPayload:
+      description: Payload for a span end event.
+      properties:
+        type:
+          const: span_end
+          default: span_end
+          title: Type
+          type: string
+        status:
+          $ref: '#/components/schemas/SpanStatus'
+      required:
+      - status
+      title: SpanEndPayload
+      type: object
+    SpanStartPayload:
+      description: Payload for a span start event.
+      properties:
+        type:
+          const: span_start
+          default: span_start
+          title: Type
+          type: string
+        name:
+          title: Name
+          type: string
+        parent_span_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+      required:
+      - name
+      title: SpanStartPayload
+      type: object
+    SpanStatus:
+      description: The status of a span indicating whether it completed successfully or with an error.
+      enum:
+      - ok
+      - error
+      title: SpanStatus
+      type: string
+    StructuredLogPayload:
+      discriminator:
+        mapping:
+          span_end: '#/components/schemas/SpanEndPayload'
+          span_start: '#/components/schemas/SpanStartPayload'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/SpanStartPayload'
+        title: SpanStartPayload
+      - $ref: '#/components/schemas/SpanEndPayload'
+        title: SpanEndPayload
+      title: SpanStartPayload | SpanEndPayload
+    LogSeverity:
+      description: The severity level of a log message.
+      enum:
+      - verbose
+      - debug
+      - info
+      - warn
+      - error
+      - critical
+      title: LogSeverity
+      type: string
+    MetricEvent:
+      description: A metric event containing a measured value.
+      properties:
+        trace_id:
+          title: Trace Id
+          type: string
+        span_id:
+          title: Span Id
+          type: string
+        timestamp:
+          format: date-time
+          title: Timestamp
+          type: string
+        attributes:
+          anyOf:
+          - additionalProperties:
+              anyOf:
+              - type: string
+              - type: integer
+              - type: number
+              - type: boolean
+              - type: 'null'
+              title: string | ... (4 variants)
+            type: object
+          - type: 'null'
+        type:
+          const: metric
+          default: metric
+          title: Type
+          type: string
+        metric:
+          title: Metric
+          type: string
+        value:
+          anyOf:
+          - type: integer
+          - type: number
+          title: integer | number
+        unit:
+          title: Unit
+          type: string
+      required:
+      - trace_id
+      - span_id
+      - timestamp
+      - metric
+      - value
+      - unit
+      title: MetricEvent
+      type: object
+    StructuredLogEvent:
+      description: A structured log event containing typed payload data.
+      properties:
+        trace_id:
+          title: Trace Id
+          type: string
+        span_id:
+          title: Span Id
+          type: string
+        timestamp:
+          format: date-time
+          title: Timestamp
+          type: string
+        attributes:
+          anyOf:
+          - additionalProperties:
+              anyOf:
+              - type: string
+              - type: integer
+              - type: number
+              - type: boolean
+              - type: 'null'
+              title: string | ... (4 variants)
+            type: object
+          - type: 'null'
+        type:
+          const: structured_log
+          default: structured_log
+          title: Type
+          type: string
+        payload:
+          discriminator:
+            mapping:
+              span_end: '#/components/schemas/SpanEndPayload'
+              span_start: '#/components/schemas/SpanStartPayload'
+            propertyName: type
+          oneOf:
+          - $ref: '#/components/schemas/SpanStartPayload'
+            title: SpanStartPayload
+          - $ref: '#/components/schemas/SpanEndPayload'
+            title: SpanEndPayload
+          title: SpanStartPayload | SpanEndPayload
+      required:
+      - trace_id
+      - span_id
+      - timestamp
+      - payload
+      title: StructuredLogEvent
+      type: object
+    UnstructuredLogEvent:
+      description: An unstructured log event containing a simple text message.
+      properties:
+        trace_id:
+          title: Trace Id
+          type: string
+        span_id:
+          title: Span Id
+          type: string
+        timestamp:
+          format: date-time
+          title: Timestamp
+          type: string
+        attributes:
+          anyOf:
+          - additionalProperties:
+              anyOf:
+              - type: string
+              - type: integer
+              - type: number
+              - type: boolean
+              - type: 'null'
+              title: string | ... (4 variants)
+            type: object
+          - type: 'null'
+        type:
+          const: unstructured_log
+          default: unstructured_log
+          title: Type
+          type: string
+        message:
+          title: Message
+          type: string
+        severity:
+          $ref: '#/components/schemas/LogSeverity'
+      required:
+      - trace_id
+      - span_id
+      - timestamp
+      - message
+      - severity
+      title: UnstructuredLogEvent
+      type: object
+    Event:
+      discriminator:
+        mapping:
+          metric: '#/components/schemas/MetricEvent'
+          structured_log: '#/components/schemas/StructuredLogEvent'
+          unstructured_log: '#/components/schemas/UnstructuredLogEvent'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/UnstructuredLogEvent'
+        title: UnstructuredLogEvent
+      - $ref: '#/components/schemas/MetricEvent'
+        title: MetricEvent
+      - $ref: '#/components/schemas/StructuredLogEvent'
+        title: StructuredLogEvent
+      title: UnstructuredLogEvent | MetricEvent | StructuredLogEvent
+    MetricInResponse:
+      description: A metric value included in API responses.
+      properties:
+        metric:
+          title: Metric
+          type: string
+        value:
+          anyOf:
+          - type: integer
+          - type: number
+          title: integer | number
+        unit:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+      required:
+      - metric
+      - value
+      title: MetricInResponse
+      type: object
+    TextDelta:
+      description: A text content delta for streaming responses.
+      properties:
+        type:
+          const: text
+          default: text
+          title: Type
+          type: string
+        text:
+          title: Text
+          type: string
+      required:
+      - text
+      title: TextDelta
+      type: object
+    ImageDelta:
+      description: An image content delta for streaming responses.
+      properties:
+        type:
+          const: image
+          default: image
+          title: Type
+          type: string
+        image:
+          format: binary
+          title: Image
+          type: string
+      required:
+      - image
+      title: ImageDelta
+      type: object
+    Fp8QuantizationConfig:
+      description: Configuration for 8-bit floating point quantization.
+      properties:
+        type:
+          const: fp8_mixed
+          default: fp8_mixed
+          title: Type
+          type: string
+      title: Fp8QuantizationConfig
+      type: object
+    Bf16QuantizationConfig:
+      description: Configuration for BFloat16 precision (typically no quantization).
+      properties:
+        type:
+          const: bf16
+          default: bf16
+          title: Type
+          type: string
+      title: Bf16QuantizationConfig
+      type: object
+    Int4QuantizationConfig:
+      description: Configuration for 4-bit integer quantization.
+      properties:
+        type:
+          const: int4_mixed
+          default: int4_mixed
+          title: Type
+          type: string
+        scheme:
+          anyOf:
+          - type: string
+          - type: 'null'
+          default: int4_weight_int8_dynamic_activation
+      title: Int4QuantizationConfig
+      type: object
+    UserMessage:
+      description: A message from the user in a chat conversation.
+      properties:
+        role:
+          const: user
+          default: user
+          title: Role
+          type: string
+        content:
+          anyOf:
+          - type: string
+          - discriminator:
+              mapping:
+                image: '#/components/schemas/ImageContentItem'
+                text: '#/components/schemas/TextContentItem'
+              propertyName: type
+            oneOf:
+            - $ref: '#/components/schemas/ImageContentItem'
+              title: ImageContentItem
+            - $ref: '#/components/schemas/TextContentItem'
+              title: TextContentItem
+            title: ImageContentItem | TextContentItem
+          - items:
+              discriminator:
+                mapping:
+                  image: '#/components/schemas/ImageContentItem'
+                  text: '#/components/schemas/TextContentItem'
+                propertyName: type
+              oneOf:
+              - $ref: '#/components/schemas/ImageContentItem'
+                title: ImageContentItem
+              - $ref: '#/components/schemas/TextContentItem'
+                title: TextContentItem
+              title: ImageContentItem | TextContentItem
+            type: array
+            title: list[ImageContentItem | TextContentItem]
+          title: string | list[ImageContentItem | TextContentItem]
+        context:
+          anyOf:
+          - type: string
+          - discriminator:
+              mapping:
+                image: '#/components/schemas/ImageContentItem'
+                text: '#/components/schemas/TextContentItem'
+              propertyName: type
+            oneOf:
+            - $ref: '#/components/schemas/ImageContentItem'
+              title: ImageContentItem
+            - $ref: '#/components/schemas/TextContentItem'
+              title: TextContentItem
+            title: ImageContentItem | TextContentItem
+          - items:
+              discriminator:
+                mapping:
+                  image: '#/components/schemas/ImageContentItem'
+                  text: '#/components/schemas/TextContentItem'
+                propertyName: type
+              oneOf:
+              - $ref: '#/components/schemas/ImageContentItem'
+                title: ImageContentItem
+              - $ref: '#/components/schemas/TextContentItem'
+                title: TextContentItem
+              title: ImageContentItem | TextContentItem
+            type: array
+            title: list[ImageContentItem | TextContentItem]
+          - type: 'null'
+          title: string | list[ImageContentItem | TextContentItem]
+          nullable: true
+      required:
+      - content
+      title: UserMessage
+      type: object
+    ToolResponseMessage:
+      description: A message representing the result of a tool invocation.
+      properties:
+        role:
+          const: tool
+          default: tool
+          title: Role
+          type: string
+        call_id:
+          title: Call Id
+          type: string
+        content:
+          anyOf:
+          - type: string
+          - discriminator:
+              mapping:
+                image: '#/components/schemas/ImageContentItem'
+                text: '#/components/schemas/TextContentItem'
+              propertyName: type
+            oneOf:
+            - $ref: '#/components/schemas/ImageContentItem'
+              title: ImageContentItem
+            - $ref: '#/components/schemas/TextContentItem'
+              title: TextContentItem
+            title: ImageContentItem | TextContentItem
+          - items:
+              discriminator:
+                mapping:
+                  image: '#/components/schemas/ImageContentItem'
+                  text: '#/components/schemas/TextContentItem'
+                propertyName: type
+              oneOf:
+              - $ref: '#/components/schemas/ImageContentItem'
+                title: ImageContentItem
+              - $ref: '#/components/schemas/TextContentItem'
+                title: TextContentItem
+              title: ImageContentItem | TextContentItem
+            type: array
+            title: list[ImageContentItem | TextContentItem]
+          title: string | list[ImageContentItem | TextContentItem]
+      required:
+      - call_id
+      - content
+      title: ToolResponseMessage
+      type: object
+    TokenLogProbs:
+      description: Log probabilities for generated tokens.
+      properties:
+        logprobs_by_token:
+          additionalProperties:
+            type: number
+          title: Logprobs By Token
+          type: object
+      required:
+      - logprobs_by_token
+      title: TokenLogProbs
+      type: object
+    EmbeddingsResponse:
+      description: Response containing generated embeddings.
+      properties:
+        embeddings:
+          items:
+            items:
+              type: number
+            type: array
+          title: Embeddings
+          type: array
+      required:
+      - embeddings
+      title: EmbeddingsResponse
+      type: object
+    OpenAICompletionLogprobs:
+      description: |-
+        The log probabilities for the tokens in the message from an OpenAI-compatible completion response.
+
+        :text_offset: (Optional) The offset of the token in the text
+        :token_logprobs: (Optional) The log probabilities for the tokens
+        :tokens: (Optional) The tokens
+        :top_logprobs: (Optional) The top log probabilities for the tokens
+      properties:
+        text_offset:
+          anyOf:
+          - items:
+              type: integer
+            type: array
+          - type: 'null'
+          nullable: true
+        token_logprobs:
+          anyOf:
+          - items:
+              type: number
+            type: array
+          - type: 'null'
+          nullable: true
+        tokens:
+          anyOf:
+          - items:
+              type: string
+            type: array
+          - type: 'null'
+          nullable: true
+        top_logprobs:
+          anyOf:
+          - items:
+              additionalProperties:
+                type: number
+              type: object
+            type: array
+          - type: 'null'
+          nullable: true
+      title: OpenAICompletionLogprobs
+      type: object
+    VectorStoreCreateRequest:
+      description: Request to create a vector store.
+      properties:
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+        file_ids:
+          items:
+            type: string
+          title: File Ids
+          type: array
+        expires_after:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+          nullable: true
+        chunking_strategy:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+          nullable: true
+        metadata:
+          additionalProperties: true
+          title: Metadata
+          type: object
+      title: VectorStoreCreateRequest
+      type: object
+    VectorStoreModifyRequest:
+      description: Request to modify a vector store.
+      properties:
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+        expires_after:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+          nullable: true
+        metadata:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+          nullable: true
+      title: VectorStoreModifyRequest
+      type: object
+    VectorStoreSearchRequest:
+      description: Request to search a vector store.
+      properties:
+        query:
+          anyOf:
+          - type: string
+          - items:
+              type: string
+            type: array
+            title: list[string]
+          title: string | list[string]
+        filters:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+          nullable: true
+        max_num_results:
+          default: 10
+          title: Max Num Results
+          type: integer
+        ranking_options:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+          nullable: true
+        rewrite_query:
+          default: false
+          title: Rewrite Query
+          type: boolean
+      required:
+      - query
+      title: VectorStoreSearchRequest
+      type: object
+    DialogType:
+      description: Parameter type for dialog data with semantic output labels.
+      properties:
+        type:
+          const: dialog
+          default: dialog
+          title: Type
+          type: string
+      title: DialogType
+      type: object
+    ConversationMessage:
+      description: OpenAI-compatible message item for conversations.
+      properties:
+        id:
+          description: unique identifier for this message
+          title: Id
+          type: string
+        content:
+          description: message content
+          items:
+            additionalProperties: true
+            type: object
+          title: Content
+          type: array
+        role:
+          description: message role
+          title: Role
+          type: string
+        status:
+          description: message status
+          title: Status
+          type: string
+        type:
+          const: message
+          default: message
+          title: Type
+          type: string
+        object:
+          const: message
+          default: message
+          title: Object
+          type: string
+      required:
+      - id
+      - content
+      - role
+      - status
+      title: ConversationMessage
+      type: object
+    ConversationItemCreateRequest:
+      description: Request body for creating conversation items.
+      properties:
+        items:
+          description: Items to include in the conversation context. You may add up to 20 items at a time.
+          items:
+            discriminator:
+              mapping:
+                file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                function_call_output: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+                mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                mcp_approval_response: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+                mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                message: '#/components/schemas/OpenAIResponseMessage'
+                web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+              propertyName: type
+            oneOf:
+            - $ref: '#/components/schemas/OpenAIResponseMessage'
+              title: OpenAIResponseMessage
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+              title: OpenAIResponseOutputMessageWebSearchToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+              title: OpenAIResponseOutputMessageFileSearchToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+              title: OpenAIResponseOutputMessageFunctionToolCall
+            - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+              title: OpenAIResponseInputFunctionToolCallOutput
+            - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+              title: OpenAIResponseMCPApprovalRequest
+            - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+              title: OpenAIResponseMCPApprovalResponse
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+              title: OpenAIResponseOutputMessageMCPCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+              title: OpenAIResponseOutputMessageMCPListTools
+            title: OpenAIResponseMessage | ... (9 variants)
+          maxItems: 20
+          title: Items
+          type: array
+      required:
+      - items
+      title: ConversationItemCreateRequest
+      type: object
+    ToolGroupInput:
+      description: Input data for registering a tool group.
+      properties:
+        toolgroup_id:
+          title: Toolgroup Id
+          type: string
+        provider_id:
+          title: Provider Id
+          type: string
+        args:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+          nullable: true
+        mcp_endpoint:
+          anyOf:
+          - $ref: '#/components/schemas/URL'
+            title: URL
+          - type: 'null'
+          nullable: true
+          title: URL
+      required:
+      - toolgroup_id
+      - provider_id
+      title: ToolGroupInput
+      type: object
+    Api:
+      description: Enumeration of all available APIs in the Llama Stack system.
+      enum:
+      - providers
+      - inference
+      - safety
+      - agents
+      - batches
+      - vector_io
+      - datasetio
+      - scoring
+      - eval
+      - post_training
+      - tool_runtime
+      - models
+      - shields
+      - vector_stores
+      - datasets
+      - scoring_functions
+      - benchmarks
+      - tool_groups
+      - files
+      - prompts
+      - conversations
+      - inspect
+      title: Api
+      type: string
+    ProviderSpec:
+      properties:
+        api:
+          $ref: '#/components/schemas/Api'
+        provider_type:
+          title: Provider Type
+          type: string
+        config_class:
+          description: Fully-qualified classname of the config for this provider
+          title: Config Class
+          type: string
+        api_dependencies:
+          description: Higher-level API surfaces may depend on other providers to provide their functionality
+          items:
+            $ref: '#/components/schemas/Api'
+          title: Api Dependencies
+          type: array
+        optional_api_dependencies:
+          items:
+            $ref: '#/components/schemas/Api'
+          title: Optional Api Dependencies
+          type: array
+        deprecation_warning:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: If this provider is deprecated, specify the warning message here
+          nullable: true
+        deprecation_error:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: If this provider is deprecated and does NOT work, specify the error message here
+          nullable: true
+        module:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: |2-
+
+             Fully-qualified name of the module to import. The module is expected to have:
+
+              - `get_adapter_impl(config, deps)`: returns the adapter implementation
+
+              Example: `module: ramalama_stack`
+
+          nullable: true
+        pip_packages:
+          description: The pip dependencies needed for this implementation
+          items:
+            type: string
+          title: Pip Packages
+          type: array
+        provider_data_validator:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+        is_external:
+          default: false
+          description: Notes whether this provider is an external provider.
+          title: Is External
+          type: boolean
+        deps__:
+          items:
+            type: string
+          title: Deps
+          type: array
+      required:
+      - api
+      - provider_type
+      - config_class
+      title: ProviderSpec
+      type: object
+    InlineProviderSpec:
+      properties:
+        api:
+          $ref: '#/components/schemas/Api'
+        provider_type:
+          title: Provider Type
+          type: string
+        config_class:
+          description: Fully-qualified classname of the config for this provider
+          title: Config Class
+          type: string
+        api_dependencies:
+          description: Higher-level API surfaces may depend on other providers to provide their functionality
+          items:
+            $ref: '#/components/schemas/Api'
+          title: Api Dependencies
+          type: array
+        optional_api_dependencies:
+          items:
+            $ref: '#/components/schemas/Api'
+          title: Optional Api Dependencies
+          type: array
+        deprecation_warning:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: If this provider is deprecated, specify the warning message here
+          nullable: true
+        deprecation_error:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: If this provider is deprecated and does NOT work, specify the error message here
+          nullable: true
+        module:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: |2-
+
+             Fully-qualified name of the module to import. The module is expected to have:
+
+              - `get_adapter_impl(config, deps)`: returns the adapter implementation
+
+              Example: `module: ramalama_stack`
+
+          nullable: true
+        pip_packages:
+          description: The pip dependencies needed for this implementation
+          items:
+            type: string
+          title: Pip Packages
+          type: array
+        provider_data_validator:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+        is_external:
+          default: false
+          description: Notes whether this provider is an external provider.
+          title: Is External
+          type: boolean
+        deps__:
+          items:
+            type: string
+          title: Deps
+          type: array
+        container_image:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: |2
+
+            The container image to use for this implementation. If one is provided, pip_packages will be ignored.
+            If a provider depends on other providers, the dependencies MUST NOT specify a container image.
+          nullable: true
+        description:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: |2
+
+            A description of the provider. This is used to display in the documentation.
+          nullable: true
+      required:
+      - api
+      - provider_type
+      - config_class
+      title: InlineProviderSpec
+      type: object
+    RemoteProviderSpec:
+      properties:
+        api:
+          $ref: '#/components/schemas/Api'
+        provider_type:
+          title: Provider Type
+          type: string
+        config_class:
+          description: Fully-qualified classname of the config for this provider
+          title: Config Class
+          type: string
+        api_dependencies:
+          description: Higher-level API surfaces may depend on other providers to provide their functionality
+          items:
+            $ref: '#/components/schemas/Api'
+          title: Api Dependencies
+          type: array
+        optional_api_dependencies:
+          items:
+            $ref: '#/components/schemas/Api'
+          title: Optional Api Dependencies
+          type: array
+        deprecation_warning:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: If this provider is deprecated, specify the warning message here
+          nullable: true
+        deprecation_error:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: If this provider is deprecated and does NOT work, specify the error message here
+          nullable: true
+        module:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: |2-
+
+             Fully-qualified name of the module to import. The module is expected to have:
+
+              - `get_adapter_impl(config, deps)`: returns the adapter implementation
+
+              Example: `module: ramalama_stack`
+
+          nullable: true
+        pip_packages:
+          description: The pip dependencies needed for this implementation
+          items:
+            type: string
+          title: Pip Packages
+          type: array
+        provider_data_validator:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+        is_external:
+          default: false
+          description: Notes whether this provider is an external provider.
+          title: Is External
+          type: boolean
+        deps__:
+          items:
+            type: string
+          title: Deps
+          type: array
+        adapter_type:
+          description: Unique identifier for this adapter
+          title: Adapter Type
+          type: string
+        description:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: |2
+
+            A description of the provider. This is used to display in the documentation.
+          nullable: true
+      required:
+      - api
+      - provider_type
+      - config_class
+      - adapter_type
+      title: RemoteProviderSpec
+      type: object
+    PostTrainingJobLogStream:
+      description: Stream of logs from a finetuning job.
+      properties:
+        job_uuid:
+          title: Job Uuid
+          type: string
+        log_lines:
+          items:
+            type: string
+          title: Log Lines
+          type: array
+      required:
+      - job_uuid
+      - log_lines
+      title: PostTrainingJobLogStream
+      type: object
+    RLHFAlgorithm:
+      description: Available reinforcement learning from human feedback algorithms.
+      enum:
+      - dpo
+      title: RLHFAlgorithm
+      type: string
+    PostTrainingRLHFRequest:
+      description: Request to finetune a model using reinforcement learning from human feedback.
+      properties:
+        job_uuid:
+          title: Job Uuid
+          type: string
+        finetuned_model:
+          $ref: '#/components/schemas/URL'
+        dataset_id:
+          title: Dataset Id
+          type: string
+        validation_dataset_id:
+          title: Validation Dataset Id
+          type: string
+        algorithm:
+          $ref: '#/components/schemas/RLHFAlgorithm'
+        algorithm_config:
+          $ref: '#/components/schemas/DPOAlignmentConfig'
+        optimizer_config:
+          $ref: '#/components/schemas/OptimizerConfig'
+        training_config:
+          $ref: '#/components/schemas/TrainingConfig'
+        hyperparam_search_config:
+          additionalProperties: true
+          title: Hyperparam Search Config
+          type: object
+        logger_config:
+          additionalProperties: true
+          title: Logger Config
+          type: object
+      required:
+      - job_uuid
+      - finetuned_model
+      - dataset_id
+      - validation_dataset_id
+      - algorithm
+      - algorithm_config
+      - optimizer_config
+      - training_config
+      - hyperparam_search_config
+      - logger_config
+      title: PostTrainingRLHFRequest
+      type: object
+    Span:
+      description: A span representing a single operation within a trace.
+      properties:
+        span_id:
+          title: Span Id
+          type: string
+        trace_id:
+          title: Trace Id
+          type: string
+        parent_span_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+        name:
+          title: Name
+          type: string
+        start_time:
+          format: date-time
+          title: Start Time
+          type: string
+        end_time:
+          anyOf:
+          - format: date-time
+            type: string
+          - type: 'null'
+          nullable: true
+        attributes:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+      required:
+      - span_id
+      - trace_id
+      - name
+      - start_time
+      title: Span
+      type: object
+    Trace:
+      description: A trace representing the complete execution path of a request across multiple operations.
+      properties:
+        trace_id:
+          title: Trace Id
+          type: string
+        root_span_id:
+          title: Root Span Id
+          type: string
+        start_time:
+          format: date-time
+          title: Start Time
+          type: string
+        end_time:
+          anyOf:
+          - format: date-time
+            type: string
+          - type: 'null'
+          nullable: true
+      required:
+      - trace_id
+      - root_span_id
+      - start_time
+      title: Trace
+      type: object
+    EventType:
+      description: The type of telemetry event being logged.
+      enum:
+      - unstructured_log
+      - structured_log
+      - metric
+      title: EventType
+      type: string
+    StructuredLogType:
+      description: The type of structured log event payload.
+      enum:
+      - span_start
+      - span_end
+      title: StructuredLogType
+      type: string
+    EvalTrace:
+      description: A trace record for evaluation purposes.
+      properties:
+        session_id:
+          title: Session Id
+          type: string
+        step:
+          title: Step
+          type: string
+        input:
+          title: Input
+          type: string
+        output:
+          title: Output
+          type: string
+        expected_output:
+          title: Expected Output
+          type: string
+      required:
+      - session_id
+      - step
+      - input
+      - output
+      - expected_output
+      title: EvalTrace
+      type: object
+    SpanWithStatus:
+      description: A span that includes status information.
+      properties:
+        span_id:
+          title: Span Id
+          type: string
+        trace_id:
+          title: Trace Id
+          type: string
+        parent_span_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+        name:
+          title: Name
+          type: string
+        start_time:
+          format: date-time
+          title: Start Time
+          type: string
+        end_time:
+          anyOf:
+          - format: date-time
+            type: string
+          - type: 'null'
+          nullable: true
+        attributes:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+        status:
+          anyOf:
+          - $ref: '#/components/schemas/SpanStatus'
+            title: SpanStatus
+          - type: 'null'
+          nullable: true
+          title: SpanStatus
+      required:
+      - span_id
+      - trace_id
+      - name
+      - start_time
+      title: SpanWithStatus
+      type: object
+    QueryConditionOp:
+      description: Comparison operators for query conditions.
+      enum:
+      - eq
+      - ne
+      - gt
+      - lt
+      title: QueryConditionOp
+      type: string
+    QueryCondition:
+      description: A condition for filtering query results.
+      properties:
+        key:
+          title: Key
+          type: string
+        op:
+          $ref: '#/components/schemas/QueryConditionOp'
+        value:
+          title: Value
+      required:
+      - key
+      - op
+      - value
+      title: QueryCondition
+      type: object
+    MetricLabel:
+      description: A label associated with a metric.
+      properties:
+        name:
+          title: Name
+          type: string
+        value:
+          title: Value
+          type: string
+      required:
+      - name
+      - value
+      title: MetricLabel
+      type: object
+    MetricDataPoint:
+      description: A single data point in a metric time series.
+      properties:
+        timestamp:
+          title: Timestamp
+          type: integer
+        value:
+          title: Value
+          type: number
+        unit:
+          title: Unit
+          type: string
+      required:
+      - timestamp
+      - value
+      - unit
+      title: MetricDataPoint
+      type: object
+    MetricSeries:
+      description: A time series of metric data points.
+      properties:
+        metric:
+          title: Metric
+          type: string
+        labels:
+          items:
+            $ref: '#/components/schemas/MetricLabel'
+          title: Labels
+          type: array
+        values:
+          items:
+            $ref: '#/components/schemas/MetricDataPoint'
+          title: Values
+          type: array
+      required:
+      - metric
+      - labels
+      - values
+      title: MetricSeries
+      type: object
   responses:
     BadRequest400:
       description: The request was invalid or malformed
@@ -1127,8 +10232,7 @@ components:
             title: Bad Request
             detail: The request was invalid or malformed
     TooManyRequests429:
-      description: >-
-        The client has sent too many requests in a given amount of time
+      description: The client has sent too many requests in a given amount of time
       content:
         application/json:
           schema:
@@ -1136,11 +10240,9 @@ components:
           example:
             status: 429
             title: Too Many Requests
-            detail: >-
-              You have exceeded the rate limit. Please try again later.
+            detail: You have exceeded the rate limit. Please try again later.
     InternalServerError500:
-      description: >-
-        The server encountered an unexpected error
+      description: The server encountered an unexpected error
       content:
         application/json:
           schema:
@@ -1148,39 +10250,101 @@ components:
           example:
             status: 500
             title: Internal Server Error
-            detail: >-
-              An unexpected error occurred. Our team has been notified.
+            detail: An unexpected error occurred
     DefaultError:
-      description: An unexpected error occurred
+      description: An error occurred
       content:
         application/json:
           schema:
             $ref: '#/components/schemas/Error'
-          example:
-            status: 0
-            title: Error
-            detail: An unexpected error occurred
-security:
-  - Default: []
 tags:
-  - name: Benchmarks
-    description: ''
-  - name: Datasets
-    description: ''
-  - name: Models
-    description: ''
-  - name: ScoringFunctions
-    description: ''
-  - name: Shields
-    description: ''
-  - name: ToolGroups
-    description: ''
+- description: APIs for creating and interacting with agentic systems.
+  name: Agents
+  x-displayName: Agents
+- description: |-
+    The API is designed to allow use of openai client libraries for seamless integration.
+
+    This API provides the following extensions:
+     - idempotent batch creation
+
+    Note: This API is currently under active development and may undergo changes.
+  name: Batches
+  x-displayName: The Batches API enables efficient processing of multiple requests in a single operation, particularly useful for processing large datasets, batch evaluation workflows, and cost-effective inference at scale.
+- description: ''
+  name: Benchmarks
+- description: Protocol for conversation management operations.
+  name: Conversations
+  x-displayName: Conversations
+- description: ''
+  name: DatasetIO
+- description: ''
+  name: Datasets
+- description: Llama Stack Evaluation API for running evaluations on model and agent candidates.
+  name: Eval
+  x-displayName: Evaluations
+- description: This API is used to upload documents that can be used with other Llama Stack APIs.
+  name: Files
+  x-displayName: Files
+- description: |-
+    Llama Stack Inference API for generating completions, chat completions, and embeddings.
+
+    This API provides the raw interface to the underlying models. Three kinds of models are supported:
+    - LLM models: these models generate "raw" and "chat" (conversational) completions.
+    - Embedding models: these models generate embeddings to be used for semantic search.
+    - Rerank models: these models reorder the documents based on their relevance to a query.
+  name: Inference
+  x-displayName: Inference
+- description: APIs for inspecting the Llama Stack service, including health status, available API routes with methods and implementing providers.
+  name: Inspect
+  x-displayName: Inspect
+- description: ''
+  name: Models
+- description: ''
+  name: PostTraining (Coming Soon)
+- description: Protocol for prompt management operations.
+  name: Prompts
+  x-displayName: Prompts
+- description: Providers API for inspecting, listing, and modifying providers and their configurations.
+  name: Providers
+  x-displayName: Providers
+- description: OpenAI-compatible Moderations API.
+  name: Safety
+  x-displayName: Safety
+- description: ''
+  name: Scoring
+- description: ''
+  name: ScoringFunctions
+- description: ''
+  name: Shields
+- description: ''
+  name: ToolGroups
+- description: ''
+  name: ToolRuntime
+- description: ''
+  name: VectorIO
 x-tagGroups:
-  - name: Operations
-    tags:
-      - Benchmarks
-      - Datasets
-      - Models
-      - ScoringFunctions
-      - Shields
-      - ToolGroups
+- name: Operations
+  tags:
+  - Agents
+  - Batches
+  - Benchmarks
+  - Conversations
+  - DatasetIO
+  - Datasets
+  - Eval
+  - Files
+  - Inference
+  - Inspect
+  - Models
+  - PostTraining (Coming Soon)
+  - Prompts
+  - Providers
+  - Safety
+  - Scoring
+  - ScoringFunctions
+  - Shields
+  - ToolGroups
+  - ToolRuntime
+  - VectorIO
+security:
+- Default: []
diff --git a/docs/static/experimental-llama-stack-spec.yaml b/docs/static/experimental-llama-stack-spec.yaml
index 6f379d17c..2b36ebf47 100644
--- a/docs/static/experimental-llama-stack-spec.yaml
+++ b/docs/static/experimental-llama-stack-spec.yaml
@@ -1,53 +1,53 @@
 openapi: 3.1.0
 info:
-  title: >-
-    Llama Stack Specification - Experimental APIs
-  version: v1
-  description: >-
+  title: Llama Stack Specification - Experimental APIs
+  description: |-
     This is the specification of the Llama Stack that provides
-                    a set of endpoints and their corresponding interfaces that are
-    tailored to
-                    best leverage Llama Models.
+                        a set of endpoints and their corresponding interfaces that are
+        tailored to
+                        best leverage Llama Models.
 
-    **🧪 EXPERIMENTAL**: Pre-release APIs (v1alpha, v1beta) that may change before
-    becoming stable.
+        **🧪 EXPERIMENTAL**: Pre-release APIs (v1alpha, v1beta) that may change before
+        becoming stable.
+  version: v1
 servers:
-  - url: http://any-hosted-llama-stack.com
+- url: http://any-hosted-llama-stack.com
 paths:
   /v1beta/datasetio/append-rows/{dataset_id}:
     post:
       responses:
-        '200':
-          description: OK
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
+        '204':
+          description: Successful Response
       tags:
-        - DatasetIO
-      summary: Append rows to a dataset.
+      - Datasetio
+      summary: Append Rows
       description: Append rows to a dataset.
+      operationId: append_rows_v1beta_datasetio_append_rows__dataset_id__post
       parameters:
-        - name: dataset_id
-          in: path
-          description: >-
-            The ID of the dataset to append the rows to.
-          required: true
-          schema:
-            type: string
+      - name: dataset_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: dataset_id'
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/AppendRowsRequest'
         required: true
-      deprecated: false
   /v1beta/datasetio/iterrows/{dataset_id}:
     get:
       responses:
@@ -59,55 +59,53 @@ paths:
                 $ref: '#/components/schemas/PaginatedResponse'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - DatasetIO
-      summary: >-
-        Get a paginated list of rows from a dataset.
-      description: >-
+      - Datasetio
+      summary: Iterrows
+      description: |-
         Get a paginated list of rows from a dataset.
 
         Uses offset-based pagination where:
-
         - start_index: The starting index (0-based). If None, starts from beginning.
-
         - limit: Number of items to return. If None or -1, returns all items.
 
-
         The response includes:
-
         - data: List of items for the current page.
-
         - has_more: Whether there are more items available after this set.
+      operationId: iterrows_v1beta_datasetio_iterrows__dataset_id__get
       parameters:
-        - name: dataset_id
-          in: path
-          description: >-
-            The ID of the dataset to get the rows from.
-          required: true
-          schema:
-            type: string
-        - name: start_index
-          in: query
-          description: >-
-            Index into dataset for the first row to get. Get all rows if None.
-          required: false
-          schema:
-            type: integer
-        - name: limit
-          in: query
-          description: The number of rows to get.
-          required: false
-          schema:
-            type: integer
-      deprecated: false
+      - name: limit
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: integer
+          - type: 'null'
+          title: Limit
+      - name: start_index
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: integer
+          - type: 'null'
+          title: Start Index
+      - name: dataset_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: dataset_id'
   /v1beta/datasets:
     get:
       responses:
@@ -118,51 +116,22 @@ paths:
               schema:
                 $ref: '#/components/schemas/ListDatasetsResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Datasets
-      summary: List all datasets.
+      - Datasets
+      summary: List Datasets
       description: List all datasets.
-      parameters: []
-      deprecated: false
-    post:
-      responses:
-        '200':
-          description: A Dataset.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/Dataset'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Datasets
-      summary: Register a new dataset.
-      description: Register a new dataset.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/RegisterDatasetRequest'
-        required: true
-      deprecated: true
+      operationId: list_datasets_v1beta_datasets_get
   /v1beta/datasets/{dataset_id}:
     get:
       responses:
@@ -173,53 +142,29 @@ paths:
               schema:
                 $ref: '#/components/schemas/Dataset'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Datasets
-      summary: Get a dataset by its ID.
+      - Datasets
+      summary: Get Dataset
       description: Get a dataset by its ID.
+      operationId: get_dataset_v1beta_datasets__dataset_id__get
       parameters:
-        - name: dataset_id
-          in: path
-          description: The ID of the dataset to get.
-          required: true
-          schema:
-            type: string
-      deprecated: false
-    delete:
-      responses:
-        '200':
-          description: OK
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Datasets
-      summary: Unregister a dataset by its ID.
-      description: Unregister a dataset by its ID.
-      parameters:
-        - name: dataset_id
-          in: path
-          description: The ID of the dataset to unregister.
-          required: true
-          schema:
-            type: string
-      deprecated: true
+      - name: dataset_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: dataset_id'
   /v1alpha/eval/benchmarks:
     get:
       responses:
@@ -230,47 +175,22 @@ paths:
               schema:
                 $ref: '#/components/schemas/ListBenchmarksResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Benchmarks
-      summary: List all benchmarks.
+      - Benchmarks
+      summary: List Benchmarks
       description: List all benchmarks.
-      parameters: []
-      deprecated: false
-    post:
-      responses:
-        '200':
-          description: OK
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Benchmarks
-      summary: Register a benchmark.
-      description: Register a benchmark.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/RegisterBenchmarkRequest'
-        required: true
-      deprecated: true
+      operationId: list_benchmarks_v1alpha_eval_benchmarks_get
   /v1alpha/eval/benchmarks/{benchmark_id}:
     get:
       responses:
@@ -281,131 +201,107 @@ paths:
               schema:
                 $ref: '#/components/schemas/Benchmark'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Benchmarks
-      summary: Get a benchmark by its ID.
+      - Benchmarks
+      summary: Get Benchmark
       description: Get a benchmark by its ID.
+      operationId: get_benchmark_v1alpha_eval_benchmarks__benchmark_id__get
       parameters:
-        - name: benchmark_id
-          in: path
-          description: The ID of the benchmark to get.
-          required: true
-          schema:
-            type: string
-      deprecated: false
-    delete:
-      responses:
-        '200':
-          description: OK
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Benchmarks
-      summary: Unregister a benchmark.
-      description: Unregister a benchmark.
-      parameters:
-        - name: benchmark_id
-          in: path
-          description: The ID of the benchmark to unregister.
-          required: true
-          schema:
-            type: string
-      deprecated: true
+      - name: benchmark_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: benchmark_id'
   /v1alpha/eval/benchmarks/{benchmark_id}/evaluations:
     post:
       responses:
         '200':
-          description: >-
-            EvaluateResponse object containing generations and scores.
+          description: EvaluateResponse object containing generations and scores.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/EvaluateResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Eval
-      summary: Evaluate a list of rows on a benchmark.
+      - Eval
+      summary: Evaluate Rows
       description: Evaluate a list of rows on a benchmark.
+      operationId: evaluate_rows_v1alpha_eval_benchmarks__benchmark_id__evaluations_post
       parameters:
-        - name: benchmark_id
-          in: path
-          description: >-
-            The ID of the benchmark to run the evaluation on.
-          required: true
-          schema:
-            type: string
+      - name: benchmark_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: benchmark_id'
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/EvaluateRowsRequest'
         required: true
-      deprecated: false
   /v1alpha/eval/benchmarks/{benchmark_id}/jobs:
     post:
       responses:
         '200':
-          description: >-
-            The job that was created to run the evaluation.
+          description: The job that was created to run the evaluation.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/Job'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Eval
-      summary: Run an evaluation on a benchmark.
+      - Eval
+      summary: Run Eval
       description: Run an evaluation on a benchmark.
+      operationId: run_eval_v1alpha_eval_benchmarks__benchmark_id__jobs_post
       parameters:
-        - name: benchmark_id
-          in: path
-          description: >-
-            The ID of the benchmark to run the evaluation on.
-          required: true
-          schema:
-            type: string
+      - name: benchmark_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: benchmark_id'
       requestBody:
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/RunEvalRequest'
+              $ref: '#/components/schemas/BenchmarkConfig'
         required: true
-      deprecated: false
   /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}:
     get:
       responses:
@@ -416,67 +312,69 @@ paths:
               schema:
                 $ref: '#/components/schemas/Job'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Eval
-      summary: Get the status of a job.
+      - Eval
+      summary: Job Status
       description: Get the status of a job.
+      operationId: job_status_v1alpha_eval_benchmarks__benchmark_id__jobs__job_id__get
       parameters:
-        - name: benchmark_id
-          in: path
-          description: >-
-            The ID of the benchmark to run the evaluation on.
-          required: true
-          schema:
-            type: string
-        - name: job_id
-          in: path
-          description: The ID of the job to get the status of.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: benchmark_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: benchmark_id'
+      - name: job_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: job_id'
     delete:
       responses:
-        '200':
-          description: OK
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
+        '204':
+          description: Successful Response
       tags:
-        - Eval
-      summary: Cancel a job.
+      - Eval
+      summary: Job Cancel
       description: Cancel a job.
+      operationId: job_cancel_v1alpha_eval_benchmarks__benchmark_id__jobs__job_id__delete
       parameters:
-        - name: benchmark_id
-          in: path
-          description: >-
-            The ID of the benchmark to run the evaluation on.
-          required: true
-          schema:
-            type: string
-        - name: job_id
-          in: path
-          description: The ID of the job to cancel.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: benchmark_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: benchmark_id'
+      - name: job_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: job_id'
   /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result:
     get:
       responses:
@@ -487,68 +385,67 @@ paths:
               schema:
                 $ref: '#/components/schemas/EvaluateResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Eval
-      summary: Get the result of a job.
+      - Eval
+      summary: Job Result
       description: Get the result of a job.
+      operationId: job_result_v1alpha_eval_benchmarks__benchmark_id__jobs__job_id__result_get
       parameters:
-        - name: benchmark_id
-          in: path
-          description: >-
-            The ID of the benchmark to run the evaluation on.
-          required: true
-          schema:
-            type: string
-        - name: job_id
-          in: path
-          description: The ID of the job to get the result of.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: benchmark_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: benchmark_id'
+      - name: job_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: job_id'
   /v1alpha/inference/rerank:
     post:
       responses:
         '200':
-          description: >-
-            RerankResponse with indices sorted by relevance score (descending).
+          description: RerankResponse with indices sorted by relevance score (descending).
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/RerankResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Inference
-      summary: >-
-        Rerank a list of documents based on their relevance to a query.
-      description: >-
-        Rerank a list of documents based on their relevance to a query.
-      parameters: []
+      - Inference
+      summary: Rerank
+      description: Rerank a list of documents based on their relevance to a query.
+      operationId: rerank_v1alpha_inference_rerank_post
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/RerankRequest'
         required: true
-      deprecated: false
   /v1alpha/post-training/job/artifacts:
     get:
       responses:
@@ -560,54 +457,56 @@ paths:
                 $ref: '#/components/schemas/PostTrainingJobArtifactsResponse'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - PostTraining (Coming Soon)
-      summary: Get the artifacts of a training job.
+      - Post Training
+      summary: Get Training Job Artifacts
       description: Get the artifacts of a training job.
+      operationId: get_training_job_artifacts_v1alpha_post_training_job_artifacts_get
       parameters:
-        - name: job_uuid
-          in: query
-          description: >-
-            The UUID of the job to get the artifacts of.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: job_uuid
+        in: query
+        required: true
+        schema:
+          type: string
+          title: Job Uuid
   /v1alpha/post-training/job/cancel:
     post:
       responses:
-        '200':
-          description: OK
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
+        '204':
+          description: Successful Response
       tags:
-        - PostTraining (Coming Soon)
-      summary: Cancel a training job.
+      - Post Training
+      summary: Cancel Training Job
       description: Cancel a training job.
-      parameters: []
+      operationId: cancel_training_job_v1alpha_post_training_job_cancel_post
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/CancelTrainingJobRequest'
         required: true
-      deprecated: false
   /v1alpha/post-training/job/status:
     get:
       responses:
@@ -619,27 +518,28 @@ paths:
                 $ref: '#/components/schemas/PostTrainingJobStatusResponse'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - PostTraining (Coming Soon)
-      summary: Get the status of a training job.
+      - Post Training
+      summary: Get Training Job Status
       description: Get the status of a training job.
+      operationId: get_training_job_status_v1alpha_post_training_job_status_get
       parameters:
-        - name: job_uuid
-          in: query
-          description: >-
-            The UUID of the job to get the status of.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: job_uuid
+        in: query
+        required: true
+        schema:
+          type: string
+          title: Job Uuid
   /v1alpha/post-training/jobs:
     get:
       responses:
@@ -650,21 +550,22 @@ paths:
               schema:
                 $ref: '#/components/schemas/ListPostTrainingJobsResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - PostTraining (Coming Soon)
-      summary: Get all training jobs.
+      - Post Training
+      summary: Get Training Jobs
       description: Get all training jobs.
-      parameters: []
-      deprecated: false
+      operationId: get_training_jobs_v1alpha_post_training_jobs_get
   /v1alpha/post-training/preference-optimize:
     post:
       responses:
@@ -675,27 +576,28 @@ paths:
               schema:
                 $ref: '#/components/schemas/PostTrainingJob'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - PostTraining (Coming Soon)
-      summary: Run preference optimization of a model.
+      - Post Training
+      summary: Preference Optimize
       description: Run preference optimization of a model.
-      parameters: []
+      operationId: preference_optimize_v1alpha_post_training_preference_optimize_post
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/PreferenceOptimizeRequest'
         required: true
-      deprecated: false
   /v1alpha/post-training/supervised-fine-tune:
     post:
       responses:
@@ -706,1554 +608,8603 @@ paths:
               schema:
                 $ref: '#/components/schemas/PostTrainingJob'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - PostTraining (Coming Soon)
-      summary: Run supervised fine-tuning of a model.
+      - Post Training
+      summary: Supervised Fine Tune
       description: Run supervised fine-tuning of a model.
-      parameters: []
+      operationId: supervised_fine_tune_v1alpha_post_training_supervised_fine_tune_post
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/SupervisedFineTuneRequest'
         required: true
-      deprecated: false
-jsonSchemaDialect: >-
-  https://json-schema.org/draft/2020-12/schema
 components:
   schemas:
     Error:
-      type: object
+      description: Error response from the API. Roughly follows RFC 7807.
       properties:
         status:
+          title: Status
           type: integer
-          description: HTTP status code
         title:
+          title: Title
           type: string
-          description: >-
-            Error title, a short summary of the error which is invariant for an error
-            type
         detail:
+          title: Detail
           type: string
-          description: >-
-            Error detail, a longer human-readable description of the error
         instance:
-          type: string
-          description: >-
-            (Optional) A URL which can be used to retrieve more information about
-            the specific occurrence of the error
-      additionalProperties: false
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
       required:
-        - status
-        - title
-        - detail
+      - status
+      - title
+      - detail
       title: Error
-      description: >-
-        Error response from the API. Roughly follows RFC 7807.
-    AppendRowsRequest:
-      type: object
-      properties:
-        rows:
-          type: array
-          items:
-            type: object
-            additionalProperties:
-              oneOf:
-                - type: 'null'
-                - type: boolean
-                - type: number
-                - type: string
-                - type: array
-                - type: object
-          description: The rows to append to the dataset.
-      additionalProperties: false
-      required:
-        - rows
-      title: AppendRowsRequest
-    PaginatedResponse:
       type: object
+    ListBatchesResponse:
       properties:
+        object:
+          type: string
+          const: list
+          title: Object
+          default: list
         data:
-          type: array
           items:
-            type: object
-            additionalProperties:
-              oneOf:
-                - type: 'null'
-                - type: boolean
-                - type: number
-                - type: string
-                - type: array
-                - type: object
-          description: The list of items for the current page
+            $ref: '#/components/schemas/Batch'
+          type: array
+          title: Data
+          description: List of batch objects
+        first_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: ID of the first batch in the list
+        last_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: ID of the last batch in the list
         has_more:
           type: boolean
-          description: >-
-            Whether there are more items available after this set
-        url:
-          type: string
-          description: The URL for accessing this list
-      additionalProperties: false
-      required:
-        - data
-        - has_more
-      title: PaginatedResponse
-      description: >-
-        A generic paginated response that follows a simple format.
-    Dataset:
+          title: Has More
+          description: Whether there are more batches available
+          default: false
       type: object
-      properties:
-        identifier:
-          type: string
-        provider_resource_id:
-          type: string
-        provider_id:
-          type: string
-        type:
-          type: string
-          enum:
-            - model
-            - shield
-            - vector_store
-            - dataset
-            - scoring_function
-            - benchmark
-            - tool
-            - tool_group
-            - prompt
-          const: dataset
-          default: dataset
-          description: >-
-            Type of resource, always 'dataset' for datasets
-        purpose:
-          type: string
-          enum:
-            - post-training/messages
-            - eval/question-answer
-            - eval/messages-answer
-          description: >-
-            Purpose of the dataset indicating its intended use
-        source:
-          oneOf:
-            - $ref: '#/components/schemas/URIDataSource'
-            - $ref: '#/components/schemas/RowsDataSource'
-          discriminator:
-            propertyName: type
-            mapping:
-              uri: '#/components/schemas/URIDataSource'
-              rows: '#/components/schemas/RowsDataSource'
-          description: >-
-            Data source configuration for the dataset
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: Additional metadata for the dataset
-      additionalProperties: false
       required:
-        - identifier
-        - provider_id
-        - type
-        - purpose
-        - source
-        - metadata
-      title: Dataset
-      description: >-
-        Dataset resource for storing and accessing training or evaluation data.
-    RowsDataSource:
-      type: object
+      - data
+      title: ListBatchesResponse
+      description: Response containing a list of batch objects.
+    Batch:
       properties:
-        type:
+        id:
           type: string
-          const: rows
-          default: rows
-        rows:
-          type: array
-          items:
-            type: object
-            additionalProperties:
-              oneOf:
-                - type: 'null'
-                - type: boolean
-                - type: number
-                - type: string
-                - type: array
-                - type: object
-          description: >-
-            The dataset is stored in rows. E.g. - [ {"messages": [{"role": "user",
-            "content": "Hello, world!"}, {"role": "assistant", "content": "Hello,
-            world!"}]} ]
-      additionalProperties: false
-      required:
-        - type
-        - rows
-      title: RowsDataSource
-      description: A dataset stored in rows.
-    URIDataSource:
-      type: object
-      properties:
-        type:
+          title: Id
+        completion_window:
           type: string
-          const: uri
-          default: uri
-        uri:
-          type: string
-          description: >-
-            The dataset can be obtained from a URI. E.g. - "https://mywebsite.com/mydata.jsonl"
-            - "lsfs://mydata.jsonl" - "data:csv;base64,{base64_content}"
-      additionalProperties: false
-      required:
-        - type
-        - uri
-      title: URIDataSource
-      description: >-
-        A dataset that can be obtained from a URI.
-    ListDatasetsResponse:
-      type: object
-      properties:
-        data:
-          type: array
-          items:
-            $ref: '#/components/schemas/Dataset'
-          description: List of datasets
-      additionalProperties: false
-      required:
-        - data
-      title: ListDatasetsResponse
-      description: Response from listing datasets.
-    Benchmark:
-      type: object
-      properties:
-        identifier:
-          type: string
-        provider_resource_id:
-          type: string
-        provider_id:
-          type: string
-        type:
-          type: string
-          enum:
-            - model
-            - shield
-            - vector_store
-            - dataset
-            - scoring_function
-            - benchmark
-            - tool
-            - tool_group
-            - prompt
-          const: benchmark
-          default: benchmark
-          description: The resource type, always benchmark
-        dataset_id:
-          type: string
-          description: >-
-            Identifier of the dataset to use for the benchmark evaluation
-        scoring_functions:
-          type: array
-          items:
-            type: string
-          description: >-
-            List of scoring function identifiers to apply during evaluation
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: Metadata for this evaluation task
-      additionalProperties: false
-      required:
-        - identifier
-        - provider_id
-        - type
-        - dataset_id
-        - scoring_functions
-        - metadata
-      title: Benchmark
-      description: >-
-        A benchmark resource for evaluating model performance.
-    ListBenchmarksResponse:
-      type: object
-      properties:
-        data:
-          type: array
-          items:
-            $ref: '#/components/schemas/Benchmark'
-      additionalProperties: false
-      required:
-        - data
-      title: ListBenchmarksResponse
-    AggregationFunctionType:
-      type: string
-      enum:
-        - average
-        - weighted_average
-        - median
-        - categorical_count
-        - accuracy
-      title: AggregationFunctionType
-      description: >-
-        Types of aggregation functions for scoring results.
-    BasicScoringFnParams:
-      type: object
-      properties:
-        type:
-          $ref: '#/components/schemas/ScoringFnParamsType'
-          const: basic
-          default: basic
-          description: >-
-            The type of scoring function parameters, always basic
-        aggregation_functions:
-          type: array
-          items:
-            $ref: '#/components/schemas/AggregationFunctionType'
-          description: >-
-            Aggregation functions to apply to the scores of each row
-      additionalProperties: false
-      required:
-        - type
-        - aggregation_functions
-      title: BasicScoringFnParams
-      description: >-
-        Parameters for basic scoring function configuration.
-    BenchmarkConfig:
-      type: object
-      properties:
-        eval_candidate:
-          $ref: '#/components/schemas/ModelCandidate'
-          description: The candidate to evaluate.
-        scoring_params:
-          type: object
-          additionalProperties:
-            $ref: '#/components/schemas/ScoringFnParams'
-          description: >-
-            Map between scoring function id and parameters for each scoring function
-            you want to run
-        num_examples:
+          title: Completion Window
+        created_at:
           type: integer
-          description: >-
-            (Optional) The number of examples to evaluate. If not provided, all examples
-            in the dataset will be evaluated
-      additionalProperties: false
-      required:
-        - eval_candidate
-        - scoring_params
-      title: BenchmarkConfig
-      description: >-
-        A benchmark configuration for evaluation.
-    GreedySamplingStrategy:
-      type: object
-      properties:
-        type:
+          title: Created At
+        endpoint:
           type: string
-          const: greedy
-          default: greedy
-          description: >-
-            Must be "greedy" to identify this sampling strategy
-      additionalProperties: false
-      required:
-        - type
-      title: GreedySamplingStrategy
-      description: >-
-        Greedy sampling strategy that selects the highest probability token at each
-        step.
-    ImageContentItem:
-      type: object
-      properties:
-        type:
+          title: Endpoint
+        input_file_id:
           type: string
-          const: image
-          default: image
-          description: >-
-            Discriminator type of the content item. Always "image"
-        image:
-          type: object
-          properties:
-            url:
-              $ref: '#/components/schemas/URL'
-              description: >-
-                A URL of the image or data URL in the format of data:image/{type};base64,{data}.
-                Note that URL could have length limits.
-            data:
-              type: string
-              contentEncoding: base64
-              description: base64 encoded image data as string
-          additionalProperties: false
-          description: >-
-            Image as a base64 encoded string or an URL
-      additionalProperties: false
-      required:
-        - type
-        - image
-      title: ImageContentItem
-      description: A image content item
-    InterleavedContent:
-      oneOf:
-        - type: string
-        - $ref: '#/components/schemas/InterleavedContentItem'
-        - type: array
-          items:
-            $ref: '#/components/schemas/InterleavedContentItem'
-    InterleavedContentItem:
-      oneOf:
-        - $ref: '#/components/schemas/ImageContentItem'
-        - $ref: '#/components/schemas/TextContentItem'
-      discriminator:
-        propertyName: type
-        mapping:
-          image: '#/components/schemas/ImageContentItem'
-          text: '#/components/schemas/TextContentItem'
-    LLMAsJudgeScoringFnParams:
-      type: object
-      properties:
-        type:
-          $ref: '#/components/schemas/ScoringFnParamsType'
-          const: llm_as_judge
-          default: llm_as_judge
-          description: >-
-            The type of scoring function parameters, always llm_as_judge
-        judge_model:
+          title: Input File Id
+        object:
           type: string
-          description: >-
-            Identifier of the LLM model to use as a judge for scoring
-        prompt_template:
-          type: string
-          description: >-
-            (Optional) Custom prompt template for the judge model
-        judge_score_regexes:
-          type: array
-          items:
-            type: string
-          description: >-
-            Regexes to extract the answer from generated response
-        aggregation_functions:
-          type: array
-          items:
-            $ref: '#/components/schemas/AggregationFunctionType'
-          description: >-
-            Aggregation functions to apply to the scores of each row
-      additionalProperties: false
-      required:
-        - type
-        - judge_model
-        - judge_score_regexes
-        - aggregation_functions
-      title: LLMAsJudgeScoringFnParams
-      description: >-
-        Parameters for LLM-as-judge scoring function configuration.
-    ModelCandidate:
-      type: object
-      properties:
-        type:
-          type: string
-          const: model
-          default: model
-        model:
-          type: string
-          description: The model ID to evaluate.
-        sampling_params:
-          $ref: '#/components/schemas/SamplingParams'
-          description: The sampling parameters for the model.
-        system_message:
-          $ref: '#/components/schemas/SystemMessage'
-          description: >-
-            (Optional) The system message providing instructions or context to the
-            model.
-      additionalProperties: false
-      required:
-        - type
-        - model
-        - sampling_params
-      title: ModelCandidate
-      description: A model candidate for evaluation.
-    RegexParserScoringFnParams:
-      type: object
-      properties:
-        type:
-          $ref: '#/components/schemas/ScoringFnParamsType'
-          const: regex_parser
-          default: regex_parser
-          description: >-
-            The type of scoring function parameters, always regex_parser
-        parsing_regexes:
-          type: array
-          items:
-            type: string
-          description: >-
-            Regex to extract the answer from generated response
-        aggregation_functions:
-          type: array
-          items:
-            $ref: '#/components/schemas/AggregationFunctionType'
-          description: >-
-            Aggregation functions to apply to the scores of each row
-      additionalProperties: false
-      required:
-        - type
-        - parsing_regexes
-        - aggregation_functions
-      title: RegexParserScoringFnParams
-      description: >-
-        Parameters for regex parser scoring function configuration.
-    SamplingParams:
-      type: object
-      properties:
-        strategy:
-          oneOf:
-            - $ref: '#/components/schemas/GreedySamplingStrategy'
-            - $ref: '#/components/schemas/TopPSamplingStrategy'
-            - $ref: '#/components/schemas/TopKSamplingStrategy'
-          discriminator:
-            propertyName: type
-            mapping:
-              greedy: '#/components/schemas/GreedySamplingStrategy'
-              top_p: '#/components/schemas/TopPSamplingStrategy'
-              top_k: '#/components/schemas/TopKSamplingStrategy'
-          description: The sampling strategy.
-        max_tokens:
-          type: integer
-          description: >-
-            The maximum number of tokens that can be generated in the completion.
-            The token count of your prompt plus max_tokens cannot exceed the model's
-            context length.
-        repetition_penalty:
-          type: number
-          default: 1.0
-          description: >-
-            Number between -2.0 and 2.0. Positive values penalize new tokens based
-            on whether they appear in the text so far, increasing the model's likelihood
-            to talk about new topics.
-        stop:
-          type: array
-          items:
-            type: string
-          description: >-
-            Up to 4 sequences where the API will stop generating further tokens. The
-            returned text will not contain the stop sequence.
-      additionalProperties: false
-      required:
-        - strategy
-      title: SamplingParams
-      description: Sampling parameters.
-    ScoringFnParams:
-      oneOf:
-        - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
-        - $ref: '#/components/schemas/RegexParserScoringFnParams'
-        - $ref: '#/components/schemas/BasicScoringFnParams'
-      discriminator:
-        propertyName: type
-        mapping:
-          llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams'
-          regex_parser: '#/components/schemas/RegexParserScoringFnParams'
-          basic: '#/components/schemas/BasicScoringFnParams'
-    ScoringFnParamsType:
-      type: string
-      enum:
-        - llm_as_judge
-        - regex_parser
-        - basic
-      title: ScoringFnParamsType
-      description: >-
-        Types of scoring function parameter configurations.
-    SystemMessage:
-      type: object
-      properties:
-        role:
-          type: string
-          const: system
-          default: system
-          description: >-
-            Must be "system" to identify this as a system message
-        content:
-          $ref: '#/components/schemas/InterleavedContent'
-          description: >-
-            The content of the "system prompt". If multiple system messages are provided,
-            they are concatenated. The underlying Llama Stack code may also add other
-            system messages (for example, for formatting tool definitions).
-      additionalProperties: false
-      required:
-        - role
-        - content
-      title: SystemMessage
-      description: >-
-        A system message providing instructions or context to the model.
-    TextContentItem:
-      type: object
-      properties:
-        type:
-          type: string
-          const: text
-          default: text
-          description: >-
-            Discriminator type of the content item. Always "text"
-        text:
-          type: string
-          description: Text content
-      additionalProperties: false
-      required:
-        - type
-        - text
-      title: TextContentItem
-      description: A text content item
-    TopKSamplingStrategy:
-      type: object
-      properties:
-        type:
-          type: string
-          const: top_k
-          default: top_k
-          description: >-
-            Must be "top_k" to identify this sampling strategy
-        top_k:
-          type: integer
-          description: >-
-            Number of top tokens to consider for sampling. Must be at least 1
-      additionalProperties: false
-      required:
-        - type
-        - top_k
-      title: TopKSamplingStrategy
-      description: >-
-        Top-k sampling strategy that restricts sampling to the k most likely tokens.
-    TopPSamplingStrategy:
-      type: object
-      properties:
-        type:
-          type: string
-          const: top_p
-          default: top_p
-          description: >-
-            Must be "top_p" to identify this sampling strategy
-        temperature:
-          type: number
-          description: >-
-            Controls randomness in sampling. Higher values increase randomness
-        top_p:
-          type: number
-          default: 0.95
-          description: >-
-            Cumulative probability threshold for nucleus sampling. Defaults to 0.95
-      additionalProperties: false
-      required:
-        - type
-      title: TopPSamplingStrategy
-      description: >-
-        Top-p (nucleus) sampling strategy that samples from the smallest set of tokens
-        with cumulative probability >= p.
-    URL:
-      type: object
-      properties:
-        uri:
-          type: string
-          description: The URL string pointing to the resource
-      additionalProperties: false
-      required:
-        - uri
-      title: URL
-      description: A URL reference to external content.
-    EvaluateRowsRequest:
-      type: object
-      properties:
-        input_rows:
-          type: array
-          items:
-            type: object
-            additionalProperties:
-              oneOf:
-                - type: 'null'
-                - type: boolean
-                - type: number
-                - type: string
-                - type: array
-                - type: object
-          description: The rows to evaluate.
-        scoring_functions:
-          type: array
-          items:
-            type: string
-          description: >-
-            The scoring functions to use for the evaluation.
-        benchmark_config:
-          $ref: '#/components/schemas/BenchmarkConfig'
-          description: The configuration for the benchmark.
-      additionalProperties: false
-      required:
-        - input_rows
-        - scoring_functions
-        - benchmark_config
-      title: EvaluateRowsRequest
-    EvaluateResponse:
-      type: object
-      properties:
-        generations:
-          type: array
-          items:
-            type: object
-            additionalProperties:
-              oneOf:
-                - type: 'null'
-                - type: boolean
-                - type: number
-                - type: string
-                - type: array
-                - type: object
-          description: The generations from the evaluation.
-        scores:
-          type: object
-          additionalProperties:
-            $ref: '#/components/schemas/ScoringResult'
-          description: The scores from the evaluation.
-      additionalProperties: false
-      required:
-        - generations
-        - scores
-      title: EvaluateResponse
-      description: The response from an evaluation.
-    ScoringResult:
-      type: object
-      properties:
-        score_rows:
-          type: array
-          items:
-            type: object
-            additionalProperties:
-              oneOf:
-                - type: 'null'
-                - type: boolean
-                - type: number
-                - type: string
-                - type: array
-                - type: object
-          description: >-
-            The scoring result for each row. Each row is a map of column name to value.
-        aggregated_results:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: Map of metric name to aggregated value
-      additionalProperties: false
-      required:
-        - score_rows
-        - aggregated_results
-      title: ScoringResult
-      description: A scoring result for a single row.
-    RunEvalRequest:
-      type: object
-      properties:
-        benchmark_config:
-          $ref: '#/components/schemas/BenchmarkConfig'
-          description: The configuration for the benchmark.
-      additionalProperties: false
-      required:
-        - benchmark_config
-      title: RunEvalRequest
-    Job:
-      type: object
-      properties:
-        job_id:
-          type: string
-          description: Unique identifier for the job
+          const: batch
+          title: Object
         status:
           type: string
           enum:
-            - completed
-            - in_progress
-            - failed
-            - scheduled
-            - cancelled
-          description: Current execution status of the job
-      additionalProperties: false
-      required:
-        - job_id
-        - status
-      title: Job
-      description: >-
-        A job execution instance with status tracking.
-    "OpenAIChatCompletionContentPartImageParam":
+          - validating
+          - failed
+          - in_progress
+          - finalizing
+          - completed
+          - expired
+          - cancelling
+          - cancelled
+          title: Status
+        cancelled_at:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        cancelling_at:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        completed_at:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        error_file_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        errors:
+          anyOf:
+          - $ref: '#/components/schemas/Errors'
+            title: Errors
+          - type: 'null'
+          title: Errors
+        expired_at:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        expires_at:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        failed_at:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        finalizing_at:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        in_progress_at:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        metadata:
+          anyOf:
+          - additionalProperties:
+              type: string
+            type: object
+          - type: 'null'
+        model:
+          anyOf:
+          - type: string
+          - type: 'null'
+        output_file_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        request_counts:
+          anyOf:
+          - $ref: '#/components/schemas/BatchRequestCounts'
+            title: BatchRequestCounts
+          - type: 'null'
+          title: BatchRequestCounts
+        usage:
+          anyOf:
+          - $ref: '#/components/schemas/BatchUsage'
+            title: BatchUsage
+          - type: 'null'
+          title: BatchUsage
+      additionalProperties: true
       type: object
+      required:
+      - id
+      - completion_window
+      - created_at
+      - endpoint
+      - input_file_id
+      - object
+      - status
+      title: Batch
+    ListOpenAIChatCompletionResponse:
+      properties:
+        data:
+          items:
+            $ref: '#/components/schemas/OpenAICompletionWithInputMessages'
+          type: array
+          title: Data
+        has_more:
+          type: boolean
+          title: Has More
+        first_id:
+          type: string
+          title: First Id
+        last_id:
+          type: string
+          title: Last Id
+        object:
+          type: string
+          const: list
+          title: Object
+          default: list
+      type: object
+      required:
+      - data
+      - has_more
+      - first_id
+      - last_id
+      title: ListOpenAIChatCompletionResponse
+      description: Response from listing OpenAI-compatible chat completions.
+    OpenAIAssistantMessageParam:
+      description: A message containing the model's (assistant) response in an OpenAI-compatible chat completion request.
+      properties:
+        role:
+          const: assistant
+          default: assistant
+          title: Role
+          type: string
+        content:
+          anyOf:
+          - type: string
+          - items:
+              $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+            type: array
+            title: list[OpenAIChatCompletionContentPartTextParam]
+          - type: 'null'
+          title: string | list[OpenAIChatCompletionContentPartTextParam]
+          nullable: true
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+        tool_calls:
+          anyOf:
+          - items:
+              $ref: '#/components/schemas/OpenAIChatCompletionToolCall'
+            type: array
+          - type: 'null'
+          nullable: true
+      title: OpenAIAssistantMessageParam
+      type: object
+    OpenAIChatCompletionContentPartImageParam:
       properties:
         type:
           type: string
           const: image_url
+          title: Type
           default: image_url
-          description: >-
-            Must be "image_url" to identify this as image content
         image_url:
           $ref: '#/components/schemas/OpenAIImageURL'
-          description: >-
-            Image URL specification and processing details
-      additionalProperties: false
-      required:
-        - type
-        - image_url
-      title: >-
-        OpenAIChatCompletionContentPartImageParam
-      description: >-
-        Image content part for OpenAI-compatible chat completion messages.
-    OpenAIChatCompletionContentPartTextParam:
       type: object
+      required:
+      - image_url
+      title: OpenAIChatCompletionContentPartImageParam
+      description: Image content part for OpenAI-compatible chat completion messages.
+    OpenAIChatCompletionContentPartParam:
+      discriminator:
+        mapping:
+          file: '#/components/schemas/OpenAIFile'
+          image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+          text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+        title: OpenAIChatCompletionContentPartTextParam
+      - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+        title: OpenAIChatCompletionContentPartImageParam
+      - $ref: '#/components/schemas/OpenAIFile'
+        title: OpenAIFile
+      title: OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile
+    OpenAIChatCompletionContentPartTextParam:
       properties:
         type:
           type: string
           const: text
+          title: Type
           default: text
-          description: >-
-            Must be "text" to identify this as text content
         text:
           type: string
-          description: The text content of the message
-      additionalProperties: false
-      required:
-        - type
-        - text
-      title: OpenAIChatCompletionContentPartTextParam
-      description: >-
-        Text content part for OpenAI-compatible chat completion messages.
-    OpenAIImageURL:
+          title: Text
       type: object
+      required:
+      - text
+      title: OpenAIChatCompletionContentPartTextParam
+      description: Text content part for OpenAI-compatible chat completion messages.
+    OpenAIChatCompletionToolCall:
+      properties:
+        index:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        type:
+          type: string
+          const: function
+          title: Type
+          default: function
+        function:
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIChatCompletionToolCallFunction'
+            title: OpenAIChatCompletionToolCallFunction
+          - type: 'null'
+          title: OpenAIChatCompletionToolCallFunction
+      type: object
+      title: OpenAIChatCompletionToolCall
+      description: Tool call specification for OpenAI-compatible chat completion responses.
+    OpenAIChatCompletionToolCallFunction:
+      properties:
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+        arguments:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      title: OpenAIChatCompletionToolCallFunction
+      description: Function call details for OpenAI-compatible tool calls.
+    OpenAIChatCompletionUsage:
+      properties:
+        prompt_tokens:
+          type: integer
+          title: Prompt Tokens
+        completion_tokens:
+          type: integer
+          title: Completion Tokens
+        total_tokens:
+          type: integer
+          title: Total Tokens
+        prompt_tokens_details:
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIChatCompletionUsagePromptTokensDetails'
+            title: OpenAIChatCompletionUsagePromptTokensDetails
+          - type: 'null'
+          title: OpenAIChatCompletionUsagePromptTokensDetails
+        completion_tokens_details:
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIChatCompletionUsageCompletionTokensDetails'
+            title: OpenAIChatCompletionUsageCompletionTokensDetails
+          - type: 'null'
+          title: OpenAIChatCompletionUsageCompletionTokensDetails
+      type: object
+      required:
+      - prompt_tokens
+      - completion_tokens
+      - total_tokens
+      title: OpenAIChatCompletionUsage
+      description: Usage information for OpenAI chat completion.
+    OpenAIChoice:
+      properties:
+        message:
+          oneOf:
+          - $ref: '#/components/schemas/OpenAIUserMessageParam-Output'
+            title: OpenAIUserMessageParam-Output
+          - $ref: '#/components/schemas/OpenAISystemMessageParam'
+            title: OpenAISystemMessageParam
+          - $ref: '#/components/schemas/OpenAIAssistantMessageParam-Output'
+            title: OpenAIAssistantMessageParam-Output
+          - $ref: '#/components/schemas/OpenAIToolMessageParam'
+            title: OpenAIToolMessageParam
+          - $ref: '#/components/schemas/OpenAIDeveloperMessageParam'
+            title: OpenAIDeveloperMessageParam
+          title: OpenAIUserMessageParam-Output | ... (5 variants)
+          discriminator:
+            propertyName: role
+            mapping:
+              assistant: '#/components/schemas/OpenAIAssistantMessageParam-Output'
+              developer: '#/components/schemas/OpenAIDeveloperMessageParam'
+              system: '#/components/schemas/OpenAISystemMessageParam'
+              tool: '#/components/schemas/OpenAIToolMessageParam'
+              user: '#/components/schemas/OpenAIUserMessageParam-Output'
+        finish_reason:
+          type: string
+          title: Finish Reason
+        index:
+          type: integer
+          title: Index
+        logprobs:
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIChoiceLogprobs'
+            title: OpenAIChoiceLogprobs
+          - type: 'null'
+          title: OpenAIChoiceLogprobs
+      type: object
+      required:
+      - message
+      - finish_reason
+      - index
+      title: OpenAIChoice
+      description: A choice from an OpenAI-compatible chat completion response.
+    OpenAIChoiceLogprobs:
+      properties:
+        content:
+          anyOf:
+          - items:
+              $ref: '#/components/schemas/OpenAITokenLogProb'
+            type: array
+          - type: 'null'
+        refusal:
+          anyOf:
+          - items:
+              $ref: '#/components/schemas/OpenAITokenLogProb'
+            type: array
+          - type: 'null'
+      type: object
+      title: OpenAIChoiceLogprobs
+      description: The log probabilities for the tokens in the message from an OpenAI-compatible chat completion response.
+    OpenAIDeveloperMessageParam:
+      properties:
+        role:
+          type: string
+          const: developer
+          title: Role
+          default: developer
+        content:
+          anyOf:
+          - type: string
+          - items:
+              $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+            type: array
+            title: list[OpenAIChatCompletionContentPartTextParam]
+          title: string | list[OpenAIChatCompletionContentPartTextParam]
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      required:
+      - content
+      title: OpenAIDeveloperMessageParam
+      description: A message from the developer in an OpenAI-compatible chat completion request.
+    OpenAIFile:
+      properties:
+        type:
+          type: string
+          const: file
+          title: Type
+          default: file
+        file:
+          $ref: '#/components/schemas/OpenAIFileFile'
+      type: object
+      required:
+      - file
+      title: OpenAIFile
+    OpenAIFileFile:
+      properties:
+        file_data:
+          anyOf:
+          - type: string
+          - type: 'null'
+        file_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        filename:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      title: OpenAIFileFile
+    OpenAIImageURL:
       properties:
         url:
           type: string
-          description: >-
-            URL of the image to include in the message
+          title: Url
         detail:
-          type: string
-          description: >-
-            (Optional) Level of detail for image processing. Can be "low", "high",
-            or "auto"
-      additionalProperties: false
-      required:
-        - url
-      title: OpenAIImageURL
-      description: >-
-        Image URL specification for OpenAI-compatible chat completion messages.
-    RerankRequest:
+          anyOf:
+          - type: string
+          - type: 'null'
       type: object
+      required:
+      - url
+      title: OpenAIImageURL
+      description: Image URL specification for OpenAI-compatible chat completion messages.
+    OpenAIMessageParam:
+      discriminator:
+        mapping:
+          assistant: '#/components/schemas/OpenAIAssistantMessageParam'
+          developer: '#/components/schemas/OpenAIDeveloperMessageParam'
+          system: '#/components/schemas/OpenAISystemMessageParam'
+          tool: '#/components/schemas/OpenAIToolMessageParam'
+          user: '#/components/schemas/OpenAIUserMessageParam'
+        propertyName: role
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIUserMessageParam'
+        title: OpenAIUserMessageParam
+      - $ref: '#/components/schemas/OpenAISystemMessageParam'
+        title: OpenAISystemMessageParam
+      - $ref: '#/components/schemas/OpenAIAssistantMessageParam'
+        title: OpenAIAssistantMessageParam
+      - $ref: '#/components/schemas/OpenAIToolMessageParam'
+        title: OpenAIToolMessageParam
+      - $ref: '#/components/schemas/OpenAIDeveloperMessageParam'
+        title: OpenAIDeveloperMessageParam
+      title: OpenAIUserMessageParam | ... (5 variants)
+    OpenAISystemMessageParam:
+      properties:
+        role:
+          type: string
+          const: system
+          title: Role
+          default: system
+        content:
+          anyOf:
+          - type: string
+          - items:
+              $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+            type: array
+            title: list[OpenAIChatCompletionContentPartTextParam]
+          title: string | list[OpenAIChatCompletionContentPartTextParam]
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      required:
+      - content
+      title: OpenAISystemMessageParam
+      description: A system message providing instructions or context to the model.
+    OpenAITokenLogProb:
+      properties:
+        token:
+          type: string
+          title: Token
+        bytes:
+          anyOf:
+          - items:
+              type: integer
+            type: array
+          - type: 'null'
+        logprob:
+          type: number
+          title: Logprob
+        top_logprobs:
+          items:
+            $ref: '#/components/schemas/OpenAITopLogProb'
+          type: array
+          title: Top Logprobs
+      type: object
+      required:
+      - token
+      - logprob
+      - top_logprobs
+      title: OpenAITokenLogProb
+      description: |-
+        The log probability for a token from an OpenAI-compatible chat completion response.
+
+        :token: The token
+        :bytes: (Optional) The bytes for the token
+        :logprob: The log probability of the token
+        :top_logprobs: The top log probabilities for the token
+    OpenAIToolMessageParam:
+      properties:
+        role:
+          type: string
+          const: tool
+          title: Role
+          default: tool
+        tool_call_id:
+          type: string
+          title: Tool Call Id
+        content:
+          anyOf:
+          - type: string
+          - items:
+              $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+            type: array
+            title: list[OpenAIChatCompletionContentPartTextParam]
+          title: string | list[OpenAIChatCompletionContentPartTextParam]
+      type: object
+      required:
+      - tool_call_id
+      - content
+      title: OpenAIToolMessageParam
+      description: A message representing the result of a tool invocation in an OpenAI-compatible chat completion request.
+    OpenAITopLogProb:
+      properties:
+        token:
+          type: string
+          title: Token
+        bytes:
+          anyOf:
+          - items:
+              type: integer
+            type: array
+          - type: 'null'
+        logprob:
+          type: number
+          title: Logprob
+      type: object
+      required:
+      - token
+      - logprob
+      title: OpenAITopLogProb
+      description: |-
+        The top log probability for a token from an OpenAI-compatible chat completion response.
+
+        :token: The token
+        :bytes: (Optional) The bytes for the token
+        :logprob: The log probability of the token
+    OpenAIUserMessageParam:
+      description: A message from the user in an OpenAI-compatible chat completion request.
+      properties:
+        role:
+          const: user
+          default: user
+          title: Role
+          type: string
+        content:
+          anyOf:
+          - type: string
+          - items:
+              discriminator:
+                mapping:
+                  file: '#/components/schemas/OpenAIFile'
+                  image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+                  text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+                propertyName: type
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+                title: OpenAIChatCompletionContentPartTextParam
+              - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+                title: OpenAIChatCompletionContentPartImageParam
+              - $ref: '#/components/schemas/OpenAIFile'
+                title: OpenAIFile
+              title: OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile
+            type: array
+            title: list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile]
+          title: string | list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile]
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+      required:
+      - content
+      title: OpenAIUserMessageParam
+      type: object
+    OpenAIJSONSchema:
+      properties:
+        name:
+          type: string
+          title: Name
+        description:
+          anyOf:
+          - type: string
+          - type: 'null'
+        strict:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+        schema:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+      type: object
+      title: OpenAIJSONSchema
+      description: JSON schema specification for OpenAI-compatible structured response format.
+    OpenAIResponseFormatJSONObject:
+      properties:
+        type:
+          type: string
+          const: json_object
+          title: Type
+          default: json_object
+      type: object
+      title: OpenAIResponseFormatJSONObject
+      description: JSON object response format for OpenAI-compatible chat completion requests.
+    OpenAIResponseFormatJSONSchema:
+      properties:
+        type:
+          type: string
+          const: json_schema
+          title: Type
+          default: json_schema
+        json_schema:
+          $ref: '#/components/schemas/OpenAIJSONSchema'
+      type: object
+      required:
+      - json_schema
+      title: OpenAIResponseFormatJSONSchema
+      description: JSON schema response format for OpenAI-compatible chat completion requests.
+    OpenAIResponseFormatParam:
+      discriminator:
+        mapping:
+          json_object: '#/components/schemas/OpenAIResponseFormatJSONObject'
+          json_schema: '#/components/schemas/OpenAIResponseFormatJSONSchema'
+          text: '#/components/schemas/OpenAIResponseFormatText'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIResponseFormatText'
+        title: OpenAIResponseFormatText
+      - $ref: '#/components/schemas/OpenAIResponseFormatJSONSchema'
+        title: OpenAIResponseFormatJSONSchema
+      - $ref: '#/components/schemas/OpenAIResponseFormatJSONObject'
+        title: OpenAIResponseFormatJSONObject
+      title: OpenAIResponseFormatText | OpenAIResponseFormatJSONSchema | OpenAIResponseFormatJSONObject
+    OpenAIResponseFormatText:
+      properties:
+        type:
+          type: string
+          const: text
+          title: Type
+          default: text
+      type: object
+      title: OpenAIResponseFormatText
+      description: Text response format for OpenAI-compatible chat completion requests.
+    OpenAIChatCompletionRequestWithExtraBody:
       properties:
         model:
           type: string
-          description: >-
-            The identifier of the reranking model to use.
-        query:
-          oneOf:
-            - type: string
-            - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
-            - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
-          description: >-
-            The search query to rank items against. Can be a string, text content
-            part, or image content part. The input must not exceed the model's max
-            input token length.
-        items:
-          type: array
+          title: Model
+        messages:
           items:
             oneOf:
-              - type: string
-              - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
-              - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
-          description: >-
-            List of items to rerank. Each item can be a string, text content part,
-            or image content part. Each input must not exceed the model's max input
-            token length.
-        max_num_results:
-          type: integer
-          description: >-
-            (Optional) Maximum number of results to return. Default: returns all.
-      additionalProperties: false
-      required:
-        - model
-        - query
-        - items
-      title: RerankRequest
-    RerankData:
+            - $ref: '#/components/schemas/OpenAIUserMessageParam-Input'
+              title: OpenAIUserMessageParam-Input
+            - $ref: '#/components/schemas/OpenAISystemMessageParam'
+              title: OpenAISystemMessageParam
+            - $ref: '#/components/schemas/OpenAIAssistantMessageParam-Input'
+              title: OpenAIAssistantMessageParam-Input
+            - $ref: '#/components/schemas/OpenAIToolMessageParam'
+              title: OpenAIToolMessageParam
+            - $ref: '#/components/schemas/OpenAIDeveloperMessageParam'
+              title: OpenAIDeveloperMessageParam
+            discriminator:
+              propertyName: role
+              mapping:
+                assistant: '#/components/schemas/OpenAIAssistantMessageParam-Input'
+                developer: '#/components/schemas/OpenAIDeveloperMessageParam'
+                system: '#/components/schemas/OpenAISystemMessageParam'
+                tool: '#/components/schemas/OpenAIToolMessageParam'
+                user: '#/components/schemas/OpenAIUserMessageParam-Input'
+            title: OpenAIUserMessageParam-Input | ... (5 variants)
+          type: array
+          minItems: 1
+          title: Messages
+        frequency_penalty:
+          anyOf:
+          - type: number
+          - type: 'null'
+        function_call:
+          anyOf:
+          - type: string
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+          title: string | object
+        functions:
+          anyOf:
+          - items:
+              additionalProperties: true
+              type: object
+            type: array
+          - type: 'null'
+        logit_bias:
+          anyOf:
+          - additionalProperties:
+              type: number
+            type: object
+          - type: 'null'
+        logprobs:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+        max_completion_tokens:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        max_tokens:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        n:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        parallel_tool_calls:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+        presence_penalty:
+          anyOf:
+          - type: number
+          - type: 'null'
+        response_format:
+          anyOf:
+          - oneOf:
+            - $ref: '#/components/schemas/OpenAIResponseFormatText'
+              title: OpenAIResponseFormatText
+            - $ref: '#/components/schemas/OpenAIResponseFormatJSONSchema'
+              title: OpenAIResponseFormatJSONSchema
+            - $ref: '#/components/schemas/OpenAIResponseFormatJSONObject'
+              title: OpenAIResponseFormatJSONObject
+            discriminator:
+              propertyName: type
+              mapping:
+                json_object: '#/components/schemas/OpenAIResponseFormatJSONObject'
+                json_schema: '#/components/schemas/OpenAIResponseFormatJSONSchema'
+                text: '#/components/schemas/OpenAIResponseFormatText'
+            title: OpenAIResponseFormatText | OpenAIResponseFormatJSONSchema | OpenAIResponseFormatJSONObject
+          - type: 'null'
+          title: Response Format
+        seed:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        stop:
+          anyOf:
+          - type: string
+          - items:
+              type: string
+            type: array
+            title: list[string]
+          - type: 'null'
+          title: string | list[string]
+        stream:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+        stream_options:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+        temperature:
+          anyOf:
+          - type: number
+          - type: 'null'
+        tool_choice:
+          anyOf:
+          - type: string
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+          title: string | object
+        tools:
+          anyOf:
+          - items:
+              additionalProperties: true
+              type: object
+            type: array
+          - type: 'null'
+        top_logprobs:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        top_p:
+          anyOf:
+          - type: number
+          - type: 'null'
+        user:
+          anyOf:
+          - type: string
+          - type: 'null'
+      additionalProperties: true
       type: object
+      required:
+      - model
+      - messages
+      title: OpenAIChatCompletionRequestWithExtraBody
+      description: Request parameters for OpenAI-compatible chat completion endpoint.
+    OpenAIChatCompletion:
       properties:
+        id:
+          type: string
+          title: Id
+        choices:
+          items:
+            $ref: '#/components/schemas/OpenAIChoice'
+          type: array
+          title: Choices
+        object:
+          type: string
+          const: chat.completion
+          title: Object
+          default: chat.completion
+        created:
+          type: integer
+          title: Created
+        model:
+          type: string
+          title: Model
+        usage:
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIChatCompletionUsage'
+            title: OpenAIChatCompletionUsage
+          - type: 'null'
+          title: OpenAIChatCompletionUsage
+      type: object
+      required:
+      - id
+      - choices
+      - created
+      - model
+      title: OpenAIChatCompletion
+      description: Response from an OpenAI-compatible chat completion request.
+    OpenAIChatCompletionChunk:
+      description: Chunk from a streaming response to an OpenAI-compatible chat completion request.
+      properties:
+        id:
+          title: Id
+          type: string
+        choices:
+          items:
+            $ref: '#/components/schemas/OpenAIChunkChoice'
+          title: Choices
+          type: array
+        object:
+          const: chat.completion.chunk
+          default: chat.completion.chunk
+          title: Object
+          type: string
+        created:
+          title: Created
+          type: integer
+        model:
+          title: Model
+          type: string
+        usage:
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIChatCompletionUsage'
+            title: OpenAIChatCompletionUsage
+          - type: 'null'
+          nullable: true
+          title: OpenAIChatCompletionUsage
+      required:
+      - id
+      - choices
+      - created
+      - model
+      title: OpenAIChatCompletionChunk
+      type: object
+    OpenAIChoiceDelta:
+      description: A delta from an OpenAI-compatible chat completion streaming response.
+      properties:
+        content:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+        refusal:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+        role:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+        tool_calls:
+          anyOf:
+          - items:
+              $ref: '#/components/schemas/OpenAIChatCompletionToolCall'
+            type: array
+          - type: 'null'
+          nullable: true
+        reasoning_content:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+      title: OpenAIChoiceDelta
+      type: object
+    OpenAIChunkChoice:
+      description: A chunk choice from an OpenAI-compatible chat completion streaming response.
+      properties:
+        delta:
+          $ref: '#/components/schemas/OpenAIChoiceDelta'
+        finish_reason:
+          title: Finish Reason
+          type: string
+        index:
+          title: Index
+          type: integer
+        logprobs:
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIChoiceLogprobs'
+            title: OpenAIChoiceLogprobs
+          - type: 'null'
+          nullable: true
+          title: OpenAIChoiceLogprobs
+      required:
+      - delta
+      - finish_reason
+      - index
+      title: OpenAIChunkChoice
+      type: object
+    OpenAICompletionWithInputMessages:
+      properties:
+        id:
+          type: string
+          title: Id
+        choices:
+          items:
+            $ref: '#/components/schemas/OpenAIChoice'
+          type: array
+          title: Choices
+        object:
+          type: string
+          const: chat.completion
+          title: Object
+          default: chat.completion
+        created:
+          type: integer
+          title: Created
+        model:
+          type: string
+          title: Model
+        usage:
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIChatCompletionUsage'
+            title: OpenAIChatCompletionUsage
+          - type: 'null'
+          title: OpenAIChatCompletionUsage
+        input_messages:
+          items:
+            oneOf:
+            - $ref: '#/components/schemas/OpenAIUserMessageParam-Output'
+              title: OpenAIUserMessageParam-Output
+            - $ref: '#/components/schemas/OpenAISystemMessageParam'
+              title: OpenAISystemMessageParam
+            - $ref: '#/components/schemas/OpenAIAssistantMessageParam-Output'
+              title: OpenAIAssistantMessageParam-Output
+            - $ref: '#/components/schemas/OpenAIToolMessageParam'
+              title: OpenAIToolMessageParam
+            - $ref: '#/components/schemas/OpenAIDeveloperMessageParam'
+              title: OpenAIDeveloperMessageParam
+            discriminator:
+              propertyName: role
+              mapping:
+                assistant: '#/components/schemas/OpenAIAssistantMessageParam-Output'
+                developer: '#/components/schemas/OpenAIDeveloperMessageParam'
+                system: '#/components/schemas/OpenAISystemMessageParam'
+                tool: '#/components/schemas/OpenAIToolMessageParam'
+                user: '#/components/schemas/OpenAIUserMessageParam-Output'
+            title: OpenAIUserMessageParam-Output | ... (5 variants)
+          type: array
+          title: Input Messages
+      type: object
+      required:
+      - id
+      - choices
+      - created
+      - model
+      - input_messages
+      title: OpenAICompletionWithInputMessages
+    OpenAICompletionRequestWithExtraBody:
+      properties:
+        model:
+          type: string
+          title: Model
+        prompt:
+          anyOf:
+          - type: string
+          - items:
+              type: string
+            type: array
+            title: list[string]
+          - items:
+              type: integer
+            type: array
+            title: list[integer]
+          - items:
+              items:
+                type: integer
+              type: array
+            type: array
+            title: list[array]
+          title: string | ... (4 variants)
+        best_of:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        echo:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+        frequency_penalty:
+          anyOf:
+          - type: number
+          - type: 'null'
+        logit_bias:
+          anyOf:
+          - additionalProperties:
+              type: number
+            type: object
+          - type: 'null'
+        logprobs:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+        max_tokens:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        n:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        presence_penalty:
+          anyOf:
+          - type: number
+          - type: 'null'
+        seed:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        stop:
+          anyOf:
+          - type: string
+          - items:
+              type: string
+            type: array
+            title: list[string]
+          - type: 'null'
+          title: string | list[string]
+        stream:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+        stream_options:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+        temperature:
+          anyOf:
+          - type: number
+          - type: 'null'
+        top_p:
+          anyOf:
+          - type: number
+          - type: 'null'
+        user:
+          anyOf:
+          - type: string
+          - type: 'null'
+        suffix:
+          anyOf:
+          - type: string
+          - type: 'null'
+      additionalProperties: true
+      type: object
+      required:
+      - model
+      - prompt
+      title: OpenAICompletionRequestWithExtraBody
+      description: Request parameters for OpenAI-compatible completion endpoint.
+    OpenAICompletion:
+      properties:
+        id:
+          type: string
+          title: Id
+        choices:
+          items:
+            $ref: '#/components/schemas/OpenAICompletionChoice'
+          type: array
+          title: Choices
+        created:
+          type: integer
+          title: Created
+        model:
+          type: string
+          title: Model
+        object:
+          type: string
+          const: text_completion
+          title: Object
+          default: text_completion
+      type: object
+      required:
+      - id
+      - choices
+      - created
+      - model
+      title: OpenAICompletion
+      description: |-
+        Response from an OpenAI-compatible completion request.
+
+        :id: The ID of the completion
+        :choices: List of choices
+        :created: The Unix timestamp in seconds when the completion was created
+        :model: The model that was used to generate the completion
+        :object: The object type, which will be "text_completion"
+    OpenAICompletionChoice:
+      properties:
+        finish_reason:
+          type: string
+          title: Finish Reason
+        text:
+          type: string
+          title: Text
         index:
           type: integer
-          description: >-
-            The original index of the document in the input list
-        relevance_score:
-          type: number
-          description: >-
-            The relevance score from the model output. Values are inverted when applicable
-            so that higher scores indicate greater relevance.
-      additionalProperties: false
-      required:
-        - index
-        - relevance_score
-      title: RerankData
-      description: >-
-        A single rerank result from a reranking response.
-    RerankResponse:
+          title: Index
+        logprobs:
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIChoiceLogprobs'
+            title: OpenAIChoiceLogprobs
+          - type: 'null'
+          title: OpenAIChoiceLogprobs
       type: object
+      required:
+      - finish_reason
+      - text
+      - index
+      title: OpenAICompletionChoice
+      description: |-
+        A choice from an OpenAI-compatible completion response.
+
+        :finish_reason: The reason the model stopped generating
+        :text: The text of the choice
+        :index: The index of the choice
+        :logprobs: (Optional) The log probabilities for the tokens in the choice
+    ConversationItem:
+      discriminator:
+        mapping:
+          file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+          function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+          function_call_output: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+          mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+          mcp_approval_response: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+          mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+          mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+          message: '#/components/schemas/OpenAIResponseMessage'
+          web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIResponseMessage'
+        title: OpenAIResponseMessage
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+        title: OpenAIResponseOutputMessageWebSearchToolCall
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+        title: OpenAIResponseOutputMessageFileSearchToolCall
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+        title: OpenAIResponseOutputMessageFunctionToolCall
+      - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+        title: OpenAIResponseInputFunctionToolCallOutput
+      - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+        title: OpenAIResponseMCPApprovalRequest
+      - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+        title: OpenAIResponseMCPApprovalResponse
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+        title: OpenAIResponseOutputMessageMCPCall
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+        title: OpenAIResponseOutputMessageMCPListTools
+      title: OpenAIResponseMessage | ... (9 variants)
+    OpenAIResponseAnnotationCitation:
+      properties:
+        type:
+          type: string
+          const: url_citation
+          title: Type
+          default: url_citation
+        end_index:
+          type: integer
+          title: End Index
+        start_index:
+          type: integer
+          title: Start Index
+        title:
+          type: string
+          title: Title
+        url:
+          type: string
+          title: Url
+      type: object
+      required:
+      - end_index
+      - start_index
+      - title
+      - url
+      title: OpenAIResponseAnnotationCitation
+      description: URL citation annotation for referencing external web resources.
+    OpenAIResponseAnnotationContainerFileCitation:
+      properties:
+        type:
+          type: string
+          const: container_file_citation
+          title: Type
+          default: container_file_citation
+        container_id:
+          type: string
+          title: Container Id
+        end_index:
+          type: integer
+          title: End Index
+        file_id:
+          type: string
+          title: File Id
+        filename:
+          type: string
+          title: Filename
+        start_index:
+          type: integer
+          title: Start Index
+      type: object
+      required:
+      - container_id
+      - end_index
+      - file_id
+      - filename
+      - start_index
+      title: OpenAIResponseAnnotationContainerFileCitation
+    OpenAIResponseAnnotationFileCitation:
+      properties:
+        type:
+          type: string
+          const: file_citation
+          title: Type
+          default: file_citation
+        file_id:
+          type: string
+          title: File Id
+        filename:
+          type: string
+          title: Filename
+        index:
+          type: integer
+          title: Index
+      type: object
+      required:
+      - file_id
+      - filename
+      - index
+      title: OpenAIResponseAnnotationFileCitation
+      description: File citation annotation for referencing specific files in response content.
+    OpenAIResponseAnnotationFilePath:
+      properties:
+        type:
+          type: string
+          const: file_path
+          title: Type
+          default: file_path
+        file_id:
+          type: string
+          title: File Id
+        index:
+          type: integer
+          title: Index
+      type: object
+      required:
+      - file_id
+      - index
+      title: OpenAIResponseAnnotationFilePath
+    OpenAIResponseAnnotations:
+      discriminator:
+        mapping:
+          container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
+          file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
+          file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath'
+          url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
+        title: OpenAIResponseAnnotationFileCitation
+      - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation'
+        title: OpenAIResponseAnnotationCitation
+      - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
+        title: OpenAIResponseAnnotationContainerFileCitation
+      - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath'
+        title: OpenAIResponseAnnotationFilePath
+      title: OpenAIResponseAnnotationFileCitation | ... (4 variants)
+    OpenAIResponseContentPartRefusal:
+      properties:
+        type:
+          type: string
+          const: refusal
+          title: Type
+          default: refusal
+        refusal:
+          type: string
+          title: Refusal
+      type: object
+      required:
+      - refusal
+      title: OpenAIResponseContentPartRefusal
+      description: Refusal content within a streamed response part.
+    OpenAIResponseInputFunctionToolCallOutput:
+      properties:
+        call_id:
+          type: string
+          title: Call Id
+        output:
+          type: string
+          title: Output
+        type:
+          type: string
+          const: function_call_output
+          title: Type
+          default: function_call_output
+        id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        status:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      required:
+      - call_id
+      - output
+      title: OpenAIResponseInputFunctionToolCallOutput
+      description: This represents the output of a function call that gets passed back to the model.
+    OpenAIResponseInputMessageContent:
+      discriminator:
+        mapping:
+          input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile'
+          input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+          input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
+        title: OpenAIResponseInputMessageContentText
+      - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+        title: OpenAIResponseInputMessageContentImage
+      - $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile'
+        title: OpenAIResponseInputMessageContentFile
+      title: OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile
+    OpenAIResponseInputMessageContentFile:
+      properties:
+        type:
+          type: string
+          const: input_file
+          title: Type
+          default: input_file
+        file_data:
+          anyOf:
+          - type: string
+          - type: 'null'
+        file_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        file_url:
+          anyOf:
+          - type: string
+          - type: 'null'
+        filename:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      title: OpenAIResponseInputMessageContentFile
+      description: File content for input messages in OpenAI response format.
+    OpenAIResponseInputMessageContentImage:
+      properties:
+        detail:
+          title: Detail
+          default: auto
+          type: string
+          enum:
+          - low
+          - high
+          - auto
+        type:
+          type: string
+          const: input_image
+          title: Type
+          default: input_image
+        file_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        image_url:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      title: OpenAIResponseInputMessageContentImage
+      description: Image content for input messages in OpenAI response format.
+    OpenAIResponseInputMessageContentText:
+      properties:
+        text:
+          type: string
+          title: Text
+        type:
+          type: string
+          const: input_text
+          title: Type
+          default: input_text
+      type: object
+      required:
+      - text
+      title: OpenAIResponseInputMessageContentText
+      description: Text content for input messages in OpenAI response format.
+    OpenAIResponseMCPApprovalRequest:
+      properties:
+        arguments:
+          type: string
+          title: Arguments
+        id:
+          type: string
+          title: Id
+        name:
+          type: string
+          title: Name
+        server_label:
+          type: string
+          title: Server Label
+        type:
+          type: string
+          const: mcp_approval_request
+          title: Type
+          default: mcp_approval_request
+      type: object
+      required:
+      - arguments
+      - id
+      - name
+      - server_label
+      title: OpenAIResponseMCPApprovalRequest
+      description: A request for human approval of a tool invocation.
+    OpenAIResponseMCPApprovalResponse:
+      properties:
+        approval_request_id:
+          type: string
+          title: Approval Request Id
+        approve:
+          type: boolean
+          title: Approve
+        type:
+          type: string
+          const: mcp_approval_response
+          title: Type
+          default: mcp_approval_response
+        id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        reason:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      required:
+      - approval_request_id
+      - approve
+      title: OpenAIResponseMCPApprovalResponse
+      description: A response to an MCP approval request.
+    OpenAIResponseMessage:
+      description: |-
+        Corresponds to the various Message types in the Responses API.
+        They are all under one type because the Responses API gives them all
+        the same "type" value, and there is no way to tell them apart in certain
+        scenarios.
+      properties:
+        content:
+          anyOf:
+          - type: string
+          - items:
+              discriminator:
+                mapping:
+                  input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile'
+                  input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+                  input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
+                propertyName: type
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
+                title: OpenAIResponseInputMessageContentText
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+                title: OpenAIResponseInputMessageContentImage
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile'
+                title: OpenAIResponseInputMessageContentFile
+              title: OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile
+            type: array
+            title: list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile]
+          - items:
+              discriminator:
+                mapping:
+                  output_text: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText'
+                  refusal: '#/components/schemas/OpenAIResponseContentPartRefusal'
+                propertyName: type
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText'
+                title: OpenAIResponseOutputMessageContentOutputText
+              - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
+                title: OpenAIResponseContentPartRefusal
+              title: OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal
+            type: array
+            title: list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal]
+          title: string | list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile] | list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal]
+        role:
+          title: Role
+          type: string
+          enum:
+          - system
+          - developer
+          - user
+          - assistant
+          default: system
+        type:
+          const: message
+          default: message
+          title: Type
+          type: string
+        id:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+        status:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+      required:
+      - content
+      - role
+      title: OpenAIResponseMessage
+      type: object
+    OpenAIResponseOutputMessageContent:
+      discriminator:
+        mapping:
+          output_text: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText'
+          refusal: '#/components/schemas/OpenAIResponseContentPartRefusal'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText'
+        title: OpenAIResponseOutputMessageContentOutputText
+      - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
+        title: OpenAIResponseContentPartRefusal
+      title: OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal
+    OpenAIResponseOutputMessageContentOutputText:
+      properties:
+        text:
+          type: string
+          title: Text
+        type:
+          type: string
+          const: output_text
+          title: Type
+          default: output_text
+        annotations:
+          items:
+            oneOf:
+            - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
+              title: OpenAIResponseAnnotationFileCitation
+            - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation'
+              title: OpenAIResponseAnnotationCitation
+            - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
+              title: OpenAIResponseAnnotationContainerFileCitation
+            - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath'
+              title: OpenAIResponseAnnotationFilePath
+            discriminator:
+              propertyName: type
+              mapping:
+                container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
+                file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
+                file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath'
+                url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation'
+            title: OpenAIResponseAnnotationFileCitation | ... (4 variants)
+          type: array
+          title: Annotations
+      type: object
+      required:
+      - text
+      title: OpenAIResponseOutputMessageContentOutputText
+    OpenAIResponseOutputMessageFileSearchToolCall:
+      properties:
+        id:
+          type: string
+          title: Id
+        queries:
+          items:
+            type: string
+          type: array
+          title: Queries
+        status:
+          type: string
+          title: Status
+        type:
+          type: string
+          const: file_search_call
+          title: Type
+          default: file_search_call
+        results:
+          anyOf:
+          - items:
+              $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCallResults'
+            type: array
+          - type: 'null'
+      type: object
+      required:
+      - id
+      - queries
+      - status
+      title: OpenAIResponseOutputMessageFileSearchToolCall
+      description: File search tool call output message for OpenAI responses.
+    OpenAIResponseOutputMessageFunctionToolCall:
+      properties:
+        call_id:
+          type: string
+          title: Call Id
+        name:
+          type: string
+          title: Name
+        arguments:
+          type: string
+          title: Arguments
+        type:
+          type: string
+          const: function_call
+          title: Type
+          default: function_call
+        id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        status:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      required:
+      - call_id
+      - name
+      - arguments
+      title: OpenAIResponseOutputMessageFunctionToolCall
+      description: Function tool call output message for OpenAI responses.
+    OpenAIResponseOutputMessageMCPCall:
+      properties:
+        id:
+          type: string
+          title: Id
+        type:
+          type: string
+          const: mcp_call
+          title: Type
+          default: mcp_call
+        arguments:
+          type: string
+          title: Arguments
+        name:
+          type: string
+          title: Name
+        server_label:
+          type: string
+          title: Server Label
+        error:
+          anyOf:
+          - type: string
+          - type: 'null'
+        output:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      required:
+      - id
+      - arguments
+      - name
+      - server_label
+      title: OpenAIResponseOutputMessageMCPCall
+      description: Model Context Protocol (MCP) call output message for OpenAI responses.
+    OpenAIResponseOutputMessageMCPListTools:
+      properties:
+        id:
+          type: string
+          title: Id
+        type:
+          type: string
+          const: mcp_list_tools
+          title: Type
+          default: mcp_list_tools
+        server_label:
+          type: string
+          title: Server Label
+        tools:
+          items:
+            $ref: '#/components/schemas/MCPListToolsTool'
+          type: array
+          title: Tools
+      type: object
+      required:
+      - id
+      - server_label
+      - tools
+      title: OpenAIResponseOutputMessageMCPListTools
+      description: MCP list tools output message containing available tools from an MCP server.
+    OpenAIResponseOutputMessageWebSearchToolCall:
+      properties:
+        id:
+          type: string
+          title: Id
+        status:
+          type: string
+          title: Status
+        type:
+          type: string
+          const: web_search_call
+          title: Type
+          default: web_search_call
+      type: object
+      required:
+      - id
+      - status
+      title: OpenAIResponseOutputMessageWebSearchToolCall
+      description: Web search tool call output message for OpenAI responses.
+    Conversation:
+      properties:
+        id:
+          type: string
+          title: Id
+          description: The unique ID of the conversation.
+        object:
+          type: string
+          const: conversation
+          title: Object
+          description: The object type, which is always conversation.
+          default: conversation
+        created_at:
+          type: integer
+          title: Created At
+          description: The time at which the conversation was created, measured in seconds since the Unix epoch.
+        metadata:
+          anyOf:
+          - additionalProperties:
+              type: string
+            type: object
+          - type: 'null'
+          description: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard.
+        items:
+          anyOf:
+          - items:
+              additionalProperties: true
+              type: object
+            type: array
+          - type: 'null'
+          description: Initial items to include in the conversation context. You may add up to 20 items at a time.
+      type: object
+      required:
+      - id
+      - created_at
+      title: Conversation
+      description: OpenAI-compatible conversation object.
+    ConversationDeletedResource:
+      properties:
+        id:
+          type: string
+          title: Id
+          description: The deleted conversation identifier
+        object:
+          type: string
+          title: Object
+          description: Object type
+          default: conversation.deleted
+        deleted:
+          type: boolean
+          title: Deleted
+          description: Whether the object was deleted
+          default: true
+      type: object
+      required:
+      - id
+      title: ConversationDeletedResource
+      description: Response for deleted conversation.
+    ConversationItemList:
+      properties:
+        object:
+          type: string
+          title: Object
+          description: Object type
+          default: list
+        data:
+          items:
+            oneOf:
+            - $ref: '#/components/schemas/OpenAIResponseMessage-Output'
+              title: OpenAIResponseMessage-Output
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+              title: OpenAIResponseOutputMessageWebSearchToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+              title: OpenAIResponseOutputMessageFileSearchToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+              title: OpenAIResponseOutputMessageFunctionToolCall
+            - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+              title: OpenAIResponseInputFunctionToolCallOutput
+            - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+              title: OpenAIResponseMCPApprovalRequest
+            - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+              title: OpenAIResponseMCPApprovalResponse
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+              title: OpenAIResponseOutputMessageMCPCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+              title: OpenAIResponseOutputMessageMCPListTools
+            discriminator:
+              propertyName: type
+              mapping:
+                file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                function_call_output: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+                mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                mcp_approval_response: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+                mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                message: '#/components/schemas/OpenAIResponseMessage-Output'
+                web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+            title: OpenAIResponseMessage-Output | ... (9 variants)
+          type: array
+          title: Data
+          description: List of conversation items
+        first_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: The ID of the first item in the list
+        last_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: The ID of the last item in the list
+        has_more:
+          type: boolean
+          title: Has More
+          description: Whether there are more items available
+          default: false
+      type: object
+      required:
+      - data
+      title: ConversationItemList
+      description: List of conversation items with pagination.
+    ConversationItemDeletedResource:
+      properties:
+        id:
+          type: string
+          title: Id
+          description: The deleted item identifier
+        object:
+          type: string
+          title: Object
+          description: Object type
+          default: conversation.item.deleted
+        deleted:
+          type: boolean
+          title: Deleted
+          description: Whether the object was deleted
+          default: true
+      type: object
+      required:
+      - id
+      title: ConversationItemDeletedResource
+      description: Response for deleted conversation item.
+    OpenAIEmbeddingsRequestWithExtraBody:
+      properties:
+        model:
+          type: string
+          title: Model
+        input:
+          anyOf:
+          - type: string
+          - items:
+              type: string
+            type: array
+            title: list[string]
+          title: string | list[string]
+        encoding_format:
+          anyOf:
+          - type: string
+          - type: 'null'
+          default: float
+        dimensions:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        user:
+          anyOf:
+          - type: string
+          - type: 'null'
+      additionalProperties: true
+      type: object
+      required:
+      - model
+      - input
+      title: OpenAIEmbeddingsRequestWithExtraBody
+      description: Request parameters for OpenAI-compatible embeddings endpoint.
+    OpenAIEmbeddingData:
+      properties:
+        object:
+          type: string
+          const: embedding
+          title: Object
+          default: embedding
+        embedding:
+          anyOf:
+          - items:
+              type: number
+            type: array
+            title: list[number]
+          - type: string
+          title: list[number] | string
+        index:
+          type: integer
+          title: Index
+      type: object
+      required:
+      - embedding
+      - index
+      title: OpenAIEmbeddingData
+      description: A single embedding data object from an OpenAI-compatible embeddings response.
+    OpenAIEmbeddingUsage:
+      properties:
+        prompt_tokens:
+          type: integer
+          title: Prompt Tokens
+        total_tokens:
+          type: integer
+          title: Total Tokens
+      type: object
+      required:
+      - prompt_tokens
+      - total_tokens
+      title: OpenAIEmbeddingUsage
+      description: Usage information for an OpenAI-compatible embeddings response.
+    OpenAIEmbeddingsResponse:
+      properties:
+        object:
+          type: string
+          const: list
+          title: Object
+          default: list
+        data:
+          items:
+            $ref: '#/components/schemas/OpenAIEmbeddingData'
+          type: array
+          title: Data
+        model:
+          type: string
+          title: Model
+        usage:
+          $ref: '#/components/schemas/OpenAIEmbeddingUsage'
+      type: object
+      required:
+      - data
+      - model
+      - usage
+      title: OpenAIEmbeddingsResponse
+      description: Response from an OpenAI-compatible embeddings request.
+    OpenAIFilePurpose:
+      type: string
+      enum:
+      - assistants
+      - batch
+      title: OpenAIFilePurpose
+      description: Valid purpose values for OpenAI Files API.
+    ListOpenAIFileResponse:
       properties:
         data:
-          type: array
           items:
-            $ref: '#/components/schemas/RerankData'
-          description: >-
-            List of rerank result objects, sorted by relevance score (descending)
-      additionalProperties: false
-      required:
-        - data
-      title: RerankResponse
-      description: Response from a reranking request.
-    Checkpoint:
+            $ref: '#/components/schemas/OpenAIFileObject'
+          type: array
+          title: Data
+        has_more:
+          type: boolean
+          title: Has More
+        first_id:
+          type: string
+          title: First Id
+        last_id:
+          type: string
+          title: Last Id
+        object:
+          type: string
+          const: list
+          title: Object
+          default: list
       type: object
+      required:
+      - data
+      - has_more
+      - first_id
+      - last_id
+      title: ListOpenAIFileResponse
+      description: Response for listing files in OpenAI Files API.
+    OpenAIFileObject:
+      properties:
+        object:
+          type: string
+          const: file
+          title: Object
+          default: file
+        id:
+          type: string
+          title: Id
+        bytes:
+          type: integer
+          title: Bytes
+        created_at:
+          type: integer
+          title: Created At
+        expires_at:
+          type: integer
+          title: Expires At
+        filename:
+          type: string
+          title: Filename
+        purpose:
+          $ref: '#/components/schemas/OpenAIFilePurpose'
+      type: object
+      required:
+      - id
+      - bytes
+      - created_at
+      - expires_at
+      - filename
+      - purpose
+      title: OpenAIFileObject
+      description: OpenAI File object as defined in the OpenAI Files API.
+    ExpiresAfter:
+      properties:
+        anchor:
+          type: string
+          const: created_at
+          title: Anchor
+        seconds:
+          type: integer
+          maximum: 2592000.0
+          minimum: 3600.0
+          title: Seconds
+      type: object
+      required:
+      - anchor
+      - seconds
+      title: ExpiresAfter
+      description: |-
+        Control expiration of uploaded files.
+
+        Params:
+         - anchor, must be "created_at"
+         - seconds, must be int between 3600 and 2592000 (1 hour to 30 days)
+    OpenAIFileDeleteResponse:
+      properties:
+        id:
+          type: string
+          title: Id
+        object:
+          type: string
+          const: file
+          title: Object
+          default: file
+        deleted:
+          type: boolean
+          title: Deleted
+      type: object
+      required:
+      - id
+      - deleted
+      title: OpenAIFileDeleteResponse
+      description: Response for deleting a file in OpenAI Files API.
+    HealthInfo:
+      properties:
+        status:
+          $ref: '#/components/schemas/HealthStatus'
+      type: object
+      required:
+      - status
+      title: HealthInfo
+      description: Health status information for the service.
+    RouteInfo:
+      properties:
+        route:
+          type: string
+          title: Route
+        method:
+          type: string
+          title: Method
+        provider_types:
+          items:
+            type: string
+          type: array
+          title: Provider Types
+      type: object
+      required:
+      - route
+      - method
+      - provider_types
+      title: RouteInfo
+      description: Information about an API route including its path, method, and implementing providers.
+    ListRoutesResponse:
+      properties:
+        data:
+          items:
+            $ref: '#/components/schemas/RouteInfo'
+          type: array
+          title: Data
+      type: object
+      required:
+      - data
+      title: ListRoutesResponse
+      description: Response containing a list of all available API routes.
+    OpenAIModel:
+      properties:
+        id:
+          type: string
+          title: Id
+        object:
+          type: string
+          const: model
+          title: Object
+          default: model
+        created:
+          type: integer
+          title: Created
+        owned_by:
+          type: string
+          title: Owned By
+        custom_metadata:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+      type: object
+      required:
+      - id
+      - created
+      - owned_by
+      title: OpenAIModel
+      description: |-
+        A model from OpenAI.
+
+        :id: The ID of the model
+        :object: The object type, which will be "model"
+        :created: The Unix timestamp in seconds when the model was created
+        :owned_by: The owner of the model
+        :custom_metadata: Llama Stack-specific metadata including model_type, provider info, and additional metadata
+    OpenAIListModelsResponse:
+      properties:
+        data:
+          items:
+            $ref: '#/components/schemas/OpenAIModel'
+          type: array
+          title: Data
+      type: object
+      required:
+      - data
+      title: OpenAIListModelsResponse
+    Model:
       properties:
         identifier:
           type: string
-          description: Unique identifier for the checkpoint
+          title: Identifier
+          description: Unique identifier for this resource in llama stack
+        provider_resource_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: Unique identifier for this resource in the provider
+        provider_id:
+          type: string
+          title: Provider Id
+          description: ID of the provider that owns this resource
+        type:
+          type: string
+          const: model
+          title: Type
+          default: model
+        metadata:
+          additionalProperties: true
+          type: object
+          title: Metadata
+          description: Any additional metadata for this model
+        model_type:
+          $ref: '#/components/schemas/ModelType'
+          default: llm
+      type: object
+      required:
+      - identifier
+      - provider_id
+      title: Model
+      description: A model resource representing an AI model registered in Llama Stack.
+    ModelType:
+      type: string
+      enum:
+      - llm
+      - embedding
+      - rerank
+      title: ModelType
+      description: Enumeration of supported model types in Llama Stack.
+    ModerationObject:
+      properties:
+        id:
+          type: string
+          title: Id
+        model:
+          type: string
+          title: Model
+        results:
+          items:
+            $ref: '#/components/schemas/ModerationObjectResults'
+          type: array
+          title: Results
+      type: object
+      required:
+      - id
+      - model
+      - results
+      title: ModerationObject
+      description: A moderation object.
+    ModerationObjectResults:
+      properties:
+        flagged:
+          type: boolean
+          title: Flagged
+        categories:
+          anyOf:
+          - additionalProperties:
+              type: boolean
+            type: object
+          - type: 'null'
+        category_applied_input_types:
+          anyOf:
+          - additionalProperties:
+              items:
+                type: string
+              type: array
+            type: object
+          - type: 'null'
+        category_scores:
+          anyOf:
+          - additionalProperties:
+              type: number
+            type: object
+          - type: 'null'
+        user_message:
+          anyOf:
+          - type: string
+          - type: 'null'
+        metadata:
+          additionalProperties: true
+          type: object
+          title: Metadata
+      type: object
+      required:
+      - flagged
+      title: ModerationObjectResults
+      description: A moderation object.
+    Prompt:
+      properties:
+        prompt:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: The system prompt with variable placeholders
+        version:
+          type: integer
+          minimum: 1.0
+          title: Version
+          description: Version (integer starting at 1, incremented on save)
+        prompt_id:
+          type: string
+          title: Prompt Id
+          description: Unique identifier in format 'pmpt_<48-digit-hash>'
+        variables:
+          items:
+            type: string
+          type: array
+          title: Variables
+          description: List of variable names that can be used in the prompt template
+        is_default:
+          type: boolean
+          title: Is Default
+          description: Boolean indicating whether this version is the default version
+          default: false
+      type: object
+      required:
+      - version
+      - prompt_id
+      title: Prompt
+      description: A prompt resource representing a stored OpenAI Compatible prompt template in Llama Stack.
+    ListPromptsResponse:
+      properties:
+        data:
+          items:
+            $ref: '#/components/schemas/Prompt'
+          type: array
+          title: Data
+      type: object
+      required:
+      - data
+      title: ListPromptsResponse
+      description: Response model to list prompts.
+    ProviderInfo:
+      properties:
+        api:
+          type: string
+          title: Api
+        provider_id:
+          type: string
+          title: Provider Id
+        provider_type:
+          type: string
+          title: Provider Type
+        config:
+          additionalProperties: true
+          type: object
+          title: Config
+        health:
+          additionalProperties: true
+          type: object
+          title: Health
+      type: object
+      required:
+      - api
+      - provider_id
+      - provider_type
+      - config
+      - health
+      title: ProviderInfo
+      description: Information about a registered provider including its configuration and health status.
+    ListProvidersResponse:
+      properties:
+        data:
+          items:
+            $ref: '#/components/schemas/ProviderInfo'
+          type: array
+          title: Data
+      type: object
+      required:
+      - data
+      title: ListProvidersResponse
+      description: Response containing a list of all available providers.
+    ListOpenAIResponseObject:
+      properties:
+        data:
+          items:
+            $ref: '#/components/schemas/OpenAIResponseObjectWithInput'
+          type: array
+          title: Data
+        has_more:
+          type: boolean
+          title: Has More
+        first_id:
+          type: string
+          title: First Id
+        last_id:
+          type: string
+          title: Last Id
+        object:
+          type: string
+          const: list
+          title: Object
+          default: list
+      type: object
+      required:
+      - data
+      - has_more
+      - first_id
+      - last_id
+      title: ListOpenAIResponseObject
+      description: Paginated list of OpenAI response objects with navigation metadata.
+    OpenAIResponseError:
+      properties:
+        code:
+          type: string
+          title: Code
+        message:
+          type: string
+          title: Message
+      type: object
+      required:
+      - code
+      - message
+      title: OpenAIResponseError
+      description: Error details for failed OpenAI response requests.
+    OpenAIResponseInput:
+      anyOf:
+      - discriminator:
+          mapping:
+            file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+            function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+            mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+            mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+            mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+            message: '#/components/schemas/OpenAIResponseMessage'
+            web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+          propertyName: type
+        oneOf:
+        - $ref: '#/components/schemas/OpenAIResponseMessage'
+          title: OpenAIResponseMessage
+        - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+          title: OpenAIResponseOutputMessageWebSearchToolCall
+        - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+          title: OpenAIResponseOutputMessageFileSearchToolCall
+        - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+          title: OpenAIResponseOutputMessageFunctionToolCall
+        - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+          title: OpenAIResponseOutputMessageMCPCall
+        - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+          title: OpenAIResponseOutputMessageMCPListTools
+        - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+          title: OpenAIResponseMCPApprovalRequest
+        title: OpenAIResponseMessage | ... (7 variants)
+      - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+        title: OpenAIResponseInputFunctionToolCallOutput
+      - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+        title: OpenAIResponseMCPApprovalResponse
+      - $ref: '#/components/schemas/OpenAIResponseMessage'
+        title: OpenAIResponseMessage
+      title: OpenAIResponseInputFunctionToolCallOutput | OpenAIResponseMCPApprovalResponse | OpenAIResponseMessage
+    OpenAIResponseInputToolFileSearch:
+      properties:
+        type:
+          type: string
+          const: file_search
+          title: Type
+          default: file_search
+        vector_store_ids:
+          items:
+            type: string
+          type: array
+          title: Vector Store Ids
+        filters:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+        max_num_results:
+          anyOf:
+          - type: integer
+            maximum: 50.0
+            minimum: 1.0
+          - type: 'null'
+          default: 10
+        ranking_options:
+          anyOf:
+          - $ref: '#/components/schemas/SearchRankingOptions'
+            title: SearchRankingOptions
+          - type: 'null'
+          title: SearchRankingOptions
+      type: object
+      required:
+      - vector_store_ids
+      title: OpenAIResponseInputToolFileSearch
+      description: File search tool configuration for OpenAI response inputs.
+    OpenAIResponseInputToolFunction:
+      properties:
+        type:
+          type: string
+          const: function
+          title: Type
+          default: function
+        name:
+          type: string
+          title: Name
+        description:
+          anyOf:
+          - type: string
+          - type: 'null'
+        parameters:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+        strict:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+      type: object
+      required:
+      - name
+      - parameters
+      title: OpenAIResponseInputToolFunction
+      description: Function tool configuration for OpenAI response inputs.
+    OpenAIResponseInputToolWebSearch:
+      properties:
+        type:
+          title: Type
+          default: web_search
+          type: string
+          enum:
+          - web_search
+          - web_search_preview
+          - web_search_preview_2025_03_11
+          - web_search_2025_08_26
+        search_context_size:
+          anyOf:
+          - type: string
+            pattern: ^low|medium|high$
+          - type: 'null'
+          default: medium
+      type: object
+      title: OpenAIResponseInputToolWebSearch
+      description: Web search tool configuration for OpenAI response inputs.
+    OpenAIResponseObjectWithInput:
+      properties:
+        created_at:
+          type: integer
+          title: Created At
+        error:
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIResponseError'
+            title: OpenAIResponseError
+          - type: 'null'
+          title: OpenAIResponseError
+        id:
+          type: string
+          title: Id
+        model:
+          type: string
+          title: Model
+        object:
+          type: string
+          const: response
+          title: Object
+          default: response
+        output:
+          items:
+            oneOf:
+            - $ref: '#/components/schemas/OpenAIResponseMessage-Output'
+              title: OpenAIResponseMessage-Output
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+              title: OpenAIResponseOutputMessageWebSearchToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+              title: OpenAIResponseOutputMessageFileSearchToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+              title: OpenAIResponseOutputMessageFunctionToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+              title: OpenAIResponseOutputMessageMCPCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+              title: OpenAIResponseOutputMessageMCPListTools
+            - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+              title: OpenAIResponseMCPApprovalRequest
+            discriminator:
+              propertyName: type
+              mapping:
+                file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                message: '#/components/schemas/OpenAIResponseMessage-Output'
+                web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+            title: OpenAIResponseMessage-Output | ... (7 variants)
+          type: array
+          title: Output
+        parallel_tool_calls:
+          type: boolean
+          title: Parallel Tool Calls
+          default: false
+        previous_response_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        prompt:
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIResponsePrompt'
+            title: OpenAIResponsePrompt
+          - type: 'null'
+          title: OpenAIResponsePrompt
+        status:
+          type: string
+          title: Status
+        temperature:
+          anyOf:
+          - type: number
+          - type: 'null'
+        text:
+          $ref: '#/components/schemas/OpenAIResponseText'
+          default:
+            format:
+              type: text
+        top_p:
+          anyOf:
+          - type: number
+          - type: 'null'
+        tools:
+          anyOf:
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                title: OpenAIResponseInputToolWebSearch
+              - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch'
+                title: OpenAIResponseInputToolFileSearch
+              - $ref: '#/components/schemas/OpenAIResponseInputToolFunction'
+                title: OpenAIResponseInputToolFunction
+              - $ref: '#/components/schemas/OpenAIResponseToolMCP'
+                title: OpenAIResponseToolMCP
+              discriminator:
+                propertyName: type
+                mapping:
+                  file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch'
+                  function: '#/components/schemas/OpenAIResponseInputToolFunction'
+                  mcp: '#/components/schemas/OpenAIResponseToolMCP'
+                  web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                  web_search_2025_08_26: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                  web_search_preview: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                  web_search_preview_2025_03_11: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+              title: OpenAIResponseInputToolWebSearch | ... (4 variants)
+            type: array
+          - type: 'null'
+        truncation:
+          anyOf:
+          - type: string
+          - type: 'null'
+        usage:
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIResponseUsage'
+            title: OpenAIResponseUsage
+          - type: 'null'
+          title: OpenAIResponseUsage
+        instructions:
+          anyOf:
+          - type: string
+          - type: 'null'
+        max_tool_calls:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        input:
+          items:
+            anyOf:
+            - oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseMessage-Output'
+                title: OpenAIResponseMessage-Output
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+                title: OpenAIResponseOutputMessageWebSearchToolCall
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                title: OpenAIResponseOutputMessageFileSearchToolCall
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                title: OpenAIResponseOutputMessageFunctionToolCall
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                title: OpenAIResponseOutputMessageMCPCall
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                title: OpenAIResponseOutputMessageMCPListTools
+              - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                title: OpenAIResponseMCPApprovalRequest
+              discriminator:
+                propertyName: type
+                mapping:
+                  file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                  function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                  mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                  mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                  mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                  message: '#/components/schemas/OpenAIResponseMessage-Output'
+                  web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+              title: OpenAIResponseMessage-Output | ... (7 variants)
+            - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+              title: OpenAIResponseInputFunctionToolCallOutput
+            - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+              title: OpenAIResponseMCPApprovalResponse
+            - $ref: '#/components/schemas/OpenAIResponseMessage-Output'
+              title: OpenAIResponseMessage-Output
+            title: OpenAIResponseInputFunctionToolCallOutput | OpenAIResponseMCPApprovalResponse | OpenAIResponseMessage-Output
+          type: array
+          title: Input
+      type: object
+      required:
+      - created_at
+      - id
+      - model
+      - output
+      - status
+      - input
+      title: OpenAIResponseObjectWithInput
+      description: OpenAI response object extended with input context information.
+    OpenAIResponseOutput:
+      discriminator:
+        mapping:
+          file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+          function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+          mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+          mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+          mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+          message: '#/components/schemas/OpenAIResponseMessage'
+          web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIResponseMessage'
+        title: OpenAIResponseMessage
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+        title: OpenAIResponseOutputMessageWebSearchToolCall
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+        title: OpenAIResponseOutputMessageFileSearchToolCall
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+        title: OpenAIResponseOutputMessageFunctionToolCall
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+        title: OpenAIResponseOutputMessageMCPCall
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+        title: OpenAIResponseOutputMessageMCPListTools
+      - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+        title: OpenAIResponseMCPApprovalRequest
+      title: OpenAIResponseMessage | ... (7 variants)
+    OpenAIResponsePrompt:
+      properties:
+        id:
+          type: string
+          title: Id
+        variables:
+          anyOf:
+          - additionalProperties:
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
+                title: OpenAIResponseInputMessageContentText
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+                title: OpenAIResponseInputMessageContentImage
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile'
+                title: OpenAIResponseInputMessageContentFile
+              discriminator:
+                propertyName: type
+                mapping:
+                  input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile'
+                  input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+                  input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
+              title: OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile
+            type: object
+          - type: 'null'
+        version:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      required:
+      - id
+      title: OpenAIResponsePrompt
+      description: OpenAI compatible Prompt object that is used in OpenAI responses.
+    OpenAIResponseText:
+      properties:
+        format:
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIResponseTextFormat'
+            title: OpenAIResponseTextFormat
+          - type: 'null'
+          title: OpenAIResponseTextFormat
+      type: object
+      title: OpenAIResponseText
+      description: Text response configuration for OpenAI responses.
+    OpenAIResponseTool:
+      discriminator:
+        mapping:
+          file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch'
+          function: '#/components/schemas/OpenAIResponseInputToolFunction'
+          mcp: '#/components/schemas/OpenAIResponseToolMCP'
+          web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+          web_search_2025_08_26: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+          web_search_preview: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+          web_search_preview_2025_03_11: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+        title: OpenAIResponseInputToolWebSearch
+      - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch'
+        title: OpenAIResponseInputToolFileSearch
+      - $ref: '#/components/schemas/OpenAIResponseInputToolFunction'
+        title: OpenAIResponseInputToolFunction
+      - $ref: '#/components/schemas/OpenAIResponseToolMCP'
+        title: OpenAIResponseToolMCP
+      title: OpenAIResponseInputToolWebSearch | ... (4 variants)
+    OpenAIResponseToolMCP:
+      properties:
+        type:
+          type: string
+          const: mcp
+          title: Type
+          default: mcp
+        server_label:
+          type: string
+          title: Server Label
+        allowed_tools:
+          anyOf:
+          - items:
+              type: string
+            type: array
+            title: list[string]
+          - $ref: '#/components/schemas/AllowedToolsFilter'
+            title: AllowedToolsFilter
+          - type: 'null'
+          title: list[string] | AllowedToolsFilter
+      type: object
+      required:
+      - server_label
+      title: OpenAIResponseToolMCP
+      description: Model Context Protocol (MCP) tool configuration for OpenAI response object.
+    OpenAIResponseUsage:
+      properties:
+        input_tokens:
+          type: integer
+          title: Input Tokens
+        output_tokens:
+          type: integer
+          title: Output Tokens
+        total_tokens:
+          type: integer
+          title: Total Tokens
+        input_tokens_details:
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIResponseUsageInputTokensDetails'
+            title: OpenAIResponseUsageInputTokensDetails
+          - type: 'null'
+          title: OpenAIResponseUsageInputTokensDetails
+        output_tokens_details:
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIResponseUsageOutputTokensDetails'
+            title: OpenAIResponseUsageOutputTokensDetails
+          - type: 'null'
+          title: OpenAIResponseUsageOutputTokensDetails
+      type: object
+      required:
+      - input_tokens
+      - output_tokens
+      - total_tokens
+      title: OpenAIResponseUsage
+      description: Usage information for OpenAI response.
+    ResponseGuardrailSpec:
+      description: Specification for a guardrail to apply during response generation.
+      properties:
+        type:
+          title: Type
+          type: string
+      required:
+      - type
+      title: ResponseGuardrailSpec
+      type: object
+    OpenAIResponseInputTool:
+      discriminator:
+        mapping:
+          file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch'
+          function: '#/components/schemas/OpenAIResponseInputToolFunction'
+          mcp: '#/components/schemas/OpenAIResponseInputToolMCP'
+          web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+          web_search_2025_08_26: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+          web_search_preview: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+          web_search_preview_2025_03_11: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+        title: OpenAIResponseInputToolWebSearch
+      - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch'
+        title: OpenAIResponseInputToolFileSearch
+      - $ref: '#/components/schemas/OpenAIResponseInputToolFunction'
+        title: OpenAIResponseInputToolFunction
+      - $ref: '#/components/schemas/OpenAIResponseInputToolMCP'
+        title: OpenAIResponseInputToolMCP
+      title: OpenAIResponseInputToolWebSearch | ... (4 variants)
+    OpenAIResponseInputToolMCP:
+      properties:
+        type:
+          type: string
+          const: mcp
+          title: Type
+          default: mcp
+        server_label:
+          type: string
+          title: Server Label
+        server_url:
+          type: string
+          title: Server Url
+        headers:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+        authorization:
+          anyOf:
+          - type: string
+          - type: 'null'
+        require_approval:
+          anyOf:
+          - type: string
+            const: always
+          - type: string
+            const: never
+          - $ref: '#/components/schemas/ApprovalFilter'
+            title: ApprovalFilter
+          title: string | ApprovalFilter
+          default: never
+        allowed_tools:
+          anyOf:
+          - items:
+              type: string
+            type: array
+            title: list[string]
+          - $ref: '#/components/schemas/AllowedToolsFilter'
+            title: AllowedToolsFilter
+          - type: 'null'
+          title: list[string] | AllowedToolsFilter
+      type: object
+      required:
+      - server_label
+      - server_url
+      title: OpenAIResponseInputToolMCP
+      description: Model Context Protocol (MCP) tool configuration for OpenAI response inputs.
+    OpenAIResponseObject:
+      properties:
+        created_at:
+          type: integer
+          title: Created At
+        error:
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIResponseError'
+            title: OpenAIResponseError
+          - type: 'null'
+          title: OpenAIResponseError
+        id:
+          type: string
+          title: Id
+        model:
+          type: string
+          title: Model
+        object:
+          type: string
+          const: response
+          title: Object
+          default: response
+        output:
+          items:
+            oneOf:
+            - $ref: '#/components/schemas/OpenAIResponseMessage-Output'
+              title: OpenAIResponseMessage-Output
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+              title: OpenAIResponseOutputMessageWebSearchToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+              title: OpenAIResponseOutputMessageFileSearchToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+              title: OpenAIResponseOutputMessageFunctionToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+              title: OpenAIResponseOutputMessageMCPCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+              title: OpenAIResponseOutputMessageMCPListTools
+            - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+              title: OpenAIResponseMCPApprovalRequest
+            discriminator:
+              propertyName: type
+              mapping:
+                file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                message: '#/components/schemas/OpenAIResponseMessage-Output'
+                web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+            title: OpenAIResponseMessage-Output | ... (7 variants)
+          type: array
+          title: Output
+        parallel_tool_calls:
+          type: boolean
+          title: Parallel Tool Calls
+          default: false
+        previous_response_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        prompt:
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIResponsePrompt'
+            title: OpenAIResponsePrompt
+          - type: 'null'
+          title: OpenAIResponsePrompt
+        status:
+          type: string
+          title: Status
+        temperature:
+          anyOf:
+          - type: number
+          - type: 'null'
+        text:
+          $ref: '#/components/schemas/OpenAIResponseText'
+          default:
+            format:
+              type: text
+        top_p:
+          anyOf:
+          - type: number
+          - type: 'null'
+        tools:
+          anyOf:
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                title: OpenAIResponseInputToolWebSearch
+              - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch'
+                title: OpenAIResponseInputToolFileSearch
+              - $ref: '#/components/schemas/OpenAIResponseInputToolFunction'
+                title: OpenAIResponseInputToolFunction
+              - $ref: '#/components/schemas/OpenAIResponseToolMCP'
+                title: OpenAIResponseToolMCP
+              discriminator:
+                propertyName: type
+                mapping:
+                  file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch'
+                  function: '#/components/schemas/OpenAIResponseInputToolFunction'
+                  mcp: '#/components/schemas/OpenAIResponseToolMCP'
+                  web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                  web_search_2025_08_26: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                  web_search_preview: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                  web_search_preview_2025_03_11: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+              title: OpenAIResponseInputToolWebSearch | ... (4 variants)
+            type: array
+          - type: 'null'
+        truncation:
+          anyOf:
+          - type: string
+          - type: 'null'
+        usage:
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIResponseUsage'
+            title: OpenAIResponseUsage
+          - type: 'null'
+          title: OpenAIResponseUsage
+        instructions:
+          anyOf:
+          - type: string
+          - type: 'null'
+        max_tool_calls:
+          anyOf:
+          - type: integer
+          - type: 'null'
+      type: object
+      required:
+      - created_at
+      - id
+      - model
+      - output
+      - status
+      title: OpenAIResponseObject
+      description: Complete OpenAI response object containing generation results and metadata.
+    OpenAIResponseContentPartOutputText:
+      description: Text content within a streamed response part.
+      properties:
+        type:
+          const: output_text
+          default: output_text
+          title: Type
+          type: string
+        text:
+          title: Text
+          type: string
+        annotations:
+          items:
+            discriminator:
+              mapping:
+                container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
+                file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
+                file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath'
+                url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation'
+              propertyName: type
+            oneOf:
+            - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
+              title: OpenAIResponseAnnotationFileCitation
+            - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation'
+              title: OpenAIResponseAnnotationCitation
+            - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
+              title: OpenAIResponseAnnotationContainerFileCitation
+            - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath'
+              title: OpenAIResponseAnnotationFilePath
+            title: OpenAIResponseAnnotationFileCitation | ... (4 variants)
+          title: Annotations
+          type: array
+        logprobs:
+          anyOf:
+          - items:
+              additionalProperties: true
+              type: object
+            type: array
+          - type: 'null'
+          nullable: true
+      required:
+      - text
+      title: OpenAIResponseContentPartOutputText
+      type: object
+    OpenAIResponseContentPartReasoningSummary:
+      description: Reasoning summary part in a streamed response.
+      properties:
+        type:
+          const: summary_text
+          default: summary_text
+          title: Type
+          type: string
+        text:
+          title: Text
+          type: string
+      required:
+      - text
+      title: OpenAIResponseContentPartReasoningSummary
+      type: object
+    OpenAIResponseContentPartReasoningText:
+      description: Reasoning text emitted as part of a streamed response.
+      properties:
+        type:
+          const: reasoning_text
+          default: reasoning_text
+          title: Type
+          type: string
+        text:
+          title: Text
+          type: string
+      required:
+      - text
+      title: OpenAIResponseContentPartReasoningText
+      type: object
+    OpenAIResponseObjectStream:
+      discriminator:
+        mapping:
+          response.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
+          response.content_part.added: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartAdded'
+          response.content_part.done: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartDone'
+          response.created: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated'
+          response.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseFailed'
+          response.file_search_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallCompleted'
+          response.file_search_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallInProgress'
+          response.file_search_call.searching: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallSearching'
+          response.function_call_arguments.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta'
+          response.function_call_arguments.done: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone'
+          response.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseInProgress'
+          response.incomplete: '#/components/schemas/OpenAIResponseObjectStreamResponseIncomplete'
+          response.mcp_call.arguments.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta'
+          response.mcp_call.arguments.done: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDone'
+          response.mcp_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted'
+          response.mcp_call.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallFailed'
+          response.mcp_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallInProgress'
+          response.mcp_list_tools.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsCompleted'
+          response.mcp_list_tools.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsFailed'
+          response.mcp_list_tools.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsInProgress'
+          response.output_item.added: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded'
+          response.output_item.done: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone'
+          response.output_text.annotation.added: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded'
+          response.output_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta'
+          response.output_text.done: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDone'
+          response.reasoning_summary_part.added: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded'
+          response.reasoning_summary_part.done: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartDone'
+          response.reasoning_summary_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta'
+          response.reasoning_summary_text.done: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDone'
+          response.reasoning_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDelta'
+          response.reasoning_text.done: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDone'
+          response.refusal.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDelta'
+          response.refusal.done: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDone'
+          response.web_search_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallCompleted'
+          response.web_search_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallInProgress'
+          response.web_search_call.searching: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallSearching'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated'
+        title: OpenAIResponseObjectStreamResponseCreated
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseInProgress'
+        title: OpenAIResponseObjectStreamResponseInProgress
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded'
+        title: OpenAIResponseObjectStreamResponseOutputItemAdded
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone'
+        title: OpenAIResponseObjectStreamResponseOutputItemDone
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta'
+        title: OpenAIResponseObjectStreamResponseOutputTextDelta
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDone'
+        title: OpenAIResponseObjectStreamResponseOutputTextDone
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta'
+        title: OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone'
+        title: OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallInProgress'
+        title: OpenAIResponseObjectStreamResponseWebSearchCallInProgress
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallSearching'
+        title: OpenAIResponseObjectStreamResponseWebSearchCallSearching
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallCompleted'
+        title: OpenAIResponseObjectStreamResponseWebSearchCallCompleted
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsInProgress'
+        title: OpenAIResponseObjectStreamResponseMcpListToolsInProgress
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsFailed'
+        title: OpenAIResponseObjectStreamResponseMcpListToolsFailed
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsCompleted'
+        title: OpenAIResponseObjectStreamResponseMcpListToolsCompleted
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta'
+        title: OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDone'
+        title: OpenAIResponseObjectStreamResponseMcpCallArgumentsDone
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallInProgress'
+        title: OpenAIResponseObjectStreamResponseMcpCallInProgress
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallFailed'
+        title: OpenAIResponseObjectStreamResponseMcpCallFailed
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted'
+        title: OpenAIResponseObjectStreamResponseMcpCallCompleted
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartAdded'
+        title: OpenAIResponseObjectStreamResponseContentPartAdded
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartDone'
+        title: OpenAIResponseObjectStreamResponseContentPartDone
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDelta'
+        title: OpenAIResponseObjectStreamResponseReasoningTextDelta
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDone'
+        title: OpenAIResponseObjectStreamResponseReasoningTextDone
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded'
+        title: OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartDone'
+        title: OpenAIResponseObjectStreamResponseReasoningSummaryPartDone
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta'
+        title: OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDone'
+        title: OpenAIResponseObjectStreamResponseReasoningSummaryTextDone
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDelta'
+        title: OpenAIResponseObjectStreamResponseRefusalDelta
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDone'
+        title: OpenAIResponseObjectStreamResponseRefusalDone
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded'
+        title: OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallInProgress'
+        title: OpenAIResponseObjectStreamResponseFileSearchCallInProgress
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallSearching'
+        title: OpenAIResponseObjectStreamResponseFileSearchCallSearching
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallCompleted'
+        title: OpenAIResponseObjectStreamResponseFileSearchCallCompleted
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseIncomplete'
+        title: OpenAIResponseObjectStreamResponseIncomplete
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFailed'
+        title: OpenAIResponseObjectStreamResponseFailed
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
+        title: OpenAIResponseObjectStreamResponseCompleted
+      title: OpenAIResponseObjectStreamResponseCreated | ... (36 variants)
+    OpenAIResponseObjectStreamResponseCompleted:
+      description: Streaming event indicating a response has been completed.
+      properties:
+        response:
+          $ref: '#/components/schemas/OpenAIResponseObject'
+        type:
+          const: response.completed
+          default: response.completed
+          title: Type
+          type: string
+      required:
+      - response
+      title: OpenAIResponseObjectStreamResponseCompleted
+      type: object
+    OpenAIResponseObjectStreamResponseContentPartAdded:
+      description: Streaming event for when a new content part is added to a response item.
+      properties:
+        content_index:
+          title: Content Index
+          type: integer
+        response_id:
+          title: Response Id
+          type: string
+        item_id:
+          title: Item Id
+          type: string
+        output_index:
+          title: Output Index
+          type: integer
+        part:
+          discriminator:
+            mapping:
+              output_text: '#/components/schemas/OpenAIResponseContentPartOutputText'
+              reasoning_text: '#/components/schemas/OpenAIResponseContentPartReasoningText'
+              refusal: '#/components/schemas/OpenAIResponseContentPartRefusal'
+            propertyName: type
+          oneOf:
+          - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText'
+            title: OpenAIResponseContentPartOutputText
+          - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
+            title: OpenAIResponseContentPartRefusal
+          - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
+            title: OpenAIResponseContentPartReasoningText
+          title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.content_part.added
+          default: response.content_part.added
+          title: Type
+          type: string
+      required:
+      - content_index
+      - response_id
+      - item_id
+      - output_index
+      - part
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseContentPartAdded
+      type: object
+    OpenAIResponseObjectStreamResponseContentPartDone:
+      description: Streaming event for when a content part is completed.
+      properties:
+        content_index:
+          title: Content Index
+          type: integer
+        response_id:
+          title: Response Id
+          type: string
+        item_id:
+          title: Item Id
+          type: string
+        output_index:
+          title: Output Index
+          type: integer
+        part:
+          discriminator:
+            mapping:
+              output_text: '#/components/schemas/OpenAIResponseContentPartOutputText'
+              reasoning_text: '#/components/schemas/OpenAIResponseContentPartReasoningText'
+              refusal: '#/components/schemas/OpenAIResponseContentPartRefusal'
+            propertyName: type
+          oneOf:
+          - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText'
+            title: OpenAIResponseContentPartOutputText
+          - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
+            title: OpenAIResponseContentPartRefusal
+          - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
+            title: OpenAIResponseContentPartReasoningText
+          title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.content_part.done
+          default: response.content_part.done
+          title: Type
+          type: string
+      required:
+      - content_index
+      - response_id
+      - item_id
+      - output_index
+      - part
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseContentPartDone
+      type: object
+    OpenAIResponseObjectStreamResponseCreated:
+      description: Streaming event indicating a new response has been created.
+      properties:
+        response:
+          $ref: '#/components/schemas/OpenAIResponseObject'
+        type:
+          const: response.created
+          default: response.created
+          title: Type
+          type: string
+      required:
+      - response
+      title: OpenAIResponseObjectStreamResponseCreated
+      type: object
+    OpenAIResponseObjectStreamResponseFailed:
+      description: Streaming event emitted when a response fails.
+      properties:
+        response:
+          $ref: '#/components/schemas/OpenAIResponseObject'
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.failed
+          default: response.failed
+          title: Type
+          type: string
+      required:
+      - response
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseFailed
+      type: object
+    OpenAIResponseObjectStreamResponseFileSearchCallCompleted:
+      description: Streaming event for completed file search calls.
+      properties:
+        item_id:
+          title: Item Id
+          type: string
+        output_index:
+          title: Output Index
+          type: integer
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.file_search_call.completed
+          default: response.file_search_call.completed
+          title: Type
+          type: string
+      required:
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseFileSearchCallCompleted
+      type: object
+    OpenAIResponseObjectStreamResponseFileSearchCallInProgress:
+      description: Streaming event for file search calls in progress.
+      properties:
+        item_id:
+          title: Item Id
+          type: string
+        output_index:
+          title: Output Index
+          type: integer
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.file_search_call.in_progress
+          default: response.file_search_call.in_progress
+          title: Type
+          type: string
+      required:
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseFileSearchCallInProgress
+      type: object
+    OpenAIResponseObjectStreamResponseFileSearchCallSearching:
+      description: Streaming event for file search currently searching.
+      properties:
+        item_id:
+          title: Item Id
+          type: string
+        output_index:
+          title: Output Index
+          type: integer
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.file_search_call.searching
+          default: response.file_search_call.searching
+          title: Type
+          type: string
+      required:
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseFileSearchCallSearching
+      type: object
+    OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta:
+      description: Streaming event for incremental function call argument updates.
+      properties:
+        delta:
+          title: Delta
+          type: string
+        item_id:
+          title: Item Id
+          type: string
+        output_index:
+          title: Output Index
+          type: integer
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.function_call_arguments.delta
+          default: response.function_call_arguments.delta
+          title: Type
+          type: string
+      required:
+      - delta
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta
+      type: object
+    OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone:
+      description: Streaming event for when function call arguments are completed.
+      properties:
+        arguments:
+          title: Arguments
+          type: string
+        item_id:
+          title: Item Id
+          type: string
+        output_index:
+          title: Output Index
+          type: integer
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.function_call_arguments.done
+          default: response.function_call_arguments.done
+          title: Type
+          type: string
+      required:
+      - arguments
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone
+      type: object
+    OpenAIResponseObjectStreamResponseInProgress:
+      description: Streaming event indicating the response remains in progress.
+      properties:
+        response:
+          $ref: '#/components/schemas/OpenAIResponseObject'
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.in_progress
+          default: response.in_progress
+          title: Type
+          type: string
+      required:
+      - response
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseInProgress
+      type: object
+    OpenAIResponseObjectStreamResponseIncomplete:
+      description: Streaming event emitted when a response ends in an incomplete state.
+      properties:
+        response:
+          $ref: '#/components/schemas/OpenAIResponseObject'
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.incomplete
+          default: response.incomplete
+          title: Type
+          type: string
+      required:
+      - response
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseIncomplete
+      type: object
+    OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta:
+      properties:
+        delta:
+          title: Delta
+          type: string
+        item_id:
+          title: Item Id
+          type: string
+        output_index:
+          title: Output Index
+          type: integer
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.mcp_call.arguments.delta
+          default: response.mcp_call.arguments.delta
+          title: Type
+          type: string
+      required:
+      - delta
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta
+      type: object
+    OpenAIResponseObjectStreamResponseMcpCallArgumentsDone:
+      properties:
+        arguments:
+          title: Arguments
+          type: string
+        item_id:
+          title: Item Id
+          type: string
+        output_index:
+          title: Output Index
+          type: integer
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.mcp_call.arguments.done
+          default: response.mcp_call.arguments.done
+          title: Type
+          type: string
+      required:
+      - arguments
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseMcpCallArgumentsDone
+      type: object
+    OpenAIResponseObjectStreamResponseMcpCallCompleted:
+      description: Streaming event for completed MCP calls.
+      properties:
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.mcp_call.completed
+          default: response.mcp_call.completed
+          title: Type
+          type: string
+      required:
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseMcpCallCompleted
+      type: object
+    OpenAIResponseObjectStreamResponseMcpCallFailed:
+      description: Streaming event for failed MCP calls.
+      properties:
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.mcp_call.failed
+          default: response.mcp_call.failed
+          title: Type
+          type: string
+      required:
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseMcpCallFailed
+      type: object
+    OpenAIResponseObjectStreamResponseMcpCallInProgress:
+      description: Streaming event for MCP calls in progress.
+      properties:
+        item_id:
+          title: Item Id
+          type: string
+        output_index:
+          title: Output Index
+          type: integer
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.mcp_call.in_progress
+          default: response.mcp_call.in_progress
+          title: Type
+          type: string
+      required:
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseMcpCallInProgress
+      type: object
+    OpenAIResponseObjectStreamResponseMcpListToolsCompleted:
+      properties:
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.mcp_list_tools.completed
+          default: response.mcp_list_tools.completed
+          title: Type
+          type: string
+      required:
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseMcpListToolsCompleted
+      type: object
+    OpenAIResponseObjectStreamResponseMcpListToolsFailed:
+      properties:
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.mcp_list_tools.failed
+          default: response.mcp_list_tools.failed
+          title: Type
+          type: string
+      required:
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseMcpListToolsFailed
+      type: object
+    OpenAIResponseObjectStreamResponseMcpListToolsInProgress:
+      properties:
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.mcp_list_tools.in_progress
+          default: response.mcp_list_tools.in_progress
+          title: Type
+          type: string
+      required:
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseMcpListToolsInProgress
+      type: object
+    OpenAIResponseObjectStreamResponseOutputItemAdded:
+      description: Streaming event for when a new output item is added to the response.
+      properties:
+        response_id:
+          title: Response Id
+          type: string
+        item:
+          discriminator:
+            mapping:
+              file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+              function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+              mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+              mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+              mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+              message: '#/components/schemas/OpenAIResponseMessage'
+              web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+            propertyName: type
+          oneOf:
+          - $ref: '#/components/schemas/OpenAIResponseMessage'
+            title: OpenAIResponseMessage
+          - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+            title: OpenAIResponseOutputMessageWebSearchToolCall
+          - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+            title: OpenAIResponseOutputMessageFileSearchToolCall
+          - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+            title: OpenAIResponseOutputMessageFunctionToolCall
+          - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+            title: OpenAIResponseOutputMessageMCPCall
+          - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+            title: OpenAIResponseOutputMessageMCPListTools
+          - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+            title: OpenAIResponseMCPApprovalRequest
+          title: OpenAIResponseMessage | ... (7 variants)
+        output_index:
+          title: Output Index
+          type: integer
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.output_item.added
+          default: response.output_item.added
+          title: Type
+          type: string
+      required:
+      - response_id
+      - item
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseOutputItemAdded
+      type: object
+    OpenAIResponseObjectStreamResponseOutputItemDone:
+      description: Streaming event for when an output item is completed.
+      properties:
+        response_id:
+          title: Response Id
+          type: string
+        item:
+          discriminator:
+            mapping:
+              file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+              function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+              mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+              mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+              mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+              message: '#/components/schemas/OpenAIResponseMessage'
+              web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+            propertyName: type
+          oneOf:
+          - $ref: '#/components/schemas/OpenAIResponseMessage'
+            title: OpenAIResponseMessage
+          - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+            title: OpenAIResponseOutputMessageWebSearchToolCall
+          - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+            title: OpenAIResponseOutputMessageFileSearchToolCall
+          - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+            title: OpenAIResponseOutputMessageFunctionToolCall
+          - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+            title: OpenAIResponseOutputMessageMCPCall
+          - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+            title: OpenAIResponseOutputMessageMCPListTools
+          - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+            title: OpenAIResponseMCPApprovalRequest
+          title: OpenAIResponseMessage | ... (7 variants)
+        output_index:
+          title: Output Index
+          type: integer
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.output_item.done
+          default: response.output_item.done
+          title: Type
+          type: string
+      required:
+      - response_id
+      - item
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseOutputItemDone
+      type: object
+    OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded:
+      description: Streaming event for when an annotation is added to output text.
+      properties:
+        item_id:
+          title: Item Id
+          type: string
+        output_index:
+          title: Output Index
+          type: integer
+        content_index:
+          title: Content Index
+          type: integer
+        annotation_index:
+          title: Annotation Index
+          type: integer
+        annotation:
+          discriminator:
+            mapping:
+              container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
+              file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
+              file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath'
+              url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation'
+            propertyName: type
+          oneOf:
+          - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
+            title: OpenAIResponseAnnotationFileCitation
+          - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation'
+            title: OpenAIResponseAnnotationCitation
+          - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
+            title: OpenAIResponseAnnotationContainerFileCitation
+          - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath'
+            title: OpenAIResponseAnnotationFilePath
+          title: OpenAIResponseAnnotationFileCitation | ... (4 variants)
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.output_text.annotation.added
+          default: response.output_text.annotation.added
+          title: Type
+          type: string
+      required:
+      - item_id
+      - output_index
+      - content_index
+      - annotation_index
+      - annotation
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded
+      type: object
+    OpenAIResponseObjectStreamResponseOutputTextDelta:
+      description: Streaming event for incremental text content updates.
+      properties:
+        content_index:
+          title: Content Index
+          type: integer
+        delta:
+          title: Delta
+          type: string
+        item_id:
+          title: Item Id
+          type: string
+        output_index:
+          title: Output Index
+          type: integer
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.output_text.delta
+          default: response.output_text.delta
+          title: Type
+          type: string
+      required:
+      - content_index
+      - delta
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseOutputTextDelta
+      type: object
+    OpenAIResponseObjectStreamResponseOutputTextDone:
+      description: Streaming event for when text output is completed.
+      properties:
+        content_index:
+          title: Content Index
+          type: integer
+        text:
+          title: Text
+          type: string
+        item_id:
+          title: Item Id
+          type: string
+        output_index:
+          title: Output Index
+          type: integer
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.output_text.done
+          default: response.output_text.done
+          title: Type
+          type: string
+      required:
+      - content_index
+      - text
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseOutputTextDone
+      type: object
+    OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded:
+      description: Streaming event for when a new reasoning summary part is added.
+      properties:
+        item_id:
+          title: Item Id
+          type: string
+        output_index:
+          title: Output Index
+          type: integer
+        part:
+          $ref: '#/components/schemas/OpenAIResponseContentPartReasoningSummary'
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        summary_index:
+          title: Summary Index
+          type: integer
+        type:
+          const: response.reasoning_summary_part.added
+          default: response.reasoning_summary_part.added
+          title: Type
+          type: string
+      required:
+      - item_id
+      - output_index
+      - part
+      - sequence_number
+      - summary_index
+      title: OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded
+      type: object
+    OpenAIResponseObjectStreamResponseReasoningSummaryPartDone:
+      description: Streaming event for when a reasoning summary part is completed.
+      properties:
+        item_id:
+          title: Item Id
+          type: string
+        output_index:
+          title: Output Index
+          type: integer
+        part:
+          $ref: '#/components/schemas/OpenAIResponseContentPartReasoningSummary'
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        summary_index:
+          title: Summary Index
+          type: integer
+        type:
+          const: response.reasoning_summary_part.done
+          default: response.reasoning_summary_part.done
+          title: Type
+          type: string
+      required:
+      - item_id
+      - output_index
+      - part
+      - sequence_number
+      - summary_index
+      title: OpenAIResponseObjectStreamResponseReasoningSummaryPartDone
+      type: object
+    OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta:
+      description: Streaming event for incremental reasoning summary text updates.
+      properties:
+        delta:
+          title: Delta
+          type: string
+        item_id:
+          title: Item Id
+          type: string
+        output_index:
+          title: Output Index
+          type: integer
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        summary_index:
+          title: Summary Index
+          type: integer
+        type:
+          const: response.reasoning_summary_text.delta
+          default: response.reasoning_summary_text.delta
+          title: Type
+          type: string
+      required:
+      - delta
+      - item_id
+      - output_index
+      - sequence_number
+      - summary_index
+      title: OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta
+      type: object
+    OpenAIResponseObjectStreamResponseReasoningSummaryTextDone:
+      description: Streaming event for when reasoning summary text is completed.
+      properties:
+        text:
+          title: Text
+          type: string
+        item_id:
+          title: Item Id
+          type: string
+        output_index:
+          title: Output Index
+          type: integer
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        summary_index:
+          title: Summary Index
+          type: integer
+        type:
+          const: response.reasoning_summary_text.done
+          default: response.reasoning_summary_text.done
+          title: Type
+          type: string
+      required:
+      - text
+      - item_id
+      - output_index
+      - sequence_number
+      - summary_index
+      title: OpenAIResponseObjectStreamResponseReasoningSummaryTextDone
+      type: object
+    OpenAIResponseObjectStreamResponseReasoningTextDelta:
+      description: Streaming event for incremental reasoning text updates.
+      properties:
+        content_index:
+          title: Content Index
+          type: integer
+        delta:
+          title: Delta
+          type: string
+        item_id:
+          title: Item Id
+          type: string
+        output_index:
+          title: Output Index
+          type: integer
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.reasoning_text.delta
+          default: response.reasoning_text.delta
+          title: Type
+          type: string
+      required:
+      - content_index
+      - delta
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseReasoningTextDelta
+      type: object
+    OpenAIResponseObjectStreamResponseReasoningTextDone:
+      description: Streaming event for when reasoning text is completed.
+      properties:
+        content_index:
+          title: Content Index
+          type: integer
+        text:
+          title: Text
+          type: string
+        item_id:
+          title: Item Id
+          type: string
+        output_index:
+          title: Output Index
+          type: integer
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.reasoning_text.done
+          default: response.reasoning_text.done
+          title: Type
+          type: string
+      required:
+      - content_index
+      - text
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseReasoningTextDone
+      type: object
+    OpenAIResponseObjectStreamResponseRefusalDelta:
+      description: Streaming event for incremental refusal text updates.
+      properties:
+        content_index:
+          title: Content Index
+          type: integer
+        delta:
+          title: Delta
+          type: string
+        item_id:
+          title: Item Id
+          type: string
+        output_index:
+          title: Output Index
+          type: integer
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.refusal.delta
+          default: response.refusal.delta
+          title: Type
+          type: string
+      required:
+      - content_index
+      - delta
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseRefusalDelta
+      type: object
+    OpenAIResponseObjectStreamResponseRefusalDone:
+      description: Streaming event for when refusal text is completed.
+      properties:
+        content_index:
+          title: Content Index
+          type: integer
+        refusal:
+          title: Refusal
+          type: string
+        item_id:
+          title: Item Id
+          type: string
+        output_index:
+          title: Output Index
+          type: integer
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.refusal.done
+          default: response.refusal.done
+          title: Type
+          type: string
+      required:
+      - content_index
+      - refusal
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseRefusalDone
+      type: object
+    OpenAIResponseObjectStreamResponseWebSearchCallCompleted:
+      description: Streaming event for completed web search calls.
+      properties:
+        item_id:
+          title: Item Id
+          type: string
+        output_index:
+          title: Output Index
+          type: integer
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.web_search_call.completed
+          default: response.web_search_call.completed
+          title: Type
+          type: string
+      required:
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseWebSearchCallCompleted
+      type: object
+    OpenAIResponseObjectStreamResponseWebSearchCallInProgress:
+      description: Streaming event for web search calls in progress.
+      properties:
+        item_id:
+          title: Item Id
+          type: string
+        output_index:
+          title: Output Index
+          type: integer
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.web_search_call.in_progress
+          default: response.web_search_call.in_progress
+          title: Type
+          type: string
+      required:
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseWebSearchCallInProgress
+      type: object
+    OpenAIResponseObjectStreamResponseWebSearchCallSearching:
+      properties:
+        item_id:
+          title: Item Id
+          type: string
+        output_index:
+          title: Output Index
+          type: integer
+        sequence_number:
+          title: Sequence Number
+          type: integer
+        type:
+          const: response.web_search_call.searching
+          default: response.web_search_call.searching
+          title: Type
+          type: string
+      required:
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseWebSearchCallSearching
+      type: object
+    OpenAIDeleteResponseObject:
+      properties:
+        id:
+          type: string
+          title: Id
+        object:
+          type: string
+          const: response
+          title: Object
+          default: response
+        deleted:
+          type: boolean
+          title: Deleted
+          default: true
+      type: object
+      required:
+      - id
+      title: OpenAIDeleteResponseObject
+      description: Response object confirming deletion of an OpenAI response.
+    ListOpenAIResponseInputItem:
+      properties:
+        data:
+          items:
+            anyOf:
+            - oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseMessage-Output'
+                title: OpenAIResponseMessage-Output
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+                title: OpenAIResponseOutputMessageWebSearchToolCall
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                title: OpenAIResponseOutputMessageFileSearchToolCall
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                title: OpenAIResponseOutputMessageFunctionToolCall
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                title: OpenAIResponseOutputMessageMCPCall
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                title: OpenAIResponseOutputMessageMCPListTools
+              - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                title: OpenAIResponseMCPApprovalRequest
+              discriminator:
+                propertyName: type
+                mapping:
+                  file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                  function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                  mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                  mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                  mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                  message: '#/components/schemas/OpenAIResponseMessage-Output'
+                  web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+              title: OpenAIResponseMessage-Output | ... (7 variants)
+            - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+              title: OpenAIResponseInputFunctionToolCallOutput
+            - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+              title: OpenAIResponseMCPApprovalResponse
+            - $ref: '#/components/schemas/OpenAIResponseMessage-Output'
+              title: OpenAIResponseMessage-Output
+            title: OpenAIResponseInputFunctionToolCallOutput | OpenAIResponseMCPApprovalResponse | OpenAIResponseMessage-Output
+          type: array
+          title: Data
+        object:
+          type: string
+          const: list
+          title: Object
+          default: list
+      type: object
+      required:
+      - data
+      title: ListOpenAIResponseInputItem
+      description: List container for OpenAI response input items.
+    RunShieldResponse:
+      properties:
+        violation:
+          anyOf:
+          - $ref: '#/components/schemas/SafetyViolation'
+            title: SafetyViolation
+          - type: 'null'
+          title: SafetyViolation
+      type: object
+      title: RunShieldResponse
+      description: Response from running a safety shield.
+    SafetyViolation:
+      properties:
+        violation_level:
+          $ref: '#/components/schemas/ViolationLevel'
+        user_message:
+          anyOf:
+          - type: string
+          - type: 'null'
+        metadata:
+          additionalProperties: true
+          type: object
+          title: Metadata
+      type: object
+      required:
+      - violation_level
+      title: SafetyViolation
+      description: Details of a safety violation detected by content moderation.
+    ViolationLevel:
+      type: string
+      enum:
+      - info
+      - warn
+      - error
+      title: ViolationLevel
+      description: Severity level of a safety violation.
+    AggregationFunctionType:
+      type: string
+      enum:
+      - average
+      - weighted_average
+      - median
+      - categorical_count
+      - accuracy
+      title: AggregationFunctionType
+      description: Types of aggregation functions for scoring results.
+    ArrayType:
+      properties:
+        type:
+          type: string
+          const: array
+          title: Type
+          default: array
+      type: object
+      title: ArrayType
+      description: Parameter type for array values.
+    BasicScoringFnParams:
+      properties:
+        type:
+          type: string
+          const: basic
+          title: Type
+          default: basic
+        aggregation_functions:
+          items:
+            $ref: '#/components/schemas/AggregationFunctionType'
+          type: array
+          title: Aggregation Functions
+          description: Aggregation functions to apply to the scores of each row
+      type: object
+      title: BasicScoringFnParams
+      description: Parameters for basic scoring function configuration.
+    BooleanType:
+      properties:
+        type:
+          type: string
+          const: boolean
+          title: Type
+          default: boolean
+      type: object
+      title: BooleanType
+      description: Parameter type for boolean values.
+    ChatCompletionInputType:
+      properties:
+        type:
+          type: string
+          const: chat_completion_input
+          title: Type
+          default: chat_completion_input
+      type: object
+      title: ChatCompletionInputType
+      description: Parameter type for chat completion input.
+    CompletionInputType:
+      properties:
+        type:
+          type: string
+          const: completion_input
+          title: Type
+          default: completion_input
+      type: object
+      title: CompletionInputType
+      description: Parameter type for completion input.
+    JsonType:
+      properties:
+        type:
+          type: string
+          const: json
+          title: Type
+          default: json
+      type: object
+      title: JsonType
+      description: Parameter type for JSON values.
+    LLMAsJudgeScoringFnParams:
+      properties:
+        type:
+          type: string
+          const: llm_as_judge
+          title: Type
+          default: llm_as_judge
+        judge_model:
+          type: string
+          title: Judge Model
+        prompt_template:
+          anyOf:
+          - type: string
+          - type: 'null'
+        judge_score_regexes:
+          items:
+            type: string
+          type: array
+          title: Judge Score Regexes
+          description: Regexes to extract the answer from generated response
+        aggregation_functions:
+          items:
+            $ref: '#/components/schemas/AggregationFunctionType'
+          type: array
+          title: Aggregation Functions
+          description: Aggregation functions to apply to the scores of each row
+      type: object
+      required:
+      - judge_model
+      title: LLMAsJudgeScoringFnParams
+      description: Parameters for LLM-as-judge scoring function configuration.
+    NumberType:
+      properties:
+        type:
+          type: string
+          const: number
+          title: Type
+          default: number
+      type: object
+      title: NumberType
+      description: Parameter type for numeric values.
+    ObjectType:
+      properties:
+        type:
+          type: string
+          const: object
+          title: Type
+          default: object
+      type: object
+      title: ObjectType
+      description: Parameter type for object values.
+    RegexParserScoringFnParams:
+      properties:
+        type:
+          type: string
+          const: regex_parser
+          title: Type
+          default: regex_parser
+        parsing_regexes:
+          items:
+            type: string
+          type: array
+          title: Parsing Regexes
+          description: Regex to extract the answer from generated response
+        aggregation_functions:
+          items:
+            $ref: '#/components/schemas/AggregationFunctionType'
+          type: array
+          title: Aggregation Functions
+          description: Aggregation functions to apply to the scores of each row
+      type: object
+      title: RegexParserScoringFnParams
+      description: Parameters for regex parser scoring function configuration.
+    ScoringFn:
+      properties:
+        identifier:
+          type: string
+          title: Identifier
+          description: Unique identifier for this resource in llama stack
+        provider_resource_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: Unique identifier for this resource in the provider
+        provider_id:
+          type: string
+          title: Provider Id
+          description: ID of the provider that owns this resource
+        type:
+          type: string
+          const: scoring_function
+          title: Type
+          default: scoring_function
+        description:
+          anyOf:
+          - type: string
+          - type: 'null'
+        metadata:
+          additionalProperties: true
+          type: object
+          title: Metadata
+          description: Any additional metadata for this definition
+        return_type:
+          oneOf:
+          - $ref: '#/components/schemas/StringType'
+            title: StringType
+          - $ref: '#/components/schemas/NumberType'
+            title: NumberType
+          - $ref: '#/components/schemas/BooleanType'
+            title: BooleanType
+          - $ref: '#/components/schemas/ArrayType'
+            title: ArrayType
+          - $ref: '#/components/schemas/ObjectType'
+            title: ObjectType
+          - $ref: '#/components/schemas/JsonType'
+            title: JsonType
+          - $ref: '#/components/schemas/UnionType'
+            title: UnionType
+          - $ref: '#/components/schemas/ChatCompletionInputType'
+            title: ChatCompletionInputType
+          - $ref: '#/components/schemas/CompletionInputType'
+            title: CompletionInputType
+          title: StringType | ... (9 variants)
+          description: The return type of the deterministic function
+          discriminator:
+            propertyName: type
+            mapping:
+              array: '#/components/schemas/ArrayType'
+              boolean: '#/components/schemas/BooleanType'
+              chat_completion_input: '#/components/schemas/ChatCompletionInputType'
+              completion_input: '#/components/schemas/CompletionInputType'
+              json: '#/components/schemas/JsonType'
+              number: '#/components/schemas/NumberType'
+              object: '#/components/schemas/ObjectType'
+              string: '#/components/schemas/StringType'
+              union: '#/components/schemas/UnionType'
+        params:
+          anyOf:
+          - oneOf:
+            - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
+              title: LLMAsJudgeScoringFnParams
+            - $ref: '#/components/schemas/RegexParserScoringFnParams'
+              title: RegexParserScoringFnParams
+            - $ref: '#/components/schemas/BasicScoringFnParams'
+              title: BasicScoringFnParams
+            discriminator:
+              propertyName: type
+              mapping:
+                basic: '#/components/schemas/BasicScoringFnParams'
+                llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams'
+                regex_parser: '#/components/schemas/RegexParserScoringFnParams'
+            title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams
+          - type: 'null'
+          title: Params
+          description: The parameters for the scoring function for benchmark eval, these can be overridden for app eval
+      type: object
+      required:
+      - identifier
+      - provider_id
+      - return_type
+      title: ScoringFn
+      description: A scoring function resource for evaluating model outputs.
+    ScoringFnParams:
+      discriminator:
+        mapping:
+          basic: '#/components/schemas/BasicScoringFnParams'
+          llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams'
+          regex_parser: '#/components/schemas/RegexParserScoringFnParams'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
+        title: LLMAsJudgeScoringFnParams
+      - $ref: '#/components/schemas/RegexParserScoringFnParams'
+        title: RegexParserScoringFnParams
+      - $ref: '#/components/schemas/BasicScoringFnParams'
+        title: BasicScoringFnParams
+      title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams
+    ScoringFnParamsType:
+      description: Types of scoring function parameter configurations.
+      enum:
+      - llm_as_judge
+      - regex_parser
+      - basic
+      title: ScoringFnParamsType
+      type: string
+    StringType:
+      properties:
+        type:
+          type: string
+          const: string
+          title: Type
+          default: string
+      type: object
+      title: StringType
+      description: Parameter type for string values.
+    UnionType:
+      properties:
+        type:
+          type: string
+          const: union
+          title: Type
+          default: union
+      type: object
+      title: UnionType
+      description: Parameter type for union values.
+    ListScoringFunctionsResponse:
+      properties:
+        data:
+          items:
+            $ref: '#/components/schemas/ScoringFn'
+          type: array
+          title: Data
+      type: object
+      required:
+      - data
+      title: ListScoringFunctionsResponse
+    ScoreResponse:
+      properties:
+        results:
+          additionalProperties:
+            $ref: '#/components/schemas/ScoringResult'
+          type: object
+          title: Results
+      type: object
+      required:
+      - results
+      title: ScoreResponse
+      description: The response from scoring.
+    ScoringResult:
+      properties:
+        score_rows:
+          items:
+            additionalProperties: true
+            type: object
+          type: array
+          title: Score Rows
+        aggregated_results:
+          additionalProperties: true
+          type: object
+          title: Aggregated Results
+      type: object
+      required:
+      - score_rows
+      - aggregated_results
+      title: ScoringResult
+      description: A scoring result for a single row.
+    ScoreBatchResponse:
+      properties:
+        dataset_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        results:
+          additionalProperties:
+            $ref: '#/components/schemas/ScoringResult'
+          type: object
+          title: Results
+      type: object
+      required:
+      - results
+      title: ScoreBatchResponse
+      description: Response from batch scoring operations on datasets.
+    Shield:
+      properties:
+        identifier:
+          type: string
+          title: Identifier
+          description: Unique identifier for this resource in llama stack
+        provider_resource_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: Unique identifier for this resource in the provider
+        provider_id:
+          type: string
+          title: Provider Id
+          description: ID of the provider that owns this resource
+        type:
+          type: string
+          const: shield
+          title: Type
+          default: shield
+        params:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+      type: object
+      required:
+      - identifier
+      - provider_id
+      title: Shield
+      description: A safety shield resource that can be used to check content.
+    ListShieldsResponse:
+      properties:
+        data:
+          items:
+            $ref: '#/components/schemas/Shield'
+          type: array
+          title: Data
+      type: object
+      required:
+      - data
+      title: ListShieldsResponse
+    ImageContentItem:
+      description: A image content item
+      properties:
+        type:
+          const: image
+          default: image
+          title: Type
+          type: string
+        image:
+          $ref: '#/components/schemas/_URLOrData'
+      required:
+      - image
+      title: ImageContentItem
+      type: object
+    InterleavedContent:
+      anyOf:
+      - type: string
+      - discriminator:
+          mapping:
+            image: '#/components/schemas/ImageContentItem'
+            text: '#/components/schemas/TextContentItem'
+          propertyName: type
+        oneOf:
+        - $ref: '#/components/schemas/ImageContentItem'
+          title: ImageContentItem
+        - $ref: '#/components/schemas/TextContentItem'
+          title: TextContentItem
+        title: ImageContentItem | TextContentItem
+      - items:
+          discriminator:
+            mapping:
+              image: '#/components/schemas/ImageContentItem'
+              text: '#/components/schemas/TextContentItem'
+            propertyName: type
+          oneOf:
+          - $ref: '#/components/schemas/ImageContentItem'
+            title: ImageContentItem
+          - $ref: '#/components/schemas/TextContentItem'
+            title: TextContentItem
+          title: ImageContentItem | TextContentItem
+        type: array
+        title: list[ImageContentItem | TextContentItem]
+      title: string | list[ImageContentItem | TextContentItem]
+    InterleavedContentItem:
+      discriminator:
+        mapping:
+          image: '#/components/schemas/ImageContentItem'
+          text: '#/components/schemas/TextContentItem'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/ImageContentItem'
+        title: ImageContentItem
+      - $ref: '#/components/schemas/TextContentItem'
+        title: TextContentItem
+      title: ImageContentItem | TextContentItem
+    TextContentItem:
+      properties:
+        type:
+          type: string
+          const: text
+          title: Type
+          default: text
+        text:
+          type: string
+          title: Text
+      type: object
+      required:
+      - text
+      title: TextContentItem
+      description: A text content item
+    ToolInvocationResult:
+      properties:
+        content:
+          anyOf:
+          - type: string
+          - oneOf:
+            - $ref: '#/components/schemas/ImageContentItem-Output'
+              title: ImageContentItem-Output
+            - $ref: '#/components/schemas/TextContentItem'
+              title: TextContentItem
+            discriminator:
+              propertyName: type
+              mapping:
+                image: '#/components/schemas/ImageContentItem-Output'
+                text: '#/components/schemas/TextContentItem'
+            title: ImageContentItem-Output | TextContentItem
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/ImageContentItem-Output'
+                title: ImageContentItem-Output
+              - $ref: '#/components/schemas/TextContentItem'
+                title: TextContentItem
+              discriminator:
+                propertyName: type
+                mapping:
+                  image: '#/components/schemas/ImageContentItem-Output'
+                  text: '#/components/schemas/TextContentItem'
+              title: ImageContentItem-Output | TextContentItem
+            type: array
+            title: list[ImageContentItem-Output | TextContentItem]
+          - type: 'null'
+          title: string | list[ImageContentItem-Output | TextContentItem]
+        error_message:
+          anyOf:
+          - type: string
+          - type: 'null'
+        error_code:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        metadata:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+      type: object
+      title: ToolInvocationResult
+      description: Result of a tool invocation.
+    URL:
+      properties:
+        uri:
+          type: string
+          title: Uri
+      type: object
+      required:
+      - uri
+      title: URL
+      description: A URL reference to external content.
+    ToolDef:
+      properties:
+        toolgroup_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        name:
+          type: string
+          title: Name
+        description:
+          anyOf:
+          - type: string
+          - type: 'null'
+        input_schema:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+        output_schema:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+        metadata:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+      type: object
+      required:
+      - name
+      title: ToolDef
+      description: Tool definition used in runtime contexts.
+    ListToolDefsResponse:
+      properties:
+        data:
+          items:
+            $ref: '#/components/schemas/ToolDef'
+          type: array
+          title: Data
+      type: object
+      required:
+      - data
+      title: ListToolDefsResponse
+      description: Response containing a list of tool definitions.
+    ToolGroup:
+      properties:
+        identifier:
+          type: string
+          title: Identifier
+          description: Unique identifier for this resource in llama stack
+        provider_resource_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: Unique identifier for this resource in the provider
+        provider_id:
+          type: string
+          title: Provider Id
+          description: ID of the provider that owns this resource
+        type:
+          type: string
+          const: tool_group
+          title: Type
+          default: tool_group
+        mcp_endpoint:
+          anyOf:
+          - $ref: '#/components/schemas/URL'
+            title: URL
+          - type: 'null'
+          title: URL
+        args:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+      type: object
+      required:
+      - identifier
+      - provider_id
+      title: ToolGroup
+      description: A group of related tools managed together.
+    ListToolGroupsResponse:
+      properties:
+        data:
+          items:
+            $ref: '#/components/schemas/ToolGroup'
+          type: array
+          title: Data
+      type: object
+      required:
+      - data
+      title: ListToolGroupsResponse
+      description: Response containing a list of tool groups.
+    Chunk:
+      description: A chunk of content that can be inserted into a vector database.
+      properties:
+        content:
+          anyOf:
+          - type: string
+          - discriminator:
+              mapping:
+                image: '#/components/schemas/ImageContentItem'
+                text: '#/components/schemas/TextContentItem'
+              propertyName: type
+            oneOf:
+            - $ref: '#/components/schemas/ImageContentItem'
+              title: ImageContentItem
+            - $ref: '#/components/schemas/TextContentItem'
+              title: TextContentItem
+            title: ImageContentItem | TextContentItem
+          - items:
+              discriminator:
+                mapping:
+                  image: '#/components/schemas/ImageContentItem'
+                  text: '#/components/schemas/TextContentItem'
+                propertyName: type
+              oneOf:
+              - $ref: '#/components/schemas/ImageContentItem'
+                title: ImageContentItem
+              - $ref: '#/components/schemas/TextContentItem'
+                title: TextContentItem
+              title: ImageContentItem | TextContentItem
+            type: array
+            title: list[ImageContentItem | TextContentItem]
+          title: string | list[ImageContentItem | TextContentItem]
+        chunk_id:
+          title: Chunk Id
+          type: string
+        metadata:
+          additionalProperties: true
+          title: Metadata
+          type: object
+        embedding:
+          anyOf:
+          - items:
+              type: number
+            type: array
+          - type: 'null'
+          nullable: true
+        chunk_metadata:
+          anyOf:
+          - $ref: '#/components/schemas/ChunkMetadata'
+            title: ChunkMetadata
+          - type: 'null'
+          nullable: true
+          title: ChunkMetadata
+      required:
+      - content
+      - chunk_id
+      title: Chunk
+      type: object
+    ChunkMetadata:
+      properties:
+        chunk_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        document_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        source:
+          anyOf:
+          - type: string
+          - type: 'null'
+        created_timestamp:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        updated_timestamp:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        chunk_window:
+          anyOf:
+          - type: string
+          - type: 'null'
+        chunk_tokenizer:
+          anyOf:
+          - type: string
+          - type: 'null'
+        chunk_embedding_model:
+          anyOf:
+          - type: string
+          - type: 'null'
+        chunk_embedding_dimension:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        content_token_count:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        metadata_token_count:
+          anyOf:
+          - type: integer
+          - type: 'null'
+      type: object
+      title: ChunkMetadata
+      description: |-
+        `ChunkMetadata` is backend metadata for a `Chunk` that is used to store additional information about the chunk that
+            will not be used in the context during inference, but is required for backend functionality. The `ChunkMetadata`
+            is set during chunk creation in `MemoryToolRuntimeImpl().insert()`and is not expected to change after.
+            Use `Chunk.metadata` for metadata that will be used in the context during inference.
+    QueryChunksResponse:
+      properties:
+        chunks:
+          items:
+            $ref: '#/components/schemas/Chunk-Output'
+          type: array
+          title: Chunks
+        scores:
+          items:
+            type: number
+          type: array
+          title: Scores
+      type: object
+      required:
+      - chunks
+      - scores
+      title: QueryChunksResponse
+      description: Response from querying chunks in a vector database.
+    VectorStoreFileCounts:
+      properties:
+        completed:
+          type: integer
+          title: Completed
+        cancelled:
+          type: integer
+          title: Cancelled
+        failed:
+          type: integer
+          title: Failed
+        in_progress:
+          type: integer
+          title: In Progress
+        total:
+          type: integer
+          title: Total
+      type: object
+      required:
+      - completed
+      - cancelled
+      - failed
+      - in_progress
+      - total
+      title: VectorStoreFileCounts
+      description: File processing status counts for a vector store.
+    VectorStoreListResponse:
+      properties:
+        object:
+          type: string
+          title: Object
+          default: list
+        data:
+          items:
+            $ref: '#/components/schemas/VectorStoreObject'
+          type: array
+          title: Data
+        first_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        last_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        has_more:
+          type: boolean
+          title: Has More
+          default: false
+      type: object
+      required:
+      - data
+      title: VectorStoreListResponse
+      description: Response from listing vector stores.
+    VectorStoreObject:
+      properties:
+        id:
+          type: string
+          title: Id
+        object:
+          type: string
+          title: Object
+          default: vector_store
+        created_at:
+          type: integer
+          title: Created At
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+        usage_bytes:
+          type: integer
+          title: Usage Bytes
+          default: 0
+        file_counts:
+          $ref: '#/components/schemas/VectorStoreFileCounts'
+        status:
+          type: string
+          title: Status
+          default: completed
+        expires_after:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+        expires_at:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        last_active_at:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        metadata:
+          additionalProperties: true
+          type: object
+          title: Metadata
+      type: object
+      required:
+      - id
+      - created_at
+      - file_counts
+      title: VectorStoreObject
+      description: OpenAI Vector Store object.
+    VectorStoreChunkingStrategy:
+      discriminator:
+        mapping:
+          auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+          static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+        title: VectorStoreChunkingStrategyAuto
+      - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+        title: VectorStoreChunkingStrategyStatic
+      title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic
+    VectorStoreChunkingStrategyAuto:
+      properties:
+        type:
+          type: string
+          const: auto
+          title: Type
+          default: auto
+      type: object
+      title: VectorStoreChunkingStrategyAuto
+      description: Automatic chunking strategy for vector store files.
+    VectorStoreChunkingStrategyStatic:
+      properties:
+        type:
+          type: string
+          const: static
+          title: Type
+          default: static
+        static:
+          $ref: '#/components/schemas/VectorStoreChunkingStrategyStaticConfig'
+      type: object
+      required:
+      - static
+      title: VectorStoreChunkingStrategyStatic
+      description: Static chunking strategy with configurable parameters.
+    VectorStoreChunkingStrategyStaticConfig:
+      properties:
+        chunk_overlap_tokens:
+          type: integer
+          title: Chunk Overlap Tokens
+          default: 400
+        max_chunk_size_tokens:
+          type: integer
+          maximum: 4096.0
+          minimum: 100.0
+          title: Max Chunk Size Tokens
+          default: 800
+      type: object
+      title: VectorStoreChunkingStrategyStaticConfig
+      description: Configuration for static chunking strategy.
+    OpenAICreateVectorStoreRequestWithExtraBody:
+      properties:
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+        file_ids:
+          anyOf:
+          - items:
+              type: string
+            type: array
+          - type: 'null'
+        expires_after:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+        chunking_strategy:
+          anyOf:
+          - oneOf:
+            - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+              title: VectorStoreChunkingStrategyAuto
+            - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+              title: VectorStoreChunkingStrategyStatic
+            discriminator:
+              propertyName: type
+              mapping:
+                auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+                static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+            title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic
+          - type: 'null'
+          title: Chunking Strategy
+        metadata:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+      additionalProperties: true
+      type: object
+      title: OpenAICreateVectorStoreRequestWithExtraBody
+      description: Request to create a vector store with extra_body support.
+    VectorStoreDeleteResponse:
+      properties:
+        id:
+          type: string
+          title: Id
+        object:
+          type: string
+          title: Object
+          default: vector_store.deleted
+        deleted:
+          type: boolean
+          title: Deleted
+          default: true
+      type: object
+      required:
+      - id
+      title: VectorStoreDeleteResponse
+      description: Response from deleting a vector store.
+    OpenAICreateVectorStoreFileBatchRequestWithExtraBody:
+      properties:
+        file_ids:
+          items:
+            type: string
+          type: array
+          title: File Ids
+        attributes:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+        chunking_strategy:
+          anyOf:
+          - oneOf:
+            - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+              title: VectorStoreChunkingStrategyAuto
+            - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+              title: VectorStoreChunkingStrategyStatic
+            discriminator:
+              propertyName: type
+              mapping:
+                auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+                static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+            title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic
+          - type: 'null'
+          title: Chunking Strategy
+      additionalProperties: true
+      type: object
+      required:
+      - file_ids
+      title: OpenAICreateVectorStoreFileBatchRequestWithExtraBody
+      description: Request to create a vector store file batch with extra_body support.
+    VectorStoreFileBatchObject:
+      properties:
+        id:
+          type: string
+          title: Id
+        object:
+          type: string
+          title: Object
+          default: vector_store.file_batch
+        created_at:
+          type: integer
+          title: Created At
+        vector_store_id:
+          type: string
+          title: Vector Store Id
+        status:
+          title: Status
+          type: string
+          enum:
+          - completed
+          - in_progress
+          - cancelled
+          - failed
+          default: completed
+        file_counts:
+          $ref: '#/components/schemas/VectorStoreFileCounts'
+      type: object
+      required:
+      - id
+      - created_at
+      - vector_store_id
+      - status
+      - file_counts
+      title: VectorStoreFileBatchObject
+      description: OpenAI Vector Store File Batch object.
+    VectorStoreFileStatus:
+      type: string
+      enum:
+      - completed
+      - in_progress
+      - cancelled
+      - failed
+      default: completed
+    VectorStoreFileLastError:
+      properties:
+        code:
+          title: Code
+          type: string
+          enum:
+          - server_error
+          - rate_limit_exceeded
+          default: server_error
+        message:
+          type: string
+          title: Message
+      type: object
+      required:
+      - code
+      - message
+      title: VectorStoreFileLastError
+      description: Error information for failed vector store file processing.
+    VectorStoreFileObject:
+      properties:
+        id:
+          type: string
+          title: Id
+        object:
+          type: string
+          title: Object
+          default: vector_store.file
+        attributes:
+          additionalProperties: true
+          type: object
+          title: Attributes
+        chunking_strategy:
+          oneOf:
+          - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+            title: VectorStoreChunkingStrategyAuto
+          - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+            title: VectorStoreChunkingStrategyStatic
+          title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic
+          discriminator:
+            propertyName: type
+            mapping:
+              auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+              static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+        created_at:
+          type: integer
+          title: Created At
+        last_error:
+          anyOf:
+          - $ref: '#/components/schemas/VectorStoreFileLastError'
+            title: VectorStoreFileLastError
+          - type: 'null'
+          title: VectorStoreFileLastError
+        status:
+          title: Status
+          type: string
+          enum:
+          - completed
+          - in_progress
+          - cancelled
+          - failed
+          default: completed
+        usage_bytes:
+          type: integer
+          title: Usage Bytes
+          default: 0
+        vector_store_id:
+          type: string
+          title: Vector Store Id
+      type: object
+      required:
+      - id
+      - chunking_strategy
+      - created_at
+      - status
+      - vector_store_id
+      title: VectorStoreFileObject
+      description: OpenAI Vector Store File object.
+    VectorStoreFilesListInBatchResponse:
+      properties:
+        object:
+          type: string
+          title: Object
+          default: list
+        data:
+          items:
+            $ref: '#/components/schemas/VectorStoreFileObject'
+          type: array
+          title: Data
+        first_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        last_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        has_more:
+          type: boolean
+          title: Has More
+          default: false
+      type: object
+      required:
+      - data
+      title: VectorStoreFilesListInBatchResponse
+      description: Response from listing files in a vector store file batch.
+    VectorStoreListFilesResponse:
+      properties:
+        object:
+          type: string
+          title: Object
+          default: list
+        data:
+          items:
+            $ref: '#/components/schemas/VectorStoreFileObject'
+          type: array
+          title: Data
+        first_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        last_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        has_more:
+          type: boolean
+          title: Has More
+          default: false
+      type: object
+      required:
+      - data
+      title: VectorStoreListFilesResponse
+      description: Response from listing files in a vector store.
+    VectorStoreFileDeleteResponse:
+      properties:
+        id:
+          type: string
+          title: Id
+        object:
+          type: string
+          title: Object
+          default: vector_store.file.deleted
+        deleted:
+          type: boolean
+          title: Deleted
+          default: true
+      type: object
+      required:
+      - id
+      title: VectorStoreFileDeleteResponse
+      description: Response from deleting a vector store file.
+    VectorStoreContent:
+      properties:
+        type:
+          type: string
+          const: text
+          title: Type
+        text:
+          type: string
+          title: Text
+        embedding:
+          anyOf:
+          - items:
+              type: number
+            type: array
+          - type: 'null'
+        chunk_metadata:
+          anyOf:
+          - $ref: '#/components/schemas/ChunkMetadata'
+            title: ChunkMetadata
+          - type: 'null'
+          title: ChunkMetadata
+        metadata:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+      type: object
+      required:
+      - type
+      - text
+      title: VectorStoreContent
+      description: Content item from a vector store file or search result.
+    VectorStoreFileContentResponse:
+      properties:
+        object:
+          type: string
+          const: vector_store.file_content.page
+          title: Object
+          default: vector_store.file_content.page
+        data:
+          items:
+            $ref: '#/components/schemas/VectorStoreContent'
+          type: array
+          title: Data
+        has_more:
+          type: boolean
+          title: Has More
+          default: false
+        next_page:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      required:
+      - data
+      title: VectorStoreFileContentResponse
+      description: Represents the parsed content of a vector store file.
+    VectorStoreSearchResponse:
+      properties:
+        file_id:
+          type: string
+          title: File Id
+        filename:
+          type: string
+          title: Filename
+        score:
+          type: number
+          title: Score
+        attributes:
+          anyOf:
+          - additionalProperties:
+              anyOf:
+              - type: string
+              - type: number
+              - type: boolean
+              title: string | number | boolean
+            type: object
+          - type: 'null'
+        content:
+          items:
+            $ref: '#/components/schemas/VectorStoreContent'
+          type: array
+          title: Content
+      type: object
+      required:
+      - file_id
+      - filename
+      - score
+      - content
+      title: VectorStoreSearchResponse
+      description: Response from searching a vector store.
+    VectorStoreSearchResponsePage:
+      properties:
+        object:
+          type: string
+          title: Object
+          default: vector_store.search_results.page
+        search_query:
+          items:
+            type: string
+          type: array
+          title: Search Query
+        data:
+          items:
+            $ref: '#/components/schemas/VectorStoreSearchResponse'
+          type: array
+          title: Data
+        has_more:
+          type: boolean
+          title: Has More
+          default: false
+        next_page:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      required:
+      - search_query
+      - data
+      title: VectorStoreSearchResponsePage
+      description: Paginated response from searching a vector store.
+    VersionInfo:
+      properties:
+        version:
+          type: string
+          title: Version
+      type: object
+      required:
+      - version
+      title: VersionInfo
+      description: Version information for the service.
+    AppendRowsRequest:
+      properties:
+        rows:
+          items:
+            additionalProperties: true
+            type: object
+          type: array
+          title: Rows
+      type: object
+      required:
+      - rows
+      title: AppendRowsRequest
+    PaginatedResponse:
+      properties:
+        data:
+          items:
+            additionalProperties: true
+            type: object
+          type: array
+          title: Data
+        has_more:
+          type: boolean
+          title: Has More
+        url:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      required:
+      - data
+      - has_more
+      title: PaginatedResponse
+      description: A generic paginated response that follows a simple format.
+    Dataset:
+      properties:
+        identifier:
+          type: string
+          title: Identifier
+          description: Unique identifier for this resource in llama stack
+        provider_resource_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: Unique identifier for this resource in the provider
+        provider_id:
+          type: string
+          title: Provider Id
+          description: ID of the provider that owns this resource
+        type:
+          type: string
+          const: dataset
+          title: Type
+          default: dataset
+        purpose:
+          $ref: '#/components/schemas/DatasetPurpose'
+        source:
+          oneOf:
+          - $ref: '#/components/schemas/URIDataSource'
+            title: URIDataSource
+          - $ref: '#/components/schemas/RowsDataSource'
+            title: RowsDataSource
+          title: URIDataSource | RowsDataSource
+          discriminator:
+            propertyName: type
+            mapping:
+              rows: '#/components/schemas/RowsDataSource'
+              uri: '#/components/schemas/URIDataSource'
+        metadata:
+          additionalProperties: true
+          type: object
+          title: Metadata
+          description: Any additional metadata for this dataset
+      type: object
+      required:
+      - identifier
+      - provider_id
+      - purpose
+      - source
+      title: Dataset
+      description: Dataset resource for storing and accessing training or evaluation data.
+    RowsDataSource:
+      properties:
+        type:
+          type: string
+          const: rows
+          title: Type
+          default: rows
+        rows:
+          items:
+            additionalProperties: true
+            type: object
+          type: array
+          title: Rows
+      type: object
+      required:
+      - rows
+      title: RowsDataSource
+      description: A dataset stored in rows.
+    URIDataSource:
+      properties:
+        type:
+          type: string
+          const: uri
+          title: Type
+          default: uri
+        uri:
+          type: string
+          title: Uri
+      type: object
+      required:
+      - uri
+      title: URIDataSource
+      description: A dataset that can be obtained from a URI.
+    ListDatasetsResponse:
+      properties:
+        data:
+          items:
+            $ref: '#/components/schemas/Dataset'
+          type: array
+          title: Data
+      type: object
+      required:
+      - data
+      title: ListDatasetsResponse
+      description: Response from listing datasets.
+    Benchmark:
+      properties:
+        identifier:
+          type: string
+          title: Identifier
+          description: Unique identifier for this resource in llama stack
+        provider_resource_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: Unique identifier for this resource in the provider
+        provider_id:
+          type: string
+          title: Provider Id
+          description: ID of the provider that owns this resource
+        type:
+          type: string
+          const: benchmark
+          title: Type
+          default: benchmark
+        dataset_id:
+          type: string
+          title: Dataset Id
+        scoring_functions:
+          items:
+            type: string
+          type: array
+          title: Scoring Functions
+        metadata:
+          additionalProperties: true
+          type: object
+          title: Metadata
+          description: Metadata for this evaluation task
+      type: object
+      required:
+      - identifier
+      - provider_id
+      - dataset_id
+      - scoring_functions
+      title: Benchmark
+      description: A benchmark resource for evaluating model performance.
+    ListBenchmarksResponse:
+      properties:
+        data:
+          items:
+            $ref: '#/components/schemas/Benchmark'
+          type: array
+          title: Data
+      type: object
+      required:
+      - data
+      title: ListBenchmarksResponse
+    BenchmarkConfig:
+      properties:
+        eval_candidate:
+          $ref: '#/components/schemas/ModelCandidate'
+        scoring_params:
+          additionalProperties:
+            oneOf:
+            - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
+              title: LLMAsJudgeScoringFnParams
+            - $ref: '#/components/schemas/RegexParserScoringFnParams'
+              title: RegexParserScoringFnParams
+            - $ref: '#/components/schemas/BasicScoringFnParams'
+              title: BasicScoringFnParams
+            discriminator:
+              propertyName: type
+              mapping:
+                basic: '#/components/schemas/BasicScoringFnParams'
+                llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams'
+                regex_parser: '#/components/schemas/RegexParserScoringFnParams'
+            title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams
+          type: object
+          title: Scoring Params
+          description: Map between scoring function id and parameters for each scoring function you want to run
+        num_examples:
+          anyOf:
+          - type: integer
+          - type: 'null'
+          description: Number of examples to evaluate (useful for testing), if not provided, all examples in the dataset will be evaluated
+      type: object
+      required:
+      - eval_candidate
+      title: BenchmarkConfig
+      description: A benchmark configuration for evaluation.
+    GreedySamplingStrategy:
+      properties:
+        type:
+          type: string
+          const: greedy
+          title: Type
+          default: greedy
+      type: object
+      title: GreedySamplingStrategy
+      description: Greedy sampling strategy that selects the highest probability token at each step.
+    ModelCandidate:
+      properties:
+        type:
+          type: string
+          const: model
+          title: Type
+          default: model
+        model:
+          type: string
+          title: Model
+        sampling_params:
+          $ref: '#/components/schemas/SamplingParams'
+        system_message:
+          anyOf:
+          - $ref: '#/components/schemas/SystemMessage'
+            title: SystemMessage
+          - type: 'null'
+          title: SystemMessage
+      type: object
+      required:
+      - model
+      - sampling_params
+      title: ModelCandidate
+      description: A model candidate for evaluation.
+    SamplingParams:
+      properties:
+        strategy:
+          oneOf:
+          - $ref: '#/components/schemas/GreedySamplingStrategy'
+            title: GreedySamplingStrategy
+          - $ref: '#/components/schemas/TopPSamplingStrategy'
+            title: TopPSamplingStrategy
+          - $ref: '#/components/schemas/TopKSamplingStrategy'
+            title: TopKSamplingStrategy
+          title: GreedySamplingStrategy | TopPSamplingStrategy | TopKSamplingStrategy
+          discriminator:
+            propertyName: type
+            mapping:
+              greedy: '#/components/schemas/GreedySamplingStrategy'
+              top_k: '#/components/schemas/TopKSamplingStrategy'
+              top_p: '#/components/schemas/TopPSamplingStrategy'
+        max_tokens:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        repetition_penalty:
+          anyOf:
+          - type: number
+          - type: 'null'
+          default: 1.0
+        stop:
+          anyOf:
+          - items:
+              type: string
+            type: array
+          - type: 'null'
+      type: object
+      title: SamplingParams
+      description: Sampling parameters.
+    SystemMessage:
+      properties:
+        role:
+          type: string
+          const: system
+          title: Role
+          default: system
+        content:
+          anyOf:
+          - type: string
+          - oneOf:
+            - $ref: '#/components/schemas/ImageContentItem-Input'
+              title: ImageContentItem-Input
+            - $ref: '#/components/schemas/TextContentItem'
+              title: TextContentItem
+            discriminator:
+              propertyName: type
+              mapping:
+                image: '#/components/schemas/ImageContentItem-Input'
+                text: '#/components/schemas/TextContentItem'
+            title: ImageContentItem-Input | TextContentItem
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/ImageContentItem-Input'
+                title: ImageContentItem-Input
+              - $ref: '#/components/schemas/TextContentItem'
+                title: TextContentItem
+              discriminator:
+                propertyName: type
+                mapping:
+                  image: '#/components/schemas/ImageContentItem-Input'
+                  text: '#/components/schemas/TextContentItem'
+              title: ImageContentItem-Input | TextContentItem
+            type: array
+            title: list[ImageContentItem-Input | TextContentItem]
+          title: string | list[ImageContentItem-Input | TextContentItem]
+      type: object
+      required:
+      - content
+      title: SystemMessage
+      description: A system message providing instructions or context to the model.
+    TopKSamplingStrategy:
+      properties:
+        type:
+          type: string
+          const: top_k
+          title: Type
+          default: top_k
+        top_k:
+          type: integer
+          minimum: 1.0
+          title: Top K
+      type: object
+      required:
+      - top_k
+      title: TopKSamplingStrategy
+      description: Top-k sampling strategy that restricts sampling to the k most likely tokens.
+    TopPSamplingStrategy:
+      properties:
+        type:
+          type: string
+          const: top_p
+          title: Type
+          default: top_p
+        temperature:
+          anyOf:
+          - type: number
+            minimum: 0.0
+          - type: 'null'
+        top_p:
+          anyOf:
+          - type: number
+          - type: 'null'
+          default: 0.95
+      type: object
+      required:
+      - temperature
+      title: TopPSamplingStrategy
+      description: Top-p (nucleus) sampling strategy that samples from the smallest set of tokens with cumulative probability >= p.
+    EvaluateRowsRequest:
+      properties:
+        input_rows:
+          items:
+            additionalProperties: true
+            type: object
+          type: array
+          title: Input Rows
+        scoring_functions:
+          items:
+            type: string
+          type: array
+          title: Scoring Functions
+        benchmark_config:
+          $ref: '#/components/schemas/BenchmarkConfig'
+      type: object
+      required:
+      - input_rows
+      - scoring_functions
+      - benchmark_config
+      title: EvaluateRowsRequest
+    EvaluateResponse:
+      properties:
+        generations:
+          items:
+            additionalProperties: true
+            type: object
+          type: array
+          title: Generations
+        scores:
+          additionalProperties:
+            $ref: '#/components/schemas/ScoringResult'
+          type: object
+          title: Scores
+      type: object
+      required:
+      - generations
+      - scores
+      title: EvaluateResponse
+      description: The response from an evaluation.
+    Job:
+      properties:
+        job_id:
+          type: string
+          title: Job Id
+        status:
+          $ref: '#/components/schemas/JobStatus'
+      type: object
+      required:
+      - job_id
+      - status
+      title: Job
+      description: A job execution instance with status tracking.
+    RerankRequest:
+      properties:
+        model:
+          type: string
+          title: Model
+        query:
+          anyOf:
+          - type: string
+          - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+            title: OpenAIChatCompletionContentPartTextParam
+          - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+            title: OpenAIChatCompletionContentPartImageParam
+          title: string | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam
+        items:
+          items:
+            anyOf:
+            - type: string
+            - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+              title: OpenAIChatCompletionContentPartTextParam
+            - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+              title: OpenAIChatCompletionContentPartImageParam
+            title: string | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam
+          type: array
+          title: Items
+        max_num_results:
+          anyOf:
+          - type: integer
+          - type: 'null'
+      type: object
+      required:
+      - model
+      - query
+      - items
+      title: RerankRequest
+    RerankData:
+      properties:
+        index:
+          type: integer
+          title: Index
+        relevance_score:
+          type: number
+          title: Relevance Score
+      type: object
+      required:
+      - index
+      - relevance_score
+      title: RerankData
+      description: A single rerank result from a reranking response.
+    RerankResponse:
+      properties:
+        data:
+          items:
+            $ref: '#/components/schemas/RerankData'
+          type: array
+          title: Data
+      type: object
+      required:
+      - data
+      title: RerankResponse
+      description: Response from a reranking request.
+    Checkpoint:
+      properties:
+        identifier:
+          type: string
+          title: Identifier
         created_at:
           type: string
           format: date-time
-          description: >-
-            Timestamp when the checkpoint was created
+          title: Created At
         epoch:
           type: integer
-          description: >-
-            Training epoch when the checkpoint was saved
+          title: Epoch
         post_training_job_id:
           type: string
-          description: >-
-            Identifier of the training job that created this checkpoint
+          title: Post Training Job Id
         path:
           type: string
-          description: >-
-            File system path where the checkpoint is stored
+          title: Path
         training_metrics:
-          $ref: '#/components/schemas/PostTrainingMetric'
-          description: >-
-            (Optional) Training metrics associated with this checkpoint
-      additionalProperties: false
+          anyOf:
+          - $ref: '#/components/schemas/PostTrainingMetric'
+            title: PostTrainingMetric
+          - type: 'null'
+          title: PostTrainingMetric
+      type: object
       required:
-        - identifier
-        - created_at
-        - epoch
-        - post_training_job_id
-        - path
+      - identifier
+      - created_at
+      - epoch
+      - post_training_job_id
+      - path
       title: Checkpoint
       description: Checkpoint created during training runs.
     PostTrainingJobArtifactsResponse:
-      type: object
       properties:
         job_uuid:
           type: string
-          description: Unique identifier for the training job
+          title: Job Uuid
         checkpoints:
-          type: array
           items:
             $ref: '#/components/schemas/Checkpoint'
-          description: >-
-            List of model checkpoints created during training
-      additionalProperties: false
+          type: array
+          title: Checkpoints
+      type: object
       required:
-        - job_uuid
-        - checkpoints
+      - job_uuid
       title: PostTrainingJobArtifactsResponse
       description: Artifacts of a finetuning job.
     PostTrainingMetric:
-      type: object
       properties:
         epoch:
           type: integer
-          description: Training epoch number
+          title: Epoch
         train_loss:
           type: number
-          description: Loss value on the training dataset
+          title: Train Loss
         validation_loss:
           type: number
-          description: Loss value on the validation dataset
+          title: Validation Loss
         perplexity:
           type: number
-          description: >-
-            Perplexity metric indicating model confidence
-      additionalProperties: false
-      required:
-        - epoch
-        - train_loss
-        - validation_loss
-        - perplexity
-      title: PostTrainingMetric
-      description: >-
-        Training metrics captured during post-training jobs.
-    CancelTrainingJobRequest:
+          title: Perplexity
       type: object
+      required:
+      - epoch
+      - train_loss
+      - validation_loss
+      - perplexity
+      title: PostTrainingMetric
+      description: Training metrics captured during post-training jobs.
+    CancelTrainingJobRequest:
       properties:
         job_uuid:
           type: string
-          description: The UUID of the job to cancel.
-      additionalProperties: false
+          title: Job Uuid
+      type: object
       required:
-        - job_uuid
+      - job_uuid
       title: CancelTrainingJobRequest
     PostTrainingJobStatusResponse:
-      type: object
       properties:
         job_uuid:
           type: string
-          description: Unique identifier for the training job
+          title: Job Uuid
         status:
-          type: string
-          enum:
-            - completed
-            - in_progress
-            - failed
-            - scheduled
-            - cancelled
-          description: Current status of the training job
+          $ref: '#/components/schemas/JobStatus'
         scheduled_at:
-          type: string
-          format: date-time
-          description: >-
-            (Optional) Timestamp when the job was scheduled
+          anyOf:
+          - type: string
+            format: date-time
+          - type: 'null'
         started_at:
-          type: string
-          format: date-time
-          description: >-
-            (Optional) Timestamp when the job execution began
+          anyOf:
+          - type: string
+            format: date-time
+          - type: 'null'
         completed_at:
-          type: string
-          format: date-time
-          description: >-
-            (Optional) Timestamp when the job finished, if completed
+          anyOf:
+          - type: string
+            format: date-time
+          - type: 'null'
         resources_allocated:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) Information about computational resources allocated to the
-            job
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
         checkpoints:
-          type: array
           items:
             $ref: '#/components/schemas/Checkpoint'
-          description: >-
-            List of model checkpoints created during training
-      additionalProperties: false
+          type: array
+          title: Checkpoints
+      type: object
       required:
-        - job_uuid
-        - status
-        - checkpoints
+      - job_uuid
+      - status
       title: PostTrainingJobStatusResponse
       description: Status of a finetuning job.
     ListPostTrainingJobsResponse:
-      type: object
       properties:
         data:
-          type: array
           items:
-            type: object
-            properties:
-              job_uuid:
-                type: string
-            additionalProperties: false
-            required:
-              - job_uuid
-            title: PostTrainingJob
-      additionalProperties: false
+            $ref: '#/components/schemas/PostTrainingJob'
+          type: array
+          title: Data
+      type: object
       required:
-        - data
+      - data
       title: ListPostTrainingJobsResponse
     DPOAlignmentConfig:
-      type: object
       properties:
         beta:
           type: number
-          description: Temperature parameter for the DPO loss
+          title: Beta
         loss_type:
           $ref: '#/components/schemas/DPOLossType'
           default: sigmoid
-          description: The type of loss function to use for DPO
-      additionalProperties: false
+      type: object
       required:
-        - beta
-        - loss_type
+      - beta
       title: DPOAlignmentConfig
-      description: >-
-        Configuration for Direct Preference Optimization (DPO) alignment.
+      description: Configuration for Direct Preference Optimization (DPO) alignment.
     DPOLossType:
       type: string
       enum:
-        - sigmoid
-        - hinge
-        - ipo
-        - kto_pair
+      - sigmoid
+      - hinge
+      - ipo
+      - kto_pair
       title: DPOLossType
     DataConfig:
-      type: object
       properties:
         dataset_id:
           type: string
-          description: >-
-            Unique identifier for the training dataset
+          title: Dataset Id
         batch_size:
           type: integer
-          description: Number of samples per training batch
+          title: Batch Size
         shuffle:
           type: boolean
-          description: >-
-            Whether to shuffle the dataset during training
+          title: Shuffle
         data_format:
           $ref: '#/components/schemas/DatasetFormat'
-          description: >-
-            Format of the dataset (instruct or dialog)
         validation_dataset_id:
-          type: string
-          description: >-
-            (Optional) Unique identifier for the validation dataset
+          anyOf:
+          - type: string
+          - type: 'null'
         packed:
-          type: boolean
+          anyOf:
+          - type: boolean
+          - type: 'null'
           default: false
-          description: >-
-            (Optional) Whether to pack multiple samples into a single sequence for
-            efficiency
         train_on_input:
-          type: boolean
+          anyOf:
+          - type: boolean
+          - type: 'null'
           default: false
-          description: >-
-            (Optional) Whether to compute loss on input tokens as well as output tokens
-      additionalProperties: false
+      type: object
       required:
-        - dataset_id
-        - batch_size
-        - shuffle
-        - data_format
+      - dataset_id
+      - batch_size
+      - shuffle
+      - data_format
       title: DataConfig
-      description: >-
-        Configuration for training data and data loading.
+      description: Configuration for training data and data loading.
     DatasetFormat:
       type: string
       enum:
-        - instruct
-        - dialog
+      - instruct
+      - dialog
       title: DatasetFormat
       description: Format of the training dataset.
     EfficiencyConfig:
-      type: object
       properties:
         enable_activation_checkpointing:
-          type: boolean
+          anyOf:
+          - type: boolean
+          - type: 'null'
           default: false
-          description: >-
-            (Optional) Whether to use activation checkpointing to reduce memory usage
         enable_activation_offloading:
-          type: boolean
+          anyOf:
+          - type: boolean
+          - type: 'null'
           default: false
-          description: >-
-            (Optional) Whether to offload activations to CPU to save GPU memory
         memory_efficient_fsdp_wrap:
-          type: boolean
+          anyOf:
+          - type: boolean
+          - type: 'null'
           default: false
-          description: >-
-            (Optional) Whether to use memory-efficient FSDP wrapping
         fsdp_cpu_offload:
-          type: boolean
+          anyOf:
+          - type: boolean
+          - type: 'null'
           default: false
-          description: >-
-            (Optional) Whether to offload FSDP parameters to CPU
-      additionalProperties: false
-      title: EfficiencyConfig
-      description: >-
-        Configuration for memory and compute efficiency optimizations.
-    OptimizerConfig:
       type: object
+      title: EfficiencyConfig
+      description: Configuration for memory and compute efficiency optimizations.
+    OptimizerConfig:
       properties:
         optimizer_type:
           $ref: '#/components/schemas/OptimizerType'
-          description: >-
-            Type of optimizer to use (adam, adamw, or sgd)
         lr:
           type: number
-          description: Learning rate for the optimizer
+          title: Lr
         weight_decay:
           type: number
-          description: >-
-            Weight decay coefficient for regularization
+          title: Weight Decay
         num_warmup_steps:
           type: integer
-          description: Number of steps for learning rate warmup
-      additionalProperties: false
+          title: Num Warmup Steps
+      type: object
       required:
-        - optimizer_type
-        - lr
-        - weight_decay
-        - num_warmup_steps
+      - optimizer_type
+      - lr
+      - weight_decay
+      - num_warmup_steps
       title: OptimizerConfig
-      description: >-
-        Configuration parameters for the optimization algorithm.
+      description: Configuration parameters for the optimization algorithm.
     OptimizerType:
       type: string
       enum:
-        - adam
-        - adamw
-        - sgd
+      - adam
+      - adamw
+      - sgd
       title: OptimizerType
-      description: >-
-        Available optimizer algorithms for training.
+      description: Available optimizer algorithms for training.
     TrainingConfig:
-      type: object
       properties:
         n_epochs:
           type: integer
-          description: Number of training epochs to run
+          title: N Epochs
         max_steps_per_epoch:
           type: integer
+          title: Max Steps Per Epoch
           default: 1
-          description: Maximum number of steps to run per epoch
         gradient_accumulation_steps:
           type: integer
+          title: Gradient Accumulation Steps
           default: 1
-          description: >-
-            Number of steps to accumulate gradients before updating
         max_validation_steps:
-          type: integer
+          anyOf:
+          - type: integer
+          - type: 'null'
           default: 1
-          description: >-
-            (Optional) Maximum number of validation steps per epoch
         data_config:
-          $ref: '#/components/schemas/DataConfig'
-          description: >-
-            (Optional) Configuration for data loading and formatting
+          anyOf:
+          - $ref: '#/components/schemas/DataConfig'
+            title: DataConfig
+          - type: 'null'
+          title: DataConfig
         optimizer_config:
-          $ref: '#/components/schemas/OptimizerConfig'
-          description: >-
-            (Optional) Configuration for the optimization algorithm
+          anyOf:
+          - $ref: '#/components/schemas/OptimizerConfig'
+            title: OptimizerConfig
+          - type: 'null'
+          title: OptimizerConfig
         efficiency_config:
-          $ref: '#/components/schemas/EfficiencyConfig'
-          description: >-
-            (Optional) Configuration for memory and compute optimizations
+          anyOf:
+          - $ref: '#/components/schemas/EfficiencyConfig'
+            title: EfficiencyConfig
+          - type: 'null'
+          title: EfficiencyConfig
         dtype:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
           default: bf16
-          description: >-
-            (Optional) Data type for model parameters (bf16, fp16, fp32)
-      additionalProperties: false
-      required:
-        - n_epochs
-        - max_steps_per_epoch
-        - gradient_accumulation_steps
-      title: TrainingConfig
-      description: >-
-        Comprehensive configuration for the training process.
-    PreferenceOptimizeRequest:
       type: object
+      required:
+      - n_epochs
+      title: TrainingConfig
+      description: Comprehensive configuration for the training process.
+    PreferenceOptimizeRequest:
       properties:
         job_uuid:
           type: string
-          description: The UUID of the job to create.
+          title: Job Uuid
         finetuned_model:
           type: string
-          description: The model to fine-tune.
+          title: Finetuned Model
         algorithm_config:
           $ref: '#/components/schemas/DPOAlignmentConfig'
-          description: The algorithm configuration.
         training_config:
           $ref: '#/components/schemas/TrainingConfig'
-          description: The training configuration.
         hyperparam_search_config:
+          additionalProperties: true
           type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: The hyperparam search configuration.
+          title: Hyperparam Search Config
         logger_config:
+          additionalProperties: true
           type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: The logger configuration.
-      additionalProperties: false
+          title: Logger Config
+      type: object
       required:
-        - job_uuid
-        - finetuned_model
-        - algorithm_config
-        - training_config
-        - hyperparam_search_config
-        - logger_config
+      - job_uuid
+      - finetuned_model
+      - algorithm_config
+      - training_config
+      - hyperparam_search_config
+      - logger_config
       title: PreferenceOptimizeRequest
     PostTrainingJob:
-      type: object
       properties:
         job_uuid:
           type: string
-      additionalProperties: false
+          title: Job Uuid
+      type: object
       required:
-        - job_uuid
+      - job_uuid
       title: PostTrainingJob
     AlgorithmConfig:
-      oneOf:
-        - $ref: '#/components/schemas/LoraFinetuningConfig'
-        - $ref: '#/components/schemas/QATFinetuningConfig'
       discriminator:
-        propertyName: type
         mapping:
           LoRA: '#/components/schemas/LoraFinetuningConfig'
           QAT: '#/components/schemas/QATFinetuningConfig'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/LoraFinetuningConfig'
+        title: LoraFinetuningConfig
+      - $ref: '#/components/schemas/QATFinetuningConfig'
+        title: QATFinetuningConfig
+      title: LoraFinetuningConfig | QATFinetuningConfig
     LoraFinetuningConfig:
-      type: object
       properties:
         type:
           type: string
           const: LoRA
+          title: Type
           default: LoRA
-          description: Algorithm type identifier, always "LoRA"
         lora_attn_modules:
-          type: array
           items:
             type: string
-          description: >-
-            List of attention module names to apply LoRA to
+          type: array
+          title: Lora Attn Modules
         apply_lora_to_mlp:
           type: boolean
-          description: Whether to apply LoRA to MLP layers
+          title: Apply Lora To Mlp
         apply_lora_to_output:
           type: boolean
-          description: >-
-            Whether to apply LoRA to output projection layers
+          title: Apply Lora To Output
         rank:
           type: integer
-          description: >-
-            Rank of the LoRA adaptation (lower rank = fewer parameters)
+          title: Rank
         alpha:
           type: integer
-          description: >-
-            LoRA scaling parameter that controls adaptation strength
+          title: Alpha
         use_dora:
-          type: boolean
+          anyOf:
+          - type: boolean
+          - type: 'null'
           default: false
-          description: >-
-            (Optional) Whether to use DoRA (Weight-Decomposed Low-Rank Adaptation)
         quantize_base:
-          type: boolean
+          anyOf:
+          - type: boolean
+          - type: 'null'
           default: false
-          description: >-
-            (Optional) Whether to quantize the base model weights
-      additionalProperties: false
-      required:
-        - type
-        - lora_attn_modules
-        - apply_lora_to_mlp
-        - apply_lora_to_output
-        - rank
-        - alpha
-      title: LoraFinetuningConfig
-      description: >-
-        Configuration for Low-Rank Adaptation (LoRA) fine-tuning.
-    QATFinetuningConfig:
       type: object
+      required:
+      - lora_attn_modules
+      - apply_lora_to_mlp
+      - apply_lora_to_output
+      - rank
+      - alpha
+      title: LoraFinetuningConfig
+      description: Configuration for Low-Rank Adaptation (LoRA) fine-tuning.
+    QATFinetuningConfig:
       properties:
         type:
           type: string
           const: QAT
+          title: Type
           default: QAT
-          description: Algorithm type identifier, always "QAT"
         quantizer_name:
           type: string
-          description: >-
-            Name of the quantization algorithm to use
+          title: Quantizer Name
         group_size:
           type: integer
-          description: Size of groups for grouped quantization
-      additionalProperties: false
-      required:
-        - type
-        - quantizer_name
-        - group_size
-      title: QATFinetuningConfig
-      description: >-
-        Configuration for Quantization-Aware Training (QAT) fine-tuning.
-    SupervisedFineTuneRequest:
+          title: Group Size
       type: object
+      required:
+      - quantizer_name
+      - group_size
+      title: QATFinetuningConfig
+      description: Configuration for Quantization-Aware Training (QAT) fine-tuning.
+    SupervisedFineTuneRequest:
       properties:
         job_uuid:
           type: string
-          description: The UUID of the job to create.
+          title: Job Uuid
         training_config:
           $ref: '#/components/schemas/TrainingConfig'
-          description: The training configuration.
         hyperparam_search_config:
+          additionalProperties: true
           type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: The hyperparam search configuration.
+          title: Hyperparam Search Config
         logger_config:
+          additionalProperties: true
           type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: The logger configuration.
+          title: Logger Config
         model:
-          type: string
-          description: The model to fine-tune.
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: Model descriptor for training if not in provider config`
         checkpoint_dir:
-          type: string
-          description: The directory to save checkpoint(s) to.
+          anyOf:
+          - type: string
+          - type: 'null'
         algorithm_config:
-          $ref: '#/components/schemas/AlgorithmConfig'
-          description: The algorithm configuration.
-      additionalProperties: false
-      required:
-        - job_uuid
-        - training_config
-        - hyperparam_search_config
-        - logger_config
-      title: SupervisedFineTuneRequest
-    DataSource:
-      oneOf:
-        - $ref: '#/components/schemas/URIDataSource'
-        - $ref: '#/components/schemas/RowsDataSource'
-      discriminator:
-        propertyName: type
-        mapping:
-          uri: '#/components/schemas/URIDataSource'
-          rows: '#/components/schemas/RowsDataSource'
-    RegisterDatasetRequest:
+          anyOf:
+          - oneOf:
+            - $ref: '#/components/schemas/LoraFinetuningConfig'
+              title: LoraFinetuningConfig
+            - $ref: '#/components/schemas/QATFinetuningConfig'
+              title: QATFinetuningConfig
+            discriminator:
+              propertyName: type
+              mapping:
+                LoRA: '#/components/schemas/LoraFinetuningConfig'
+                QAT: '#/components/schemas/QATFinetuningConfig'
+            title: LoraFinetuningConfig | QATFinetuningConfig
+          - type: 'null'
+          title: Algorithm Config
       type: object
+      required:
+      - job_uuid
+      - training_config
+      - hyperparam_search_config
+      - logger_config
+      title: SupervisedFineTuneRequest
+    ParamType:
+      discriminator:
+        mapping:
+          array: '#/components/schemas/ArrayType'
+          boolean: '#/components/schemas/BooleanType'
+          chat_completion_input: '#/components/schemas/ChatCompletionInputType'
+          completion_input: '#/components/schemas/CompletionInputType'
+          json: '#/components/schemas/JsonType'
+          number: '#/components/schemas/NumberType'
+          object: '#/components/schemas/ObjectType'
+          string: '#/components/schemas/StringType'
+          union: '#/components/schemas/UnionType'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/StringType'
+        title: StringType
+      - $ref: '#/components/schemas/NumberType'
+        title: NumberType
+      - $ref: '#/components/schemas/BooleanType'
+        title: BooleanType
+      - $ref: '#/components/schemas/ArrayType'
+        title: ArrayType
+      - $ref: '#/components/schemas/ObjectType'
+        title: ObjectType
+      - $ref: '#/components/schemas/JsonType'
+        title: JsonType
+      - $ref: '#/components/schemas/UnionType'
+        title: UnionType
+      - $ref: '#/components/schemas/ChatCompletionInputType'
+        title: ChatCompletionInputType
+      - $ref: '#/components/schemas/CompletionInputType'
+        title: CompletionInputType
+      title: StringType | ... (9 variants)
+    DataSource:
+      discriminator:
+        mapping:
+          rows: '#/components/schemas/RowsDataSource'
+          uri: '#/components/schemas/URIDataSource'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/URIDataSource'
+        title: URIDataSource
+      - $ref: '#/components/schemas/RowsDataSource'
+        title: RowsDataSource
+      title: URIDataSource | RowsDataSource
+    AllowedToolsFilter:
       properties:
-        purpose:
+        tool_names:
+          anyOf:
+          - items:
+              type: string
+            type: array
+          - type: 'null'
+      type: object
+      title: AllowedToolsFilter
+      description: Filter configuration for restricting which MCP tools can be used.
+    ApprovalFilter:
+      properties:
+        always:
+          anyOf:
+          - items:
+              type: string
+            type: array
+          - type: 'null'
+        never:
+          anyOf:
+          - items:
+              type: string
+            type: array
+          - type: 'null'
+      type: object
+      title: ApprovalFilter
+      description: Filter configuration for MCP tool approval requirements.
+    BatchError:
+      properties:
+        code:
+          anyOf:
+          - type: string
+          - type: 'null'
+        line:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        message:
+          anyOf:
+          - type: string
+          - type: 'null'
+        param:
+          anyOf:
+          - type: string
+          - type: 'null'
+      additionalProperties: true
+      type: object
+      title: BatchError
+    BatchRequestCounts:
+      properties:
+        completed:
+          type: integer
+          title: Completed
+        failed:
+          type: integer
+          title: Failed
+        total:
+          type: integer
+          title: Total
+      additionalProperties: true
+      type: object
+      required:
+      - completed
+      - failed
+      - total
+      title: BatchRequestCounts
+    BatchUsage:
+      properties:
+        input_tokens:
+          type: integer
+          title: Input Tokens
+        input_tokens_details:
+          $ref: '#/components/schemas/InputTokensDetails'
+        output_tokens:
+          type: integer
+          title: Output Tokens
+        output_tokens_details:
+          $ref: '#/components/schemas/OutputTokensDetails'
+        total_tokens:
+          type: integer
+          title: Total Tokens
+      additionalProperties: true
+      type: object
+      required:
+      - input_tokens
+      - input_tokens_details
+      - output_tokens
+      - output_tokens_details
+      - total_tokens
+      title: BatchUsage
+    Chunk-Output:
+      properties:
+        content:
+          anyOf:
+          - type: string
+          - oneOf:
+            - $ref: '#/components/schemas/ImageContentItem-Output'
+              title: ImageContentItem-Output
+            - $ref: '#/components/schemas/TextContentItem'
+              title: TextContentItem
+            discriminator:
+              propertyName: type
+              mapping:
+                image: '#/components/schemas/ImageContentItem-Output'
+                text: '#/components/schemas/TextContentItem'
+            title: ImageContentItem-Output | TextContentItem
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/ImageContentItem-Output'
+                title: ImageContentItem-Output
+              - $ref: '#/components/schemas/TextContentItem'
+                title: TextContentItem
+              discriminator:
+                propertyName: type
+                mapping:
+                  image: '#/components/schemas/ImageContentItem-Output'
+                  text: '#/components/schemas/TextContentItem'
+              title: ImageContentItem-Output | TextContentItem
+            type: array
+            title: list[ImageContentItem-Output | TextContentItem]
+          title: string | list[ImageContentItem-Output | TextContentItem]
+        chunk_id:
+          type: string
+          title: Chunk Id
+        metadata:
+          additionalProperties: true
+          type: object
+          title: Metadata
+        embedding:
+          anyOf:
+          - items:
+              type: number
+            type: array
+          - type: 'null'
+        chunk_metadata:
+          anyOf:
+          - $ref: '#/components/schemas/ChunkMetadata'
+            title: ChunkMetadata
+          - type: 'null'
+          title: ChunkMetadata
+      type: object
+      required:
+      - content
+      - chunk_id
+      title: Chunk
+      description: A chunk of content that can be inserted into a vector database.
+    DatasetPurpose:
+      type: string
+      enum:
+      - post-training/messages
+      - eval/question-answer
+      - eval/messages-answer
+      title: DatasetPurpose
+      description: Purpose of the dataset. Each purpose has a required input data schema.
+    Errors:
+      properties:
+        data:
+          anyOf:
+          - items:
+              $ref: '#/components/schemas/BatchError'
+            type: array
+          - type: 'null'
+        object:
+          anyOf:
+          - type: string
+          - type: 'null'
+      additionalProperties: true
+      type: object
+      title: Errors
+    HealthStatus:
+      type: string
+      enum:
+      - OK
+      - Error
+      - Not Implemented
+      title: HealthStatus
+    ImageContentItem-Input:
+      properties:
+        type:
+          type: string
+          const: image
+          title: Type
+          default: image
+        image:
+          $ref: '#/components/schemas/_URLOrData'
+      type: object
+      required:
+      - image
+      title: ImageContentItem
+      description: A image content item
+    ImageContentItem-Output:
+      properties:
+        type:
+          type: string
+          const: image
+          title: Type
+          default: image
+        image:
+          $ref: '#/components/schemas/_URLOrData'
+      type: object
+      required:
+      - image
+      title: ImageContentItem
+      description: A image content item
+    InputTokensDetails:
+      properties:
+        cached_tokens:
+          type: integer
+          title: Cached Tokens
+      additionalProperties: true
+      type: object
+      required:
+      - cached_tokens
+      title: InputTokensDetails
+    JobStatus:
+      type: string
+      enum:
+      - completed
+      - in_progress
+      - failed
+      - scheduled
+      - cancelled
+      title: JobStatus
+      description: Status of a job execution.
+    MCPListToolsTool:
+      properties:
+        input_schema:
+          additionalProperties: true
+          type: object
+          title: Input Schema
+        name:
+          type: string
+          title: Name
+        description:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      required:
+      - input_schema
+      - name
+      title: MCPListToolsTool
+      description: Tool definition returned by MCP list tools operation.
+    OpenAIAssistantMessageParam-Input:
+      properties:
+        role:
+          type: string
+          const: assistant
+          title: Role
+          default: assistant
+        content:
+          anyOf:
+          - type: string
+          - items:
+              $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+            type: array
+            title: list[OpenAIChatCompletionContentPartTextParam]
+          - type: 'null'
+          title: string | list[OpenAIChatCompletionContentPartTextParam]
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+        tool_calls:
+          anyOf:
+          - items:
+              $ref: '#/components/schemas/OpenAIChatCompletionToolCall'
+            type: array
+          - type: 'null'
+      type: object
+      title: OpenAIAssistantMessageParam
+      description: A message containing the model's (assistant) response in an OpenAI-compatible chat completion request.
+    OpenAIAssistantMessageParam-Output:
+      properties:
+        role:
+          type: string
+          const: assistant
+          title: Role
+          default: assistant
+        content:
+          anyOf:
+          - type: string
+          - items:
+              $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+            type: array
+            title: list[OpenAIChatCompletionContentPartTextParam]
+          - type: 'null'
+          title: string | list[OpenAIChatCompletionContentPartTextParam]
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+        tool_calls:
+          anyOf:
+          - items:
+              $ref: '#/components/schemas/OpenAIChatCompletionToolCall'
+            type: array
+          - type: 'null'
+      type: object
+      title: OpenAIAssistantMessageParam
+      description: A message containing the model's (assistant) response in an OpenAI-compatible chat completion request.
+    OpenAIChatCompletionUsageCompletionTokensDetails:
+      properties:
+        reasoning_tokens:
+          anyOf:
+          - type: integer
+          - type: 'null'
+      type: object
+      title: OpenAIChatCompletionUsageCompletionTokensDetails
+      description: Token details for output tokens in OpenAI chat completion usage.
+    OpenAIChatCompletionUsagePromptTokensDetails:
+      properties:
+        cached_tokens:
+          anyOf:
+          - type: integer
+          - type: 'null'
+      type: object
+      title: OpenAIChatCompletionUsagePromptTokensDetails
+      description: Token details for prompt tokens in OpenAI chat completion usage.
+    OpenAIResponseMessage-Output:
+      properties:
+        content:
+          anyOf:
+          - type: string
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
+                title: OpenAIResponseInputMessageContentText
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+                title: OpenAIResponseInputMessageContentImage
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile'
+                title: OpenAIResponseInputMessageContentFile
+              discriminator:
+                propertyName: type
+                mapping:
+                  input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile'
+                  input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+                  input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
+              title: OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile
+            type: array
+            title: list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile]
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText'
+                title: OpenAIResponseOutputMessageContentOutputText
+              - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
+                title: OpenAIResponseContentPartRefusal
+              discriminator:
+                propertyName: type
+                mapping:
+                  output_text: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText'
+                  refusal: '#/components/schemas/OpenAIResponseContentPartRefusal'
+              title: OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal
+            type: array
+            title: list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal]
+          title: string | list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile] | list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal]
+        role:
+          title: Role
           type: string
           enum:
-            - post-training/messages
-            - eval/question-answer
-            - eval/messages-answer
-          description: >-
-            The purpose of the dataset. One of: - "post-training/messages": The dataset
-            contains a messages column with list of messages for post-training. {
-            "messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant",
-            "content": "Hello, world!"}, ] } - "eval/question-answer": The dataset
-            contains a question column and an answer column for evaluation. { "question":
-            "What is the capital of France?", "answer": "Paris" } - "eval/messages-answer":
-            The dataset contains a messages column with list of messages and an answer
-            column for evaluation. { "messages": [ {"role": "user", "content": "Hello,
-            my name is John Doe."}, {"role": "assistant", "content": "Hello, John
-            Doe. How can I help you today?"}, {"role": "user", "content": "What's
-            my name?"}, ], "answer": "John Doe" }
-        source:
-          $ref: '#/components/schemas/DataSource'
-          description: >-
-            The data source of the dataset. Ensure that the data source schema is
-            compatible with the purpose of the dataset. Examples: - { "type": "uri",
-            "uri": "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri":
-            "lsfs://mydata.jsonl" } - { "type": "uri", "uri": "data:csv;base64,{base64_content}"
-            } - { "type": "uri", "uri": "huggingface://llamastack/simpleqa?split=train"
-            } - { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content":
-            "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ]
-            } ] }
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            The metadata for the dataset. - E.g. {"description": "My dataset"}.
-        dataset_id:
+          - system
+          - developer
+          - user
+          - assistant
+          default: system
+        type:
           type: string
-          description: >-
-            The ID of the dataset. If not provided, an ID will be generated.
-      additionalProperties: false
-      required:
-        - purpose
-        - source
-      title: RegisterDatasetRequest
-    RegisterBenchmarkRequest:
+          const: message
+          title: Type
+          default: message
+        id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        status:
+          anyOf:
+          - type: string
+          - type: 'null'
       type: object
+      required:
+      - content
+      - role
+      title: OpenAIResponseMessage
+      description: |-
+        Corresponds to the various Message types in the Responses API.
+        They are all under one type because the Responses API gives them all
+        the same "type" value, and there is no way to tell them apart in certain
+        scenarios.
+    OpenAIResponseOutputMessageFileSearchToolCallResults:
       properties:
-        benchmark_id:
+        attributes:
+          additionalProperties: true
+          type: object
+          title: Attributes
+        file_id:
           type: string
-          description: The ID of the benchmark to register.
-        dataset_id:
+          title: File Id
+        filename:
           type: string
-          description: >-
-            The ID of the dataset to use for the benchmark.
-        scoring_functions:
+          title: Filename
+        score:
+          type: number
+          title: Score
+        text:
+          type: string
+          title: Text
+      type: object
+      required:
+      - attributes
+      - file_id
+      - filename
+      - score
+      - text
+      title: OpenAIResponseOutputMessageFileSearchToolCallResults
+      description: Search results returned by the file search operation.
+    OpenAIResponseTextFormat:
+      properties:
+        type:
+          title: Type
+          type: string
+          enum:
+          - text
+          - json_schema
+          - json_object
+          default: text
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+        schema:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+        description:
+          anyOf:
+          - type: string
+          - type: 'null'
+        strict:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+      type: object
+      title: OpenAIResponseTextFormat
+      description: Configuration for Responses API text format.
+    OpenAIResponseUsageInputTokensDetails:
+      properties:
+        cached_tokens:
+          anyOf:
+          - type: integer
+          - type: 'null'
+      type: object
+      title: OpenAIResponseUsageInputTokensDetails
+      description: Token details for input tokens in OpenAI response usage.
+    OpenAIResponseUsageOutputTokensDetails:
+      properties:
+        reasoning_tokens:
+          anyOf:
+          - type: integer
+          - type: 'null'
+      type: object
+      title: OpenAIResponseUsageOutputTokensDetails
+      description: Token details for output tokens in OpenAI response usage.
+    OpenAIUserMessageParam-Input:
+      properties:
+        role:
+          type: string
+          const: user
+          title: Role
+          default: user
+        content:
+          anyOf:
+          - type: string
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+                title: OpenAIChatCompletionContentPartTextParam
+              - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+                title: OpenAIChatCompletionContentPartImageParam
+              - $ref: '#/components/schemas/OpenAIFile'
+                title: OpenAIFile
+              discriminator:
+                propertyName: type
+                mapping:
+                  file: '#/components/schemas/OpenAIFile'
+                  image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+                  text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+              title: OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile
+            type: array
+            title: list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile]
+          title: string | list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile]
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      required:
+      - content
+      title: OpenAIUserMessageParam
+      description: A message from the user in an OpenAI-compatible chat completion request.
+    OpenAIUserMessageParam-Output:
+      properties:
+        role:
+          type: string
+          const: user
+          title: Role
+          default: user
+        content:
+          anyOf:
+          - type: string
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+                title: OpenAIChatCompletionContentPartTextParam
+              - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+                title: OpenAIChatCompletionContentPartImageParam
+              - $ref: '#/components/schemas/OpenAIFile'
+                title: OpenAIFile
+              discriminator:
+                propertyName: type
+                mapping:
+                  file: '#/components/schemas/OpenAIFile'
+                  image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+                  text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+              title: OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile
+            type: array
+            title: list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile]
+          title: string | list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile]
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      required:
+      - content
+      title: OpenAIUserMessageParam
+      description: A message from the user in an OpenAI-compatible chat completion request.
+    OutputTokensDetails:
+      properties:
+        reasoning_tokens:
+          type: integer
+          title: Reasoning Tokens
+      additionalProperties: true
+      type: object
+      required:
+      - reasoning_tokens
+      title: OutputTokensDetails
+    SearchRankingOptions:
+      properties:
+        ranker:
+          anyOf:
+          - type: string
+          - type: 'null'
+        score_threshold:
+          anyOf:
+          - type: number
+          - type: 'null'
+          default: 0.0
+      type: object
+      title: SearchRankingOptions
+      description: Options for ranking and filtering search results.
+    _URLOrData:
+      properties:
+        url:
+          anyOf:
+          - $ref: '#/components/schemas/URL'
+            title: URL
+          - type: 'null'
+          title: URL
+        data:
+          anyOf:
+          - type: string
+          - type: 'null'
+          contentEncoding: base64
+      type: object
+      title: _URLOrData
+      description: A URL or a base64 encoded string
+    SamplingStrategy:
+      discriminator:
+        mapping:
+          greedy: '#/components/schemas/GreedySamplingStrategy'
+          top_k: '#/components/schemas/TopKSamplingStrategy'
+          top_p: '#/components/schemas/TopPSamplingStrategy'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/GreedySamplingStrategy'
+        title: GreedySamplingStrategy
+      - $ref: '#/components/schemas/TopPSamplingStrategy'
+        title: TopPSamplingStrategy
+      - $ref: '#/components/schemas/TopKSamplingStrategy'
+        title: TopKSamplingStrategy
+      title: GreedySamplingStrategy | TopPSamplingStrategy | TopKSamplingStrategy
+    GrammarResponseFormat:
+      description: Configuration for grammar-guided response generation.
+      properties:
+        type:
+          const: grammar
+          default: grammar
+          title: Type
+          type: string
+        bnf:
+          additionalProperties: true
+          title: Bnf
+          type: object
+      required:
+      - bnf
+      title: GrammarResponseFormat
+      type: object
+    JsonSchemaResponseFormat:
+      description: Configuration for JSON schema-guided response generation.
+      properties:
+        type:
+          const: json_schema
+          default: json_schema
+          title: Type
+          type: string
+        json_schema:
+          additionalProperties: true
+          title: Json Schema
+          type: object
+      required:
+      - json_schema
+      title: JsonSchemaResponseFormat
+      type: object
+    ResponseFormat:
+      discriminator:
+        mapping:
+          grammar: '#/components/schemas/GrammarResponseFormat'
+          json_schema: '#/components/schemas/JsonSchemaResponseFormat'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/JsonSchemaResponseFormat'
+        title: JsonSchemaResponseFormat
+      - $ref: '#/components/schemas/GrammarResponseFormat'
+        title: GrammarResponseFormat
+      title: JsonSchemaResponseFormat | GrammarResponseFormat
+    OpenAIResponseContentPart:
+      discriminator:
+        mapping:
+          output_text: '#/components/schemas/OpenAIResponseContentPartOutputText'
+          reasoning_text: '#/components/schemas/OpenAIResponseContentPartReasoningText'
+          refusal: '#/components/schemas/OpenAIResponseContentPartRefusal'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText'
+        title: OpenAIResponseContentPartOutputText
+      - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
+        title: OpenAIResponseContentPartRefusal
+      - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
+        title: OpenAIResponseContentPartReasoningText
+      title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText
+    SpanEndPayload:
+      description: Payload for a span end event.
+      properties:
+        type:
+          const: span_end
+          default: span_end
+          title: Type
+          type: string
+        status:
+          $ref: '#/components/schemas/SpanStatus'
+      required:
+      - status
+      title: SpanEndPayload
+      type: object
+    SpanStartPayload:
+      description: Payload for a span start event.
+      properties:
+        type:
+          const: span_start
+          default: span_start
+          title: Type
+          type: string
+        name:
+          title: Name
+          type: string
+        parent_span_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+      required:
+      - name
+      title: SpanStartPayload
+      type: object
+    SpanStatus:
+      description: The status of a span indicating whether it completed successfully or with an error.
+      enum:
+      - ok
+      - error
+      title: SpanStatus
+      type: string
+    StructuredLogPayload:
+      discriminator:
+        mapping:
+          span_end: '#/components/schemas/SpanEndPayload'
+          span_start: '#/components/schemas/SpanStartPayload'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/SpanStartPayload'
+        title: SpanStartPayload
+      - $ref: '#/components/schemas/SpanEndPayload'
+        title: SpanEndPayload
+      title: SpanStartPayload | SpanEndPayload
+    LogSeverity:
+      description: The severity level of a log message.
+      enum:
+      - verbose
+      - debug
+      - info
+      - warn
+      - error
+      - critical
+      title: LogSeverity
+      type: string
+    MetricEvent:
+      description: A metric event containing a measured value.
+      properties:
+        trace_id:
+          title: Trace Id
+          type: string
+        span_id:
+          title: Span Id
+          type: string
+        timestamp:
+          format: date-time
+          title: Timestamp
+          type: string
+        attributes:
+          anyOf:
+          - additionalProperties:
+              anyOf:
+              - type: string
+              - type: integer
+              - type: number
+              - type: boolean
+              - type: 'null'
+              title: string | ... (4 variants)
+            type: object
+          - type: 'null'
+        type:
+          const: metric
+          default: metric
+          title: Type
+          type: string
+        metric:
+          title: Metric
+          type: string
+        value:
+          anyOf:
+          - type: integer
+          - type: number
+          title: integer | number
+        unit:
+          title: Unit
+          type: string
+      required:
+      - trace_id
+      - span_id
+      - timestamp
+      - metric
+      - value
+      - unit
+      title: MetricEvent
+      type: object
+    StructuredLogEvent:
+      description: A structured log event containing typed payload data.
+      properties:
+        trace_id:
+          title: Trace Id
+          type: string
+        span_id:
+          title: Span Id
+          type: string
+        timestamp:
+          format: date-time
+          title: Timestamp
+          type: string
+        attributes:
+          anyOf:
+          - additionalProperties:
+              anyOf:
+              - type: string
+              - type: integer
+              - type: number
+              - type: boolean
+              - type: 'null'
+              title: string | ... (4 variants)
+            type: object
+          - type: 'null'
+        type:
+          const: structured_log
+          default: structured_log
+          title: Type
+          type: string
+        payload:
+          discriminator:
+            mapping:
+              span_end: '#/components/schemas/SpanEndPayload'
+              span_start: '#/components/schemas/SpanStartPayload'
+            propertyName: type
+          oneOf:
+          - $ref: '#/components/schemas/SpanStartPayload'
+            title: SpanStartPayload
+          - $ref: '#/components/schemas/SpanEndPayload'
+            title: SpanEndPayload
+          title: SpanStartPayload | SpanEndPayload
+      required:
+      - trace_id
+      - span_id
+      - timestamp
+      - payload
+      title: StructuredLogEvent
+      type: object
+    UnstructuredLogEvent:
+      description: An unstructured log event containing a simple text message.
+      properties:
+        trace_id:
+          title: Trace Id
+          type: string
+        span_id:
+          title: Span Id
+          type: string
+        timestamp:
+          format: date-time
+          title: Timestamp
+          type: string
+        attributes:
+          anyOf:
+          - additionalProperties:
+              anyOf:
+              - type: string
+              - type: integer
+              - type: number
+              - type: boolean
+              - type: 'null'
+              title: string | ... (4 variants)
+            type: object
+          - type: 'null'
+        type:
+          const: unstructured_log
+          default: unstructured_log
+          title: Type
+          type: string
+        message:
+          title: Message
+          type: string
+        severity:
+          $ref: '#/components/schemas/LogSeverity'
+      required:
+      - trace_id
+      - span_id
+      - timestamp
+      - message
+      - severity
+      title: UnstructuredLogEvent
+      type: object
+    Event:
+      discriminator:
+        mapping:
+          metric: '#/components/schemas/MetricEvent'
+          structured_log: '#/components/schemas/StructuredLogEvent'
+          unstructured_log: '#/components/schemas/UnstructuredLogEvent'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/UnstructuredLogEvent'
+        title: UnstructuredLogEvent
+      - $ref: '#/components/schemas/MetricEvent'
+        title: MetricEvent
+      - $ref: '#/components/schemas/StructuredLogEvent'
+        title: StructuredLogEvent
+      title: UnstructuredLogEvent | MetricEvent | StructuredLogEvent
+    MetricInResponse:
+      description: A metric value included in API responses.
+      properties:
+        metric:
+          title: Metric
+          type: string
+        value:
+          anyOf:
+          - type: integer
+          - type: number
+          title: integer | number
+        unit:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+      required:
+      - metric
+      - value
+      title: MetricInResponse
+      type: object
+    TextDelta:
+      description: A text content delta for streaming responses.
+      properties:
+        type:
+          const: text
+          default: text
+          title: Type
+          type: string
+        text:
+          title: Text
+          type: string
+      required:
+      - text
+      title: TextDelta
+      type: object
+    ImageDelta:
+      description: An image content delta for streaming responses.
+      properties:
+        type:
+          const: image
+          default: image
+          title: Type
+          type: string
+        image:
+          format: binary
+          title: Image
+          type: string
+      required:
+      - image
+      title: ImageDelta
+      type: object
+    Fp8QuantizationConfig:
+      description: Configuration for 8-bit floating point quantization.
+      properties:
+        type:
+          const: fp8_mixed
+          default: fp8_mixed
+          title: Type
+          type: string
+      title: Fp8QuantizationConfig
+      type: object
+    Bf16QuantizationConfig:
+      description: Configuration for BFloat16 precision (typically no quantization).
+      properties:
+        type:
+          const: bf16
+          default: bf16
+          title: Type
+          type: string
+      title: Bf16QuantizationConfig
+      type: object
+    Int4QuantizationConfig:
+      description: Configuration for 4-bit integer quantization.
+      properties:
+        type:
+          const: int4_mixed
+          default: int4_mixed
+          title: Type
+          type: string
+        scheme:
+          anyOf:
+          - type: string
+          - type: 'null'
+          default: int4_weight_int8_dynamic_activation
+      title: Int4QuantizationConfig
+      type: object
+    UserMessage:
+      description: A message from the user in a chat conversation.
+      properties:
+        role:
+          const: user
+          default: user
+          title: Role
+          type: string
+        content:
+          anyOf:
+          - type: string
+          - discriminator:
+              mapping:
+                image: '#/components/schemas/ImageContentItem'
+                text: '#/components/schemas/TextContentItem'
+              propertyName: type
+            oneOf:
+            - $ref: '#/components/schemas/ImageContentItem'
+              title: ImageContentItem
+            - $ref: '#/components/schemas/TextContentItem'
+              title: TextContentItem
+            title: ImageContentItem | TextContentItem
+          - items:
+              discriminator:
+                mapping:
+                  image: '#/components/schemas/ImageContentItem'
+                  text: '#/components/schemas/TextContentItem'
+                propertyName: type
+              oneOf:
+              - $ref: '#/components/schemas/ImageContentItem'
+                title: ImageContentItem
+              - $ref: '#/components/schemas/TextContentItem'
+                title: TextContentItem
+              title: ImageContentItem | TextContentItem
+            type: array
+            title: list[ImageContentItem | TextContentItem]
+          title: string | list[ImageContentItem | TextContentItem]
+        context:
+          anyOf:
+          - type: string
+          - discriminator:
+              mapping:
+                image: '#/components/schemas/ImageContentItem'
+                text: '#/components/schemas/TextContentItem'
+              propertyName: type
+            oneOf:
+            - $ref: '#/components/schemas/ImageContentItem'
+              title: ImageContentItem
+            - $ref: '#/components/schemas/TextContentItem'
+              title: TextContentItem
+            title: ImageContentItem | TextContentItem
+          - items:
+              discriminator:
+                mapping:
+                  image: '#/components/schemas/ImageContentItem'
+                  text: '#/components/schemas/TextContentItem'
+                propertyName: type
+              oneOf:
+              - $ref: '#/components/schemas/ImageContentItem'
+                title: ImageContentItem
+              - $ref: '#/components/schemas/TextContentItem'
+                title: TextContentItem
+              title: ImageContentItem | TextContentItem
+            type: array
+            title: list[ImageContentItem | TextContentItem]
+          - type: 'null'
+          title: string | list[ImageContentItem | TextContentItem]
+          nullable: true
+      required:
+      - content
+      title: UserMessage
+      type: object
+    ToolResponseMessage:
+      description: A message representing the result of a tool invocation.
+      properties:
+        role:
+          const: tool
+          default: tool
+          title: Role
+          type: string
+        call_id:
+          title: Call Id
+          type: string
+        content:
+          anyOf:
+          - type: string
+          - discriminator:
+              mapping:
+                image: '#/components/schemas/ImageContentItem'
+                text: '#/components/schemas/TextContentItem'
+              propertyName: type
+            oneOf:
+            - $ref: '#/components/schemas/ImageContentItem'
+              title: ImageContentItem
+            - $ref: '#/components/schemas/TextContentItem'
+              title: TextContentItem
+            title: ImageContentItem | TextContentItem
+          - items:
+              discriminator:
+                mapping:
+                  image: '#/components/schemas/ImageContentItem'
+                  text: '#/components/schemas/TextContentItem'
+                propertyName: type
+              oneOf:
+              - $ref: '#/components/schemas/ImageContentItem'
+                title: ImageContentItem
+              - $ref: '#/components/schemas/TextContentItem'
+                title: TextContentItem
+              title: ImageContentItem | TextContentItem
+            type: array
+            title: list[ImageContentItem | TextContentItem]
+          title: string | list[ImageContentItem | TextContentItem]
+      required:
+      - call_id
+      - content
+      title: ToolResponseMessage
+      type: object
+    TokenLogProbs:
+      description: Log probabilities for generated tokens.
+      properties:
+        logprobs_by_token:
+          additionalProperties:
+            type: number
+          title: Logprobs By Token
+          type: object
+      required:
+      - logprobs_by_token
+      title: TokenLogProbs
+      type: object
+    EmbeddingsResponse:
+      description: Response containing generated embeddings.
+      properties:
+        embeddings:
+          items:
+            items:
+              type: number
+            type: array
+          title: Embeddings
           type: array
+      required:
+      - embeddings
+      title: EmbeddingsResponse
+      type: object
+    OpenAICompletionLogprobs:
+      description: |-
+        The log probabilities for the tokens in the message from an OpenAI-compatible completion response.
+
+        :text_offset: (Optional) The offset of the token in the text
+        :token_logprobs: (Optional) The log probabilities for the tokens
+        :tokens: (Optional) The tokens
+        :top_logprobs: (Optional) The top log probabilities for the tokens
+      properties:
+        text_offset:
+          anyOf:
+          - items:
+              type: integer
+            type: array
+          - type: 'null'
+          nullable: true
+        token_logprobs:
+          anyOf:
+          - items:
+              type: number
+            type: array
+          - type: 'null'
+          nullable: true
+        tokens:
+          anyOf:
+          - items:
+              type: string
+            type: array
+          - type: 'null'
+          nullable: true
+        top_logprobs:
+          anyOf:
+          - items:
+              additionalProperties:
+                type: number
+              type: object
+            type: array
+          - type: 'null'
+          nullable: true
+      title: OpenAICompletionLogprobs
+      type: object
+    VectorStoreCreateRequest:
+      description: Request to create a vector store.
+      properties:
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+        file_ids:
           items:
             type: string
-          description: >-
-            The scoring functions to use for the benchmark.
-        provider_benchmark_id:
-          type: string
-          description: >-
-            The ID of the provider benchmark to use for the benchmark.
-        provider_id:
-          type: string
-          description: >-
-            The ID of the provider to use for the benchmark.
+          title: File Ids
+          type: array
+        expires_after:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+          nullable: true
+        chunking_strategy:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+          nullable: true
         metadata:
+          additionalProperties: true
+          title: Metadata
           type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: The metadata to use for the benchmark.
-      additionalProperties: false
+      title: VectorStoreCreateRequest
+      type: object
+    VectorStoreModifyRequest:
+      description: Request to modify a vector store.
+      properties:
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+        expires_after:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+          nullable: true
+        metadata:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+          nullable: true
+      title: VectorStoreModifyRequest
+      type: object
+    VectorStoreSearchRequest:
+      description: Request to search a vector store.
+      properties:
+        query:
+          anyOf:
+          - type: string
+          - items:
+              type: string
+            type: array
+            title: list[string]
+          title: string | list[string]
+        filters:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+          nullable: true
+        max_num_results:
+          default: 10
+          title: Max Num Results
+          type: integer
+        ranking_options:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+          nullable: true
+        rewrite_query:
+          default: false
+          title: Rewrite Query
+          type: boolean
       required:
-        - benchmark_id
-        - dataset_id
-        - scoring_functions
-      title: RegisterBenchmarkRequest
+      - query
+      title: VectorStoreSearchRequest
+      type: object
+    DialogType:
+      description: Parameter type for dialog data with semantic output labels.
+      properties:
+        type:
+          const: dialog
+          default: dialog
+          title: Type
+          type: string
+      title: DialogType
+      type: object
+    ConversationMessage:
+      description: OpenAI-compatible message item for conversations.
+      properties:
+        id:
+          description: unique identifier for this message
+          title: Id
+          type: string
+        content:
+          description: message content
+          items:
+            additionalProperties: true
+            type: object
+          title: Content
+          type: array
+        role:
+          description: message role
+          title: Role
+          type: string
+        status:
+          description: message status
+          title: Status
+          type: string
+        type:
+          const: message
+          default: message
+          title: Type
+          type: string
+        object:
+          const: message
+          default: message
+          title: Object
+          type: string
+      required:
+      - id
+      - content
+      - role
+      - status
+      title: ConversationMessage
+      type: object
+    ConversationItemCreateRequest:
+      description: Request body for creating conversation items.
+      properties:
+        items:
+          description: Items to include in the conversation context. You may add up to 20 items at a time.
+          items:
+            discriminator:
+              mapping:
+                file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                function_call_output: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+                mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                mcp_approval_response: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+                mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                message: '#/components/schemas/OpenAIResponseMessage'
+                web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+              propertyName: type
+            oneOf:
+            - $ref: '#/components/schemas/OpenAIResponseMessage'
+              title: OpenAIResponseMessage
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+              title: OpenAIResponseOutputMessageWebSearchToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+              title: OpenAIResponseOutputMessageFileSearchToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+              title: OpenAIResponseOutputMessageFunctionToolCall
+            - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+              title: OpenAIResponseInputFunctionToolCallOutput
+            - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+              title: OpenAIResponseMCPApprovalRequest
+            - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+              title: OpenAIResponseMCPApprovalResponse
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+              title: OpenAIResponseOutputMessageMCPCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+              title: OpenAIResponseOutputMessageMCPListTools
+            title: OpenAIResponseMessage | ... (9 variants)
+          maxItems: 20
+          title: Items
+          type: array
+      required:
+      - items
+      title: ConversationItemCreateRequest
+      type: object
+    ToolGroupInput:
+      description: Input data for registering a tool group.
+      properties:
+        toolgroup_id:
+          title: Toolgroup Id
+          type: string
+        provider_id:
+          title: Provider Id
+          type: string
+        args:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+          nullable: true
+        mcp_endpoint:
+          anyOf:
+          - $ref: '#/components/schemas/URL'
+            title: URL
+          - type: 'null'
+          nullable: true
+          title: URL
+      required:
+      - toolgroup_id
+      - provider_id
+      title: ToolGroupInput
+      type: object
+    Api:
+      description: Enumeration of all available APIs in the Llama Stack system.
+      enum:
+      - providers
+      - inference
+      - safety
+      - agents
+      - batches
+      - vector_io
+      - datasetio
+      - scoring
+      - eval
+      - post_training
+      - tool_runtime
+      - models
+      - shields
+      - vector_stores
+      - datasets
+      - scoring_functions
+      - benchmarks
+      - tool_groups
+      - files
+      - prompts
+      - conversations
+      - inspect
+      title: Api
+      type: string
+    ProviderSpec:
+      properties:
+        api:
+          $ref: '#/components/schemas/Api'
+        provider_type:
+          title: Provider Type
+          type: string
+        config_class:
+          description: Fully-qualified classname of the config for this provider
+          title: Config Class
+          type: string
+        api_dependencies:
+          description: Higher-level API surfaces may depend on other providers to provide their functionality
+          items:
+            $ref: '#/components/schemas/Api'
+          title: Api Dependencies
+          type: array
+        optional_api_dependencies:
+          items:
+            $ref: '#/components/schemas/Api'
+          title: Optional Api Dependencies
+          type: array
+        deprecation_warning:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: If this provider is deprecated, specify the warning message here
+          nullable: true
+        deprecation_error:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: If this provider is deprecated and does NOT work, specify the error message here
+          nullable: true
+        module:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: |2-
+
+             Fully-qualified name of the module to import. The module is expected to have:
+
+              - `get_adapter_impl(config, deps)`: returns the adapter implementation
+
+              Example: `module: ramalama_stack`
+
+          nullable: true
+        pip_packages:
+          description: The pip dependencies needed for this implementation
+          items:
+            type: string
+          title: Pip Packages
+          type: array
+        provider_data_validator:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+        is_external:
+          default: false
+          description: Notes whether this provider is an external provider.
+          title: Is External
+          type: boolean
+        deps__:
+          items:
+            type: string
+          title: Deps
+          type: array
+      required:
+      - api
+      - provider_type
+      - config_class
+      title: ProviderSpec
+      type: object
+    InlineProviderSpec:
+      properties:
+        api:
+          $ref: '#/components/schemas/Api'
+        provider_type:
+          title: Provider Type
+          type: string
+        config_class:
+          description: Fully-qualified classname of the config for this provider
+          title: Config Class
+          type: string
+        api_dependencies:
+          description: Higher-level API surfaces may depend on other providers to provide their functionality
+          items:
+            $ref: '#/components/schemas/Api'
+          title: Api Dependencies
+          type: array
+        optional_api_dependencies:
+          items:
+            $ref: '#/components/schemas/Api'
+          title: Optional Api Dependencies
+          type: array
+        deprecation_warning:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: If this provider is deprecated, specify the warning message here
+          nullable: true
+        deprecation_error:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: If this provider is deprecated and does NOT work, specify the error message here
+          nullable: true
+        module:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: |2-
+
+             Fully-qualified name of the module to import. The module is expected to have:
+
+              - `get_adapter_impl(config, deps)`: returns the adapter implementation
+
+              Example: `module: ramalama_stack`
+
+          nullable: true
+        pip_packages:
+          description: The pip dependencies needed for this implementation
+          items:
+            type: string
+          title: Pip Packages
+          type: array
+        provider_data_validator:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+        is_external:
+          default: false
+          description: Notes whether this provider is an external provider.
+          title: Is External
+          type: boolean
+        deps__:
+          items:
+            type: string
+          title: Deps
+          type: array
+        container_image:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: |2
+
+            The container image to use for this implementation. If one is provided, pip_packages will be ignored.
+            If a provider depends on other providers, the dependencies MUST NOT specify a container image.
+          nullable: true
+        description:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: |2
+
+            A description of the provider. This is used to display in the documentation.
+          nullable: true
+      required:
+      - api
+      - provider_type
+      - config_class
+      title: InlineProviderSpec
+      type: object
+    RemoteProviderSpec:
+      properties:
+        api:
+          $ref: '#/components/schemas/Api'
+        provider_type:
+          title: Provider Type
+          type: string
+        config_class:
+          description: Fully-qualified classname of the config for this provider
+          title: Config Class
+          type: string
+        api_dependencies:
+          description: Higher-level API surfaces may depend on other providers to provide their functionality
+          items:
+            $ref: '#/components/schemas/Api'
+          title: Api Dependencies
+          type: array
+        optional_api_dependencies:
+          items:
+            $ref: '#/components/schemas/Api'
+          title: Optional Api Dependencies
+          type: array
+        deprecation_warning:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: If this provider is deprecated, specify the warning message here
+          nullable: true
+        deprecation_error:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: If this provider is deprecated and does NOT work, specify the error message here
+          nullable: true
+        module:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: |2-
+
+             Fully-qualified name of the module to import. The module is expected to have:
+
+              - `get_adapter_impl(config, deps)`: returns the adapter implementation
+
+              Example: `module: ramalama_stack`
+
+          nullable: true
+        pip_packages:
+          description: The pip dependencies needed for this implementation
+          items:
+            type: string
+          title: Pip Packages
+          type: array
+        provider_data_validator:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+        is_external:
+          default: false
+          description: Notes whether this provider is an external provider.
+          title: Is External
+          type: boolean
+        deps__:
+          items:
+            type: string
+          title: Deps
+          type: array
+        adapter_type:
+          description: Unique identifier for this adapter
+          title: Adapter Type
+          type: string
+        description:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: |2
+
+            A description of the provider. This is used to display in the documentation.
+          nullable: true
+      required:
+      - api
+      - provider_type
+      - config_class
+      - adapter_type
+      title: RemoteProviderSpec
+      type: object
+    PostTrainingJobLogStream:
+      description: Stream of logs from a finetuning job.
+      properties:
+        job_uuid:
+          title: Job Uuid
+          type: string
+        log_lines:
+          items:
+            type: string
+          title: Log Lines
+          type: array
+      required:
+      - job_uuid
+      - log_lines
+      title: PostTrainingJobLogStream
+      type: object
+    RLHFAlgorithm:
+      description: Available reinforcement learning from human feedback algorithms.
+      enum:
+      - dpo
+      title: RLHFAlgorithm
+      type: string
+    PostTrainingRLHFRequest:
+      description: Request to finetune a model using reinforcement learning from human feedback.
+      properties:
+        job_uuid:
+          title: Job Uuid
+          type: string
+        finetuned_model:
+          $ref: '#/components/schemas/URL'
+        dataset_id:
+          title: Dataset Id
+          type: string
+        validation_dataset_id:
+          title: Validation Dataset Id
+          type: string
+        algorithm:
+          $ref: '#/components/schemas/RLHFAlgorithm'
+        algorithm_config:
+          $ref: '#/components/schemas/DPOAlignmentConfig'
+        optimizer_config:
+          $ref: '#/components/schemas/OptimizerConfig'
+        training_config:
+          $ref: '#/components/schemas/TrainingConfig'
+        hyperparam_search_config:
+          additionalProperties: true
+          title: Hyperparam Search Config
+          type: object
+        logger_config:
+          additionalProperties: true
+          title: Logger Config
+          type: object
+      required:
+      - job_uuid
+      - finetuned_model
+      - dataset_id
+      - validation_dataset_id
+      - algorithm
+      - algorithm_config
+      - optimizer_config
+      - training_config
+      - hyperparam_search_config
+      - logger_config
+      title: PostTrainingRLHFRequest
+      type: object
+    Span:
+      description: A span representing a single operation within a trace.
+      properties:
+        span_id:
+          title: Span Id
+          type: string
+        trace_id:
+          title: Trace Id
+          type: string
+        parent_span_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+        name:
+          title: Name
+          type: string
+        start_time:
+          format: date-time
+          title: Start Time
+          type: string
+        end_time:
+          anyOf:
+          - format: date-time
+            type: string
+          - type: 'null'
+          nullable: true
+        attributes:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+      required:
+      - span_id
+      - trace_id
+      - name
+      - start_time
+      title: Span
+      type: object
+    Trace:
+      description: A trace representing the complete execution path of a request across multiple operations.
+      properties:
+        trace_id:
+          title: Trace Id
+          type: string
+        root_span_id:
+          title: Root Span Id
+          type: string
+        start_time:
+          format: date-time
+          title: Start Time
+          type: string
+        end_time:
+          anyOf:
+          - format: date-time
+            type: string
+          - type: 'null'
+          nullable: true
+      required:
+      - trace_id
+      - root_span_id
+      - start_time
+      title: Trace
+      type: object
+    EventType:
+      description: The type of telemetry event being logged.
+      enum:
+      - unstructured_log
+      - structured_log
+      - metric
+      title: EventType
+      type: string
+    StructuredLogType:
+      description: The type of structured log event payload.
+      enum:
+      - span_start
+      - span_end
+      title: StructuredLogType
+      type: string
+    EvalTrace:
+      description: A trace record for evaluation purposes.
+      properties:
+        session_id:
+          title: Session Id
+          type: string
+        step:
+          title: Step
+          type: string
+        input:
+          title: Input
+          type: string
+        output:
+          title: Output
+          type: string
+        expected_output:
+          title: Expected Output
+          type: string
+      required:
+      - session_id
+      - step
+      - input
+      - output
+      - expected_output
+      title: EvalTrace
+      type: object
+    SpanWithStatus:
+      description: A span that includes status information.
+      properties:
+        span_id:
+          title: Span Id
+          type: string
+        trace_id:
+          title: Trace Id
+          type: string
+        parent_span_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+        name:
+          title: Name
+          type: string
+        start_time:
+          format: date-time
+          title: Start Time
+          type: string
+        end_time:
+          anyOf:
+          - format: date-time
+            type: string
+          - type: 'null'
+          nullable: true
+        attributes:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+        status:
+          anyOf:
+          - $ref: '#/components/schemas/SpanStatus'
+            title: SpanStatus
+          - type: 'null'
+          nullable: true
+          title: SpanStatus
+      required:
+      - span_id
+      - trace_id
+      - name
+      - start_time
+      title: SpanWithStatus
+      type: object
+    QueryConditionOp:
+      description: Comparison operators for query conditions.
+      enum:
+      - eq
+      - ne
+      - gt
+      - lt
+      title: QueryConditionOp
+      type: string
+    QueryCondition:
+      description: A condition for filtering query results.
+      properties:
+        key:
+          title: Key
+          type: string
+        op:
+          $ref: '#/components/schemas/QueryConditionOp'
+        value:
+          title: Value
+      required:
+      - key
+      - op
+      - value
+      title: QueryCondition
+      type: object
+    MetricLabel:
+      description: A label associated with a metric.
+      properties:
+        name:
+          title: Name
+          type: string
+        value:
+          title: Value
+          type: string
+      required:
+      - name
+      - value
+      title: MetricLabel
+      type: object
+    MetricDataPoint:
+      description: A single data point in a metric time series.
+      properties:
+        timestamp:
+          title: Timestamp
+          type: integer
+        value:
+          title: Value
+          type: number
+        unit:
+          title: Unit
+          type: string
+      required:
+      - timestamp
+      - value
+      - unit
+      title: MetricDataPoint
+      type: object
+    MetricSeries:
+      description: A time series of metric data points.
+      properties:
+        metric:
+          title: Metric
+          type: string
+        labels:
+          items:
+            $ref: '#/components/schemas/MetricLabel'
+          title: Labels
+          type: array
+        values:
+          items:
+            $ref: '#/components/schemas/MetricDataPoint'
+          title: Values
+          type: array
+      required:
+      - metric
+      - labels
+      - values
+      title: MetricSeries
+      type: object
   responses:
     BadRequest400:
       description: The request was invalid or malformed
@@ -2266,8 +9217,7 @@ components:
             title: Bad Request
             detail: The request was invalid or malformed
     TooManyRequests429:
-      description: >-
-        The client has sent too many requests in a given amount of time
+      description: The client has sent too many requests in a given amount of time
       content:
         application/json:
           schema:
@@ -2275,11 +9225,9 @@ components:
           example:
             status: 429
             title: Too Many Requests
-            detail: >-
-              You have exceeded the rate limit. Please try again later.
+            detail: You have exceeded the rate limit. Please try again later.
     InternalServerError500:
-      description: >-
-        The server encountered an unexpected error
+      description: The server encountered an unexpected error
       content:
         application/json:
           schema:
@@ -2287,38 +9235,101 @@ components:
           example:
             status: 500
             title: Internal Server Error
-            detail: >-
-              An unexpected error occurred. Our team has been notified.
+            detail: An unexpected error occurred
     DefaultError:
-      description: An unexpected error occurred
+      description: An error occurred
       content:
         application/json:
           schema:
             $ref: '#/components/schemas/Error'
-          example:
-            status: 0
-            title: Error
-            detail: An unexpected error occurred
-security:
-  - Default: []
 tags:
-  - name: Benchmarks
-    description: ''
-  - name: DatasetIO
-    description: ''
-  - name: Datasets
-    description: ''
-  - name: Eval
-    description: >-
-      Llama Stack Evaluation API for running evaluations on model and agent candidates.
-    x-displayName: Evaluations
-  - name: PostTraining (Coming Soon)
-    description: ''
+- description: APIs for creating and interacting with agentic systems.
+  name: Agents
+  x-displayName: Agents
+- description: |-
+    The API is designed to allow use of openai client libraries for seamless integration.
+
+    This API provides the following extensions:
+     - idempotent batch creation
+
+    Note: This API is currently under active development and may undergo changes.
+  name: Batches
+  x-displayName: The Batches API enables efficient processing of multiple requests in a single operation, particularly useful for processing large datasets, batch evaluation workflows, and cost-effective inference at scale.
+- description: ''
+  name: Benchmarks
+- description: Protocol for conversation management operations.
+  name: Conversations
+  x-displayName: Conversations
+- description: ''
+  name: DatasetIO
+- description: ''
+  name: Datasets
+- description: Llama Stack Evaluation API for running evaluations on model and agent candidates.
+  name: Eval
+  x-displayName: Evaluations
+- description: This API is used to upload documents that can be used with other Llama Stack APIs.
+  name: Files
+  x-displayName: Files
+- description: |-
+    Llama Stack Inference API for generating completions, chat completions, and embeddings.
+
+    This API provides the raw interface to the underlying models. Three kinds of models are supported:
+    - LLM models: these models generate "raw" and "chat" (conversational) completions.
+    - Embedding models: these models generate embeddings to be used for semantic search.
+    - Rerank models: these models reorder the documents based on their relevance to a query.
+  name: Inference
+  x-displayName: Inference
+- description: APIs for inspecting the Llama Stack service, including health status, available API routes with methods and implementing providers.
+  name: Inspect
+  x-displayName: Inspect
+- description: ''
+  name: Models
+- description: ''
+  name: PostTraining (Coming Soon)
+- description: Protocol for prompt management operations.
+  name: Prompts
+  x-displayName: Prompts
+- description: Providers API for inspecting, listing, and modifying providers and their configurations.
+  name: Providers
+  x-displayName: Providers
+- description: OpenAI-compatible Moderations API.
+  name: Safety
+  x-displayName: Safety
+- description: ''
+  name: Scoring
+- description: ''
+  name: ScoringFunctions
+- description: ''
+  name: Shields
+- description: ''
+  name: ToolGroups
+- description: ''
+  name: ToolRuntime
+- description: ''
+  name: VectorIO
 x-tagGroups:
-  - name: Operations
-    tags:
-      - Benchmarks
-      - DatasetIO
-      - Datasets
-      - Eval
-      - PostTraining (Coming Soon)
+- name: Operations
+  tags:
+  - Agents
+  - Batches
+  - Benchmarks
+  - Conversations
+  - DatasetIO
+  - Datasets
+  - Eval
+  - Files
+  - Inference
+  - Inspect
+  - Models
+  - PostTraining (Coming Soon)
+  - Prompts
+  - Providers
+  - Safety
+  - Scoring
+  - ScoringFunctions
+  - Shields
+  - ToolGroups
+  - ToolRuntime
+  - VectorIO
+security:
+- Default: []
diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml
index 759c7501a..a12ac342f 100644
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@@ -1,16 +1,16 @@
 openapi: 3.1.0
 info:
   title: Llama Stack Specification
-  version: v1
-  description: >-
+  description: |-
     This is the specification of the Llama Stack that provides
-                    a set of endpoints and their corresponding interfaces that are
-    tailored to
-                    best leverage Llama Models.
+                        a set of endpoints and their corresponding interfaces that are
+        tailored to
+                        best leverage Llama Models.
 
-    **✅ STABLE**: Production-ready APIs with backward compatibility guarantees.
+        **✅ STABLE**: Production-ready APIs with backward compatibility guarantees.
+  version: v1
 servers:
-  - url: http://any-hosted-llama-stack.com
+- url: http://any-hosted-llama-stack.com
 paths:
   /v1/batches:
     get:
@@ -23,34 +23,37 @@ paths:
                 $ref: '#/components/schemas/ListBatchesResponse'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - Batches
-      summary: List all batches for the current user.
+      - Batches
+      summary: List Batches
       description: List all batches for the current user.
+      operationId: list_batches_v1_batches_get
       parameters:
-        - name: after
-          in: query
-          description: >-
-            A cursor for pagination; returns batches after this batch ID.
-          required: false
-          schema:
-            type: string
-        - name: limit
-          in: query
-          description: >-
-            Number of batches to return (default 20, max 100).
-          required: true
-          schema:
-            type: integer
-      deprecated: false
+      - name: after
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: After
+      - name: limit
+        in: query
+        required: false
+        schema:
+          type: integer
+          default: 20
+          title: Limit
     post:
       responses:
         '200':
@@ -61,28 +64,27 @@ paths:
                 $ref: '#/components/schemas/Batch'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - Batches
-      summary: >-
-        Create a new batch for processing multiple API requests.
-      description: >-
-        Create a new batch for processing multiple API requests.
-      parameters: []
+      - Batches
+      summary: Create Batch
+      description: Create a new batch for processing multiple API requests.
+      operationId: create_batch_v1_batches_post
       requestBody:
+        required: true
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/CreateBatchRequest'
-        required: true
-      deprecated: false
   /v1/batches/{batch_id}:
     get:
       responses:
@@ -93,29 +95,29 @@ paths:
               schema:
                 $ref: '#/components/schemas/Batch'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Batches
-      summary: >-
-        Retrieve information about a specific batch.
-      description: >-
-        Retrieve information about a specific batch.
+      - Batches
+      summary: Retrieve Batch
+      description: Retrieve information about a specific batch.
+      operationId: retrieve_batch_v1_batches__batch_id__get
       parameters:
-        - name: batch_id
-          in: path
-          description: The ID of the batch to retrieve.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: batch_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: batch_id'
   /v1/batches/{batch_id}/cancel:
     post:
       responses:
@@ -126,27 +128,29 @@ paths:
               schema:
                 $ref: '#/components/schemas/Batch'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Batches
-      summary: Cancel a batch that is in progress.
+      - Batches
+      summary: Cancel Batch
       description: Cancel a batch that is in progress.
+      operationId: cancel_batch_v1_batches__batch_id__cancel_post
       parameters:
-        - name: batch_id
-          in: path
-          description: The ID of the batch to cancel.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: batch_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: batch_id'
   /v1/chat/completions:
     get:
       responses:
@@ -158,48 +162,56 @@ paths:
                 $ref: '#/components/schemas/ListOpenAIChatCompletionResponse'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - Inference
-      summary: List chat completions.
+      - Inference
+      summary: List Chat Completions
       description: List chat completions.
+      operationId: list_chat_completions_v1_chat_completions_get
       parameters:
-        - name: after
-          in: query
-          description: >-
-            The ID of the last chat completion to return.
-          required: false
-          schema:
-            type: string
-        - name: limit
-          in: query
-          description: >-
-            The maximum number of chat completions to return.
-          required: false
-          schema:
-            type: integer
-        - name: model
-          in: query
-          description: The model to filter by.
-          required: false
-          schema:
-            type: string
-        - name: order
-          in: query
-          description: >-
-            The order to sort the chat completions by: "asc" or "desc". Defaults to
-            "desc".
-          required: false
-          schema:
-            $ref: '#/components/schemas/Order'
-      deprecated: false
+      - name: after
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: After
+      - name: limit
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: integer
+          - type: 'null'
+          default: 20
+          title: Limit
+      - name: model
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: Model
+      - name: order
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - $ref: '#/components/schemas/Order'
+          - type: 'null'
+          default: desc
+          title: Order
     post:
       responses:
         '200':
@@ -207,35 +219,36 @@ paths:
           content:
             application/json:
               schema:
-                oneOf:
-                  - $ref: '#/components/schemas/OpenAIChatCompletion'
-                  - $ref: '#/components/schemas/OpenAIChatCompletionChunk'
+                $ref: '#/components/schemas/OpenAIChatCompletion'
+            text/event-stream:
+              schema:
+                $ref: '#/components/schemas/OpenAIChatCompletionChunk'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - Inference
-      summary: Create chat completions.
-      description: >-
+      - Inference
+      summary: Openai Chat Completion
+      description: |-
         Create chat completions.
 
-        Generate an OpenAI-compatible chat completion for the given messages using
-        the specified model.
-      parameters: []
+        Generate an OpenAI-compatible chat completion for the given messages using the specified model.
+      operationId: openai_chat_completion_v1_chat_completions_post
       requestBody:
+        required: true
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/OpenAIChatCompletionRequestWithExtraBody'
-        required: true
-      deprecated: false
   /v1/chat/completions/{completion_id}:
     get:
       responses:
@@ -246,30 +259,32 @@ paths:
               schema:
                 $ref: '#/components/schemas/OpenAICompletionWithInputMessages'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Inference
-      summary: Get chat completion.
-      description: >-
+      - Inference
+      summary: Get Chat Completion
+      description: |-
         Get chat completion.
 
         Describe a chat completion by its ID.
+      operationId: get_chat_completion_v1_chat_completions__completion_id__get
       parameters:
-        - name: completion_id
-          in: path
-          description: ID of the chat completion.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: completion_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: completion_id'
   /v1/completions:
     post:
       responses:
@@ -280,31 +295,31 @@ paths:
               schema:
                 $ref: '#/components/schemas/OpenAICompletion'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Inference
-      summary: Create completion.
-      description: >-
+      - Inference
+      summary: Openai Completion
+      description: |-
         Create completion.
 
-        Generate an OpenAI-compatible completion for the given prompt using the specified
-        model.
-      parameters: []
+        Generate an OpenAI-compatible completion for the given prompt using the specified model.
+      operationId: openai_completion_v1_completions_post
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/OpenAICompletionRequestWithExtraBody'
         required: true
-      deprecated: false
   /v1/conversations:
     post:
       responses:
@@ -315,30 +330,31 @@ paths:
               schema:
                 $ref: '#/components/schemas/Conversation'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Conversations
-      summary: Create a conversation.
-      description: >-
+      - Conversations
+      summary: Create Conversation
+      description: |-
         Create a conversation.
 
         Create a conversation.
-      parameters: []
+      operationId: create_conversation_v1_conversations_post
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/CreateConversationRequest'
         required: true
-      deprecated: false
   /v1/conversations/{conversation_id}:
     get:
       responses:
@@ -349,30 +365,32 @@ paths:
               schema:
                 $ref: '#/components/schemas/Conversation'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Conversations
-      summary: Retrieve a conversation.
-      description: >-
+      - Conversations
+      summary: Get Conversation
+      description: |-
         Retrieve a conversation.
 
         Get a conversation with the given ID.
+      operationId: get_conversation_v1_conversations__conversation_id__get
       parameters:
-        - name: conversation_id
-          in: path
-          description: The conversation identifier.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: conversation_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: conversation_id'
     post:
       responses:
         '200':
@@ -382,36 +400,38 @@ paths:
               schema:
                 $ref: '#/components/schemas/Conversation'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Conversations
-      summary: Update a conversation.
-      description: >-
+      - Conversations
+      summary: Update Conversation
+      description: |-
         Update a conversation.
 
         Update a conversation's metadata with the given ID.
+      operationId: update_conversation_v1_conversations__conversation_id__post
       parameters:
-        - name: conversation_id
-          in: path
-          description: The conversation identifier.
-          required: true
-          schema:
-            type: string
+      - name: conversation_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: conversation_id'
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/UpdateConversationRequest'
         required: true
-      deprecated: false
     delete:
       responses:
         '200':
@@ -421,30 +441,32 @@ paths:
               schema:
                 $ref: '#/components/schemas/ConversationDeletedResource'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Conversations
-      summary: Delete a conversation.
-      description: >-
+      - Conversations
+      summary: Openai Delete Conversation
+      description: |-
         Delete a conversation.
 
         Delete a conversation with the given ID.
+      operationId: openai_delete_conversation_v1_conversations__conversation_id__delete
       parameters:
-        - name: conversation_id
-          in: path
-          description: The conversation identifier.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: conversation_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: conversation_id'
   /v1/conversations/{conversation_id}/items:
     get:
       responses:
@@ -456,73 +478,68 @@ paths:
                 $ref: '#/components/schemas/ConversationItemList'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - Conversations
-      summary: List items.
-      description: >-
+      - Conversations
+      summary: List Items
+      description: |-
         List items.
 
         List items in the conversation.
+      operationId: list_items_v1_conversations__conversation_id__items_get
       parameters:
-        - name: conversation_id
-          in: path
-          description: The conversation identifier.
-          required: true
-          schema:
+      - name: after
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: After
+      - name: limit
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: integer
+          - type: 'null'
+          title: Limit
+      - name: order
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - enum:
+            - asc
+            - desc
             type: string
-        - name: after
-          in: query
-          description: >-
-            An item ID to list items after, used in pagination.
-          required: false
-          schema:
-            type: string
-        - name: include
-          in: query
-          description: >-
-            Specify additional output data to include in the response.
-          required: false
-          schema:
-            type: array
+          - type: 'null'
+          title: Order
+      - name: conversation_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: conversation_id'
+      - name: include
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: array
             items:
-              type: string
-              enum:
-                - web_search_call.action.sources
-                - code_interpreter_call.outputs
-                - computer_call_output.output.image_url
-                - file_search_call.results
-                - message.input_image.image_url
-                - message.output_text.logprobs
-                - reasoning.encrypted_content
-              title: ConversationItemInclude
-              description: >-
-                Specify additional output data to include in the model response.
-        - name: limit
-          in: query
-          description: >-
-            A limit on the number of objects to be returned (1-100, default 20).
-          required: false
-          schema:
-            type: integer
-        - name: order
-          in: query
-          description: >-
-            The order to return items in (asc or desc, default desc).
-          required: false
-          schema:
-            type: string
-            enum:
-              - asc
-              - desc
-      deprecated: false
+              $ref: '#/components/schemas/ConversationItemInclude'
+          - type: 'null'
+          title: Include
     post:
       responses:
         '200':
@@ -533,35 +550,37 @@ paths:
                 $ref: '#/components/schemas/ConversationItemList'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - Conversations
-      summary: Create items.
-      description: >-
+      - Conversations
+      summary: Add Items
+      description: |-
         Create items.
 
         Create items in the conversation.
+      operationId: add_items_v1_conversations__conversation_id__items_post
       parameters:
-        - name: conversation_id
-          in: path
-          description: The conversation identifier.
-          required: true
-          schema:
-            type: string
+      - name: conversation_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: conversation_id'
       requestBody:
+        required: true
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/AddItemsRequest'
-        required: true
-      deprecated: false
   /v1/conversations/{conversation_id}/items/{item_id}:
     get:
       responses:
@@ -570,38 +589,40 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ConversationItem'
+                $ref: '#/components/schemas/OpenAIResponseMessage'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Conversations
-      summary: Retrieve an item.
-      description: >-
+      - Conversations
+      summary: Retrieve
+      description: |-
         Retrieve an item.
 
         Retrieve a conversation item.
+      operationId: retrieve_v1_conversations__conversation_id__items__item_id__get
       parameters:
-        - name: conversation_id
-          in: path
-          description: The conversation identifier.
-          required: true
-          schema:
-            type: string
-        - name: item_id
-          in: path
-          description: The item identifier.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: conversation_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: conversation_id'
+      - name: item_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: item_id'
     delete:
       responses:
         '200':
@@ -611,365 +632,352 @@ paths:
               schema:
                 $ref: '#/components/schemas/ConversationItemDeletedResource'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Conversations
-      summary: Delete an item.
-      description: >-
+      - Conversations
+      summary: Openai Delete Conversation Item
+      description: |-
         Delete an item.
 
         Delete a conversation item.
+      operationId: openai_delete_conversation_item_v1_conversations__conversation_id__items__item_id__delete
       parameters:
-        - name: conversation_id
-          in: path
-          description: The conversation identifier.
-          required: true
-          schema:
-            type: string
-        - name: item_id
-          in: path
-          description: The item identifier.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: conversation_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: conversation_id'
+      - name: item_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: item_id'
   /v1/embeddings:
     post:
       responses:
         '200':
-          description: >-
-            An OpenAIEmbeddingsResponse containing the embeddings.
+          description: An OpenAIEmbeddingsResponse containing the embeddings.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/OpenAIEmbeddingsResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Inference
-      summary: Create embeddings.
-      description: >-
+      - Inference
+      summary: Openai Embeddings
+      description: |-
         Create embeddings.
 
-        Generate OpenAI-compatible embeddings for the given input using the specified
-        model.
-      parameters: []
+        Generate OpenAI-compatible embeddings for the given input using the specified model.
+      operationId: openai_embeddings_v1_embeddings_post
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/OpenAIEmbeddingsRequestWithExtraBody'
         required: true
-      deprecated: false
   /v1/files:
     get:
       responses:
         '200':
-          description: >-
-            An ListOpenAIFileResponse containing the list of files.
+          description: An ListOpenAIFileResponse containing the list of files.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ListOpenAIFileResponse'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - Files
-      summary: List files.
-      description: >-
+      - Files
+      summary: Openai List Files
+      description: |-
         List files.
 
         Returns a list of files that belong to the user's organization.
+      operationId: openai_list_files_v1_files_get
       parameters:
-        - name: after
-          in: query
-          description: >-
-            A cursor for use in pagination. `after` is an object ID that defines your
-            place in the list. For instance, if you make a list request and receive
-            100 objects, ending with obj_foo, your subsequent call can include after=obj_foo
-            in order to fetch the next page of the list.
-          required: false
-          schema:
-            type: string
-        - name: limit
-          in: query
-          description: >-
-            A limit on the number of objects to be returned. Limit can range between
-            1 and 10,000, and the default is 10,000.
-          required: false
-          schema:
-            type: integer
-        - name: order
-          in: query
-          description: >-
-            Sort order by the `created_at` timestamp of the objects. `asc` for ascending
-            order and `desc` for descending order.
-          required: false
-          schema:
-            $ref: '#/components/schemas/Order'
-        - name: purpose
-          in: query
-          description: >-
-            Only return files with the given purpose.
-          required: false
-          schema:
-            $ref: '#/components/schemas/OpenAIFilePurpose'
-      deprecated: false
+      - name: after
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: After
+      - name: limit
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: integer
+          - type: 'null'
+          default: 10000
+          title: Limit
+      - name: order
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - $ref: '#/components/schemas/Order'
+          - type: 'null'
+          default: desc
+          title: Order
+      - name: purpose
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIFilePurpose'
+          - type: 'null'
+          title: Purpose
     post:
       responses:
         '200':
-          description: >-
-            An OpenAIFileObject representing the uploaded file.
+          description: An OpenAIFileObject representing the uploaded file.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/OpenAIFileObject'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - Files
-      summary: Upload file.
-      description: >-
+      - Files
+      summary: Openai Upload File
+      description: |-
         Upload file.
 
         Upload a file that can be used across various endpoints.
 
-
         The file upload should be a multipart form request with:
-
         - file: The File object (not file name) to be uploaded.
-
         - purpose: The intended purpose of the uploaded file.
-
         - expires_after: Optional form values describing expiration for the file.
-      parameters: []
+      operationId: openai_upload_file_v1_files_post
       requestBody:
+        required: true
         content:
           multipart/form-data:
             schema:
-              type: object
-              properties:
-                file:
-                  type: string
-                  format: binary
-                purpose:
-                  $ref: '#/components/schemas/OpenAIFilePurpose'
-                expires_after:
-                  $ref: '#/components/schemas/ExpiresAfter'
-              required:
-                - file
-                - purpose
-        required: true
-      deprecated: false
+              $ref: '#/components/schemas/Body_openai_upload_file_v1_files_post'
   /v1/files/{file_id}:
     get:
       responses:
         '200':
-          description: >-
-            An OpenAIFileObject containing file information.
+          description: An OpenAIFileObject containing file information.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/OpenAIFileObject'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Files
-      summary: Retrieve file.
-      description: >-
+      - Files
+      summary: Openai Retrieve File
+      description: |-
         Retrieve file.
 
         Returns information about a specific file.
+      operationId: openai_retrieve_file_v1_files__file_id__get
       parameters:
-        - name: file_id
-          in: path
-          description: >-
-            The ID of the file to use for this request.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: file_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: file_id'
     delete:
       responses:
         '200':
-          description: >-
-            An OpenAIFileDeleteResponse indicating successful deletion.
+          description: An OpenAIFileDeleteResponse indicating successful deletion.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/OpenAIFileDeleteResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Files
-      summary: Delete file.
+      - Files
+      summary: Openai Delete File
       description: Delete file.
+      operationId: openai_delete_file_v1_files__file_id__delete
       parameters:
-        - name: file_id
-          in: path
-          description: >-
-            The ID of the file to use for this request.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: file_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: file_id'
   /v1/files/{file_id}/content:
     get:
       responses:
         '200':
-          description: >-
-            The raw file content as a binary response.
+          description: The raw file content as a binary response.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/Response'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Files
-      summary: Retrieve file content.
-      description: >-
+      - Files
+      summary: Openai Retrieve File Content
+      description: |-
         Retrieve file content.
 
         Returns the contents of the specified file.
+      operationId: openai_retrieve_file_content_v1_files__file_id__content_get
       parameters:
-        - name: file_id
-          in: path
-          description: >-
-            The ID of the file to use for this request.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: file_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: file_id'
   /v1/health:
     get:
       responses:
         '200':
-          description: >-
-            Health information indicating if the service is operational.
+          description: Health information indicating if the service is operational.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/HealthInfo'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Inspect
-      summary: Get health status.
-      description: >-
+      - Inspect
+      summary: Health
+      description: |-
         Get health status.
 
         Get the current health status of the service.
-      parameters: []
-      deprecated: false
+      operationId: health_v1_health_get
   /v1/inspect/routes:
     get:
       responses:
         '200':
-          description: >-
-            Response containing information about all available routes.
+          description: Response containing information about all available routes.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ListRoutesResponse'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - Inspect
-      summary: List routes.
-      description: >-
+      - Inspect
+      summary: List Routes
+      description: |-
         List routes.
 
         List all available API routes with their methods and implementing providers.
+      operationId: list_routes_v1_inspect_routes_get
       parameters:
-        - name: api_filter
-          in: query
-          description: >-
-            Optional filter to control which routes are returned. Can be an API level
-            ('v1', 'v1alpha', 'v1beta') to show non-deprecated routes at that level,
-            or 'deprecated' to show deprecated routes across all levels. If not specified,
-            returns all non-deprecated routes.
-          required: false
-          schema:
+      - name: api_filter
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - enum:
+            - v1
+            - v1alpha
+            - v1beta
+            - deprecated
             type: string
-            enum:
-              - v1
-              - v1alpha
-              - v1beta
-              - deprecated
-      deprecated: false
+          - type: 'null'
+          title: Api Filter
   /v1/models:
     get:
       responses:
@@ -980,21 +988,22 @@ paths:
               schema:
                 $ref: '#/components/schemas/OpenAIListModelsResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Models
-      summary: List models using the OpenAI API.
+      - Models
+      summary: Openai List Models
       description: List models using the OpenAI API.
-      parameters: []
-      deprecated: false
+      operationId: openai_list_models_v1_models_get
   /v1/models/{model_id}:
     get:
       responses:
@@ -1005,30 +1014,32 @@ paths:
               schema:
                 $ref: '#/components/schemas/Model'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Models
-      summary: Get model.
-      description: >-
+      - Models
+      summary: Get Model
+      description: |-
         Get model.
 
         Get a model by its identifier.
+      operationId: get_model_v1_models__model_id__get
       parameters:
-        - name: model_id
-          in: path
-          description: The identifier of the model to get.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: model_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: model_id'
   /v1/moderations:
     post:
       responses:
@@ -1039,56 +1050,57 @@ paths:
               schema:
                 $ref: '#/components/schemas/ModerationObject'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Safety
-      summary: Create moderation.
-      description: >-
+      - Safety
+      summary: Run Moderation
+      description: |-
         Create moderation.
 
         Classifies if text and/or image inputs are potentially harmful.
-      parameters: []
+      operationId: run_moderation_v1_moderations_post
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/RunModerationRequest'
         required: true
-      deprecated: false
   /v1/prompts:
     get:
       responses:
         '200':
-          description: >-
-            A ListPromptsResponse containing all prompts.
+          description: A ListPromptsResponse containing all prompts.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ListPromptsResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Prompts
-      summary: List all prompts.
+      - Prompts
+      summary: List Prompts
       description: List all prompts.
-      parameters: []
-      deprecated: false
+      operationId: list_prompts_v1_prompts_get
     post:
       responses:
         '200':
@@ -1098,30 +1110,31 @@ paths:
               schema:
                 $ref: '#/components/schemas/Prompt'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Prompts
-      summary: Create prompt.
-      description: >-
+      - Prompts
+      summary: Create Prompt
+      description: |-
         Create prompt.
 
         Create a new prompt.
-      parameters: []
+      operationId: create_prompt_v1_prompts_post
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/CreatePromptRequest'
         required: true
-      deprecated: false
   /v1/prompts/{prompt_id}:
     get:
       responses:
@@ -1133,246 +1146,254 @@ paths:
                 $ref: '#/components/schemas/Prompt'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - Prompts
-      summary: Get prompt.
-      description: >-
+      - Prompts
+      summary: Get Prompt
+      description: |-
         Get prompt.
 
         Get a prompt by its identifier and optional version.
+      operationId: get_prompt_v1_prompts__prompt_id__get
       parameters:
-        - name: prompt_id
-          in: path
-          description: The identifier of the prompt to get.
-          required: true
-          schema:
-            type: string
-        - name: version
-          in: query
-          description: >-
-            The version of the prompt to get (defaults to latest).
-          required: false
-          schema:
-            type: integer
-      deprecated: false
+      - name: version
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: integer
+          - type: 'null'
+          title: Version
+      - name: prompt_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: prompt_id'
     post:
       responses:
         '200':
-          description: >-
-            The updated Prompt resource with incremented version.
+          description: The updated Prompt resource with incremented version.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/Prompt'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - Prompts
-      summary: Update prompt.
-      description: >-
+      - Prompts
+      summary: Update Prompt
+      description: |-
         Update prompt.
 
         Update an existing prompt (increments version).
+      operationId: update_prompt_v1_prompts__prompt_id__post
       parameters:
-        - name: prompt_id
-          in: path
-          description: The identifier of the prompt to update.
-          required: true
-          schema:
-            type: string
+      - name: prompt_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: prompt_id'
       requestBody:
+        required: true
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/UpdatePromptRequest'
-        required: true
-      deprecated: false
     delete:
       responses:
-        '200':
-          description: OK
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
+        '204':
+          description: Successful Response
       tags:
-        - Prompts
-      summary: Delete prompt.
-      description: >-
+      - Prompts
+      summary: Delete Prompt
+      description: |-
         Delete prompt.
 
         Delete a prompt.
+      operationId: delete_prompt_v1_prompts__prompt_id__delete
       parameters:
-        - name: prompt_id
-          in: path
-          description: The identifier of the prompt to delete.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: prompt_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: prompt_id'
   /v1/prompts/{prompt_id}/set-default-version:
     post:
       responses:
         '200':
-          description: >-
-            The prompt with the specified version now set as default.
+          description: The prompt with the specified version now set as default.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/Prompt'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Prompts
-      summary: Set prompt version.
-      description: >-
+      - Prompts
+      summary: Set Default Version
+      description: |-
         Set prompt version.
 
         Set which version of a prompt should be the default in get_prompt (latest).
+      operationId: set_default_version_v1_prompts__prompt_id__set_default_version_post
       parameters:
-        - name: prompt_id
-          in: path
-          description: The identifier of the prompt.
-          required: true
-          schema:
-            type: string
+      - name: prompt_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: prompt_id'
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/SetDefaultVersionRequest'
         required: true
-      deprecated: false
   /v1/prompts/{prompt_id}/versions:
     get:
       responses:
         '200':
-          description: >-
-            A ListPromptsResponse containing all versions of the prompt.
+          description: A ListPromptsResponse containing all versions of the prompt.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ListPromptsResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Prompts
-      summary: List prompt versions.
-      description: >-
+      - Prompts
+      summary: List Prompt Versions
+      description: |-
         List prompt versions.
 
         List all versions of a specific prompt.
+      operationId: list_prompt_versions_v1_prompts__prompt_id__versions_get
       parameters:
-        - name: prompt_id
-          in: path
-          description: >-
-            The identifier of the prompt to list versions for.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: prompt_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: prompt_id'
   /v1/providers:
     get:
       responses:
         '200':
-          description: >-
-            A ListProvidersResponse containing information about all providers.
+          description: A ListProvidersResponse containing information about all providers.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ListProvidersResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Providers
-      summary: List providers.
-      description: >-
+      - Providers
+      summary: List Providers
+      description: |-
         List providers.
 
         List all available providers.
-      parameters: []
-      deprecated: false
+      operationId: list_providers_v1_providers_get
   /v1/providers/{provider_id}:
     get:
       responses:
         '200':
-          description: >-
-            A ProviderInfo object containing the provider's details.
+          description: A ProviderInfo object containing the provider's details.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ProviderInfo'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Providers
-      summary: Get provider.
-      description: >-
+      - Providers
+      summary: Inspect Provider
+      description: |-
         Get provider.
 
         Get detailed information about a specific provider.
+      operationId: inspect_provider_v1_providers__provider_id__get
       parameters:
-        - name: provider_id
-          in: path
-          description: The ID of the provider to inspect.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: provider_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: provider_id'
   /v1/responses:
     get:
       responses:
@@ -1384,45 +1405,56 @@ paths:
                 $ref: '#/components/schemas/ListOpenAIResponseObject'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - Agents
-      summary: List all responses.
+      - Agents
+      summary: List Openai Responses
       description: List all responses.
+      operationId: list_openai_responses_v1_responses_get
       parameters:
-        - name: after
-          in: query
-          description: The ID of the last response to return.
-          required: false
-          schema:
-            type: string
-        - name: limit
-          in: query
-          description: The number of responses to return.
-          required: false
-          schema:
-            type: integer
-        - name: model
-          in: query
-          description: The model to filter responses by.
-          required: false
-          schema:
-            type: string
-        - name: order
-          in: query
-          description: >-
-            The order to sort responses by when sorted by created_at ('asc' or 'desc').
-          required: false
-          schema:
-            $ref: '#/components/schemas/Order'
-      deprecated: false
+      - name: after
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: After
+      - name: limit
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: integer
+          - type: 'null'
+          default: 50
+          title: Limit
+      - name: model
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: Model
+      - name: order
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - $ref: '#/components/schemas/Order'
+          - type: 'null'
+          default: desc
+          title: Order
     post:
       responses:
         '200':
@@ -1436,38 +1468,51 @@ paths:
                 $ref: '#/components/schemas/OpenAIResponseObjectStream'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - Agents
-      summary: Create a model response.
+      - Agents
+      summary: Create Openai Response
       description: Create a model response.
-      parameters: []
+      operationId: create_openai_response_v1_responses_post
       requestBody:
+        required: true
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/CreateOpenaiResponseRequest'
-        required: true
-      deprecated: false
-      x-llama-stack-extra-body-params:
-        - name: guardrails
-          schema:
-            type: array
-            items:
-              oneOf:
+        x-llama-stack-extra-body-params:
+          guardrails:
+            $defs:
+              ResponseGuardrailSpec:
+                description: |-
+                  Specification for a guardrail to apply during response generation.
+
+                  :param type: The type/identifier of the guardrail.
+                properties:
+                  type:
+                    title: Type
+                    type: string
+                required:
+                - type
+                title: ResponseGuardrailSpec
+                type: object
+            anyOf:
+            - items:
+                anyOf:
                 - type: string
                 - $ref: '#/components/schemas/ResponseGuardrailSpec'
-          description: >-
-            List of guardrails to apply during response generation. Guardrails provide
-            safety and content moderation.
-          required: false
+              type: array
+            - type: 'null'
+            description: List of guardrails to apply during response generation. Guardrails provide safety and content moderation.
   /v1/responses/{response_id}:
     get:
       responses:
@@ -1478,28 +1523,29 @@ paths:
               schema:
                 $ref: '#/components/schemas/OpenAIResponseObject'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Agents
-      summary: Get a model response.
+      - Agents
+      summary: Get Openai Response
       description: Get a model response.
+      operationId: get_openai_response_v1_responses__response_id__get
       parameters:
-        - name: response_id
-          in: path
-          description: >-
-            The ID of the OpenAI response to retrieve.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: response_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: response_id'
     delete:
       responses:
         '200':
@@ -1509,27 +1555,29 @@ paths:
               schema:
                 $ref: '#/components/schemas/OpenAIDeleteResponseObject'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Agents
-      summary: Delete a response.
+      - Agents
+      summary: Delete Openai Response
       description: Delete a response.
+      operationId: delete_openai_response_v1_responses__response_id__delete
       parameters:
-        - name: response_id
-          in: path
-          description: The ID of the OpenAI response to delete.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: response_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: response_id'
   /v1/responses/{response_id}/input_items:
     get:
       responses:
@@ -1541,65 +1589,72 @@ paths:
                 $ref: '#/components/schemas/ListOpenAIResponseInputItem'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - Agents
-      summary: List input items.
+      - Agents
+      summary: List Openai Response Input Items
       description: List input items.
+      operationId: list_openai_response_input_items_v1_responses__response_id__input_items_get
       parameters:
-        - name: response_id
-          in: path
-          description: >-
-            The ID of the response to retrieve input items for.
-          required: true
-          schema:
-            type: string
-        - name: after
-          in: query
-          description: >-
-            An item ID to list items after, used for pagination.
-          required: false
-          schema:
-            type: string
-        - name: before
-          in: query
-          description: >-
-            An item ID to list items before, used for pagination.
-          required: false
-          schema:
-            type: string
-        - name: include
-          in: query
-          description: >-
-            Additional fields to include in the response.
-          required: false
-          schema:
-            type: array
+      - name: after
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: After
+      - name: before
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: Before
+      - name: limit
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: integer
+          - type: 'null'
+          default: 20
+          title: Limit
+      - name: order
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - $ref: '#/components/schemas/Order'
+          - type: 'null'
+          default: desc
+          title: Order
+      - name: response_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: response_id'
+      - name: include
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: array
             items:
               type: string
-        - name: limit
-          in: query
-          description: >-
-            A limit on the number of objects to be returned. Limit can range between
-            1 and 100, and the default is 20.
-          required: false
-          schema:
-            type: integer
-        - name: order
-          in: query
-          description: >-
-            The order to return the input items in. Default is desc.
-          required: false
-          schema:
-            $ref: '#/components/schemas/Order'
-      deprecated: false
+          - type: 'null'
+          title: Include
   /v1/safety/run-shield:
     post:
       responses:
@@ -1610,30 +1665,31 @@ paths:
               schema:
                 $ref: '#/components/schemas/RunShieldResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Safety
-      summary: Run shield.
-      description: >-
+      - Safety
+      summary: Run Shield
+      description: |-
         Run shield.
 
         Run a shield.
-      parameters: []
+      operationId: run_shield_v1_safety_run_shield_post
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/RunShieldRequest'
         required: true
-      deprecated: false
   /v1/scoring-functions:
     get:
       responses:
@@ -1644,21 +1700,22 @@ paths:
               schema:
                 $ref: '#/components/schemas/ListScoringFunctionsResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - ScoringFunctions
-      summary: List all scoring functions.
+      - Scoring Functions
+      summary: List Scoring Functions
       description: List all scoring functions.
-      parameters: []
-      deprecated: false
+      operationId: list_scoring_functions_v1_scoring_functions_get
   /v1/scoring-functions/{scoring_fn_id}:
     get:
       responses:
@@ -1669,59 +1726,61 @@ paths:
               schema:
                 $ref: '#/components/schemas/ScoringFn'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - ScoringFunctions
-      summary: Get a scoring function by its ID.
+      - Scoring Functions
+      summary: Get Scoring Function
       description: Get a scoring function by its ID.
+      operationId: get_scoring_function_v1_scoring_functions__scoring_fn_id__get
       parameters:
-        - name: scoring_fn_id
-          in: path
-          description: The ID of the scoring function to get.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: scoring_fn_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: scoring_fn_id'
   /v1/scoring/score:
     post:
       responses:
         '200':
-          description: >-
-            A ScoreResponse object containing rows and aggregated results.
+          description: A ScoreResponse object containing rows and aggregated results.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ScoreResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Scoring
-      summary: Score a list of rows.
+      - Scoring
+      summary: Score
       description: Score a list of rows.
-      parameters: []
+      operationId: score_v1_scoring_score_post
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/ScoreRequest'
         required: true
-      deprecated: false
   /v1/scoring/score-batch:
     post:
       responses:
@@ -1732,27 +1791,28 @@ paths:
               schema:
                 $ref: '#/components/schemas/ScoreBatchResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Scoring
-      summary: Score a batch of rows.
+      - Scoring
+      summary: Score Batch
       description: Score a batch of rows.
-      parameters: []
+      operationId: score_batch_v1_scoring_score_batch_post
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/ScoreBatchRequest'
         required: true
-      deprecated: false
   /v1/shields:
     get:
       responses:
@@ -1763,21 +1823,22 @@ paths:
               schema:
                 $ref: '#/components/schemas/ListShieldsResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Shields
-      summary: List all shields.
+      - Shields
+      summary: List Shields
       description: List all shields.
-      parameters: []
-      deprecated: false
+      operationId: list_shields_v1_shields_get
   /v1/shields/{identifier}:
     get:
       responses:
@@ -1788,27 +1849,29 @@ paths:
               schema:
                 $ref: '#/components/schemas/Shield'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Shields
-      summary: Get a shield by its identifier.
+      - Shields
+      summary: Get Shield
       description: Get a shield by its identifier.
+      operationId: get_shield_v1_shields__identifier__get
       parameters:
-        - name: identifier
-          in: path
-          description: The identifier of the shield to get.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: identifier
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: identifier'
   /v1/tool-runtime/invoke:
     post:
       responses:
@@ -1819,27 +1882,28 @@ paths:
               schema:
                 $ref: '#/components/schemas/ToolInvocationResult'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - ToolRuntime
-      summary: Run a tool with the given arguments.
+      - Tool Runtime
+      summary: Invoke Tool
       description: Run a tool with the given arguments.
-      parameters: []
+      operationId: invoke_tool_v1_tool_runtime_invoke_post
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/InvokeToolRequest'
         required: true
-      deprecated: false
   /v1/tool-runtime/list-tools:
     get:
       responses:
@@ -1851,41 +1915,46 @@ paths:
                 $ref: '#/components/schemas/ListToolDefsResponse'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - ToolRuntime
-      summary: List all tools in the runtime.
+      - Tool Runtime
+      summary: List Runtime Tools
       description: List all tools in the runtime.
+      operationId: list_runtime_tools_v1_tool_runtime_list_tools_get
       parameters:
-        - name: tool_group_id
-          in: query
-          description: >-
-            The ID of the tool group to list tools for.
-          required: false
-          schema:
-            type: string
-        - name: mcp_endpoint
-          in: query
-          description: >-
-            The MCP endpoint to use for the tool group.
-          required: false
-          schema:
-            $ref: '#/components/schemas/URL'
-        - name: authorization
-          in: query
-          description: >-
-            (Optional) OAuth access token for authenticating with the MCP server.
-          required: false
-          schema:
-            type: string
-      deprecated: false
+      - name: authorization
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: Authorization
+      - name: tool_group_id
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: Tool Group Id
+      - name: mcp_endpoint
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - $ref: '#/components/schemas/URL'
+          - type: 'null'
+          title: Mcp Endpoint
   /v1/toolgroups:
     get:
       responses:
@@ -1896,21 +1965,22 @@ paths:
               schema:
                 $ref: '#/components/schemas/ListToolGroupsResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - ToolGroups
-      summary: List tool groups with optional provider.
+      - Tool Groups
+      summary: List Tool Groups
       description: List tool groups with optional provider.
-      parameters: []
-      deprecated: false
+      operationId: list_tool_groups_v1_toolgroups_get
   /v1/toolgroups/{toolgroup_id}:
     get:
       responses:
@@ -1921,27 +1991,29 @@ paths:
               schema:
                 $ref: '#/components/schemas/ToolGroup'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - ToolGroups
-      summary: Get a tool group by its ID.
+      - Tool Groups
+      summary: Get Tool Group
       description: Get a tool group by its ID.
+      operationId: get_tool_group_v1_toolgroups__toolgroup_id__get
       parameters:
-        - name: toolgroup_id
-          in: path
-          description: The ID of the tool group to get.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: toolgroup_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: toolgroup_id'
   /v1/tools:
     get:
       responses:
@@ -1953,27 +2025,30 @@ paths:
                 $ref: '#/components/schemas/ListToolDefsResponse'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - ToolGroups
-      summary: List tools with optional tool group.
+      - Tool Groups
+      summary: List Tools
       description: List tools with optional tool group.
+      operationId: list_tools_v1_tools_get
       parameters:
-        - name: toolgroup_id
-          in: query
-          description: >-
-            The ID of the tool group to list tools for.
-          required: false
-          schema:
-            type: string
-      deprecated: false
+      - name: toolgroup_id
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: Toolgroup Id
   /v1/tools/{tool_name}:
     get:
       responses:
@@ -1984,54 +2059,57 @@ paths:
               schema:
                 $ref: '#/components/schemas/ToolDef'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - ToolGroups
-      summary: Get a tool by its name.
+      - Tool Groups
+      summary: Get Tool
       description: Get a tool by its name.
+      operationId: get_tool_v1_tools__tool_name__get
       parameters:
-        - name: tool_name
-          in: path
-          description: The name of the tool to get.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: tool_name
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: tool_name'
   /v1/vector-io/insert:
     post:
       responses:
-        '200':
-          description: OK
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
+        '204':
+          description: Successful Response
       tags:
-        - VectorIO
-      summary: Insert chunks into a vector database.
+      - Vector Io
+      summary: Insert Chunks
       description: Insert chunks into a vector database.
-      parameters: []
+      operationId: insert_chunks_v1_vector_io_insert_post
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/InsertChunksRequest'
         required: true
-      deprecated: false
   /v1/vector-io/query:
     post:
       responses:
@@ -2042,2227 +2120,2043 @@ paths:
               schema:
                 $ref: '#/components/schemas/QueryChunksResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - VectorIO
-      summary: Query chunks from a vector database.
+      - Vector Io
+      summary: Query Chunks
       description: Query chunks from a vector database.
-      parameters: []
+      operationId: query_chunks_v1_vector_io_query_post
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/QueryChunksRequest'
         required: true
-      deprecated: false
   /v1/vector_stores:
     get:
       responses:
         '200':
-          description: >-
-            A VectorStoreListResponse containing the list of vector stores.
+          description: A VectorStoreListResponse containing the list of vector stores.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/VectorStoreListResponse'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - VectorIO
-      summary: Returns a list of vector stores.
+      - Vector Io
+      summary: Openai List Vector Stores
       description: Returns a list of vector stores.
+      operationId: openai_list_vector_stores_v1_vector_stores_get
       parameters:
-        - name: limit
-          in: query
-          description: >-
-            A limit on the number of objects to be returned. Limit can range between
-            1 and 100, and the default is 20.
-          required: false
-          schema:
-            type: integer
-        - name: order
-          in: query
-          description: >-
-            Sort order by the `created_at` timestamp of the objects. `asc` for ascending
-            order and `desc` for descending order.
-          required: false
-          schema:
-            type: string
-        - name: after
-          in: query
-          description: >-
-            A cursor for use in pagination. `after` is an object ID that defines your
-            place in the list.
-          required: false
-          schema:
-            type: string
-        - name: before
-          in: query
-          description: >-
-            A cursor for use in pagination. `before` is an object ID that defines
-            your place in the list.
-          required: false
-          schema:
-            type: string
-      deprecated: false
+      - name: after
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: After
+      - name: before
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: Before
+      - name: limit
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: integer
+          - type: 'null'
+          default: 20
+          title: Limit
+      - name: order
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          default: desc
+          title: Order
     post:
       responses:
         '200':
-          description: >-
-            A VectorStoreObject representing the created vector store.
+          description: A VectorStoreObject representing the created vector store.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/VectorStoreObject'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - VectorIO
-      summary: Creates a vector store.
-      description: >-
+      - Vector Io
+      summary: Openai Create Vector Store
+      description: |-
         Creates a vector store.
 
         Generate an OpenAI-compatible vector store with the given parameters.
-      parameters: []
+      operationId: openai_create_vector_store_v1_vector_stores_post
       requestBody:
+        required: true
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/OpenAICreateVectorStoreRequestWithExtraBody'
-        required: true
-      deprecated: false
   /v1/vector_stores/{vector_store_id}:
     get:
       responses:
         '200':
-          description: >-
-            A VectorStoreObject representing the vector store.
+          description: A VectorStoreObject representing the vector store.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/VectorStoreObject'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - VectorIO
-      summary: Retrieves a vector store.
+      - Vector Io
+      summary: Openai Retrieve Vector Store
       description: Retrieves a vector store.
+      operationId: openai_retrieve_vector_store_v1_vector_stores__vector_store_id__get
       parameters:
-        - name: vector_store_id
-          in: path
-          description: The ID of the vector store to retrieve.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: vector_store_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: vector_store_id'
     post:
       responses:
         '200':
-          description: >-
-            A VectorStoreObject representing the updated vector store.
+          description: A VectorStoreObject representing the updated vector store.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/VectorStoreObject'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - VectorIO
-      summary: Updates a vector store.
+      - Vector Io
+      summary: Openai Update Vector Store
       description: Updates a vector store.
+      operationId: openai_update_vector_store_v1_vector_stores__vector_store_id__post
       parameters:
-        - name: vector_store_id
-          in: path
-          description: The ID of the vector store to update.
-          required: true
-          schema:
-            type: string
+      - name: vector_store_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: vector_store_id'
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/OpenaiUpdateVectorStoreRequest'
         required: true
-      deprecated: false
     delete:
       responses:
         '200':
-          description: >-
-            A VectorStoreDeleteResponse indicating the deletion status.
+          description: A VectorStoreDeleteResponse indicating the deletion status.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/VectorStoreDeleteResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - VectorIO
-      summary: Delete a vector store.
+      - Vector Io
+      summary: Openai Delete Vector Store
       description: Delete a vector store.
+      operationId: openai_delete_vector_store_v1_vector_stores__vector_store_id__delete
       parameters:
-        - name: vector_store_id
-          in: path
-          description: The ID of the vector store to delete.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: vector_store_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: vector_store_id'
   /v1/vector_stores/{vector_store_id}/file_batches:
     post:
       responses:
         '200':
-          description: >-
-            A VectorStoreFileBatchObject representing the created file batch.
+          description: A VectorStoreFileBatchObject representing the created file batch.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/VectorStoreFileBatchObject'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - VectorIO
-      summary: Create a vector store file batch.
-      description: >-
+      - Vector Io
+      summary: Openai Create Vector Store File Batch
+      description: |-
         Create a vector store file batch.
 
-        Generate an OpenAI-compatible vector store file batch for the given vector
-        store.
+        Generate an OpenAI-compatible vector store file batch for the given vector store.
+      operationId: openai_create_vector_store_file_batch_v1_vector_stores__vector_store_id__file_batches_post
       parameters:
-        - name: vector_store_id
-          in: path
-          description: >-
-            The ID of the vector store to create the file batch for.
-          required: true
-          schema:
-            type: string
+      - name: vector_store_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: vector_store_id'
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/OpenAICreateVectorStoreFileBatchRequestWithExtraBody'
         required: true
-      deprecated: false
   /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}:
     get:
       responses:
         '200':
-          description: >-
-            A VectorStoreFileBatchObject representing the file batch.
+          description: A VectorStoreFileBatchObject representing the file batch.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/VectorStoreFileBatchObject'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - VectorIO
-      summary: Retrieve a vector store file batch.
+      - Vector Io
+      summary: Openai Retrieve Vector Store File Batch
       description: Retrieve a vector store file batch.
+      operationId: openai_retrieve_vector_store_file_batch_v1_vector_stores__vector_store_id__file_batches__batch_id__get
       parameters:
-        - name: batch_id
-          in: path
-          description: The ID of the file batch to retrieve.
-          required: true
-          schema:
-            type: string
-        - name: vector_store_id
-          in: path
-          description: >-
-            The ID of the vector store containing the file batch.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: vector_store_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: vector_store_id'
+      - name: batch_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: batch_id'
   /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel:
     post:
       responses:
         '200':
-          description: >-
-            A VectorStoreFileBatchObject representing the cancelled file batch.
+          description: A VectorStoreFileBatchObject representing the cancelled file batch.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/VectorStoreFileBatchObject'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - VectorIO
-      summary: Cancels a vector store file batch.
+      - Vector Io
+      summary: Openai Cancel Vector Store File Batch
       description: Cancels a vector store file batch.
+      operationId: openai_cancel_vector_store_file_batch_v1_vector_stores__vector_store_id__file_batches__batch_id__cancel_post
       parameters:
-        - name: batch_id
-          in: path
-          description: The ID of the file batch to cancel.
-          required: true
-          schema:
-            type: string
-        - name: vector_store_id
-          in: path
-          description: >-
-            The ID of the vector store containing the file batch.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: vector_store_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: vector_store_id'
+      - name: batch_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: batch_id'
   /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/files:
     get:
       responses:
         '200':
-          description: >-
-            A VectorStoreFilesListInBatchResponse containing the list of files in
-            the batch.
+          description: A VectorStoreFilesListInBatchResponse containing the list of files in the batch.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/VectorStoreFilesListInBatchResponse'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - VectorIO
-      summary: >-
-        Returns a list of vector store files in a batch.
-      description: >-
-        Returns a list of vector store files in a batch.
+      - Vector Io
+      summary: Openai List Files In Vector Store File Batch
+      description: Returns a list of vector store files in a batch.
+      operationId: openai_list_files_in_vector_store_file_batch_v1_vector_stores__vector_store_id__file_batches__batch_id__files_get
       parameters:
-        - name: batch_id
-          in: path
-          description: >-
-            The ID of the file batch to list files from.
-          required: true
-          schema:
-            type: string
-        - name: vector_store_id
-          in: path
-          description: >-
-            The ID of the vector store containing the file batch.
-          required: true
-          schema:
-            type: string
-        - name: after
-          in: query
-          description: >-
-            A cursor for use in pagination. `after` is an object ID that defines your
-            place in the list.
-          required: false
-          schema:
-            type: string
-        - name: before
-          in: query
-          description: >-
-            A cursor for use in pagination. `before` is an object ID that defines
-            your place in the list.
-          required: false
-          schema:
-            type: string
-        - name: filter
-          in: query
-          description: >-
-            Filter by file status. One of in_progress, completed, failed, cancelled.
-          required: false
-          schema:
-            type: string
-        - name: limit
-          in: query
-          description: >-
-            A limit on the number of objects to be returned. Limit can range between
-            1 and 100, and the default is 20.
-          required: false
-          schema:
-            type: integer
-        - name: order
-          in: query
-          description: >-
-            Sort order by the `created_at` timestamp of the objects. `asc` for ascending
-            order and `desc` for descending order.
-          required: false
-          schema:
-            type: string
-      deprecated: false
+      - name: after
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: After
+      - name: before
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: Before
+      - name: filter
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: Filter
+      - name: limit
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: integer
+          - type: 'null'
+          default: 20
+          title: Limit
+      - name: order
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          default: desc
+          title: Order
+      - name: vector_store_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: vector_store_id'
+      - name: batch_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: batch_id'
   /v1/vector_stores/{vector_store_id}/files:
     get:
       responses:
         '200':
-          description: >-
-            A VectorStoreListFilesResponse containing the list of files.
+          description: A VectorStoreListFilesResponse containing the list of files.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/VectorStoreListFilesResponse'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - VectorIO
-      summary: List files in a vector store.
+      - Vector Io
+      summary: Openai List Files In Vector Store
       description: List files in a vector store.
+      operationId: openai_list_files_in_vector_store_v1_vector_stores__vector_store_id__files_get
       parameters:
-        - name: vector_store_id
-          in: path
-          description: >-
-            The ID of the vector store to list files from.
-          required: true
-          schema:
-            type: string
-        - name: limit
-          in: query
-          description: >-
-            (Optional) A limit on the number of objects to be returned. Limit can
-            range between 1 and 100, and the default is 20.
-          required: false
-          schema:
-            type: integer
-        - name: order
-          in: query
-          description: >-
-            (Optional) Sort order by the `created_at` timestamp of the objects. `asc`
-            for ascending order and `desc` for descending order.
-          required: false
-          schema:
-            type: string
-        - name: after
-          in: query
-          description: >-
-            (Optional) A cursor for use in pagination. `after` is an object ID that
-            defines your place in the list.
-          required: false
-          schema:
-            type: string
-        - name: before
-          in: query
-          description: >-
-            (Optional) A cursor for use in pagination. `before` is an object ID that
-            defines your place in the list.
-          required: false
-          schema:
-            type: string
-        - name: filter
-          in: query
-          description: >-
-            (Optional) Filter by file status to only return files with the specified
-            status.
-          required: false
-          schema:
-            $ref: '#/components/schemas/VectorStoreFileStatus'
-      deprecated: false
+      - name: after
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: After
+      - name: before
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: Before
+      - name: filter
+        in: query
+        required: false
+        schema:
+          title: Filter
+          type: string
+          enum:
+          - completed
+          - in_progress
+          - cancelled
+          - failed
+          default: completed
+          nullable: true
+      - name: limit
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: integer
+          - type: 'null'
+          default: 20
+          title: Limit
+      - name: order
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          default: desc
+          title: Order
+      - name: vector_store_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: vector_store_id'
     post:
       responses:
         '200':
-          description: >-
-            A VectorStoreFileObject representing the attached file.
+          description: A VectorStoreFileObject representing the attached file.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/VectorStoreFileObject'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - VectorIO
-      summary: Attach a file to a vector store.
+      - Vector Io
+      summary: Openai Attach File To Vector Store
       description: Attach a file to a vector store.
+      operationId: openai_attach_file_to_vector_store_v1_vector_stores__vector_store_id__files_post
       parameters:
-        - name: vector_store_id
-          in: path
-          description: >-
-            The ID of the vector store to attach the file to.
-          required: true
-          schema:
-            type: string
+      - name: vector_store_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: vector_store_id'
       requestBody:
+        required: true
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/OpenaiAttachFileToVectorStoreRequest'
-        required: true
-      deprecated: false
   /v1/vector_stores/{vector_store_id}/files/{file_id}:
     get:
       responses:
         '200':
-          description: >-
-            A VectorStoreFileObject representing the file.
+          description: A VectorStoreFileObject representing the file.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/VectorStoreFileObject'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - VectorIO
-      summary: Retrieves a vector store file.
+      - Vector Io
+      summary: Openai Retrieve Vector Store File
       description: Retrieves a vector store file.
+      operationId: openai_retrieve_vector_store_file_v1_vector_stores__vector_store_id__files__file_id__get
       parameters:
-        - name: vector_store_id
-          in: path
-          description: >-
-            The ID of the vector store containing the file to retrieve.
-          required: true
-          schema:
-            type: string
-        - name: file_id
-          in: path
-          description: The ID of the file to retrieve.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: vector_store_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: vector_store_id'
+      - name: file_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: file_id'
     post:
       responses:
         '200':
-          description: >-
-            A VectorStoreFileObject representing the updated file.
+          description: A VectorStoreFileObject representing the updated file.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/VectorStoreFileObject'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - VectorIO
-      summary: Updates a vector store file.
+      - Vector Io
+      summary: Openai Update Vector Store File
       description: Updates a vector store file.
+      operationId: openai_update_vector_store_file_v1_vector_stores__vector_store_id__files__file_id__post
       parameters:
-        - name: vector_store_id
-          in: path
-          description: >-
-            The ID of the vector store containing the file to update.
-          required: true
-          schema:
-            type: string
-        - name: file_id
-          in: path
-          description: The ID of the file to update.
-          required: true
-          schema:
-            type: string
+      - name: vector_store_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: vector_store_id'
+      - name: file_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: file_id'
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/OpenaiUpdateVectorStoreFileRequest'
         required: true
-      deprecated: false
     delete:
       responses:
         '200':
-          description: >-
-            A VectorStoreFileDeleteResponse indicating the deletion status.
+          description: A VectorStoreFileDeleteResponse indicating the deletion status.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/VectorStoreFileDeleteResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - VectorIO
-      summary: Delete a vector store file.
+      - Vector Io
+      summary: Openai Delete Vector Store File
       description: Delete a vector store file.
+      operationId: openai_delete_vector_store_file_v1_vector_stores__vector_store_id__files__file_id__delete
       parameters:
-        - name: vector_store_id
-          in: path
-          description: >-
-            The ID of the vector store containing the file to delete.
-          required: true
-          schema:
-            type: string
-        - name: file_id
-          in: path
-          description: The ID of the file to delete.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: vector_store_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: vector_store_id'
+      - name: file_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: file_id'
   /v1/vector_stores/{vector_store_id}/files/{file_id}/content:
     get:
       responses:
         '200':
-          description: >-
-            File contents, optionally with embeddings and metadata based on query
-            parameters.
+          description: File contents, optionally with embeddings and metadata based on query parameters.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/VectorStoreFileContentResponse'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - VectorIO
-      summary: >-
-        Retrieves the contents of a vector store file.
-      description: >-
-        Retrieves the contents of a vector store file.
+      - Vector Io
+      summary: Openai Retrieve Vector Store File Contents
+      description: Retrieves the contents of a vector store file.
+      operationId: openai_retrieve_vector_store_file_contents_v1_vector_stores__vector_store_id__files__file_id__content_get
       parameters:
-        - name: vector_store_id
-          in: path
-          description: >-
-            The ID of the vector store containing the file to retrieve.
-          required: true
-          schema:
-            type: string
-        - name: file_id
-          in: path
-          description: The ID of the file to retrieve.
-          required: true
-          schema:
-            type: string
-        - name: include_embeddings
-          in: query
-          description: >-
-            Whether to include embedding vectors in the response.
-          required: false
-          schema:
-            $ref: '#/components/schemas/bool'
-        - name: include_metadata
-          in: query
-          description: >-
-            Whether to include chunk metadata in the response.
-          required: false
-          schema:
-            $ref: '#/components/schemas/bool'
-      deprecated: false
+      - name: include_embeddings
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          default: false
+          title: Include Embeddings
+      - name: include_metadata
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          default: false
+          title: Include Metadata
+      - name: vector_store_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: vector_store_id'
+      - name: file_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: file_id'
   /v1/vector_stores/{vector_store_id}/search:
     post:
       responses:
         '200':
-          description: >-
-            A VectorStoreSearchResponse containing the search results.
+          description: A VectorStoreSearchResponse containing the search results.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/VectorStoreSearchResponsePage'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - VectorIO
-      summary: Search for chunks in a vector store.
-      description: >-
+      - Vector Io
+      summary: Openai Search Vector Store
+      description: |-
         Search for chunks in a vector store.
 
-        Searches a vector store for relevant chunks based on a query and optional
-        file attribute filters.
+        Searches a vector store for relevant chunks based on a query and optional file attribute filters.
+      operationId: openai_search_vector_store_v1_vector_stores__vector_store_id__search_post
       parameters:
-        - name: vector_store_id
-          in: path
-          description: The ID of the vector store to search.
-          required: true
-          schema:
-            type: string
+      - name: vector_store_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: vector_store_id'
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/OpenaiSearchVectorStoreRequest'
         required: true
-      deprecated: false
   /v1/version:
     get:
       responses:
         '200':
-          description: >-
-            Version information containing the service version number.
+          description: Version information containing the service version number.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/VersionInfo'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Inspect
-      summary: Get version.
-      description: >-
+      - Inspect
+      summary: Version
+      description: |-
         Get version.
 
         Get the version of the service.
-      parameters: []
-      deprecated: false
-jsonSchemaDialect: >-
-  https://json-schema.org/draft/2020-12/schema
+      operationId: version_v1_version_get
 components:
   schemas:
     Error:
-      type: object
+      description: Error response from the API. Roughly follows RFC 7807.
       properties:
         status:
+          title: Status
           type: integer
-          description: HTTP status code
         title:
+          title: Title
           type: string
-          description: >-
-            Error title, a short summary of the error which is invariant for an error
-            type
         detail:
+          title: Detail
           type: string
-          description: >-
-            Error detail, a longer human-readable description of the error
         instance:
-          type: string
-          description: >-
-            (Optional) A URL which can be used to retrieve more information about
-            the specific occurrence of the error
-      additionalProperties: false
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
       required:
-        - status
-        - title
-        - detail
+      - status
+      - title
+      - detail
       title: Error
-      description: >-
-        Error response from the API. Roughly follows RFC 7807.
-    ListBatchesResponse:
       type: object
+    ListBatchesResponse:
       properties:
         object:
           type: string
           const: list
+          title: Object
           default: list
         data:
-          type: array
           items:
-            type: object
-            properties:
-              id:
-                type: string
-              completion_window:
-                type: string
-              created_at:
-                type: integer
-              endpoint:
-                type: string
-              input_file_id:
-                type: string
-              object:
-                type: string
-                const: batch
-              status:
-                type: string
-                enum:
-                  - validating
-                  - failed
-                  - in_progress
-                  - finalizing
-                  - completed
-                  - expired
-                  - cancelling
-                  - cancelled
-              cancelled_at:
-                type: integer
-              cancelling_at:
-                type: integer
-              completed_at:
-                type: integer
-              error_file_id:
-                type: string
-              errors:
-                type: object
-                properties:
-                  data:
-                    type: array
-                    items:
-                      type: object
-                      properties:
-                        code:
-                          type: string
-                        line:
-                          type: integer
-                        message:
-                          type: string
-                        param:
-                          type: string
-                      additionalProperties: false
-                      title: BatchError
-                  object:
-                    type: string
-                additionalProperties: false
-                title: Errors
-              expired_at:
-                type: integer
-              expires_at:
-                type: integer
-              failed_at:
-                type: integer
-              finalizing_at:
-                type: integer
-              in_progress_at:
-                type: integer
-              metadata:
-                type: object
-                additionalProperties:
-                  type: string
-              model:
-                type: string
-              output_file_id:
-                type: string
-              request_counts:
-                type: object
-                properties:
-                  completed:
-                    type: integer
-                  failed:
-                    type: integer
-                  total:
-                    type: integer
-                additionalProperties: false
-                required:
-                  - completed
-                  - failed
-                  - total
-                title: BatchRequestCounts
-              usage:
-                type: object
-                properties:
-                  input_tokens:
-                    type: integer
-                  input_tokens_details:
-                    type: object
-                    properties:
-                      cached_tokens:
-                        type: integer
-                    additionalProperties: false
-                    required:
-                      - cached_tokens
-                    title: InputTokensDetails
-                  output_tokens:
-                    type: integer
-                  output_tokens_details:
-                    type: object
-                    properties:
-                      reasoning_tokens:
-                        type: integer
-                    additionalProperties: false
-                    required:
-                      - reasoning_tokens
-                    title: OutputTokensDetails
-                  total_tokens:
-                    type: integer
-                additionalProperties: false
-                required:
-                  - input_tokens
-                  - input_tokens_details
-                  - output_tokens
-                  - output_tokens_details
-                  - total_tokens
-                title: BatchUsage
-            additionalProperties: false
-            required:
-              - id
-              - completion_window
-              - created_at
-              - endpoint
-              - input_file_id
-              - object
-              - status
-            title: Batch
+            $ref: '#/components/schemas/Batch'
+          type: array
+          title: Data
+          description: List of batch objects
         first_id:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: ID of the first batch in the list
         last_id:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: ID of the last batch in the list
         has_more:
           type: boolean
+          title: Has More
+          description: Whether there are more batches available
           default: false
-      additionalProperties: false
-      required:
-        - object
-        - data
-        - has_more
-      title: ListBatchesResponse
-      description: >-
-        Response containing a list of batch objects.
-    CreateBatchRequest:
       type: object
+      required:
+      - data
+      title: ListBatchesResponse
+      description: Response containing a list of batch objects.
+    CreateBatchRequest:
       properties:
         input_file_id:
           type: string
-          description: >-
-            The ID of an uploaded file containing requests for the batch.
+          title: Input File Id
         endpoint:
           type: string
-          description: >-
-            The endpoint to be used for all requests in the batch.
+          title: Endpoint
         completion_window:
           type: string
           const: 24h
-          description: >-
-            The time window within which the batch should be processed.
+          title: Completion Window
         metadata:
-          type: object
-          additionalProperties:
-            type: string
-          description: Optional metadata for the batch.
+          anyOf:
+          - additionalProperties:
+              type: string
+            type: object
+          - type: 'null'
         idempotency_key:
-          type: string
-          description: >-
-            Optional idempotency key. When provided, enables idempotent behavior.
-      additionalProperties: false
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
       required:
-        - input_file_id
-        - endpoint
-        - completion_window
+      - input_file_id
+      - endpoint
+      - completion_window
       title: CreateBatchRequest
     Batch:
-      type: object
       properties:
         id:
           type: string
+          title: Id
         completion_window:
           type: string
+          title: Completion Window
         created_at:
           type: integer
+          title: Created At
         endpoint:
           type: string
+          title: Endpoint
         input_file_id:
           type: string
+          title: Input File Id
         object:
           type: string
           const: batch
+          title: Object
         status:
           type: string
           enum:
-            - validating
-            - failed
-            - in_progress
-            - finalizing
-            - completed
-            - expired
-            - cancelling
-            - cancelled
+          - validating
+          - failed
+          - in_progress
+          - finalizing
+          - completed
+          - expired
+          - cancelling
+          - cancelled
+          title: Status
         cancelled_at:
-          type: integer
+          anyOf:
+          - type: integer
+          - type: 'null'
         cancelling_at:
-          type: integer
+          anyOf:
+          - type: integer
+          - type: 'null'
         completed_at:
-          type: integer
+          anyOf:
+          - type: integer
+          - type: 'null'
         error_file_id:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
         errors:
-          type: object
-          properties:
-            data:
-              type: array
-              items:
-                type: object
-                properties:
-                  code:
-                    type: string
-                  line:
-                    type: integer
-                  message:
-                    type: string
-                  param:
-                    type: string
-                additionalProperties: false
-                title: BatchError
-            object:
-              type: string
-          additionalProperties: false
+          anyOf:
+          - $ref: '#/components/schemas/Errors'
+            title: Errors
+          - type: 'null'
           title: Errors
         expired_at:
-          type: integer
+          anyOf:
+          - type: integer
+          - type: 'null'
         expires_at:
-          type: integer
+          anyOf:
+          - type: integer
+          - type: 'null'
         failed_at:
-          type: integer
+          anyOf:
+          - type: integer
+          - type: 'null'
         finalizing_at:
-          type: integer
+          anyOf:
+          - type: integer
+          - type: 'null'
         in_progress_at:
-          type: integer
+          anyOf:
+          - type: integer
+          - type: 'null'
         metadata:
-          type: object
-          additionalProperties:
-            type: string
+          anyOf:
+          - additionalProperties:
+              type: string
+            type: object
+          - type: 'null'
         model:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
         output_file_id:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
         request_counts:
-          type: object
-          properties:
-            completed:
-              type: integer
-            failed:
-              type: integer
-            total:
-              type: integer
-          additionalProperties: false
-          required:
-            - completed
-            - failed
-            - total
+          anyOf:
+          - $ref: '#/components/schemas/BatchRequestCounts'
+            title: BatchRequestCounts
+          - type: 'null'
           title: BatchRequestCounts
         usage:
-          type: object
-          properties:
-            input_tokens:
-              type: integer
-            input_tokens_details:
-              type: object
-              properties:
-                cached_tokens:
-                  type: integer
-              additionalProperties: false
-              required:
-                - cached_tokens
-              title: InputTokensDetails
-            output_tokens:
-              type: integer
-            output_tokens_details:
-              type: object
-              properties:
-                reasoning_tokens:
-                  type: integer
-              additionalProperties: false
-              required:
-                - reasoning_tokens
-              title: OutputTokensDetails
-            total_tokens:
-              type: integer
-          additionalProperties: false
-          required:
-            - input_tokens
-            - input_tokens_details
-            - output_tokens
-            - output_tokens_details
-            - total_tokens
+          anyOf:
+          - $ref: '#/components/schemas/BatchUsage'
+            title: BatchUsage
+          - type: 'null'
           title: BatchUsage
-      additionalProperties: false
+      additionalProperties: true
+      type: object
       required:
-        - id
-        - completion_window
-        - created_at
-        - endpoint
-        - input_file_id
-        - object
-        - status
+      - id
+      - completion_window
+      - created_at
+      - endpoint
+      - input_file_id
+      - object
+      - status
       title: Batch
     Order:
       type: string
       enum:
-        - asc
-        - desc
+      - asc
+      - desc
       title: Order
       description: Sort order for paginated responses.
     ListOpenAIChatCompletionResponse:
-      type: object
       properties:
         data:
-          type: array
           items:
-            type: object
-            properties:
-              id:
-                type: string
-                description: The ID of the chat completion
-              choices:
-                type: array
-                items:
-                  $ref: '#/components/schemas/OpenAIChoice'
-                description: List of choices
-              object:
-                type: string
-                const: chat.completion
-                default: chat.completion
-                description: >-
-                  The object type, which will be "chat.completion"
-              created:
-                type: integer
-                description: >-
-                  The Unix timestamp in seconds when the chat completion was created
-              model:
-                type: string
-                description: >-
-                  The model that was used to generate the chat completion
-              usage:
-                $ref: '#/components/schemas/OpenAIChatCompletionUsage'
-                description: >-
-                  Token usage information for the completion
-              input_messages:
-                type: array
-                items:
-                  $ref: '#/components/schemas/OpenAIMessageParam'
-            additionalProperties: false
-            required:
-              - id
-              - choices
-              - object
-              - created
-              - model
-              - input_messages
-            title: OpenAICompletionWithInputMessages
-          description: >-
-            List of chat completion objects with their input messages
+            $ref: '#/components/schemas/OpenAICompletionWithInputMessages'
+          type: array
+          title: Data
         has_more:
           type: boolean
-          description: >-
-            Whether there are more completions available beyond this list
+          title: Has More
         first_id:
           type: string
-          description: ID of the first completion in this list
+          title: First Id
         last_id:
           type: string
-          description: ID of the last completion in this list
+          title: Last Id
         object:
           type: string
           const: list
+          title: Object
           default: list
-          description: >-
-            Must be "list" to identify this as a list response
-      additionalProperties: false
-      required:
-        - data
-        - has_more
-        - first_id
-        - last_id
-        - object
-      title: ListOpenAIChatCompletionResponse
-      description: >-
-        Response from listing OpenAI-compatible chat completions.
-    OpenAIAssistantMessageParam:
       type: object
+      required:
+      - data
+      - has_more
+      - first_id
+      - last_id
+      title: ListOpenAIChatCompletionResponse
+      description: Response from listing OpenAI-compatible chat completions.
+    OpenAIAssistantMessageParam:
+      description: A message containing the model's (assistant) response in an OpenAI-compatible chat completion request.
       properties:
         role:
-          type: string
           const: assistant
           default: assistant
-          description: >-
-            Must be "assistant" to identify this as the model's response
-        content:
-          oneOf:
-            - type: string
-            - type: array
-              items:
-                $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
-          description: The content of the model's response
-        name:
+          title: Role
           type: string
-          description: >-
-            (Optional) The name of the assistant message participant.
+        content:
+          anyOf:
+          - type: string
+          - items:
+              $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+            type: array
+            title: list[OpenAIChatCompletionContentPartTextParam]
+          - type: 'null'
+          title: string | list[OpenAIChatCompletionContentPartTextParam]
+          nullable: true
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
         tool_calls:
-          type: array
-          items:
-            $ref: '#/components/schemas/OpenAIChatCompletionToolCall'
-          description: >-
-            List of tool calls. Each tool call is an OpenAIChatCompletionToolCall
-            object.
-      additionalProperties: false
-      required:
-        - role
+          anyOf:
+          - items:
+              $ref: '#/components/schemas/OpenAIChatCompletionToolCall'
+            type: array
+          - type: 'null'
+          nullable: true
       title: OpenAIAssistantMessageParam
-      description: >-
-        A message containing the model's (assistant) response in an OpenAI-compatible
-        chat completion request.
-    "OpenAIChatCompletionContentPartImageParam":
       type: object
+    OpenAIChatCompletionContentPartImageParam:
       properties:
         type:
           type: string
           const: image_url
+          title: Type
           default: image_url
-          description: >-
-            Must be "image_url" to identify this as image content
         image_url:
           $ref: '#/components/schemas/OpenAIImageURL'
-          description: >-
-            Image URL specification and processing details
-      additionalProperties: false
-      required:
-        - type
-        - image_url
-      title: >-
-        OpenAIChatCompletionContentPartImageParam
-      description: >-
-        Image content part for OpenAI-compatible chat completion messages.
-    OpenAIChatCompletionContentPartParam:
-      oneOf:
-        - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
-        - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
-        - $ref: '#/components/schemas/OpenAIFile'
-      discriminator:
-        propertyName: type
-        mapping:
-          text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
-          image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
-          file: '#/components/schemas/OpenAIFile'
-    OpenAIChatCompletionContentPartTextParam:
       type: object
+      required:
+      - image_url
+      title: OpenAIChatCompletionContentPartImageParam
+      description: Image content part for OpenAI-compatible chat completion messages.
+    OpenAIChatCompletionContentPartParam:
+      discriminator:
+        mapping:
+          file: '#/components/schemas/OpenAIFile'
+          image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+          text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+        title: OpenAIChatCompletionContentPartTextParam
+      - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+        title: OpenAIChatCompletionContentPartImageParam
+      - $ref: '#/components/schemas/OpenAIFile'
+        title: OpenAIFile
+      title: OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile
+    OpenAIChatCompletionContentPartTextParam:
       properties:
         type:
           type: string
           const: text
+          title: Type
           default: text
-          description: >-
-            Must be "text" to identify this as text content
         text:
           type: string
-          description: The text content of the message
-      additionalProperties: false
-      required:
-        - type
-        - text
-      title: OpenAIChatCompletionContentPartTextParam
-      description: >-
-        Text content part for OpenAI-compatible chat completion messages.
-    OpenAIChatCompletionToolCall:
+          title: Text
       type: object
+      required:
+      - text
+      title: OpenAIChatCompletionContentPartTextParam
+      description: Text content part for OpenAI-compatible chat completion messages.
+    OpenAIChatCompletionToolCall:
       properties:
         index:
-          type: integer
-          description: >-
-            (Optional) Index of the tool call in the list
+          anyOf:
+          - type: integer
+          - type: 'null'
         id:
-          type: string
-          description: >-
-            (Optional) Unique identifier for the tool call
+          anyOf:
+          - type: string
+          - type: 'null'
         type:
           type: string
           const: function
+          title: Type
           default: function
-          description: >-
-            Must be "function" to identify this as a function call
         function:
-          $ref: '#/components/schemas/OpenAIChatCompletionToolCallFunction'
-          description: (Optional) Function call details
-      additionalProperties: false
-      required:
-        - type
-      title: OpenAIChatCompletionToolCall
-      description: >-
-        Tool call specification for OpenAI-compatible chat completion responses.
-    OpenAIChatCompletionToolCallFunction:
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIChatCompletionToolCallFunction'
+            title: OpenAIChatCompletionToolCallFunction
+          - type: 'null'
+          title: OpenAIChatCompletionToolCallFunction
       type: object
+      title: OpenAIChatCompletionToolCall
+      description: Tool call specification for OpenAI-compatible chat completion responses.
+    OpenAIChatCompletionToolCallFunction:
       properties:
         name:
-          type: string
-          description: (Optional) Name of the function to call
+          anyOf:
+          - type: string
+          - type: 'null'
         arguments:
-          type: string
-          description: >-
-            (Optional) Arguments to pass to the function as a JSON string
-      additionalProperties: false
-      title: OpenAIChatCompletionToolCallFunction
-      description: >-
-        Function call details for OpenAI-compatible tool calls.
-    OpenAIChatCompletionUsage:
+          anyOf:
+          - type: string
+          - type: 'null'
       type: object
+      title: OpenAIChatCompletionToolCallFunction
+      description: Function call details for OpenAI-compatible tool calls.
+    OpenAIChatCompletionUsage:
       properties:
         prompt_tokens:
           type: integer
-          description: Number of tokens in the prompt
+          title: Prompt Tokens
         completion_tokens:
           type: integer
-          description: Number of tokens in the completion
+          title: Completion Tokens
         total_tokens:
           type: integer
-          description: Total tokens used (prompt + completion)
+          title: Total Tokens
         prompt_tokens_details:
-          type: object
-          properties:
-            cached_tokens:
-              type: integer
-              description: Number of tokens retrieved from cache
-          additionalProperties: false
-          title: >-
-            OpenAIChatCompletionUsagePromptTokensDetails
-          description: >-
-            Token details for prompt tokens in OpenAI chat completion usage.
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIChatCompletionUsagePromptTokensDetails'
+            title: OpenAIChatCompletionUsagePromptTokensDetails
+          - type: 'null'
+          title: OpenAIChatCompletionUsagePromptTokensDetails
         completion_tokens_details:
-          type: object
-          properties:
-            reasoning_tokens:
-              type: integer
-              description: >-
-                Number of tokens used for reasoning (o1/o3 models)
-          additionalProperties: false
-          title: >-
-            OpenAIChatCompletionUsageCompletionTokensDetails
-          description: >-
-            Token details for output tokens in OpenAI chat completion usage.
-      additionalProperties: false
-      required:
-        - prompt_tokens
-        - completion_tokens
-        - total_tokens
-      title: OpenAIChatCompletionUsage
-      description: >-
-        Usage information for OpenAI chat completion.
-    OpenAIChoice:
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIChatCompletionUsageCompletionTokensDetails'
+            title: OpenAIChatCompletionUsageCompletionTokensDetails
+          - type: 'null'
+          title: OpenAIChatCompletionUsageCompletionTokensDetails
       type: object
+      required:
+      - prompt_tokens
+      - completion_tokens
+      - total_tokens
+      title: OpenAIChatCompletionUsage
+      description: Usage information for OpenAI chat completion.
+    OpenAIChoice:
       properties:
         message:
           oneOf:
-            - $ref: '#/components/schemas/OpenAIUserMessageParam'
-            - $ref: '#/components/schemas/OpenAISystemMessageParam'
-            - $ref: '#/components/schemas/OpenAIAssistantMessageParam'
-            - $ref: '#/components/schemas/OpenAIToolMessageParam'
-            - $ref: '#/components/schemas/OpenAIDeveloperMessageParam'
+          - $ref: '#/components/schemas/OpenAIUserMessageParam-Output'
+            title: OpenAIUserMessageParam-Output
+          - $ref: '#/components/schemas/OpenAISystemMessageParam'
+            title: OpenAISystemMessageParam
+          - $ref: '#/components/schemas/OpenAIAssistantMessageParam-Output'
+            title: OpenAIAssistantMessageParam-Output
+          - $ref: '#/components/schemas/OpenAIToolMessageParam'
+            title: OpenAIToolMessageParam
+          - $ref: '#/components/schemas/OpenAIDeveloperMessageParam'
+            title: OpenAIDeveloperMessageParam
+          title: OpenAIUserMessageParam-Output | ... (5 variants)
           discriminator:
             propertyName: role
             mapping:
-              user: '#/components/schemas/OpenAIUserMessageParam'
-              system: '#/components/schemas/OpenAISystemMessageParam'
-              assistant: '#/components/schemas/OpenAIAssistantMessageParam'
-              tool: '#/components/schemas/OpenAIToolMessageParam'
+              assistant: '#/components/schemas/OpenAIAssistantMessageParam-Output'
               developer: '#/components/schemas/OpenAIDeveloperMessageParam'
-          description: The message from the model
+              system: '#/components/schemas/OpenAISystemMessageParam'
+              tool: '#/components/schemas/OpenAIToolMessageParam'
+              user: '#/components/schemas/OpenAIUserMessageParam-Output'
         finish_reason:
           type: string
-          description: The reason the model stopped generating
+          title: Finish Reason
         index:
           type: integer
-          description: The index of the choice
+          title: Index
         logprobs:
-          $ref: '#/components/schemas/OpenAIChoiceLogprobs'
-          description: >-
-            (Optional) The log probabilities for the tokens in the message
-      additionalProperties: false
-      required:
-        - message
-        - finish_reason
-        - index
-      title: OpenAIChoice
-      description: >-
-        A choice from an OpenAI-compatible chat completion response.
-    OpenAIChoiceLogprobs:
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIChoiceLogprobs'
+            title: OpenAIChoiceLogprobs
+          - type: 'null'
+          title: OpenAIChoiceLogprobs
       type: object
+      required:
+      - message
+      - finish_reason
+      - index
+      title: OpenAIChoice
+      description: A choice from an OpenAI-compatible chat completion response.
+    OpenAIChoiceLogprobs:
       properties:
         content:
-          type: array
-          items:
-            $ref: '#/components/schemas/OpenAITokenLogProb'
-          description: >-
-            (Optional) The log probabilities for the tokens in the message
+          anyOf:
+          - items:
+              $ref: '#/components/schemas/OpenAITokenLogProb'
+            type: array
+          - type: 'null'
         refusal:
-          type: array
-          items:
-            $ref: '#/components/schemas/OpenAITokenLogProb'
-          description: >-
-            (Optional) The log probabilities for the tokens in the message
-      additionalProperties: false
-      title: OpenAIChoiceLogprobs
-      description: >-
-        The log probabilities for the tokens in the message from an OpenAI-compatible
-        chat completion response.
-    OpenAIDeveloperMessageParam:
+          anyOf:
+          - items:
+              $ref: '#/components/schemas/OpenAITokenLogProb'
+            type: array
+          - type: 'null'
       type: object
+      title: OpenAIChoiceLogprobs
+      description: The log probabilities for the tokens in the message from an OpenAI-compatible chat completion response.
+    OpenAIDeveloperMessageParam:
       properties:
         role:
           type: string
           const: developer
+          title: Role
           default: developer
-          description: >-
-            Must be "developer" to identify this as a developer message
         content:
-          oneOf:
-            - type: string
-            - type: array
-              items:
-                $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
-          description: The content of the developer message
+          anyOf:
+          - type: string
+          - items:
+              $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+            type: array
+            title: list[OpenAIChatCompletionContentPartTextParam]
+          title: string | list[OpenAIChatCompletionContentPartTextParam]
         name:
-          type: string
-          description: >-
-            (Optional) The name of the developer message participant.
-      additionalProperties: false
-      required:
-        - role
-        - content
-      title: OpenAIDeveloperMessageParam
-      description: >-
-        A message from the developer in an OpenAI-compatible chat completion request.
-    OpenAIFile:
+          anyOf:
+          - type: string
+          - type: 'null'
       type: object
+      required:
+      - content
+      title: OpenAIDeveloperMessageParam
+      description: A message from the developer in an OpenAI-compatible chat completion request.
+    OpenAIFile:
       properties:
         type:
           type: string
           const: file
+          title: Type
           default: file
         file:
           $ref: '#/components/schemas/OpenAIFileFile'
-      additionalProperties: false
+      type: object
       required:
-        - type
-        - file
+      - file
       title: OpenAIFile
     OpenAIFileFile:
-      type: object
       properties:
         file_data:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
         file_id:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
         filename:
-          type: string
-      additionalProperties: false
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
       title: OpenAIFileFile
     OpenAIImageURL:
-      type: object
       properties:
         url:
           type: string
-          description: >-
-            URL of the image to include in the message
+          title: Url
         detail:
-          type: string
-          description: >-
-            (Optional) Level of detail for image processing. Can be "low", "high",
-            or "auto"
-      additionalProperties: false
-      required:
-        - url
-      title: OpenAIImageURL
-      description: >-
-        Image URL specification for OpenAI-compatible chat completion messages.
-    OpenAIMessageParam:
-      oneOf:
-        - $ref: '#/components/schemas/OpenAIUserMessageParam'
-        - $ref: '#/components/schemas/OpenAISystemMessageParam'
-        - $ref: '#/components/schemas/OpenAIAssistantMessageParam'
-        - $ref: '#/components/schemas/OpenAIToolMessageParam'
-        - $ref: '#/components/schemas/OpenAIDeveloperMessageParam'
-      discriminator:
-        propertyName: role
-        mapping:
-          user: '#/components/schemas/OpenAIUserMessageParam'
-          system: '#/components/schemas/OpenAISystemMessageParam'
-          assistant: '#/components/schemas/OpenAIAssistantMessageParam'
-          tool: '#/components/schemas/OpenAIToolMessageParam'
-          developer: '#/components/schemas/OpenAIDeveloperMessageParam'
-    OpenAISystemMessageParam:
+          anyOf:
+          - type: string
+          - type: 'null'
       type: object
+      required:
+      - url
+      title: OpenAIImageURL
+      description: Image URL specification for OpenAI-compatible chat completion messages.
+    OpenAIMessageParam:
+      discriminator:
+        mapping:
+          assistant: '#/components/schemas/OpenAIAssistantMessageParam'
+          developer: '#/components/schemas/OpenAIDeveloperMessageParam'
+          system: '#/components/schemas/OpenAISystemMessageParam'
+          tool: '#/components/schemas/OpenAIToolMessageParam'
+          user: '#/components/schemas/OpenAIUserMessageParam'
+        propertyName: role
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIUserMessageParam'
+        title: OpenAIUserMessageParam
+      - $ref: '#/components/schemas/OpenAISystemMessageParam'
+        title: OpenAISystemMessageParam
+      - $ref: '#/components/schemas/OpenAIAssistantMessageParam'
+        title: OpenAIAssistantMessageParam
+      - $ref: '#/components/schemas/OpenAIToolMessageParam'
+        title: OpenAIToolMessageParam
+      - $ref: '#/components/schemas/OpenAIDeveloperMessageParam'
+        title: OpenAIDeveloperMessageParam
+      title: OpenAIUserMessageParam | ... (5 variants)
+    OpenAISystemMessageParam:
       properties:
         role:
           type: string
           const: system
+          title: Role
           default: system
-          description: >-
-            Must be "system" to identify this as a system message
         content:
-          oneOf:
-            - type: string
-            - type: array
-              items:
-                $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
-          description: >-
-            The content of the "system prompt". If multiple system messages are provided,
-            they are concatenated. The underlying Llama Stack code may also add other
-            system messages (for example, for formatting tool definitions).
+          anyOf:
+          - type: string
+          - items:
+              $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+            type: array
+            title: list[OpenAIChatCompletionContentPartTextParam]
+          title: string | list[OpenAIChatCompletionContentPartTextParam]
         name:
-          type: string
-          description: >-
-            (Optional) The name of the system message participant.
-      additionalProperties: false
-      required:
-        - role
-        - content
-      title: OpenAISystemMessageParam
-      description: >-
-        A system message providing instructions or context to the model.
-    OpenAITokenLogProb:
+          anyOf:
+          - type: string
+          - type: 'null'
       type: object
+      required:
+      - content
+      title: OpenAISystemMessageParam
+      description: A system message providing instructions or context to the model.
+    OpenAITokenLogProb:
       properties:
         token:
           type: string
+          title: Token
         bytes:
-          type: array
-          items:
-            type: integer
+          anyOf:
+          - items:
+              type: integer
+            type: array
+          - type: 'null'
         logprob:
           type: number
+          title: Logprob
         top_logprobs:
-          type: array
           items:
             $ref: '#/components/schemas/OpenAITopLogProb'
-      additionalProperties: false
-      required:
-        - token
-        - logprob
-        - top_logprobs
-      title: OpenAITokenLogProb
-      description: >-
-        The log probability for a token from an OpenAI-compatible chat completion
-        response.
-    OpenAIToolMessageParam:
+          type: array
+          title: Top Logprobs
       type: object
+      required:
+      - token
+      - logprob
+      - top_logprobs
+      title: OpenAITokenLogProb
+      description: |-
+        The log probability for a token from an OpenAI-compatible chat completion response.
+
+        :token: The token
+        :bytes: (Optional) The bytes for the token
+        :logprob: The log probability of the token
+        :top_logprobs: The top log probabilities for the token
+    OpenAIToolMessageParam:
       properties:
         role:
           type: string
           const: tool
+          title: Role
           default: tool
-          description: >-
-            Must be "tool" to identify this as a tool response
         tool_call_id:
           type: string
-          description: >-
-            Unique identifier for the tool call this response is for
+          title: Tool Call Id
         content:
-          oneOf:
-            - type: string
-            - type: array
-              items:
-                $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
-          description: The response content from the tool
-      additionalProperties: false
-      required:
-        - role
-        - tool_call_id
-        - content
-      title: OpenAIToolMessageParam
-      description: >-
-        A message representing the result of a tool invocation in an OpenAI-compatible
-        chat completion request.
-    OpenAITopLogProb:
+          anyOf:
+          - type: string
+          - items:
+              $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+            type: array
+            title: list[OpenAIChatCompletionContentPartTextParam]
+          title: string | list[OpenAIChatCompletionContentPartTextParam]
       type: object
+      required:
+      - tool_call_id
+      - content
+      title: OpenAIToolMessageParam
+      description: A message representing the result of a tool invocation in an OpenAI-compatible chat completion request.
+    OpenAITopLogProb:
       properties:
         token:
           type: string
+          title: Token
         bytes:
-          type: array
-          items:
-            type: integer
+          anyOf:
+          - items:
+              type: integer
+            type: array
+          - type: 'null'
         logprob:
           type: number
-      additionalProperties: false
-      required:
-        - token
-        - logprob
-      title: OpenAITopLogProb
-      description: >-
-        The top log probability for a token from an OpenAI-compatible chat completion
-        response.
-    OpenAIUserMessageParam:
+          title: Logprob
       type: object
+      required:
+      - token
+      - logprob
+      title: OpenAITopLogProb
+      description: |-
+        The top log probability for a token from an OpenAI-compatible chat completion response.
+
+        :token: The token
+        :bytes: (Optional) The bytes for the token
+        :logprob: The log probability of the token
+    OpenAIUserMessageParam:
+      description: A message from the user in an OpenAI-compatible chat completion request.
       properties:
         role:
-          type: string
           const: user
           default: user
-          description: >-
-            Must be "user" to identify this as a user message
-        content:
-          oneOf:
-            - type: string
-            - type: array
-              items:
-                $ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
-          description: >-
-            The content of the message, which can include text and other media
-        name:
+          title: Role
           type: string
-          description: >-
-            (Optional) The name of the user message participant.
-      additionalProperties: false
+        content:
+          anyOf:
+          - type: string
+          - items:
+              discriminator:
+                mapping:
+                  file: '#/components/schemas/OpenAIFile'
+                  image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+                  text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+                propertyName: type
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+                title: OpenAIChatCompletionContentPartTextParam
+              - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+                title: OpenAIChatCompletionContentPartImageParam
+              - $ref: '#/components/schemas/OpenAIFile'
+                title: OpenAIFile
+              title: OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile
+            type: array
+            title: list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile]
+          title: string | list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile]
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
       required:
-        - role
-        - content
+      - content
       title: OpenAIUserMessageParam
-      description: >-
-        A message from the user in an OpenAI-compatible chat completion request.
-    OpenAIJSONSchema:
       type: object
+    OpenAIJSONSchema:
       properties:
         name:
           type: string
-          description: Name of the schema
+          title: Name
         description:
-          type: string
-          description: (Optional) Description of the schema
+          anyOf:
+          - type: string
+          - type: 'null'
         strict:
-          type: boolean
-          description: >-
-            (Optional) Whether to enforce strict adherence to the schema
+          anyOf:
+          - type: boolean
+          - type: 'null'
         schema:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: (Optional) The JSON schema definition
-      additionalProperties: false
-      required:
-        - name
-      title: OpenAIJSONSchema
-      description: >-
-        JSON schema specification for OpenAI-compatible structured response format.
-    OpenAIResponseFormatJSONObject:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
       type: object
+      title: OpenAIJSONSchema
+      description: JSON schema specification for OpenAI-compatible structured response format.
+    OpenAIResponseFormatJSONObject:
       properties:
         type:
           type: string
           const: json_object
+          title: Type
           default: json_object
-          description: >-
-            Must be "json_object" to indicate generic JSON object response format
-      additionalProperties: false
-      required:
-        - type
-      title: OpenAIResponseFormatJSONObject
-      description: >-
-        JSON object response format for OpenAI-compatible chat completion requests.
-    OpenAIResponseFormatJSONSchema:
       type: object
+      title: OpenAIResponseFormatJSONObject
+      description: JSON object response format for OpenAI-compatible chat completion requests.
+    OpenAIResponseFormatJSONSchema:
       properties:
         type:
           type: string
           const: json_schema
+          title: Type
           default: json_schema
-          description: >-
-            Must be "json_schema" to indicate structured JSON response format
         json_schema:
           $ref: '#/components/schemas/OpenAIJSONSchema'
-          description: >-
-            The JSON schema specification for the response
-      additionalProperties: false
-      required:
-        - type
-        - json_schema
-      title: OpenAIResponseFormatJSONSchema
-      description: >-
-        JSON schema response format for OpenAI-compatible chat completion requests.
-    OpenAIResponseFormatParam:
-      oneOf:
-        - $ref: '#/components/schemas/OpenAIResponseFormatText'
-        - $ref: '#/components/schemas/OpenAIResponseFormatJSONSchema'
-        - $ref: '#/components/schemas/OpenAIResponseFormatJSONObject'
-      discriminator:
-        propertyName: type
-        mapping:
-          text: '#/components/schemas/OpenAIResponseFormatText'
-          json_schema: '#/components/schemas/OpenAIResponseFormatJSONSchema'
-          json_object: '#/components/schemas/OpenAIResponseFormatJSONObject'
-    OpenAIResponseFormatText:
       type: object
+      required:
+      - json_schema
+      title: OpenAIResponseFormatJSONSchema
+      description: JSON schema response format for OpenAI-compatible chat completion requests.
+    OpenAIResponseFormatParam:
+      discriminator:
+        mapping:
+          json_object: '#/components/schemas/OpenAIResponseFormatJSONObject'
+          json_schema: '#/components/schemas/OpenAIResponseFormatJSONSchema'
+          text: '#/components/schemas/OpenAIResponseFormatText'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIResponseFormatText'
+        title: OpenAIResponseFormatText
+      - $ref: '#/components/schemas/OpenAIResponseFormatJSONSchema'
+        title: OpenAIResponseFormatJSONSchema
+      - $ref: '#/components/schemas/OpenAIResponseFormatJSONObject'
+        title: OpenAIResponseFormatJSONObject
+      title: OpenAIResponseFormatText | OpenAIResponseFormatJSONSchema | OpenAIResponseFormatJSONObject
+    OpenAIResponseFormatText:
       properties:
         type:
           type: string
           const: text
+          title: Type
           default: text
-          description: >-
-            Must be "text" to indicate plain text response format
-      additionalProperties: false
-      required:
-        - type
-      title: OpenAIResponseFormatText
-      description: >-
-        Text response format for OpenAI-compatible chat completion requests.
-    OpenAIChatCompletionRequestWithExtraBody:
       type: object
+      title: OpenAIResponseFormatText
+      description: Text response format for OpenAI-compatible chat completion requests.
+    OpenAIChatCompletionRequestWithExtraBody:
       properties:
         model:
           type: string
-          description: >-
-            The identifier of the model to use. The model must be registered with
-            Llama Stack and available via the /models endpoint.
+          title: Model
         messages:
-          type: array
           items:
-            $ref: '#/components/schemas/OpenAIMessageParam'
-          description: List of messages in the conversation.
-        frequency_penalty:
-          type: number
-          description: >-
-            (Optional) The penalty for repeated tokens.
-        function_call:
-          oneOf:
-            - type: string
-            - type: object
-              additionalProperties:
-                oneOf:
-                  - type: 'null'
-                  - type: boolean
-                  - type: number
-                  - type: string
-                  - type: array
-                  - type: object
-          description: (Optional) The function call to use.
-        functions:
-          type: array
-          items:
-            type: object
-            additionalProperties:
-              oneOf:
-                - type: 'null'
-                - type: boolean
-                - type: number
-                - type: string
-                - type: array
-                - type: object
-          description: (Optional) List of functions to use.
-        logit_bias:
-          type: object
-          additionalProperties:
-            type: number
-          description: (Optional) The logit bias to use.
-        logprobs:
-          type: boolean
-          description: (Optional) The log probabilities to use.
-        max_completion_tokens:
-          type: integer
-          description: >-
-            (Optional) The maximum number of tokens to generate.
-        max_tokens:
-          type: integer
-          description: >-
-            (Optional) The maximum number of tokens to generate.
-        n:
-          type: integer
-          description: >-
-            (Optional) The number of completions to generate.
-        parallel_tool_calls:
-          type: boolean
-          description: >-
-            (Optional) Whether to parallelize tool calls.
-        presence_penalty:
-          type: number
-          description: >-
-            (Optional) The penalty for repeated tokens.
-        response_format:
-          $ref: '#/components/schemas/OpenAIResponseFormatParam'
-          description: (Optional) The response format to use.
-        seed:
-          type: integer
-          description: (Optional) The seed to use.
-        stop:
-          oneOf:
-            - type: string
-            - type: array
-              items:
-                type: string
-          description: (Optional) The stop tokens to use.
-        stream:
-          type: boolean
-          description: >-
-            (Optional) Whether to stream the response.
-        stream_options:
-          type: object
-          additionalProperties:
             oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: (Optional) The stream options to use.
-        temperature:
-          type: number
-          description: (Optional) The temperature to use.
-        tool_choice:
-          oneOf:
-            - type: string
-            - type: object
-              additionalProperties:
-                oneOf:
-                  - type: 'null'
-                  - type: boolean
-                  - type: number
-                  - type: string
-                  - type: array
-                  - type: object
-          description: (Optional) The tool choice to use.
-        tools:
+            - $ref: '#/components/schemas/OpenAIUserMessageParam-Input'
+              title: OpenAIUserMessageParam-Input
+            - $ref: '#/components/schemas/OpenAISystemMessageParam'
+              title: OpenAISystemMessageParam
+            - $ref: '#/components/schemas/OpenAIAssistantMessageParam-Input'
+              title: OpenAIAssistantMessageParam-Input
+            - $ref: '#/components/schemas/OpenAIToolMessageParam'
+              title: OpenAIToolMessageParam
+            - $ref: '#/components/schemas/OpenAIDeveloperMessageParam'
+              title: OpenAIDeveloperMessageParam
+            discriminator:
+              propertyName: role
+              mapping:
+                assistant: '#/components/schemas/OpenAIAssistantMessageParam-Input'
+                developer: '#/components/schemas/OpenAIDeveloperMessageParam'
+                system: '#/components/schemas/OpenAISystemMessageParam'
+                tool: '#/components/schemas/OpenAIToolMessageParam'
+                user: '#/components/schemas/OpenAIUserMessageParam-Input'
+            title: OpenAIUserMessageParam-Input | ... (5 variants)
           type: array
-          items:
+          minItems: 1
+          title: Messages
+        frequency_penalty:
+          anyOf:
+          - type: number
+          - type: 'null'
+        function_call:
+          anyOf:
+          - type: string
+          - additionalProperties: true
             type: object
-            additionalProperties:
-              oneOf:
-                - type: 'null'
-                - type: boolean
-                - type: number
-                - type: string
-                - type: array
-                - type: object
-          description: (Optional) The tools to use.
+          - type: 'null'
+          title: string | object
+        functions:
+          anyOf:
+          - items:
+              additionalProperties: true
+              type: object
+            type: array
+          - type: 'null'
+        logit_bias:
+          anyOf:
+          - additionalProperties:
+              type: number
+            type: object
+          - type: 'null'
+        logprobs:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+        max_completion_tokens:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        max_tokens:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        n:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        parallel_tool_calls:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+        presence_penalty:
+          anyOf:
+          - type: number
+          - type: 'null'
+        response_format:
+          anyOf:
+          - oneOf:
+            - $ref: '#/components/schemas/OpenAIResponseFormatText'
+              title: OpenAIResponseFormatText
+            - $ref: '#/components/schemas/OpenAIResponseFormatJSONSchema'
+              title: OpenAIResponseFormatJSONSchema
+            - $ref: '#/components/schemas/OpenAIResponseFormatJSONObject'
+              title: OpenAIResponseFormatJSONObject
+            discriminator:
+              propertyName: type
+              mapping:
+                json_object: '#/components/schemas/OpenAIResponseFormatJSONObject'
+                json_schema: '#/components/schemas/OpenAIResponseFormatJSONSchema'
+                text: '#/components/schemas/OpenAIResponseFormatText'
+            title: OpenAIResponseFormatText | OpenAIResponseFormatJSONSchema | OpenAIResponseFormatJSONObject
+          - type: 'null'
+          title: Response Format
+        seed:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        stop:
+          anyOf:
+          - type: string
+          - items:
+              type: string
+            type: array
+            title: list[string]
+          - type: 'null'
+          title: string | list[string]
+        stream:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+        stream_options:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+        temperature:
+          anyOf:
+          - type: number
+          - type: 'null'
+        tool_choice:
+          anyOf:
+          - type: string
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+          title: string | object
+        tools:
+          anyOf:
+          - items:
+              additionalProperties: true
+              type: object
+            type: array
+          - type: 'null'
         top_logprobs:
-          type: integer
-          description: >-
-            (Optional) The top log probabilities to use.
+          anyOf:
+          - type: integer
+          - type: 'null'
         top_p:
-          type: number
-          description: (Optional) The top p to use.
+          anyOf:
+          - type: number
+          - type: 'null'
         user:
-          type: string
-          description: (Optional) The user to use.
-      additionalProperties: false
-      required:
-        - model
-        - messages
-      title: OpenAIChatCompletionRequestWithExtraBody
-      description: >-
-        Request parameters for OpenAI-compatible chat completion endpoint.
-    OpenAIChatCompletion:
+          anyOf:
+          - type: string
+          - type: 'null'
+      additionalProperties: true
       type: object
+      required:
+      - model
+      - messages
+      title: OpenAIChatCompletionRequestWithExtraBody
+      description: Request parameters for OpenAI-compatible chat completion endpoint.
+    OpenAIChatCompletion:
       properties:
         id:
           type: string
-          description: The ID of the chat completion
+          title: Id
         choices:
-          type: array
           items:
             $ref: '#/components/schemas/OpenAIChoice'
-          description: List of choices
+          type: array
+          title: Choices
         object:
           type: string
           const: chat.completion
+          title: Object
           default: chat.completion
-          description: >-
-            The object type, which will be "chat.completion"
         created:
           type: integer
-          description: >-
-            The Unix timestamp in seconds when the chat completion was created
+          title: Created
         model:
           type: string
-          description: >-
-            The model that was used to generate the chat completion
+          title: Model
         usage:
-          $ref: '#/components/schemas/OpenAIChatCompletionUsage'
-          description: >-
-            Token usage information for the completion
-      additionalProperties: false
-      required:
-        - id
-        - choices
-        - object
-        - created
-        - model
-      title: OpenAIChatCompletion
-      description: >-
-        Response from an OpenAI-compatible chat completion request.
-    OpenAIChatCompletionChunk:
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIChatCompletionUsage'
+            title: OpenAIChatCompletionUsage
+          - type: 'null'
+          title: OpenAIChatCompletionUsage
       type: object
+      required:
+      - id
+      - choices
+      - created
+      - model
+      title: OpenAIChatCompletion
+      description: Response from an OpenAI-compatible chat completion request.
+    OpenAIChatCompletionChunk:
+      description: Chunk from a streaming response to an OpenAI-compatible chat completion request.
       properties:
         id:
+          title: Id
           type: string
-          description: The ID of the chat completion
         choices:
-          type: array
           items:
             $ref: '#/components/schemas/OpenAIChunkChoice'
-          description: List of choices
+          title: Choices
+          type: array
         object:
-          type: string
           const: chat.completion.chunk
           default: chat.completion.chunk
-          description: >-
-            The object type, which will be "chat.completion.chunk"
-        created:
-          type: integer
-          description: >-
-            The Unix timestamp in seconds when the chat completion was created
-        model:
+          title: Object
+          type: string
+        created:
+          title: Created
+          type: integer
+        model:
+          title: Model
           type: string
-          description: >-
-            The model that was used to generate the chat completion
         usage:
-          $ref: '#/components/schemas/OpenAIChatCompletionUsage'
-          description: >-
-            Token usage information (typically included in final chunk with stream_options)
-      additionalProperties: false
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIChatCompletionUsage'
+            title: OpenAIChatCompletionUsage
+          - type: 'null'
+          nullable: true
+          title: OpenAIChatCompletionUsage
       required:
-        - id
-        - choices
-        - object
-        - created
-        - model
+      - id
+      - choices
+      - created
+      - model
       title: OpenAIChatCompletionChunk
-      description: >-
-        Chunk from a streaming response to an OpenAI-compatible chat completion request.
-    OpenAIChoiceDelta:
       type: object
+    OpenAIChoiceDelta:
+      description: A delta from an OpenAI-compatible chat completion streaming response.
       properties:
         content:
-          type: string
-          description: (Optional) The content of the delta
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
         refusal:
-          type: string
-          description: (Optional) The refusal of the delta
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
         role:
-          type: string
-          description: (Optional) The role of the delta
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
         tool_calls:
-          type: array
-          items:
-            $ref: '#/components/schemas/OpenAIChatCompletionToolCall'
-          description: (Optional) The tool calls of the delta
+          anyOf:
+          - items:
+              $ref: '#/components/schemas/OpenAIChatCompletionToolCall'
+            type: array
+          - type: 'null'
+          nullable: true
         reasoning_content:
-          type: string
-          description: >-
-            (Optional) The reasoning content from the model (non-standard, for o1/o3
-            models)
-      additionalProperties: false
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
       title: OpenAIChoiceDelta
-      description: >-
-        A delta from an OpenAI-compatible chat completion streaming response.
-    OpenAIChunkChoice:
       type: object
+    OpenAIChunkChoice:
+      description: A chunk choice from an OpenAI-compatible chat completion streaming response.
       properties:
         delta:
           $ref: '#/components/schemas/OpenAIChoiceDelta'
-          description: The delta from the chunk
         finish_reason:
+          title: Finish Reason
           type: string
-          description: The reason the model stopped generating
         index:
+          title: Index
           type: integer
-          description: The index of the choice
         logprobs:
-          $ref: '#/components/schemas/OpenAIChoiceLogprobs'
-          description: >-
-            (Optional) The log probabilities for the tokens in the message
-      additionalProperties: false
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIChoiceLogprobs'
+            title: OpenAIChoiceLogprobs
+          - type: 'null'
+          nullable: true
+          title: OpenAIChoiceLogprobs
       required:
-        - delta
-        - finish_reason
-        - index
+      - delta
+      - finish_reason
+      - index
       title: OpenAIChunkChoice
-      description: >-
-        A chunk choice from an OpenAI-compatible chat completion streaming response.
-    OpenAICompletionWithInputMessages:
       type: object
+    OpenAICompletionWithInputMessages:
       properties:
         id:
           type: string
-          description: The ID of the chat completion
+          title: Id
         choices:
-          type: array
           items:
             $ref: '#/components/schemas/OpenAIChoice'
-          description: List of choices
+          type: array
+          title: Choices
         object:
           type: string
           const: chat.completion
+          title: Object
           default: chat.completion
-          description: >-
-            The object type, which will be "chat.completion"
         created:
           type: integer
-          description: >-
-            The Unix timestamp in seconds when the chat completion was created
+          title: Created
         model:
           type: string
-          description: >-
-            The model that was used to generate the chat completion
+          title: Model
         usage:
-          $ref: '#/components/schemas/OpenAIChatCompletionUsage'
-          description: >-
-            Token usage information for the completion
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIChatCompletionUsage'
+            title: OpenAIChatCompletionUsage
+          - type: 'null'
+          title: OpenAIChatCompletionUsage
         input_messages:
-          type: array
           items:
-            $ref: '#/components/schemas/OpenAIMessageParam'
-      additionalProperties: false
+            oneOf:
+            - $ref: '#/components/schemas/OpenAIUserMessageParam-Output'
+              title: OpenAIUserMessageParam-Output
+            - $ref: '#/components/schemas/OpenAISystemMessageParam'
+              title: OpenAISystemMessageParam
+            - $ref: '#/components/schemas/OpenAIAssistantMessageParam-Output'
+              title: OpenAIAssistantMessageParam-Output
+            - $ref: '#/components/schemas/OpenAIToolMessageParam'
+              title: OpenAIToolMessageParam
+            - $ref: '#/components/schemas/OpenAIDeveloperMessageParam'
+              title: OpenAIDeveloperMessageParam
+            discriminator:
+              propertyName: role
+              mapping:
+                assistant: '#/components/schemas/OpenAIAssistantMessageParam-Output'
+                developer: '#/components/schemas/OpenAIDeveloperMessageParam'
+                system: '#/components/schemas/OpenAISystemMessageParam'
+                tool: '#/components/schemas/OpenAIToolMessageParam'
+                user: '#/components/schemas/OpenAIUserMessageParam-Output'
+            title: OpenAIUserMessageParam-Output | ... (5 variants)
+          type: array
+          title: Input Messages
+      type: object
       required:
-        - id
-        - choices
-        - object
-        - created
-        - model
-        - input_messages
+      - id
+      - choices
+      - created
+      - model
+      - input_messages
       title: OpenAICompletionWithInputMessages
     OpenAICompletionRequestWithExtraBody:
-      type: object
       properties:
         model:
           type: string
-          description: >-
-            The identifier of the model to use. The model must be registered with
-            Llama Stack and available via the /models endpoint.
+          title: Model
         prompt:
-          oneOf:
-            - type: string
-            - type: array
-              items:
-                type: string
-            - type: array
+          anyOf:
+          - type: string
+          - items:
+              type: string
+            type: array
+            title: list[string]
+          - items:
+              type: integer
+            type: array
+            title: list[integer]
+          - items:
               items:
                 type: integer
-            - type: array
-              items:
-                type: array
-                items:
-                  type: integer
-          description: The prompt to generate a completion for.
+              type: array
+            type: array
+            title: list[array]
+          title: string | ... (4 variants)
         best_of:
-          type: integer
-          description: >-
-            (Optional) The number of completions to generate.
+          anyOf:
+          - type: integer
+          - type: 'null'
         echo:
-          type: boolean
-          description: (Optional) Whether to echo the prompt.
+          anyOf:
+          - type: boolean
+          - type: 'null'
         frequency_penalty:
-          type: number
-          description: >-
-            (Optional) The penalty for repeated tokens.
+          anyOf:
+          - type: number
+          - type: 'null'
         logit_bias:
-          type: object
-          additionalProperties:
-            type: number
-          description: (Optional) The logit bias to use.
+          anyOf:
+          - additionalProperties:
+              type: number
+            type: object
+          - type: 'null'
         logprobs:
-          type: boolean
-          description: (Optional) The log probabilities to use.
+          anyOf:
+          - type: boolean
+          - type: 'null'
         max_tokens:
-          type: integer
-          description: >-
-            (Optional) The maximum number of tokens to generate.
+          anyOf:
+          - type: integer
+          - type: 'null'
         n:
-          type: integer
-          description: >-
-            (Optional) The number of completions to generate.
+          anyOf:
+          - type: integer
+          - type: 'null'
         presence_penalty:
-          type: number
-          description: >-
-            (Optional) The penalty for repeated tokens.
+          anyOf:
+          - type: number
+          - type: 'null'
         seed:
-          type: integer
-          description: (Optional) The seed to use.
+          anyOf:
+          - type: integer
+          - type: 'null'
         stop:
-          oneOf:
-            - type: string
-            - type: array
-              items:
-                type: string
-          description: (Optional) The stop tokens to use.
+          anyOf:
+          - type: string
+          - items:
+              type: string
+            type: array
+            title: list[string]
+          - type: 'null'
+          title: string | list[string]
         stream:
-          type: boolean
-          description: >-
-            (Optional) Whether to stream the response.
+          anyOf:
+          - type: boolean
+          - type: 'null'
         stream_options:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: (Optional) The stream options to use.
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
         temperature:
-          type: number
-          description: (Optional) The temperature to use.
+          anyOf:
+          - type: number
+          - type: 'null'
         top_p:
-          type: number
-          description: (Optional) The top p to use.
+          anyOf:
+          - type: number
+          - type: 'null'
         user:
-          type: string
-          description: (Optional) The user to use.
+          anyOf:
+          - type: string
+          - type: 'null'
         suffix:
-          type: string
-          description: >-
-            (Optional) The suffix that should be appended to the completion.
-      additionalProperties: false
-      required:
-        - model
-        - prompt
-      title: OpenAICompletionRequestWithExtraBody
-      description: >-
-        Request parameters for OpenAI-compatible completion endpoint.
-    OpenAICompletion:
+          anyOf:
+          - type: string
+          - type: 'null'
+      additionalProperties: true
       type: object
+      required:
+      - model
+      - prompt
+      title: OpenAICompletionRequestWithExtraBody
+      description: Request parameters for OpenAI-compatible completion endpoint.
+    OpenAICompletion:
       properties:
         id:
           type: string
+          title: Id
         choices:
-          type: array
           items:
             $ref: '#/components/schemas/OpenAICompletionChoice'
+          type: array
+          title: Choices
         created:
           type: integer
+          title: Created
         model:
           type: string
+          title: Model
         object:
           type: string
           const: text_completion
+          title: Object
           default: text_completion
-      additionalProperties: false
-      required:
-        - id
-        - choices
-        - created
-        - model
-        - object
-      title: OpenAICompletion
-      description: >-
-        Response from an OpenAI-compatible completion request.
-    OpenAICompletionChoice:
       type: object
+      required:
+      - id
+      - choices
+      - created
+      - model
+      title: OpenAICompletion
+      description: |-
+        Response from an OpenAI-compatible completion request.
+
+        :id: The ID of the completion
+        :choices: List of choices
+        :created: The Unix timestamp in seconds when the completion was created
+        :model: The model that was used to generate the completion
+        :object: The object type, which will be "text_completion"
+    OpenAICompletionChoice:
       properties:
         finish_reason:
           type: string
+          title: Finish Reason
         text:
           type: string
+          title: Text
         index:
           type: integer
+          title: Index
         logprobs:
-          $ref: '#/components/schemas/OpenAIChoiceLogprobs'
-      additionalProperties: false
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIChoiceLogprobs'
+            title: OpenAIChoiceLogprobs
+          - type: 'null'
+          title: OpenAIChoiceLogprobs
+      type: object
       required:
-        - finish_reason
-        - text
-        - index
+      - finish_reason
+      - text
+      - index
       title: OpenAICompletionChoice
-      description: >-
+      description: |-
         A choice from an OpenAI-compatible completion response.
+
+        :finish_reason: The reason the model stopped generating
+        :text: The text of the choice
+        :index: The index of the choice
+        :logprobs: (Optional) The log probabilities for the tokens in the choice
     ConversationItem:
-      oneOf:
-        - $ref: '#/components/schemas/OpenAIResponseMessage'
-        - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
-        - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
-        - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
-        - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
-        - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
-        - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
-        - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
-        - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
       discriminator:
-        propertyName: type
         mapping:
-          message: '#/components/schemas/OpenAIResponseMessage'
-          web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
           file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
           function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
           function_call_output: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
@@ -4270,5350 +4164,7945 @@ components:
           mcp_approval_response: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
           mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
           mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+          message: '#/components/schemas/OpenAIResponseMessage'
+          web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIResponseMessage'
+        title: OpenAIResponseMessage
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+        title: OpenAIResponseOutputMessageWebSearchToolCall
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+        title: OpenAIResponseOutputMessageFileSearchToolCall
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+        title: OpenAIResponseOutputMessageFunctionToolCall
+      - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+        title: OpenAIResponseInputFunctionToolCallOutput
+      - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+        title: OpenAIResponseMCPApprovalRequest
+      - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+        title: OpenAIResponseMCPApprovalResponse
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+        title: OpenAIResponseOutputMessageMCPCall
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+        title: OpenAIResponseOutputMessageMCPListTools
+      title: OpenAIResponseMessage | ... (9 variants)
     OpenAIResponseAnnotationCitation:
-      type: object
       properties:
         type:
           type: string
           const: url_citation
+          title: Type
           default: url_citation
-          description: >-
-            Annotation type identifier, always "url_citation"
         end_index:
           type: integer
-          description: >-
-            End position of the citation span in the content
+          title: End Index
         start_index:
           type: integer
-          description: >-
-            Start position of the citation span in the content
+          title: Start Index
         title:
           type: string
-          description: Title of the referenced web resource
+          title: Title
         url:
           type: string
-          description: URL of the referenced web resource
-      additionalProperties: false
-      required:
-        - type
-        - end_index
-        - start_index
-        - title
-        - url
-      title: OpenAIResponseAnnotationCitation
-      description: >-
-        URL citation annotation for referencing external web resources.
-    "OpenAIResponseAnnotationContainerFileCitation":
+          title: Url
       type: object
+      required:
+      - end_index
+      - start_index
+      - title
+      - url
+      title: OpenAIResponseAnnotationCitation
+      description: URL citation annotation for referencing external web resources.
+    OpenAIResponseAnnotationContainerFileCitation:
       properties:
         type:
           type: string
           const: container_file_citation
+          title: Type
           default: container_file_citation
         container_id:
           type: string
+          title: Container Id
         end_index:
           type: integer
+          title: End Index
         file_id:
           type: string
+          title: File Id
         filename:
           type: string
+          title: Filename
         start_index:
           type: integer
-      additionalProperties: false
-      required:
-        - type
-        - container_id
-        - end_index
-        - file_id
-        - filename
-        - start_index
-      title: >-
-        OpenAIResponseAnnotationContainerFileCitation
-    OpenAIResponseAnnotationFileCitation:
+          title: Start Index
       type: object
+      required:
+      - container_id
+      - end_index
+      - file_id
+      - filename
+      - start_index
+      title: OpenAIResponseAnnotationContainerFileCitation
+    OpenAIResponseAnnotationFileCitation:
       properties:
         type:
           type: string
           const: file_citation
+          title: Type
           default: file_citation
-          description: >-
-            Annotation type identifier, always "file_citation"
         file_id:
           type: string
-          description: Unique identifier of the referenced file
+          title: File Id
         filename:
           type: string
-          description: Name of the referenced file
+          title: Filename
         index:
           type: integer
-          description: >-
-            Position index of the citation within the content
-      additionalProperties: false
-      required:
-        - type
-        - file_id
-        - filename
-        - index
-      title: OpenAIResponseAnnotationFileCitation
-      description: >-
-        File citation annotation for referencing specific files in response content.
-    OpenAIResponseAnnotationFilePath:
+          title: Index
       type: object
+      required:
+      - file_id
+      - filename
+      - index
+      title: OpenAIResponseAnnotationFileCitation
+      description: File citation annotation for referencing specific files in response content.
+    OpenAIResponseAnnotationFilePath:
       properties:
         type:
           type: string
           const: file_path
+          title: Type
           default: file_path
         file_id:
           type: string
+          title: File Id
         index:
           type: integer
-      additionalProperties: false
+          title: Index
+      type: object
       required:
-        - type
-        - file_id
-        - index
+      - file_id
+      - index
       title: OpenAIResponseAnnotationFilePath
     OpenAIResponseAnnotations:
-      oneOf:
-        - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
-        - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation'
-        - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
-        - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath'
       discriminator:
-        propertyName: type
         mapping:
-          file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
-          url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation'
           container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
+          file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
           file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath'
+          url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
+        title: OpenAIResponseAnnotationFileCitation
+      - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation'
+        title: OpenAIResponseAnnotationCitation
+      - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
+        title: OpenAIResponseAnnotationContainerFileCitation
+      - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath'
+        title: OpenAIResponseAnnotationFilePath
+      title: OpenAIResponseAnnotationFileCitation | ... (4 variants)
     OpenAIResponseContentPartRefusal:
-      type: object
       properties:
         type:
           type: string
           const: refusal
+          title: Type
           default: refusal
-          description: >-
-            Content part type identifier, always "refusal"
         refusal:
           type: string
-          description: Refusal text supplied by the model
-      additionalProperties: false
-      required:
-        - type
-        - refusal
-      title: OpenAIResponseContentPartRefusal
-      description: >-
-        Refusal content within a streamed response part.
-    "OpenAIResponseInputFunctionToolCallOutput":
+          title: Refusal
       type: object
+      required:
+      - refusal
+      title: OpenAIResponseContentPartRefusal
+      description: Refusal content within a streamed response part.
+    OpenAIResponseInputFunctionToolCallOutput:
       properties:
         call_id:
           type: string
+          title: Call Id
         output:
           type: string
+          title: Output
         type:
           type: string
           const: function_call_output
+          title: Type
           default: function_call_output
         id:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
         status:
-          type: string
-      additionalProperties: false
-      required:
-        - call_id
-        - output
-        - type
-      title: >-
-        OpenAIResponseInputFunctionToolCallOutput
-      description: >-
-        This represents the output of a function call that gets passed back to the
-        model.
-    OpenAIResponseInputMessageContent:
-      oneOf:
-        - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
-        - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
-        - $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile'
-      discriminator:
-        propertyName: type
-        mapping:
-          input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
-          input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
-          input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile'
-    OpenAIResponseInputMessageContentFile:
+          anyOf:
+          - type: string
+          - type: 'null'
       type: object
+      required:
+      - call_id
+      - output
+      title: OpenAIResponseInputFunctionToolCallOutput
+      description: This represents the output of a function call that gets passed back to the model.
+    OpenAIResponseInputMessageContent:
+      discriminator:
+        mapping:
+          input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile'
+          input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+          input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
+        title: OpenAIResponseInputMessageContentText
+      - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+        title: OpenAIResponseInputMessageContentImage
+      - $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile'
+        title: OpenAIResponseInputMessageContentFile
+      title: OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile
+    OpenAIResponseInputMessageContentFile:
       properties:
         type:
           type: string
           const: input_file
+          title: Type
           default: input_file
-          description: >-
-            The type of the input item. Always `input_file`.
         file_data:
-          type: string
-          description: >-
-            The data of the file to be sent to the model.
+          anyOf:
+          - type: string
+          - type: 'null'
         file_id:
-          type: string
-          description: >-
-            (Optional) The ID of the file to be sent to the model.
+          anyOf:
+          - type: string
+          - type: 'null'
         file_url:
-          type: string
-          description: >-
-            The URL of the file to be sent to the model.
+          anyOf:
+          - type: string
+          - type: 'null'
         filename:
-          type: string
-          description: >-
-            The name of the file to be sent to the model.
-      additionalProperties: false
-      required:
-        - type
-      title: OpenAIResponseInputMessageContentFile
-      description: >-
-        File content for input messages in OpenAI response format.
-    OpenAIResponseInputMessageContentImage:
+          anyOf:
+          - type: string
+          - type: 'null'
       type: object
+      title: OpenAIResponseInputMessageContentFile
+      description: File content for input messages in OpenAI response format.
+    OpenAIResponseInputMessageContentImage:
       properties:
         detail:
-          oneOf:
-            - type: string
-              const: low
-            - type: string
-              const: high
-            - type: string
-              const: auto
+          title: Detail
           default: auto
-          description: >-
-            Level of detail for image processing, can be "low", "high", or "auto"
+          type: string
+          enum:
+          - low
+          - high
+          - auto
         type:
           type: string
           const: input_image
+          title: Type
           default: input_image
-          description: >-
-            Content type identifier, always "input_image"
         file_id:
-          type: string
-          description: >-
-            (Optional) The ID of the file to be sent to the model.
+          anyOf:
+          - type: string
+          - type: 'null'
         image_url:
-          type: string
-          description: (Optional) URL of the image content
-      additionalProperties: false
-      required:
-        - detail
-        - type
-      title: OpenAIResponseInputMessageContentImage
-      description: >-
-        Image content for input messages in OpenAI response format.
-    OpenAIResponseInputMessageContentText:
+          anyOf:
+          - type: string
+          - type: 'null'
       type: object
+      title: OpenAIResponseInputMessageContentImage
+      description: Image content for input messages in OpenAI response format.
+    OpenAIResponseInputMessageContentText:
       properties:
         text:
           type: string
-          description: The text content of the input message
+          title: Text
         type:
           type: string
           const: input_text
+          title: Type
           default: input_text
-          description: >-
-            Content type identifier, always "input_text"
-      additionalProperties: false
-      required:
-        - text
-        - type
-      title: OpenAIResponseInputMessageContentText
-      description: >-
-        Text content for input messages in OpenAI response format.
-    OpenAIResponseMCPApprovalRequest:
       type: object
+      required:
+      - text
+      title: OpenAIResponseInputMessageContentText
+      description: Text content for input messages in OpenAI response format.
+    OpenAIResponseMCPApprovalRequest:
       properties:
         arguments:
           type: string
+          title: Arguments
         id:
           type: string
+          title: Id
         name:
           type: string
+          title: Name
         server_label:
           type: string
+          title: Server Label
         type:
           type: string
           const: mcp_approval_request
+          title: Type
           default: mcp_approval_request
-      additionalProperties: false
-      required:
-        - arguments
-        - id
-        - name
-        - server_label
-        - type
-      title: OpenAIResponseMCPApprovalRequest
-      description: >-
-        A request for human approval of a tool invocation.
-    OpenAIResponseMCPApprovalResponse:
       type: object
+      required:
+      - arguments
+      - id
+      - name
+      - server_label
+      title: OpenAIResponseMCPApprovalRequest
+      description: A request for human approval of a tool invocation.
+    OpenAIResponseMCPApprovalResponse:
       properties:
         approval_request_id:
           type: string
+          title: Approval Request Id
         approve:
           type: boolean
+          title: Approve
         type:
           type: string
           const: mcp_approval_response
+          title: Type
           default: mcp_approval_response
         id:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
         reason:
-          type: string
-      additionalProperties: false
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
       required:
-        - approval_request_id
-        - approve
-        - type
+      - approval_request_id
+      - approve
       title: OpenAIResponseMCPApprovalResponse
       description: A response to an MCP approval request.
     OpenAIResponseMessage:
-      type: object
+      description: |-
+        Corresponds to the various Message types in the Responses API.
+        They are all under one type because the Responses API gives them all
+        the same "type" value, and there is no way to tell them apart in certain
+        scenarios.
       properties:
         content:
-          oneOf:
-            - type: string
-            - type: array
-              items:
-                $ref: '#/components/schemas/OpenAIResponseInputMessageContent'
-            - type: array
-              items:
-                $ref: '#/components/schemas/OpenAIResponseOutputMessageContent'
+          anyOf:
+          - type: string
+          - items:
+              discriminator:
+                mapping:
+                  input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile'
+                  input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+                  input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
+                propertyName: type
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
+                title: OpenAIResponseInputMessageContentText
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+                title: OpenAIResponseInputMessageContentImage
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile'
+                title: OpenAIResponseInputMessageContentFile
+              title: OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile
+            type: array
+            title: list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile]
+          - items:
+              discriminator:
+                mapping:
+                  output_text: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText'
+                  refusal: '#/components/schemas/OpenAIResponseContentPartRefusal'
+                propertyName: type
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText'
+                title: OpenAIResponseOutputMessageContentOutputText
+              - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
+                title: OpenAIResponseContentPartRefusal
+              title: OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal
+            type: array
+            title: list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal]
+          title: string | list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile] | list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal]
         role:
-          oneOf:
-            - type: string
-              const: system
-            - type: string
-              const: developer
-            - type: string
-              const: user
-            - type: string
-              const: assistant
-        type:
+          title: Role
           type: string
+          enum:
+          - system
+          - developer
+          - user
+          - assistant
+          default: system
+        type:
           const: message
           default: message
+          title: Type
+          type: string
         id:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
         status:
-          type: string
-      additionalProperties: false
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
       required:
-        - content
-        - role
-        - type
+      - content
+      - role
       title: OpenAIResponseMessage
-      description: >-
-        Corresponds to the various Message types in the Responses API. They are all
-        under one type because the Responses API gives them all the same "type" value,
-        and there is no way to tell them apart in certain scenarios.
+      type: object
     OpenAIResponseOutputMessageContent:
-      oneOf:
-        - $ref: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText'
-        - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
       discriminator:
-        propertyName: type
         mapping:
           output_text: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText'
           refusal: '#/components/schemas/OpenAIResponseContentPartRefusal'
-    "OpenAIResponseOutputMessageContentOutputText":
-      type: object
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText'
+        title: OpenAIResponseOutputMessageContentOutputText
+      - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
+        title: OpenAIResponseContentPartRefusal
+      title: OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal
+    OpenAIResponseOutputMessageContentOutputText:
       properties:
         text:
           type: string
+          title: Text
         type:
           type: string
           const: output_text
+          title: Type
           default: output_text
         annotations:
-          type: array
           items:
-            $ref: '#/components/schemas/OpenAIResponseAnnotations'
-      additionalProperties: false
-      required:
-        - text
-        - type
-        - annotations
-      title: >-
-        OpenAIResponseOutputMessageContentOutputText
-    "OpenAIResponseOutputMessageFileSearchToolCall":
+            oneOf:
+            - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
+              title: OpenAIResponseAnnotationFileCitation
+            - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation'
+              title: OpenAIResponseAnnotationCitation
+            - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
+              title: OpenAIResponseAnnotationContainerFileCitation
+            - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath'
+              title: OpenAIResponseAnnotationFilePath
+            discriminator:
+              propertyName: type
+              mapping:
+                container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
+                file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
+                file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath'
+                url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation'
+            title: OpenAIResponseAnnotationFileCitation | ... (4 variants)
+          type: array
+          title: Annotations
       type: object
+      required:
+      - text
+      title: OpenAIResponseOutputMessageContentOutputText
+    OpenAIResponseOutputMessageFileSearchToolCall:
       properties:
         id:
           type: string
-          description: Unique identifier for this tool call
+          title: Id
         queries:
-          type: array
           items:
             type: string
-          description: List of search queries executed
+          type: array
+          title: Queries
         status:
           type: string
-          description: >-
-            Current status of the file search operation
+          title: Status
         type:
           type: string
           const: file_search_call
+          title: Type
           default: file_search_call
-          description: >-
-            Tool call type identifier, always "file_search_call"
         results:
-          type: array
-          items:
-            type: object
-            properties:
-              attributes:
-                type: object
-                additionalProperties:
-                  oneOf:
-                    - type: 'null'
-                    - type: boolean
-                    - type: number
-                    - type: string
-                    - type: array
-                    - type: object
-                description: >-
-                  (Optional) Key-value attributes associated with the file
-              file_id:
-                type: string
-                description: >-
-                  Unique identifier of the file containing the result
-              filename:
-                type: string
-                description: Name of the file containing the result
-              score:
-                type: number
-                description: >-
-                  Relevance score for this search result (between 0 and 1)
-              text:
-                type: string
-                description: Text content of the search result
-            additionalProperties: false
-            required:
-              - attributes
-              - file_id
-              - filename
-              - score
-              - text
-            title: >-
-              OpenAIResponseOutputMessageFileSearchToolCallResults
-            description: >-
-              Search results returned by the file search operation.
-          description: >-
-            (Optional) Search results returned by the file search operation
-      additionalProperties: false
-      required:
-        - id
-        - queries
-        - status
-        - type
-      title: >-
-        OpenAIResponseOutputMessageFileSearchToolCall
-      description: >-
-        File search tool call output message for OpenAI responses.
-    "OpenAIResponseOutputMessageFunctionToolCall":
+          anyOf:
+          - items:
+              $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCallResults'
+            type: array
+          - type: 'null'
       type: object
+      required:
+      - id
+      - queries
+      - status
+      title: OpenAIResponseOutputMessageFileSearchToolCall
+      description: File search tool call output message for OpenAI responses.
+    OpenAIResponseOutputMessageFunctionToolCall:
       properties:
         call_id:
           type: string
-          description: Unique identifier for the function call
+          title: Call Id
         name:
           type: string
-          description: Name of the function being called
+          title: Name
         arguments:
           type: string
-          description: >-
-            JSON string containing the function arguments
+          title: Arguments
         type:
           type: string
           const: function_call
+          title: Type
           default: function_call
-          description: >-
-            Tool call type identifier, always "function_call"
         id:
-          type: string
-          description: >-
-            (Optional) Additional identifier for the tool call
+          anyOf:
+          - type: string
+          - type: 'null'
         status:
-          type: string
-          description: >-
-            (Optional) Current status of the function call execution
-      additionalProperties: false
-      required:
-        - call_id
-        - name
-        - arguments
-        - type
-      title: >-
-        OpenAIResponseOutputMessageFunctionToolCall
-      description: >-
-        Function tool call output message for OpenAI responses.
-    OpenAIResponseOutputMessageMCPCall:
+          anyOf:
+          - type: string
+          - type: 'null'
       type: object
+      required:
+      - call_id
+      - name
+      - arguments
+      title: OpenAIResponseOutputMessageFunctionToolCall
+      description: Function tool call output message for OpenAI responses.
+    OpenAIResponseOutputMessageMCPCall:
       properties:
         id:
           type: string
-          description: Unique identifier for this MCP call
+          title: Id
         type:
           type: string
           const: mcp_call
+          title: Type
           default: mcp_call
-          description: >-
-            Tool call type identifier, always "mcp_call"
         arguments:
           type: string
-          description: >-
-            JSON string containing the MCP call arguments
+          title: Arguments
         name:
           type: string
-          description: Name of the MCP method being called
+          title: Name
         server_label:
           type: string
-          description: >-
-            Label identifying the MCP server handling the call
+          title: Server Label
         error:
-          type: string
-          description: >-
-            (Optional) Error message if the MCP call failed
+          anyOf:
+          - type: string
+          - type: 'null'
         output:
-          type: string
-          description: >-
-            (Optional) Output result from the successful MCP call
-      additionalProperties: false
-      required:
-        - id
-        - type
-        - arguments
-        - name
-        - server_label
-      title: OpenAIResponseOutputMessageMCPCall
-      description: >-
-        Model Context Protocol (MCP) call output message for OpenAI responses.
-    OpenAIResponseOutputMessageMCPListTools:
+          anyOf:
+          - type: string
+          - type: 'null'
       type: object
+      required:
+      - id
+      - arguments
+      - name
+      - server_label
+      title: OpenAIResponseOutputMessageMCPCall
+      description: Model Context Protocol (MCP) call output message for OpenAI responses.
+    OpenAIResponseOutputMessageMCPListTools:
       properties:
         id:
           type: string
-          description: >-
-            Unique identifier for this MCP list tools operation
+          title: Id
         type:
           type: string
           const: mcp_list_tools
+          title: Type
           default: mcp_list_tools
-          description: >-
-            Tool call type identifier, always "mcp_list_tools"
         server_label:
           type: string
-          description: >-
-            Label identifying the MCP server providing the tools
+          title: Server Label
         tools:
-          type: array
           items:
-            type: object
-            properties:
-              input_schema:
-                type: object
-                additionalProperties:
-                  oneOf:
-                    - type: 'null'
-                    - type: boolean
-                    - type: number
-                    - type: string
-                    - type: array
-                    - type: object
-                description: >-
-                  JSON schema defining the tool's input parameters
-              name:
-                type: string
-                description: Name of the tool
-              description:
-                type: string
-                description: >-
-                  (Optional) Description of what the tool does
-            additionalProperties: false
-            required:
-              - input_schema
-              - name
-            title: MCPListToolsTool
-            description: >-
-              Tool definition returned by MCP list tools operation.
-          description: >-
-            List of available tools provided by the MCP server
-      additionalProperties: false
-      required:
-        - id
-        - type
-        - server_label
-        - tools
-      title: OpenAIResponseOutputMessageMCPListTools
-      description: >-
-        MCP list tools output message containing available tools from an MCP server.
-    "OpenAIResponseOutputMessageWebSearchToolCall":
+            $ref: '#/components/schemas/MCPListToolsTool'
+          type: array
+          title: Tools
       type: object
+      required:
+      - id
+      - server_label
+      - tools
+      title: OpenAIResponseOutputMessageMCPListTools
+      description: MCP list tools output message containing available tools from an MCP server.
+    OpenAIResponseOutputMessageWebSearchToolCall:
       properties:
         id:
           type: string
-          description: Unique identifier for this tool call
+          title: Id
         status:
           type: string
-          description: >-
-            Current status of the web search operation
+          title: Status
         type:
           type: string
           const: web_search_call
+          title: Type
           default: web_search_call
-          description: >-
-            Tool call type identifier, always "web_search_call"
-      additionalProperties: false
-      required:
-        - id
-        - status
-        - type
-      title: >-
-        OpenAIResponseOutputMessageWebSearchToolCall
-      description: >-
-        Web search tool call output message for OpenAI responses.
-    CreateConversationRequest:
       type: object
+      required:
+      - id
+      - status
+      title: OpenAIResponseOutputMessageWebSearchToolCall
+      description: Web search tool call output message for OpenAI responses.
+    CreateConversationRequest:
       properties:
         items:
-          type: array
-          items:
-            $ref: '#/components/schemas/ConversationItem'
-          description: >-
-            Initial items to include in the conversation context.
+          anyOf:
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseMessage-Input'
+                title: OpenAIResponseMessage-Input
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+                title: OpenAIResponseOutputMessageWebSearchToolCall
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                title: OpenAIResponseOutputMessageFileSearchToolCall
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                title: OpenAIResponseOutputMessageFunctionToolCall
+              - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+                title: OpenAIResponseInputFunctionToolCallOutput
+              - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                title: OpenAIResponseMCPApprovalRequest
+              - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+                title: OpenAIResponseMCPApprovalResponse
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                title: OpenAIResponseOutputMessageMCPCall
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                title: OpenAIResponseOutputMessageMCPListTools
+              discriminator:
+                propertyName: type
+                mapping:
+                  file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                  function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                  function_call_output: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+                  mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                  mcp_approval_response: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+                  mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                  mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                  message: '#/components/schemas/OpenAIResponseMessage-Input'
+                  web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+              title: OpenAIResponseMessage-Input | ... (9 variants)
+            type: array
+          - type: 'null'
         metadata:
-          type: object
-          additionalProperties:
-            type: string
-          description: >-
-            Set of key-value pairs that can be attached to an object.
-      additionalProperties: false
+          anyOf:
+          - additionalProperties:
+              type: string
+            type: object
+          - type: 'null'
+      type: object
       title: CreateConversationRequest
     Conversation:
-      type: object
       properties:
         id:
           type: string
+          title: Id
+          description: The unique ID of the conversation.
         object:
           type: string
           const: conversation
+          title: Object
+          description: The object type, which is always conversation.
           default: conversation
         created_at:
           type: integer
+          title: Created At
+          description: The time at which the conversation was created, measured in seconds since the Unix epoch.
         metadata:
-          type: object
-          additionalProperties:
-            type: string
-        items:
-          type: array
-          items:
+          anyOf:
+          - additionalProperties:
+              type: string
             type: object
-            title: dict
-            description: >-
-              dict() -> new empty dictionary dict(mapping) -> new dictionary initialized
-              from a mapping object's     (key, value) pairs dict(iterable) -> new
-              dictionary initialized as if via:     d = {}     for k, v in iterable:         d[k]
-              = v dict(**kwargs) -> new dictionary initialized with the name=value
-              pairs     in the keyword argument list.  For example:  dict(one=1, two=2)
-      additionalProperties: false
+          - type: 'null'
+          description: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard.
+        items:
+          anyOf:
+          - items:
+              additionalProperties: true
+              type: object
+            type: array
+          - type: 'null'
+          description: Initial items to include in the conversation context. You may add up to 20 items at a time.
+      type: object
       required:
-        - id
-        - object
-        - created_at
+      - id
+      - created_at
       title: Conversation
       description: OpenAI-compatible conversation object.
     UpdateConversationRequest:
-      type: object
       properties:
         metadata:
-          type: object
           additionalProperties:
             type: string
-          description: >-
-            Set of key-value pairs that can be attached to an object.
-      additionalProperties: false
+          type: object
+          title: Metadata
+      type: object
       required:
-        - metadata
+      - metadata
       title: UpdateConversationRequest
     ConversationDeletedResource:
-      type: object
       properties:
         id:
           type: string
+          title: Id
+          description: The deleted conversation identifier
         object:
           type: string
+          title: Object
+          description: Object type
           default: conversation.deleted
         deleted:
           type: boolean
+          title: Deleted
+          description: Whether the object was deleted
           default: true
-      additionalProperties: false
+      type: object
       required:
-        - id
-        - object
-        - deleted
+      - id
       title: ConversationDeletedResource
       description: Response for deleted conversation.
     ConversationItemList:
-      type: object
       properties:
         object:
           type: string
+          title: Object
+          description: Object type
           default: list
         data:
-          type: array
           items:
-            $ref: '#/components/schemas/ConversationItem'
+            oneOf:
+            - $ref: '#/components/schemas/OpenAIResponseMessage-Output'
+              title: OpenAIResponseMessage-Output
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+              title: OpenAIResponseOutputMessageWebSearchToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+              title: OpenAIResponseOutputMessageFileSearchToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+              title: OpenAIResponseOutputMessageFunctionToolCall
+            - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+              title: OpenAIResponseInputFunctionToolCallOutput
+            - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+              title: OpenAIResponseMCPApprovalRequest
+            - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+              title: OpenAIResponseMCPApprovalResponse
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+              title: OpenAIResponseOutputMessageMCPCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+              title: OpenAIResponseOutputMessageMCPListTools
+            discriminator:
+              propertyName: type
+              mapping:
+                file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                function_call_output: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+                mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                mcp_approval_response: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+                mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                message: '#/components/schemas/OpenAIResponseMessage-Output'
+                web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+            title: OpenAIResponseMessage-Output | ... (9 variants)
+          type: array
+          title: Data
+          description: List of conversation items
         first_id:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: The ID of the first item in the list
         last_id:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: The ID of the last item in the list
         has_more:
           type: boolean
+          title: Has More
+          description: Whether there are more items available
           default: false
-      additionalProperties: false
-      required:
-        - object
-        - data
-        - has_more
-      title: ConversationItemList
-      description: >-
-        List of conversation items with pagination.
-    AddItemsRequest:
       type: object
+      required:
+      - data
+      title: ConversationItemList
+      description: List of conversation items with pagination.
+    AddItemsRequest:
       properties:
         items:
-          type: array
           items:
-            $ref: '#/components/schemas/ConversationItem'
-          description: >-
-            Items to include in the conversation context.
-      additionalProperties: false
+            oneOf:
+            - $ref: '#/components/schemas/OpenAIResponseMessage-Input'
+              title: OpenAIResponseMessage-Input
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+              title: OpenAIResponseOutputMessageWebSearchToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+              title: OpenAIResponseOutputMessageFileSearchToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+              title: OpenAIResponseOutputMessageFunctionToolCall
+            - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+              title: OpenAIResponseInputFunctionToolCallOutput
+            - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+              title: OpenAIResponseMCPApprovalRequest
+            - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+              title: OpenAIResponseMCPApprovalResponse
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+              title: OpenAIResponseOutputMessageMCPCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+              title: OpenAIResponseOutputMessageMCPListTools
+            discriminator:
+              propertyName: type
+              mapping:
+                file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                function_call_output: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+                mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                mcp_approval_response: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+                mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                message: '#/components/schemas/OpenAIResponseMessage-Input'
+                web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+            title: OpenAIResponseMessage-Input | ... (9 variants)
+          type: array
+          title: Items
+      type: object
       required:
-        - items
+      - items
       title: AddItemsRequest
     ConversationItemDeletedResource:
-      type: object
       properties:
         id:
           type: string
+          title: Id
+          description: The deleted item identifier
         object:
           type: string
+          title: Object
+          description: Object type
           default: conversation.item.deleted
         deleted:
           type: boolean
+          title: Deleted
+          description: Whether the object was deleted
           default: true
-      additionalProperties: false
+      type: object
       required:
-        - id
-        - object
-        - deleted
+      - id
       title: ConversationItemDeletedResource
       description: Response for deleted conversation item.
     OpenAIEmbeddingsRequestWithExtraBody:
-      type: object
       properties:
         model:
           type: string
-          description: >-
-            The identifier of the model to use. The model must be an embedding model
-            registered with Llama Stack and available via the /models endpoint.
+          title: Model
         input:
-          oneOf:
-            - type: string
-            - type: array
-              items:
-                type: string
-          description: >-
-            Input text to embed, encoded as a string or array of strings. To embed
-            multiple inputs in a single request, pass an array of strings.
+          anyOf:
+          - type: string
+          - items:
+              type: string
+            type: array
+            title: list[string]
+          title: string | list[string]
         encoding_format:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
           default: float
-          description: >-
-            (Optional) The format to return the embeddings in. Can be either "float"
-            or "base64". Defaults to "float".
         dimensions:
-          type: integer
-          description: >-
-            (Optional) The number of dimensions the resulting output embeddings should
-            have. Only supported in text-embedding-3 and later models.
+          anyOf:
+          - type: integer
+          - type: 'null'
         user:
-          type: string
-          description: >-
-            (Optional) A unique identifier representing your end-user, which can help
-            OpenAI to monitor and detect abuse.
-      additionalProperties: false
-      required:
-        - model
-        - input
-      title: OpenAIEmbeddingsRequestWithExtraBody
-      description: >-
-        Request parameters for OpenAI-compatible embeddings endpoint.
-    OpenAIEmbeddingData:
+          anyOf:
+          - type: string
+          - type: 'null'
+      additionalProperties: true
       type: object
+      required:
+      - model
+      - input
+      title: OpenAIEmbeddingsRequestWithExtraBody
+      description: Request parameters for OpenAI-compatible embeddings endpoint.
+    OpenAIEmbeddingData:
       properties:
         object:
           type: string
           const: embedding
+          title: Object
           default: embedding
-          description: >-
-            The object type, which will be "embedding"
         embedding:
-          oneOf:
-            - type: array
-              items:
-                type: number
-            - type: string
-          description: >-
-            The embedding vector as a list of floats (when encoding_format="float")
-            or as a base64-encoded string (when encoding_format="base64")
+          anyOf:
+          - items:
+              type: number
+            type: array
+            title: list[number]
+          - type: string
+          title: list[number] | string
         index:
           type: integer
-          description: >-
-            The index of the embedding in the input list
-      additionalProperties: false
-      required:
-        - object
-        - embedding
-        - index
-      title: OpenAIEmbeddingData
-      description: >-
-        A single embedding data object from an OpenAI-compatible embeddings response.
-    OpenAIEmbeddingUsage:
+          title: Index
       type: object
+      required:
+      - embedding
+      - index
+      title: OpenAIEmbeddingData
+      description: A single embedding data object from an OpenAI-compatible embeddings response.
+    OpenAIEmbeddingUsage:
       properties:
         prompt_tokens:
           type: integer
-          description: The number of tokens in the input
+          title: Prompt Tokens
         total_tokens:
           type: integer
-          description: The total number of tokens used
-      additionalProperties: false
-      required:
-        - prompt_tokens
-        - total_tokens
-      title: OpenAIEmbeddingUsage
-      description: >-
-        Usage information for an OpenAI-compatible embeddings response.
-    OpenAIEmbeddingsResponse:
+          title: Total Tokens
       type: object
+      required:
+      - prompt_tokens
+      - total_tokens
+      title: OpenAIEmbeddingUsage
+      description: Usage information for an OpenAI-compatible embeddings response.
+    OpenAIEmbeddingsResponse:
       properties:
         object:
           type: string
           const: list
+          title: Object
           default: list
-          description: The object type, which will be "list"
         data:
-          type: array
           items:
             $ref: '#/components/schemas/OpenAIEmbeddingData'
-          description: List of embedding data objects
+          type: array
+          title: Data
         model:
           type: string
-          description: >-
-            The model that was used to generate the embeddings
+          title: Model
         usage:
           $ref: '#/components/schemas/OpenAIEmbeddingUsage'
-          description: Usage information
-      additionalProperties: false
+      type: object
       required:
-        - object
-        - data
-        - model
-        - usage
+      - data
+      - model
+      - usage
       title: OpenAIEmbeddingsResponse
-      description: >-
-        Response from an OpenAI-compatible embeddings request.
+      description: Response from an OpenAI-compatible embeddings request.
     OpenAIFilePurpose:
       type: string
       enum:
-        - assistants
-        - batch
+      - assistants
+      - batch
       title: OpenAIFilePurpose
-      description: >-
-        Valid purpose values for OpenAI Files API.
+      description: Valid purpose values for OpenAI Files API.
     ListOpenAIFileResponse:
-      type: object
       properties:
         data:
-          type: array
           items:
             $ref: '#/components/schemas/OpenAIFileObject'
-          description: List of file objects
+          type: array
+          title: Data
         has_more:
           type: boolean
-          description: >-
-            Whether there are more files available beyond this page
+          title: Has More
         first_id:
           type: string
-          description: >-
-            ID of the first file in the list for pagination
+          title: First Id
         last_id:
           type: string
-          description: >-
-            ID of the last file in the list for pagination
+          title: Last Id
         object:
           type: string
           const: list
+          title: Object
           default: list
-          description: The object type, which is always "list"
-      additionalProperties: false
-      required:
-        - data
-        - has_more
-        - first_id
-        - last_id
-        - object
-      title: ListOpenAIFileResponse
-      description: >-
-        Response for listing files in OpenAI Files API.
-    OpenAIFileObject:
       type: object
+      required:
+      - data
+      - has_more
+      - first_id
+      - last_id
+      title: ListOpenAIFileResponse
+      description: Response for listing files in OpenAI Files API.
+    OpenAIFileObject:
       properties:
         object:
           type: string
           const: file
+          title: Object
           default: file
-          description: The object type, which is always "file"
         id:
           type: string
-          description: >-
-            The file identifier, which can be referenced in the API endpoints
+          title: Id
         bytes:
           type: integer
-          description: The size of the file, in bytes
+          title: Bytes
         created_at:
           type: integer
-          description: >-
-            The Unix timestamp (in seconds) for when the file was created
+          title: Created At
         expires_at:
           type: integer
-          description: >-
-            The Unix timestamp (in seconds) for when the file expires
+          title: Expires At
         filename:
           type: string
-          description: The name of the file
+          title: Filename
         purpose:
-          type: string
-          enum:
-            - assistants
-            - batch
-          description: The intended purpose of the file
-      additionalProperties: false
-      required:
-        - object
-        - id
-        - bytes
-        - created_at
-        - expires_at
-        - filename
-        - purpose
-      title: OpenAIFileObject
-      description: >-
-        OpenAI File object as defined in the OpenAI Files API.
-    ExpiresAfter:
+          $ref: '#/components/schemas/OpenAIFilePurpose'
       type: object
+      required:
+      - id
+      - bytes
+      - created_at
+      - expires_at
+      - filename
+      - purpose
+      title: OpenAIFileObject
+      description: OpenAI File object as defined in the OpenAI Files API.
+    ExpiresAfter:
       properties:
         anchor:
           type: string
           const: created_at
+          title: Anchor
         seconds:
           type: integer
-      additionalProperties: false
+          maximum: 2592000.0
+          minimum: 3600.0
+          title: Seconds
+      type: object
       required:
-        - anchor
-        - seconds
+      - anchor
+      - seconds
       title: ExpiresAfter
-      description: >-
+      description: |-
         Control expiration of uploaded files.
 
         Params:
          - anchor, must be "created_at"
          - seconds, must be int between 3600 and 2592000 (1 hour to 30 days)
     OpenAIFileDeleteResponse:
-      type: object
       properties:
         id:
           type: string
-          description: The file identifier that was deleted
+          title: Id
         object:
           type: string
           const: file
+          title: Object
           default: file
-          description: The object type, which is always "file"
         deleted:
           type: boolean
-          description: >-
-            Whether the file was successfully deleted
-      additionalProperties: false
+          title: Deleted
+      type: object
       required:
-        - id
-        - object
-        - deleted
+      - id
+      - deleted
       title: OpenAIFileDeleteResponse
-      description: >-
-        Response for deleting a file in OpenAI Files API.
+      description: Response for deleting a file in OpenAI Files API.
     Response:
-      type: object
       title: Response
-    HealthInfo:
       type: object
+    HealthInfo:
       properties:
         status:
-          type: string
-          enum:
-            - OK
-            - Error
-            - Not Implemented
-          description: Current health status of the service
-      additionalProperties: false
-      required:
-        - status
-      title: HealthInfo
-      description: >-
-        Health status information for the service.
-    RouteInfo:
+          $ref: '#/components/schemas/HealthStatus'
       type: object
+      required:
+      - status
+      title: HealthInfo
+      description: Health status information for the service.
+    RouteInfo:
       properties:
         route:
           type: string
-          description: The API endpoint path
+          title: Route
         method:
           type: string
-          description: HTTP method for the route
+          title: Method
         provider_types:
-          type: array
           items:
             type: string
-          description: >-
-            List of provider types that implement this route
-      additionalProperties: false
-      required:
-        - route
-        - method
-        - provider_types
-      title: RouteInfo
-      description: >-
-        Information about an API route including its path, method, and implementing
-        providers.
-    ListRoutesResponse:
+          type: array
+          title: Provider Types
       type: object
+      required:
+      - route
+      - method
+      - provider_types
+      title: RouteInfo
+      description: Information about an API route including its path, method, and implementing providers.
+    ListRoutesResponse:
       properties:
         data:
-          type: array
           items:
             $ref: '#/components/schemas/RouteInfo'
-          description: >-
-            List of available route information objects
-      additionalProperties: false
-      required:
-        - data
-      title: ListRoutesResponse
-      description: >-
-        Response containing a list of all available API routes.
-    OpenAIModel:
+          type: array
+          title: Data
       type: object
+      required:
+      - data
+      title: ListRoutesResponse
+      description: Response containing a list of all available API routes.
+    OpenAIModel:
       properties:
         id:
           type: string
+          title: Id
         object:
           type: string
           const: model
+          title: Object
           default: model
         created:
           type: integer
+          title: Created
         owned_by:
           type: string
+          title: Owned By
         custom_metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-      additionalProperties: false
-      required:
-        - id
-        - object
-        - created
-        - owned_by
-      title: OpenAIModel
-      description: A model from OpenAI.
-    OpenAIListModelsResponse:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
       type: object
+      required:
+      - id
+      - created
+      - owned_by
+      title: OpenAIModel
+      description: |-
+        A model from OpenAI.
+
+        :id: The ID of the model
+        :object: The object type, which will be "model"
+        :created: The Unix timestamp in seconds when the model was created
+        :owned_by: The owner of the model
+        :custom_metadata: Llama Stack-specific metadata including model_type, provider info, and additional metadata
+    OpenAIListModelsResponse:
       properties:
         data:
-          type: array
           items:
             $ref: '#/components/schemas/OpenAIModel'
-      additionalProperties: false
+          type: array
+          title: Data
+      type: object
       required:
-        - data
+      - data
       title: OpenAIListModelsResponse
     Model:
-      type: object
       properties:
         identifier:
           type: string
-          description: >-
-            Unique identifier for this resource in llama stack
+          title: Identifier
+          description: Unique identifier for this resource in llama stack
         provider_resource_id:
-          type: string
-          description: >-
-            Unique identifier for this resource in the provider
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: Unique identifier for this resource in the provider
         provider_id:
           type: string
-          description: >-
-            ID of the provider that owns this resource
+          title: Provider Id
+          description: ID of the provider that owns this resource
         type:
           type: string
-          enum:
-            - model
-            - shield
-            - vector_store
-            - dataset
-            - scoring_function
-            - benchmark
-            - tool
-            - tool_group
-            - prompt
           const: model
+          title: Type
           default: model
-          description: >-
-            The resource type, always 'model' for model resources
         metadata:
+          additionalProperties: true
           type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
+          title: Metadata
           description: Any additional metadata for this model
         model_type:
           $ref: '#/components/schemas/ModelType'
           default: llm
-          description: >-
-            The type of model (LLM or embedding model)
-      additionalProperties: false
+      type: object
       required:
-        - identifier
-        - provider_id
-        - type
-        - metadata
-        - model_type
+      - identifier
+      - provider_id
       title: Model
-      description: >-
-        A model resource representing an AI model registered in Llama Stack.
+      description: A model resource representing an AI model registered in Llama Stack.
     ModelType:
       type: string
       enum:
-        - llm
-        - embedding
-        - rerank
+      - llm
+      - embedding
+      - rerank
       title: ModelType
-      description: >-
-        Enumeration of supported model types in Llama Stack.
+      description: Enumeration of supported model types in Llama Stack.
     RunModerationRequest:
-      type: object
       properties:
         input:
-          oneOf:
-            - type: string
-            - type: array
-              items:
-                type: string
-          description: >-
-            Input (or inputs) to classify. Can be a single string, an array of strings,
-            or an array of multi-modal input objects similar to other models.
+          anyOf:
+          - type: string
+          - items:
+              type: string
+            type: array
+            title: list[string]
+          title: string | list[string]
         model:
-          type: string
-          description: >-
-            (Optional) The content moderation model you would like to use.
-      additionalProperties: false
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
       required:
-        - input
+      - input
       title: RunModerationRequest
     ModerationObject:
-      type: object
       properties:
         id:
           type: string
-          description: >-
-            The unique identifier for the moderation request.
+          title: Id
         model:
           type: string
-          description: >-
-            The model used to generate the moderation results.
+          title: Model
         results:
-          type: array
           items:
             $ref: '#/components/schemas/ModerationObjectResults'
-          description: A list of moderation objects
-      additionalProperties: false
+          type: array
+          title: Results
+      type: object
       required:
-        - id
-        - model
-        - results
+      - id
+      - model
+      - results
       title: ModerationObject
       description: A moderation object.
     ModerationObjectResults:
-      type: object
       properties:
         flagged:
           type: boolean
-          description: >-
-            Whether any of the below categories are flagged.
+          title: Flagged
         categories:
-          type: object
-          additionalProperties:
-            type: boolean
-          description: >-
-            A list of the categories, and whether they are flagged or not.
+          anyOf:
+          - additionalProperties:
+              type: boolean
+            type: object
+          - type: 'null'
         category_applied_input_types:
-          type: object
-          additionalProperties:
-            type: array
-            items:
-              type: string
-          description: >-
-            A list of the categories along with the input type(s) that the score applies
-            to.
+          anyOf:
+          - additionalProperties:
+              items:
+                type: string
+              type: array
+            type: object
+          - type: 'null'
         category_scores:
-          type: object
-          additionalProperties:
-            type: number
-          description: >-
-            A list of the categories along with their scores as predicted by model.
+          anyOf:
+          - additionalProperties:
+              type: number
+            type: object
+          - type: 'null'
         user_message:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
         metadata:
+          additionalProperties: true
           type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-      additionalProperties: false
+          title: Metadata
+      type: object
       required:
-        - flagged
-        - metadata
+      - flagged
       title: ModerationObjectResults
       description: A moderation object.
     Prompt:
-      type: object
       properties:
         prompt:
-          type: string
-          description: >-
-            The system prompt text with variable placeholders. Variables are only
-            supported when using the Responses API.
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: The system prompt with variable placeholders
         version:
           type: integer
-          description: >-
-            Version (integer starting at 1, incremented on save)
+          minimum: 1.0
+          title: Version
+          description: Version (integer starting at 1, incremented on save)
         prompt_id:
           type: string
-          description: >-
-            Unique identifier formatted as 'pmpt_<48-digit-hash>'
+          title: Prompt Id
+          description: Unique identifier in format 'pmpt_<48-digit-hash>'
         variables:
-          type: array
           items:
             type: string
-          description: >-
-            List of prompt variable names that can be used in the prompt template
+          type: array
+          title: Variables
+          description: List of variable names that can be used in the prompt template
         is_default:
           type: boolean
+          title: Is Default
+          description: Boolean indicating whether this version is the default version
           default: false
-          description: >-
-            Boolean indicating whether this version is the default version for this
-            prompt
-      additionalProperties: false
-      required:
-        - version
-        - prompt_id
-        - variables
-        - is_default
-      title: Prompt
-      description: >-
-        A prompt resource representing a stored OpenAI Compatible prompt template
-        in Llama Stack.
-    ListPromptsResponse:
       type: object
+      required:
+      - version
+      - prompt_id
+      title: Prompt
+      description: A prompt resource representing a stored OpenAI Compatible prompt template in Llama Stack.
+    ListPromptsResponse:
       properties:
         data:
-          type: array
           items:
             $ref: '#/components/schemas/Prompt'
-      additionalProperties: false
+          type: array
+          title: Data
+      type: object
       required:
-        - data
+      - data
       title: ListPromptsResponse
       description: Response model to list prompts.
     CreatePromptRequest:
-      type: object
       properties:
         prompt:
           type: string
-          description: >-
-            The prompt text content with variable placeholders.
+          title: Prompt
         variables:
-          type: array
-          items:
-            type: string
-          description: >-
-            List of variable names that can be used in the prompt template.
-      additionalProperties: false
+          anyOf:
+          - items:
+              type: string
+            type: array
+          - type: 'null'
+      type: object
       required:
-        - prompt
+      - prompt
       title: CreatePromptRequest
     UpdatePromptRequest:
-      type: object
       properties:
         prompt:
           type: string
-          description: The updated prompt text content.
+          title: Prompt
         version:
           type: integer
-          description: >-
-            The current version of the prompt being updated.
+          title: Version
         variables:
-          type: array
-          items:
-            type: string
-          description: >-
-            Updated list of variable names that can be used in the prompt template.
+          anyOf:
+          - items:
+              type: string
+            type: array
+          - type: 'null'
         set_as_default:
           type: boolean
-          description: >-
-            Set the new version as the default (default=True).
-      additionalProperties: false
+          title: Set As Default
+          default: true
+      type: object
       required:
-        - prompt
-        - version
-        - set_as_default
+      - prompt
+      - version
       title: UpdatePromptRequest
     SetDefaultVersionRequest:
-      type: object
       properties:
         version:
           type: integer
-          description: The version to set as default.
-      additionalProperties: false
+          title: Version
+      type: object
       required:
-        - version
+      - version
       title: SetDefaultVersionRequest
     ProviderInfo:
-      type: object
       properties:
         api:
           type: string
-          description: The API name this provider implements
+          title: Api
         provider_id:
           type: string
-          description: Unique identifier for the provider
+          title: Provider Id
         provider_type:
           type: string
-          description: The type of provider implementation
+          title: Provider Type
         config:
+          additionalProperties: true
           type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            Configuration parameters for the provider
+          title: Config
         health:
+          additionalProperties: true
           type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: Current health status of the provider
-      additionalProperties: false
-      required:
-        - api
-        - provider_id
-        - provider_type
-        - config
-        - health
-      title: ProviderInfo
-      description: >-
-        Information about a registered provider including its configuration and health
-        status.
-    ListProvidersResponse:
+          title: Health
       type: object
+      required:
+      - api
+      - provider_id
+      - provider_type
+      - config
+      - health
+      title: ProviderInfo
+      description: Information about a registered provider including its configuration and health status.
+    ListProvidersResponse:
       properties:
         data:
-          type: array
           items:
             $ref: '#/components/schemas/ProviderInfo'
-          description: List of provider information objects
-      additionalProperties: false
-      required:
-        - data
-      title: ListProvidersResponse
-      description: >-
-        Response containing a list of all available providers.
-    ListOpenAIResponseObject:
+          type: array
+          title: Data
       type: object
+      required:
+      - data
+      title: ListProvidersResponse
+      description: Response containing a list of all available providers.
+    ListOpenAIResponseObject:
       properties:
         data:
-          type: array
           items:
             $ref: '#/components/schemas/OpenAIResponseObjectWithInput'
-          description: >-
-            List of response objects with their input context
+          type: array
+          title: Data
         has_more:
           type: boolean
-          description: >-
-            Whether there are more results available beyond this page
+          title: Has More
         first_id:
           type: string
-          description: >-
-            Identifier of the first item in this page
+          title: First Id
         last_id:
           type: string
-          description: Identifier of the last item in this page
+          title: Last Id
         object:
           type: string
           const: list
+          title: Object
           default: list
-          description: Object type identifier, always "list"
-      additionalProperties: false
-      required:
-        - data
-        - has_more
-        - first_id
-        - last_id
-        - object
-      title: ListOpenAIResponseObject
-      description: >-
-        Paginated list of OpenAI response objects with navigation metadata.
-    OpenAIResponseError:
       type: object
+      required:
+      - data
+      - has_more
+      - first_id
+      - last_id
+      title: ListOpenAIResponseObject
+      description: Paginated list of OpenAI response objects with navigation metadata.
+    OpenAIResponseError:
       properties:
         code:
           type: string
-          description: >-
-            Error code identifying the type of failure
+          title: Code
         message:
           type: string
-          description: >-
-            Human-readable error message describing the failure
-      additionalProperties: false
-      required:
-        - code
-        - message
-      title: OpenAIResponseError
-      description: >-
-        Error details for failed OpenAI response requests.
-    OpenAIResponseInput:
-      oneOf:
-        - $ref: '#/components/schemas/OpenAIResponseOutput'
-        - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
-        - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
-        - $ref: '#/components/schemas/OpenAIResponseMessage'
-    OpenAIResponseInputToolFileSearch:
+          title: Message
       type: object
+      required:
+      - code
+      - message
+      title: OpenAIResponseError
+      description: Error details for failed OpenAI response requests.
+    OpenAIResponseInput:
+      anyOf:
+      - discriminator:
+          mapping:
+            file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+            function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+            mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+            mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+            mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+            message: '#/components/schemas/OpenAIResponseMessage'
+            web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+          propertyName: type
+        oneOf:
+        - $ref: '#/components/schemas/OpenAIResponseMessage'
+          title: OpenAIResponseMessage
+        - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+          title: OpenAIResponseOutputMessageWebSearchToolCall
+        - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+          title: OpenAIResponseOutputMessageFileSearchToolCall
+        - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+          title: OpenAIResponseOutputMessageFunctionToolCall
+        - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+          title: OpenAIResponseOutputMessageMCPCall
+        - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+          title: OpenAIResponseOutputMessageMCPListTools
+        - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+          title: OpenAIResponseMCPApprovalRequest
+        title: OpenAIResponseMessage | ... (7 variants)
+      - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+        title: OpenAIResponseInputFunctionToolCallOutput
+      - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+        title: OpenAIResponseMCPApprovalResponse
+      - $ref: '#/components/schemas/OpenAIResponseMessage'
+        title: OpenAIResponseMessage
+      title: OpenAIResponseInputFunctionToolCallOutput | OpenAIResponseMCPApprovalResponse | OpenAIResponseMessage
+    OpenAIResponseInputToolFileSearch:
       properties:
         type:
           type: string
           const: file_search
+          title: Type
           default: file_search
-          description: >-
-            Tool type identifier, always "file_search"
         vector_store_ids:
-          type: array
           items:
             type: string
-          description: >-
-            List of vector store identifiers to search within
+          type: array
+          title: Vector Store Ids
         filters:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) Additional filters to apply to the search
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
         max_num_results:
-          type: integer
+          anyOf:
+          - type: integer
+            maximum: 50.0
+            minimum: 1.0
+          - type: 'null'
           default: 10
-          description: >-
-            (Optional) Maximum number of search results to return (1-50)
         ranking_options:
-          type: object
-          properties:
-            ranker:
-              type: string
-              description: >-
-                (Optional) Name of the ranking algorithm to use
-            score_threshold:
-              type: number
-              default: 0.0
-              description: >-
-                (Optional) Minimum relevance score threshold for results
-          additionalProperties: false
-          description: >-
-            (Optional) Options for ranking and scoring search results
-      additionalProperties: false
-      required:
-        - type
-        - vector_store_ids
-      title: OpenAIResponseInputToolFileSearch
-      description: >-
-        File search tool configuration for OpenAI response inputs.
-    OpenAIResponseInputToolFunction:
+          anyOf:
+          - $ref: '#/components/schemas/SearchRankingOptions'
+            title: SearchRankingOptions
+          - type: 'null'
+          title: SearchRankingOptions
       type: object
+      required:
+      - vector_store_ids
+      title: OpenAIResponseInputToolFileSearch
+      description: File search tool configuration for OpenAI response inputs.
+    OpenAIResponseInputToolFunction:
       properties:
         type:
           type: string
           const: function
+          title: Type
           default: function
-          description: Tool type identifier, always "function"
         name:
           type: string
-          description: Name of the function that can be called
+          title: Name
         description:
-          type: string
-          description: >-
-            (Optional) Description of what the function does
+          anyOf:
+          - type: string
+          - type: 'null'
         parameters:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) JSON schema defining the function's parameters
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
         strict:
-          type: boolean
-          description: >-
-            (Optional) Whether to enforce strict parameter validation
-      additionalProperties: false
-      required:
-        - type
-        - name
-      title: OpenAIResponseInputToolFunction
-      description: >-
-        Function tool configuration for OpenAI response inputs.
-    OpenAIResponseInputToolWebSearch:
+          anyOf:
+          - type: boolean
+          - type: 'null'
       type: object
+      required:
+      - name
+      - parameters
+      title: OpenAIResponseInputToolFunction
+      description: Function tool configuration for OpenAI response inputs.
+    OpenAIResponseInputToolWebSearch:
       properties:
         type:
-          oneOf:
-            - type: string
-              const: web_search
-            - type: string
-              const: web_search_preview
-            - type: string
-              const: web_search_preview_2025_03_11
-            - type: string
-              const: web_search_2025_08_26
+          title: Type
           default: web_search
-          description: Web search tool type variant to use
-        search_context_size:
           type: string
+          enum:
+          - web_search
+          - web_search_preview
+          - web_search_preview_2025_03_11
+          - web_search_2025_08_26
+        search_context_size:
+          anyOf:
+          - type: string
+            pattern: ^low|medium|high$
+          - type: 'null'
           default: medium
-          description: >-
-            (Optional) Size of search context, must be "low", "medium", or "high"
-      additionalProperties: false
-      required:
-        - type
-      title: OpenAIResponseInputToolWebSearch
-      description: >-
-        Web search tool configuration for OpenAI response inputs.
-    OpenAIResponseObjectWithInput:
       type: object
+      title: OpenAIResponseInputToolWebSearch
+      description: Web search tool configuration for OpenAI response inputs.
+    OpenAIResponseObjectWithInput:
       properties:
         created_at:
           type: integer
-          description: >-
-            Unix timestamp when the response was created
+          title: Created At
         error:
-          $ref: '#/components/schemas/OpenAIResponseError'
-          description: >-
-            (Optional) Error details if the response generation failed
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIResponseError'
+            title: OpenAIResponseError
+          - type: 'null'
+          title: OpenAIResponseError
         id:
           type: string
-          description: Unique identifier for this response
+          title: Id
         model:
           type: string
-          description: Model identifier used for generation
+          title: Model
         object:
           type: string
           const: response
+          title: Object
           default: response
-          description: >-
-            Object type identifier, always "response"
         output:
-          type: array
           items:
-            $ref: '#/components/schemas/OpenAIResponseOutput'
-          description: >-
-            List of generated output items (messages, tool calls, etc.)
+            oneOf:
+            - $ref: '#/components/schemas/OpenAIResponseMessage-Output'
+              title: OpenAIResponseMessage-Output
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+              title: OpenAIResponseOutputMessageWebSearchToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+              title: OpenAIResponseOutputMessageFileSearchToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+              title: OpenAIResponseOutputMessageFunctionToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+              title: OpenAIResponseOutputMessageMCPCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+              title: OpenAIResponseOutputMessageMCPListTools
+            - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+              title: OpenAIResponseMCPApprovalRequest
+            discriminator:
+              propertyName: type
+              mapping:
+                file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                message: '#/components/schemas/OpenAIResponseMessage-Output'
+                web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+            title: OpenAIResponseMessage-Output | ... (7 variants)
+          type: array
+          title: Output
         parallel_tool_calls:
           type: boolean
+          title: Parallel Tool Calls
           default: false
-          description: >-
-            Whether tool calls can be executed in parallel
         previous_response_id:
-          type: string
-          description: >-
-            (Optional) ID of the previous response in a conversation
+          anyOf:
+          - type: string
+          - type: 'null'
         prompt:
-          $ref: '#/components/schemas/OpenAIResponsePrompt'
-          description: >-
-            (Optional) Reference to a prompt template and its variables.
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIResponsePrompt'
+            title: OpenAIResponsePrompt
+          - type: 'null'
+          title: OpenAIResponsePrompt
         status:
           type: string
-          description: >-
-            Current status of the response generation
+          title: Status
         temperature:
-          type: number
-          description: >-
-            (Optional) Sampling temperature used for generation
+          anyOf:
+          - type: number
+          - type: 'null'
         text:
           $ref: '#/components/schemas/OpenAIResponseText'
-          description: >-
-            Text formatting configuration for the response
+          default:
+            format:
+              type: text
         top_p:
-          type: number
-          description: >-
-            (Optional) Nucleus sampling parameter used for generation
+          anyOf:
+          - type: number
+          - type: 'null'
         tools:
-          type: array
-          items:
-            $ref: '#/components/schemas/OpenAIResponseTool'
-          description: >-
-            (Optional) An array of tools the model may call while generating a response.
+          anyOf:
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                title: OpenAIResponseInputToolWebSearch
+              - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch'
+                title: OpenAIResponseInputToolFileSearch
+              - $ref: '#/components/schemas/OpenAIResponseInputToolFunction'
+                title: OpenAIResponseInputToolFunction
+              - $ref: '#/components/schemas/OpenAIResponseToolMCP'
+                title: OpenAIResponseToolMCP
+              discriminator:
+                propertyName: type
+                mapping:
+                  file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch'
+                  function: '#/components/schemas/OpenAIResponseInputToolFunction'
+                  mcp: '#/components/schemas/OpenAIResponseToolMCP'
+                  web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                  web_search_2025_08_26: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                  web_search_preview: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                  web_search_preview_2025_03_11: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+              title: OpenAIResponseInputToolWebSearch | ... (4 variants)
+            type: array
+          - type: 'null'
         truncation:
-          type: string
-          description: >-
-            (Optional) Truncation strategy applied to the response
+          anyOf:
+          - type: string
+          - type: 'null'
         usage:
-          $ref: '#/components/schemas/OpenAIResponseUsage'
-          description: >-
-            (Optional) Token usage information for the response
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIResponseUsage'
+            title: OpenAIResponseUsage
+          - type: 'null'
+          title: OpenAIResponseUsage
         instructions:
-          type: string
-          description: >-
-            (Optional) System message inserted into the model's context
+          anyOf:
+          - type: string
+          - type: 'null'
         max_tool_calls:
-          type: integer
-          description: >-
-            (Optional) Max number of total calls to built-in tools that can be processed
-            in a response
+          anyOf:
+          - type: integer
+          - type: 'null'
         input:
-          type: array
           items:
-            $ref: '#/components/schemas/OpenAIResponseInput'
-          description: >-
-            List of input items that led to this response
-      additionalProperties: false
+            anyOf:
+            - oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseMessage-Output'
+                title: OpenAIResponseMessage-Output
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+                title: OpenAIResponseOutputMessageWebSearchToolCall
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                title: OpenAIResponseOutputMessageFileSearchToolCall
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                title: OpenAIResponseOutputMessageFunctionToolCall
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                title: OpenAIResponseOutputMessageMCPCall
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                title: OpenAIResponseOutputMessageMCPListTools
+              - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                title: OpenAIResponseMCPApprovalRequest
+              discriminator:
+                propertyName: type
+                mapping:
+                  file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                  function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                  mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                  mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                  mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                  message: '#/components/schemas/OpenAIResponseMessage-Output'
+                  web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+              title: OpenAIResponseMessage-Output | ... (7 variants)
+            - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+              title: OpenAIResponseInputFunctionToolCallOutput
+            - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+              title: OpenAIResponseMCPApprovalResponse
+            - $ref: '#/components/schemas/OpenAIResponseMessage-Output'
+              title: OpenAIResponseMessage-Output
+            title: OpenAIResponseInputFunctionToolCallOutput | OpenAIResponseMCPApprovalResponse | OpenAIResponseMessage-Output
+          type: array
+          title: Input
+      type: object
       required:
-        - created_at
-        - id
-        - model
-        - object
-        - output
-        - parallel_tool_calls
-        - status
-        - text
-        - input
+      - created_at
+      - id
+      - model
+      - output
+      - status
+      - input
       title: OpenAIResponseObjectWithInput
-      description: >-
-        OpenAI response object extended with input context information.
+      description: OpenAI response object extended with input context information.
     OpenAIResponseOutput:
-      oneOf:
-        - $ref: '#/components/schemas/OpenAIResponseMessage'
-        - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
-        - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
-        - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
-        - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
-        - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
-        - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
       discriminator:
-        propertyName: type
         mapping:
-          message: '#/components/schemas/OpenAIResponseMessage'
-          web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
           file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
           function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+          mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
           mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
           mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
-          mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+          message: '#/components/schemas/OpenAIResponseMessage'
+          web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIResponseMessage'
+        title: OpenAIResponseMessage
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+        title: OpenAIResponseOutputMessageWebSearchToolCall
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+        title: OpenAIResponseOutputMessageFileSearchToolCall
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+        title: OpenAIResponseOutputMessageFunctionToolCall
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+        title: OpenAIResponseOutputMessageMCPCall
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+        title: OpenAIResponseOutputMessageMCPListTools
+      - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+        title: OpenAIResponseMCPApprovalRequest
+      title: OpenAIResponseMessage | ... (7 variants)
     OpenAIResponsePrompt:
-      type: object
       properties:
         id:
           type: string
-          description: Unique identifier of the prompt template
+          title: Id
         variables:
-          type: object
-          additionalProperties:
-            $ref: '#/components/schemas/OpenAIResponseInputMessageContent'
-          description: >-
-            Dictionary of variable names to OpenAIResponseInputMessageContent structure
-            for template substitution. The substitution values can either be strings,
-            or other Response input types like images or files.
+          anyOf:
+          - additionalProperties:
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
+                title: OpenAIResponseInputMessageContentText
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+                title: OpenAIResponseInputMessageContentImage
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile'
+                title: OpenAIResponseInputMessageContentFile
+              discriminator:
+                propertyName: type
+                mapping:
+                  input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile'
+                  input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+                  input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
+              title: OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile
+            type: object
+          - type: 'null'
         version:
-          type: string
-          description: >-
-            Version number of the prompt to use (defaults to latest if not specified)
-      additionalProperties: false
-      required:
-        - id
-      title: OpenAIResponsePrompt
-      description: >-
-        OpenAI compatible Prompt object that is used in OpenAI responses.
-    OpenAIResponseText:
+          anyOf:
+          - type: string
+          - type: 'null'
       type: object
+      required:
+      - id
+      title: OpenAIResponsePrompt
+      description: OpenAI compatible Prompt object that is used in OpenAI responses.
+    OpenAIResponseText:
       properties:
         format:
-          type: object
-          properties:
-            type:
-              oneOf:
-                - type: string
-                  const: text
-                - type: string
-                  const: json_schema
-                - type: string
-                  const: json_object
-              description: >-
-                Must be "text", "json_schema", or "json_object" to identify the format
-                type
-            name:
-              type: string
-              description: >-
-                The name of the response format. Only used for json_schema.
-            schema:
-              type: object
-              additionalProperties:
-                oneOf:
-                  - type: 'null'
-                  - type: boolean
-                  - type: number
-                  - type: string
-                  - type: array
-                  - type: object
-              description: >-
-                The JSON schema the response should conform to. In a Python SDK, this
-                is often a `pydantic` model. Only used for json_schema.
-            description:
-              type: string
-              description: >-
-                (Optional) A description of the response format. Only used for json_schema.
-            strict:
-              type: boolean
-              description: >-
-                (Optional) Whether to strictly enforce the JSON schema. If true, the
-                response must match the schema exactly. Only used for json_schema.
-          additionalProperties: false
-          required:
-            - type
-          description: >-
-            (Optional) Text format configuration specifying output format requirements
-      additionalProperties: false
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIResponseTextFormat'
+            title: OpenAIResponseTextFormat
+          - type: 'null'
+          title: OpenAIResponseTextFormat
+      type: object
       title: OpenAIResponseText
-      description: >-
-        Text response configuration for OpenAI responses.
+      description: Text response configuration for OpenAI responses.
     OpenAIResponseTool:
-      oneOf:
-        - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch'
-        - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch'
-        - $ref: '#/components/schemas/OpenAIResponseInputToolFunction'
-        - $ref: '#/components/schemas/OpenAIResponseToolMCP'
       discriminator:
-        propertyName: type
         mapping:
-          web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch'
           file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch'
           function: '#/components/schemas/OpenAIResponseInputToolFunction'
           mcp: '#/components/schemas/OpenAIResponseToolMCP'
+          web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+          web_search_2025_08_26: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+          web_search_preview: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+          web_search_preview_2025_03_11: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+        title: OpenAIResponseInputToolWebSearch
+      - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch'
+        title: OpenAIResponseInputToolFileSearch
+      - $ref: '#/components/schemas/OpenAIResponseInputToolFunction'
+        title: OpenAIResponseInputToolFunction
+      - $ref: '#/components/schemas/OpenAIResponseToolMCP'
+        title: OpenAIResponseToolMCP
+      title: OpenAIResponseInputToolWebSearch | ... (4 variants)
     OpenAIResponseToolMCP:
-      type: object
       properties:
         type:
           type: string
           const: mcp
+          title: Type
           default: mcp
-          description: Tool type identifier, always "mcp"
         server_label:
           type: string
-          description: Label to identify this MCP server
+          title: Server Label
         allowed_tools:
-          oneOf:
-            - type: array
-              items:
-                type: string
-            - type: object
-              properties:
-                tool_names:
-                  type: array
-                  items:
-                    type: string
-                  description: >-
-                    (Optional) List of specific tool names that are allowed
-              additionalProperties: false
-              title: AllowedToolsFilter
-              description: >-
-                Filter configuration for restricting which MCP tools can be used.
-          description: >-
-            (Optional) Restriction on which tools can be used from this server
-      additionalProperties: false
-      required:
-        - type
-        - server_label
-      title: OpenAIResponseToolMCP
-      description: >-
-        Model Context Protocol (MCP) tool configuration for OpenAI response object.
-    OpenAIResponseUsage:
+          anyOf:
+          - items:
+              type: string
+            type: array
+            title: list[string]
+          - $ref: '#/components/schemas/AllowedToolsFilter'
+            title: AllowedToolsFilter
+          - type: 'null'
+          title: list[string] | AllowedToolsFilter
       type: object
+      required:
+      - server_label
+      title: OpenAIResponseToolMCP
+      description: Model Context Protocol (MCP) tool configuration for OpenAI response object.
+    OpenAIResponseUsage:
       properties:
         input_tokens:
           type: integer
-          description: Number of tokens in the input
+          title: Input Tokens
         output_tokens:
           type: integer
-          description: Number of tokens in the output
+          title: Output Tokens
         total_tokens:
           type: integer
-          description: Total tokens used (input + output)
+          title: Total Tokens
         input_tokens_details:
-          type: object
-          properties:
-            cached_tokens:
-              type: integer
-              description: Number of tokens retrieved from cache
-          additionalProperties: false
-          description: Detailed breakdown of input token usage
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIResponseUsageInputTokensDetails'
+            title: OpenAIResponseUsageInputTokensDetails
+          - type: 'null'
+          title: OpenAIResponseUsageInputTokensDetails
         output_tokens_details:
-          type: object
-          properties:
-            reasoning_tokens:
-              type: integer
-              description: >-
-                Number of tokens used for reasoning (o1/o3 models)
-          additionalProperties: false
-          description: Detailed breakdown of output token usage
-      additionalProperties: false
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIResponseUsageOutputTokensDetails'
+            title: OpenAIResponseUsageOutputTokensDetails
+          - type: 'null'
+          title: OpenAIResponseUsageOutputTokensDetails
+      type: object
       required:
-        - input_tokens
-        - output_tokens
-        - total_tokens
+      - input_tokens
+      - output_tokens
+      - total_tokens
       title: OpenAIResponseUsage
       description: Usage information for OpenAI response.
     ResponseGuardrailSpec:
-      type: object
+      description: Specification for a guardrail to apply during response generation.
       properties:
         type:
+          title: Type
           type: string
-          description: The type/identifier of the guardrail.
-      additionalProperties: false
       required:
-        - type
+      - type
       title: ResponseGuardrailSpec
-      description: >-
-        Specification for a guardrail to apply during response generation.
+      type: object
     OpenAIResponseInputTool:
-      oneOf:
-        - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch'
-        - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch'
-        - $ref: '#/components/schemas/OpenAIResponseInputToolFunction'
-        - $ref: '#/components/schemas/OpenAIResponseInputToolMCP'
       discriminator:
-        propertyName: type
         mapping:
-          web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch'
           file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch'
           function: '#/components/schemas/OpenAIResponseInputToolFunction'
           mcp: '#/components/schemas/OpenAIResponseInputToolMCP'
+          web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+          web_search_2025_08_26: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+          web_search_preview: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+          web_search_preview_2025_03_11: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+        title: OpenAIResponseInputToolWebSearch
+      - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch'
+        title: OpenAIResponseInputToolFileSearch
+      - $ref: '#/components/schemas/OpenAIResponseInputToolFunction'
+        title: OpenAIResponseInputToolFunction
+      - $ref: '#/components/schemas/OpenAIResponseInputToolMCP'
+        title: OpenAIResponseInputToolMCP
+      title: OpenAIResponseInputToolWebSearch | ... (4 variants)
     OpenAIResponseInputToolMCP:
-      type: object
       properties:
         type:
           type: string
           const: mcp
+          title: Type
           default: mcp
-          description: Tool type identifier, always "mcp"
         server_label:
           type: string
-          description: Label to identify this MCP server
+          title: Server Label
         server_url:
           type: string
-          description: URL endpoint of the MCP server
+          title: Server Url
         headers:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) HTTP headers to include when connecting to the server
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
         authorization:
-          type: string
-          description: >-
-            (Optional) OAuth access token for authenticating with the MCP server
+          anyOf:
+          - type: string
+          - type: 'null'
         require_approval:
-          oneOf:
-            - type: string
-              const: always
-            - type: string
-              const: never
-            - type: object
-              properties:
-                always:
-                  type: array
-                  items:
-                    type: string
-                  description: >-
-                    (Optional) List of tool names that always require approval
-                never:
-                  type: array
-                  items:
-                    type: string
-                  description: >-
-                    (Optional) List of tool names that never require approval
-              additionalProperties: false
-              title: ApprovalFilter
-              description: >-
-                Filter configuration for MCP tool approval requirements.
+          anyOf:
+          - type: string
+            const: always
+          - type: string
+            const: never
+          - $ref: '#/components/schemas/ApprovalFilter'
+            title: ApprovalFilter
+          title: string | ApprovalFilter
           default: never
-          description: >-
-            Approval requirement for tool calls ("always", "never", or filter)
         allowed_tools:
-          oneOf:
-            - type: array
-              items:
-                type: string
-            - type: object
-              properties:
-                tool_names:
-                  type: array
-                  items:
-                    type: string
-                  description: >-
-                    (Optional) List of specific tool names that are allowed
-              additionalProperties: false
-              title: AllowedToolsFilter
-              description: >-
-                Filter configuration for restricting which MCP tools can be used.
-          description: >-
-            (Optional) Restriction on which tools can be used from this server
-      additionalProperties: false
-      required:
-        - type
-        - server_label
-        - server_url
-        - require_approval
-      title: OpenAIResponseInputToolMCP
-      description: >-
-        Model Context Protocol (MCP) tool configuration for OpenAI response inputs.
-    CreateOpenaiResponseRequest:
+          anyOf:
+          - items:
+              type: string
+            type: array
+            title: list[string]
+          - $ref: '#/components/schemas/AllowedToolsFilter'
+            title: AllowedToolsFilter
+          - type: 'null'
+          title: list[string] | AllowedToolsFilter
       type: object
+      required:
+      - server_label
+      - server_url
+      title: OpenAIResponseInputToolMCP
+      description: Model Context Protocol (MCP) tool configuration for OpenAI response inputs.
+    CreateOpenaiResponseRequest:
       properties:
         input:
-          oneOf:
-            - type: string
-            - type: array
-              items:
-                $ref: '#/components/schemas/OpenAIResponseInput'
-          description: Input message(s) to create the response.
+          anyOf:
+          - type: string
+          - items:
+              anyOf:
+              - oneOf:
+                - $ref: '#/components/schemas/OpenAIResponseMessage-Input'
+                  title: OpenAIResponseMessage-Input
+                - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+                  title: OpenAIResponseOutputMessageWebSearchToolCall
+                - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                  title: OpenAIResponseOutputMessageFileSearchToolCall
+                - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                  title: OpenAIResponseOutputMessageFunctionToolCall
+                - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                  title: OpenAIResponseOutputMessageMCPCall
+                - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                  title: OpenAIResponseOutputMessageMCPListTools
+                - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                  title: OpenAIResponseMCPApprovalRequest
+                discriminator:
+                  propertyName: type
+                  mapping:
+                    file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                    function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                    mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                    mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                    mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                    message: '#/components/schemas/OpenAIResponseMessage-Input'
+                    web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+                title: OpenAIResponseMessage-Input | ... (7 variants)
+              - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+                title: OpenAIResponseInputFunctionToolCallOutput
+              - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+                title: OpenAIResponseMCPApprovalResponse
+              - $ref: '#/components/schemas/OpenAIResponseMessage-Input'
+                title: OpenAIResponseMessage-Input
+              title: OpenAIResponseInputFunctionToolCallOutput | OpenAIResponseMCPApprovalResponse | OpenAIResponseMessage-Input
+            type: array
+            title: list[OpenAIResponseMessageUnion | OpenAIResponseInputFunctionToolCallOutput | ...]
+          title: string | list[OpenAIResponseMessageUnion | OpenAIResponseInputFunctionToolCallOutput | ...]
         model:
           type: string
-          description: The underlying LLM used for completions.
+          title: Model
         prompt:
-          $ref: '#/components/schemas/OpenAIResponsePrompt'
-          description: >-
-            (Optional) Prompt object with ID, version, and variables.
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIResponsePrompt'
+            title: OpenAIResponsePrompt
+          - type: 'null'
+          title: OpenAIResponsePrompt
         instructions:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
         previous_response_id:
-          type: string
-          description: >-
-            (Optional) if specified, the new response will be a continuation of the
-            previous response. This can be used to easily fork-off new responses from
-            existing responses.
+          anyOf:
+          - type: string
+          - type: 'null'
         conversation:
-          type: string
-          description: >-
-            (Optional) The ID of a conversation to add the response to. Must begin
-            with 'conv_'. Input and output messages will be automatically added to
-            the conversation.
+          anyOf:
+          - type: string
+          - type: 'null'
         store:
-          type: boolean
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          default: true
         stream:
-          type: boolean
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          default: false
         temperature:
-          type: number
+          anyOf:
+          - type: number
+          - type: 'null'
         text:
-          $ref: '#/components/schemas/OpenAIResponseText'
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIResponseText'
+            title: OpenAIResponseText
+          - type: 'null'
+          title: OpenAIResponseText
         tools:
-          type: array
-          items:
-            $ref: '#/components/schemas/OpenAIResponseInputTool'
+          anyOf:
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                title: OpenAIResponseInputToolWebSearch
+              - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch'
+                title: OpenAIResponseInputToolFileSearch
+              - $ref: '#/components/schemas/OpenAIResponseInputToolFunction'
+                title: OpenAIResponseInputToolFunction
+              - $ref: '#/components/schemas/OpenAIResponseInputToolMCP'
+                title: OpenAIResponseInputToolMCP
+              discriminator:
+                propertyName: type
+                mapping:
+                  file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch'
+                  function: '#/components/schemas/OpenAIResponseInputToolFunction'
+                  mcp: '#/components/schemas/OpenAIResponseInputToolMCP'
+                  web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                  web_search_2025_08_26: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                  web_search_preview: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                  web_search_preview_2025_03_11: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+              title: OpenAIResponseInputToolWebSearch | ... (4 variants)
+            type: array
+          - type: 'null'
         include:
-          type: array
-          items:
-            type: string
-          description: >-
-            (Optional) Additional fields to include in the response.
+          anyOf:
+          - items:
+              type: string
+            type: array
+          - type: 'null'
         max_infer_iters:
-          type: integer
+          anyOf:
+          - type: integer
+          - type: 'null'
+          default: 10
         max_tool_calls:
-          type: integer
-          description: >-
-            (Optional) Max number of total calls to built-in tools that can be processed
-            in a response.
-      additionalProperties: false
+          anyOf:
+          - type: integer
+          - type: 'null'
+      type: object
       required:
-        - input
-        - model
+      - input
+      - model
       title: CreateOpenaiResponseRequest
     OpenAIResponseObject:
-      type: object
       properties:
         created_at:
           type: integer
-          description: >-
-            Unix timestamp when the response was created
+          title: Created At
         error:
-          $ref: '#/components/schemas/OpenAIResponseError'
-          description: >-
-            (Optional) Error details if the response generation failed
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIResponseError'
+            title: OpenAIResponseError
+          - type: 'null'
+          title: OpenAIResponseError
         id:
           type: string
-          description: Unique identifier for this response
+          title: Id
         model:
           type: string
-          description: Model identifier used for generation
+          title: Model
         object:
           type: string
           const: response
+          title: Object
           default: response
-          description: >-
-            Object type identifier, always "response"
         output:
-          type: array
           items:
-            $ref: '#/components/schemas/OpenAIResponseOutput'
-          description: >-
-            List of generated output items (messages, tool calls, etc.)
+            oneOf:
+            - $ref: '#/components/schemas/OpenAIResponseMessage-Output'
+              title: OpenAIResponseMessage-Output
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+              title: OpenAIResponseOutputMessageWebSearchToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+              title: OpenAIResponseOutputMessageFileSearchToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+              title: OpenAIResponseOutputMessageFunctionToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+              title: OpenAIResponseOutputMessageMCPCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+              title: OpenAIResponseOutputMessageMCPListTools
+            - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+              title: OpenAIResponseMCPApprovalRequest
+            discriminator:
+              propertyName: type
+              mapping:
+                file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                message: '#/components/schemas/OpenAIResponseMessage-Output'
+                web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+            title: OpenAIResponseMessage-Output | ... (7 variants)
+          type: array
+          title: Output
         parallel_tool_calls:
           type: boolean
+          title: Parallel Tool Calls
           default: false
-          description: >-
-            Whether tool calls can be executed in parallel
         previous_response_id:
-          type: string
-          description: >-
-            (Optional) ID of the previous response in a conversation
+          anyOf:
+          - type: string
+          - type: 'null'
         prompt:
-          $ref: '#/components/schemas/OpenAIResponsePrompt'
-          description: >-
-            (Optional) Reference to a prompt template and its variables.
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIResponsePrompt'
+            title: OpenAIResponsePrompt
+          - type: 'null'
+          title: OpenAIResponsePrompt
         status:
           type: string
-          description: >-
-            Current status of the response generation
+          title: Status
         temperature:
-          type: number
-          description: >-
-            (Optional) Sampling temperature used for generation
+          anyOf:
+          - type: number
+          - type: 'null'
         text:
           $ref: '#/components/schemas/OpenAIResponseText'
-          description: >-
-            Text formatting configuration for the response
+          default:
+            format:
+              type: text
         top_p:
-          type: number
-          description: >-
-            (Optional) Nucleus sampling parameter used for generation
+          anyOf:
+          - type: number
+          - type: 'null'
         tools:
-          type: array
-          items:
-            $ref: '#/components/schemas/OpenAIResponseTool'
-          description: >-
-            (Optional) An array of tools the model may call while generating a response.
+          anyOf:
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                title: OpenAIResponseInputToolWebSearch
+              - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch'
+                title: OpenAIResponseInputToolFileSearch
+              - $ref: '#/components/schemas/OpenAIResponseInputToolFunction'
+                title: OpenAIResponseInputToolFunction
+              - $ref: '#/components/schemas/OpenAIResponseToolMCP'
+                title: OpenAIResponseToolMCP
+              discriminator:
+                propertyName: type
+                mapping:
+                  file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch'
+                  function: '#/components/schemas/OpenAIResponseInputToolFunction'
+                  mcp: '#/components/schemas/OpenAIResponseToolMCP'
+                  web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                  web_search_2025_08_26: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                  web_search_preview: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                  web_search_preview_2025_03_11: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+              title: OpenAIResponseInputToolWebSearch | ... (4 variants)
+            type: array
+          - type: 'null'
         truncation:
-          type: string
-          description: >-
-            (Optional) Truncation strategy applied to the response
+          anyOf:
+          - type: string
+          - type: 'null'
         usage:
-          $ref: '#/components/schemas/OpenAIResponseUsage'
-          description: >-
-            (Optional) Token usage information for the response
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIResponseUsage'
+            title: OpenAIResponseUsage
+          - type: 'null'
+          title: OpenAIResponseUsage
         instructions:
-          type: string
-          description: >-
-            (Optional) System message inserted into the model's context
+          anyOf:
+          - type: string
+          - type: 'null'
         max_tool_calls:
-          type: integer
-          description: >-
-            (Optional) Max number of total calls to built-in tools that can be processed
-            in a response
-      additionalProperties: false
-      required:
-        - created_at
-        - id
-        - model
-        - object
-        - output
-        - parallel_tool_calls
-        - status
-        - text
-      title: OpenAIResponseObject
-      description: >-
-        Complete OpenAI response object containing generation results and metadata.
-    OpenAIResponseContentPartOutputText:
+          anyOf:
+          - type: integer
+          - type: 'null'
       type: object
+      required:
+      - created_at
+      - id
+      - model
+      - output
+      - status
+      title: OpenAIResponseObject
+      description: Complete OpenAI response object containing generation results and metadata.
+    OpenAIResponseContentPartOutputText:
+      description: Text content within a streamed response part.
       properties:
         type:
-          type: string
           const: output_text
           default: output_text
-          description: >-
-            Content part type identifier, always "output_text"
-        text:
+          title: Type
+          type: string
+        text:
+          title: Text
           type: string
-          description: Text emitted for this content part
         annotations:
-          type: array
           items:
-            $ref: '#/components/schemas/OpenAIResponseAnnotations'
-          description: >-
-            Structured annotations associated with the text
+            discriminator:
+              mapping:
+                container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
+                file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
+                file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath'
+                url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation'
+              propertyName: type
+            oneOf:
+            - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
+              title: OpenAIResponseAnnotationFileCitation
+            - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation'
+              title: OpenAIResponseAnnotationCitation
+            - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
+              title: OpenAIResponseAnnotationContainerFileCitation
+            - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath'
+              title: OpenAIResponseAnnotationFilePath
+            title: OpenAIResponseAnnotationFileCitation | ... (4 variants)
+          title: Annotations
+          type: array
         logprobs:
-          type: array
-          items:
-            type: object
-            additionalProperties:
-              oneOf:
-                - type: 'null'
-                - type: boolean
-                - type: number
-                - type: string
-                - type: array
-                - type: object
-          description: (Optional) Token log probability details
-      additionalProperties: false
+          anyOf:
+          - items:
+              additionalProperties: true
+              type: object
+            type: array
+          - type: 'null'
+          nullable: true
       required:
-        - type
-        - text
-        - annotations
+      - text
       title: OpenAIResponseContentPartOutputText
-      description: >-
-        Text content within a streamed response part.
-    "OpenAIResponseContentPartReasoningSummary":
       type: object
+    OpenAIResponseContentPartReasoningSummary:
+      description: Reasoning summary part in a streamed response.
       properties:
         type:
-          type: string
           const: summary_text
           default: summary_text
-          description: >-
-            Content part type identifier, always "summary_text"
-        text:
+          title: Type
+          type: string
+        text:
+          title: Text
           type: string
-          description: Summary text
-      additionalProperties: false
       required:
-        - type
-        - text
-      title: >-
-        OpenAIResponseContentPartReasoningSummary
-      description: >-
-        Reasoning summary part in a streamed response.
-    OpenAIResponseContentPartReasoningText:
+      - text
+      title: OpenAIResponseContentPartReasoningSummary
       type: object
+    OpenAIResponseContentPartReasoningText:
+      description: Reasoning text emitted as part of a streamed response.
       properties:
         type:
-          type: string
           const: reasoning_text
           default: reasoning_text
-          description: >-
-            Content part type identifier, always "reasoning_text"
-        text:
+          title: Type
+          type: string
+        text:
+          title: Text
           type: string
-          description: Reasoning text supplied by the model
-      additionalProperties: false
       required:
-        - type
-        - text
+      - text
       title: OpenAIResponseContentPartReasoningText
-      description: >-
-        Reasoning text emitted as part of a streamed response.
+      type: object
     OpenAIResponseObjectStream:
-      oneOf:
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseInProgress'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDone'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallInProgress'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallSearching'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallCompleted'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsInProgress'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsFailed'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsCompleted'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDone'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallInProgress'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallFailed'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartAdded'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartDone'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDelta'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDone'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartDone'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDone'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDelta'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDone'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallInProgress'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallSearching'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallCompleted'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseIncomplete'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFailed'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
       discriminator:
-        propertyName: type
         mapping:
-          response.created: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated'
-          response.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseInProgress'
-          response.output_item.added: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded'
-          response.output_item.done: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone'
-          response.output_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta'
-          response.output_text.done: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDone'
-          response.function_call_arguments.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta'
-          response.function_call_arguments.done: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone'
-          response.web_search_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallInProgress'
-          response.web_search_call.searching: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallSearching'
-          response.web_search_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallCompleted'
-          response.mcp_list_tools.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsInProgress'
-          response.mcp_list_tools.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsFailed'
-          response.mcp_list_tools.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsCompleted'
-          response.mcp_call.arguments.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta'
-          response.mcp_call.arguments.done: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDone'
-          response.mcp_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallInProgress'
-          response.mcp_call.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallFailed'
-          response.mcp_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted'
+          response.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
           response.content_part.added: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartAdded'
           response.content_part.done: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartDone'
-          response.reasoning_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDelta'
-          response.reasoning_text.done: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDone'
+          response.created: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated'
+          response.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseFailed'
+          response.file_search_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallCompleted'
+          response.file_search_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallInProgress'
+          response.file_search_call.searching: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallSearching'
+          response.function_call_arguments.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta'
+          response.function_call_arguments.done: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone'
+          response.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseInProgress'
+          response.incomplete: '#/components/schemas/OpenAIResponseObjectStreamResponseIncomplete'
+          response.mcp_call.arguments.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta'
+          response.mcp_call.arguments.done: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDone'
+          response.mcp_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted'
+          response.mcp_call.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallFailed'
+          response.mcp_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallInProgress'
+          response.mcp_list_tools.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsCompleted'
+          response.mcp_list_tools.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsFailed'
+          response.mcp_list_tools.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsInProgress'
+          response.output_item.added: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded'
+          response.output_item.done: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone'
+          response.output_text.annotation.added: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded'
+          response.output_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta'
+          response.output_text.done: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDone'
           response.reasoning_summary_part.added: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded'
           response.reasoning_summary_part.done: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartDone'
           response.reasoning_summary_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta'
           response.reasoning_summary_text.done: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDone'
+          response.reasoning_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDelta'
+          response.reasoning_text.done: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDone'
           response.refusal.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDelta'
           response.refusal.done: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDone'
-          response.output_text.annotation.added: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded'
-          response.file_search_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallInProgress'
-          response.file_search_call.searching: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallSearching'
-          response.file_search_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallCompleted'
-          response.incomplete: '#/components/schemas/OpenAIResponseObjectStreamResponseIncomplete'
-          response.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseFailed'
-          response.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
-    "OpenAIResponseObjectStreamResponseCompleted":
-      type: object
+          response.web_search_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallCompleted'
+          response.web_search_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallInProgress'
+          response.web_search_call.searching: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallSearching'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated'
+        title: OpenAIResponseObjectStreamResponseCreated
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseInProgress'
+        title: OpenAIResponseObjectStreamResponseInProgress
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded'
+        title: OpenAIResponseObjectStreamResponseOutputItemAdded
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone'
+        title: OpenAIResponseObjectStreamResponseOutputItemDone
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta'
+        title: OpenAIResponseObjectStreamResponseOutputTextDelta
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDone'
+        title: OpenAIResponseObjectStreamResponseOutputTextDone
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta'
+        title: OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone'
+        title: OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallInProgress'
+        title: OpenAIResponseObjectStreamResponseWebSearchCallInProgress
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallSearching'
+        title: OpenAIResponseObjectStreamResponseWebSearchCallSearching
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallCompleted'
+        title: OpenAIResponseObjectStreamResponseWebSearchCallCompleted
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsInProgress'
+        title: OpenAIResponseObjectStreamResponseMcpListToolsInProgress
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsFailed'
+        title: OpenAIResponseObjectStreamResponseMcpListToolsFailed
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsCompleted'
+        title: OpenAIResponseObjectStreamResponseMcpListToolsCompleted
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta'
+        title: OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDone'
+        title: OpenAIResponseObjectStreamResponseMcpCallArgumentsDone
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallInProgress'
+        title: OpenAIResponseObjectStreamResponseMcpCallInProgress
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallFailed'
+        title: OpenAIResponseObjectStreamResponseMcpCallFailed
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted'
+        title: OpenAIResponseObjectStreamResponseMcpCallCompleted
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartAdded'
+        title: OpenAIResponseObjectStreamResponseContentPartAdded
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartDone'
+        title: OpenAIResponseObjectStreamResponseContentPartDone
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDelta'
+        title: OpenAIResponseObjectStreamResponseReasoningTextDelta
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDone'
+        title: OpenAIResponseObjectStreamResponseReasoningTextDone
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded'
+        title: OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartDone'
+        title: OpenAIResponseObjectStreamResponseReasoningSummaryPartDone
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta'
+        title: OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDone'
+        title: OpenAIResponseObjectStreamResponseReasoningSummaryTextDone
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDelta'
+        title: OpenAIResponseObjectStreamResponseRefusalDelta
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDone'
+        title: OpenAIResponseObjectStreamResponseRefusalDone
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded'
+        title: OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallInProgress'
+        title: OpenAIResponseObjectStreamResponseFileSearchCallInProgress
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallSearching'
+        title: OpenAIResponseObjectStreamResponseFileSearchCallSearching
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallCompleted'
+        title: OpenAIResponseObjectStreamResponseFileSearchCallCompleted
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseIncomplete'
+        title: OpenAIResponseObjectStreamResponseIncomplete
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFailed'
+        title: OpenAIResponseObjectStreamResponseFailed
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
+        title: OpenAIResponseObjectStreamResponseCompleted
+      title: OpenAIResponseObjectStreamResponseCreated | ... (36 variants)
+    OpenAIResponseObjectStreamResponseCompleted:
+      description: Streaming event indicating a response has been completed.
       properties:
         response:
           $ref: '#/components/schemas/OpenAIResponseObject'
-          description: Completed response object
         type:
-          type: string
           const: response.completed
           default: response.completed
-          description: >-
-            Event type identifier, always "response.completed"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - response
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseCompleted
-      description: >-
-        Streaming event indicating a response has been completed.
-    "OpenAIResponseObjectStreamResponseContentPartAdded":
+      - response
+      title: OpenAIResponseObjectStreamResponseCompleted
       type: object
+    OpenAIResponseObjectStreamResponseContentPartAdded:
+      description: Streaming event for when a new content part is added to a response item.
       properties:
         content_index:
+          title: Content Index
           type: integer
-          description: >-
-            Index position of the part within the content array
         response_id:
+          title: Response Id
           type: string
-          description: >-
-            Unique identifier of the response containing this content
         item_id:
+          title: Item Id
           type: string
-          description: >-
-            Unique identifier of the output item containing this content part
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of the output item in the response
         part:
-          oneOf:
-            - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText'
-            - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
-            - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
           discriminator:
-            propertyName: type
             mapping:
               output_text: '#/components/schemas/OpenAIResponseContentPartOutputText'
-              refusal: '#/components/schemas/OpenAIResponseContentPartRefusal'
               reasoning_text: '#/components/schemas/OpenAIResponseContentPartReasoningText'
-          description: The content part that was added
+              refusal: '#/components/schemas/OpenAIResponseContentPartRefusal'
+            propertyName: type
+          oneOf:
+          - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText'
+            title: OpenAIResponseContentPartOutputText
+          - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
+            title: OpenAIResponseContentPartRefusal
+          - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
+            title: OpenAIResponseContentPartReasoningText
+          title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.content_part.added
           default: response.content_part.added
-          description: >-
-            Event type identifier, always "response.content_part.added"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - content_index
-        - response_id
-        - item_id
-        - output_index
-        - part
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseContentPartAdded
-      description: >-
-        Streaming event for when a new content part is added to a response item.
-    "OpenAIResponseObjectStreamResponseContentPartDone":
+      - content_index
+      - response_id
+      - item_id
+      - output_index
+      - part
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseContentPartAdded
       type: object
+    OpenAIResponseObjectStreamResponseContentPartDone:
+      description: Streaming event for when a content part is completed.
       properties:
         content_index:
+          title: Content Index
           type: integer
-          description: >-
-            Index position of the part within the content array
         response_id:
+          title: Response Id
           type: string
-          description: >-
-            Unique identifier of the response containing this content
         item_id:
+          title: Item Id
           type: string
-          description: >-
-            Unique identifier of the output item containing this content part
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of the output item in the response
         part:
-          oneOf:
-            - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText'
-            - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
-            - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
           discriminator:
-            propertyName: type
             mapping:
               output_text: '#/components/schemas/OpenAIResponseContentPartOutputText'
-              refusal: '#/components/schemas/OpenAIResponseContentPartRefusal'
               reasoning_text: '#/components/schemas/OpenAIResponseContentPartReasoningText'
-          description: The completed content part
+              refusal: '#/components/schemas/OpenAIResponseContentPartRefusal'
+            propertyName: type
+          oneOf:
+          - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText'
+            title: OpenAIResponseContentPartOutputText
+          - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
+            title: OpenAIResponseContentPartRefusal
+          - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
+            title: OpenAIResponseContentPartReasoningText
+          title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.content_part.done
           default: response.content_part.done
-          description: >-
-            Event type identifier, always "response.content_part.done"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - content_index
-        - response_id
-        - item_id
-        - output_index
-        - part
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseContentPartDone
-      description: >-
-        Streaming event for when a content part is completed.
-    "OpenAIResponseObjectStreamResponseCreated":
+      - content_index
+      - response_id
+      - item_id
+      - output_index
+      - part
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseContentPartDone
       type: object
+    OpenAIResponseObjectStreamResponseCreated:
+      description: Streaming event indicating a new response has been created.
       properties:
         response:
           $ref: '#/components/schemas/OpenAIResponseObject'
-          description: The response object that was created
         type:
-          type: string
           const: response.created
           default: response.created
-          description: >-
-            Event type identifier, always "response.created"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - response
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseCreated
-      description: >-
-        Streaming event indicating a new response has been created.
-    OpenAIResponseObjectStreamResponseFailed:
+      - response
+      title: OpenAIResponseObjectStreamResponseCreated
       type: object
+    OpenAIResponseObjectStreamResponseFailed:
+      description: Streaming event emitted when a response fails.
       properties:
         response:
           $ref: '#/components/schemas/OpenAIResponseObject'
-          description: Response object describing the failure
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.failed
           default: response.failed
-          description: >-
-            Event type identifier, always "response.failed"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - response
-        - sequence_number
-        - type
+      - response
+      - sequence_number
       title: OpenAIResponseObjectStreamResponseFailed
-      description: >-
-        Streaming event emitted when a response fails.
-    "OpenAIResponseObjectStreamResponseFileSearchCallCompleted":
       type: object
+    OpenAIResponseObjectStreamResponseFileSearchCallCompleted:
+      description: Streaming event for completed file search calls.
       properties:
         item_id:
+          title: Item Id
           type: string
-          description: >-
-            Unique identifier of the completed file search call
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of the item in the output list
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.file_search_call.completed
           default: response.file_search_call.completed
-          description: >-
-            Event type identifier, always "response.file_search_call.completed"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - item_id
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseFileSearchCallCompleted
-      description: >-
-        Streaming event for completed file search calls.
-    "OpenAIResponseObjectStreamResponseFileSearchCallInProgress":
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseFileSearchCallCompleted
       type: object
+    OpenAIResponseObjectStreamResponseFileSearchCallInProgress:
+      description: Streaming event for file search calls in progress.
       properties:
         item_id:
+          title: Item Id
           type: string
-          description: >-
-            Unique identifier of the file search call
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of the item in the output list
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.file_search_call.in_progress
           default: response.file_search_call.in_progress
-          description: >-
-            Event type identifier, always "response.file_search_call.in_progress"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - item_id
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseFileSearchCallInProgress
-      description: >-
-        Streaming event for file search calls in progress.
-    "OpenAIResponseObjectStreamResponseFileSearchCallSearching":
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseFileSearchCallInProgress
       type: object
+    OpenAIResponseObjectStreamResponseFileSearchCallSearching:
+      description: Streaming event for file search currently searching.
       properties:
         item_id:
+          title: Item Id
           type: string
-          description: >-
-            Unique identifier of the file search call
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of the item in the output list
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.file_search_call.searching
           default: response.file_search_call.searching
-          description: >-
-            Event type identifier, always "response.file_search_call.searching"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - item_id
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseFileSearchCallSearching
-      description: >-
-        Streaming event for file search currently searching.
-    "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta":
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseFileSearchCallSearching
       type: object
+    OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta:
+      description: Streaming event for incremental function call argument updates.
       properties:
         delta:
+          title: Delta
           type: string
-          description: >-
-            Incremental function call arguments being added
         item_id:
+          title: Item Id
           type: string
-          description: >-
-            Unique identifier of the function call being updated
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of the item in the output list
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.function_call_arguments.delta
           default: response.function_call_arguments.delta
-          description: >-
-            Event type identifier, always "response.function_call_arguments.delta"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - delta
-        - item_id
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta
-      description: >-
-        Streaming event for incremental function call argument updates.
-    "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone":
+      - delta
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta
       type: object
+    OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone:
+      description: Streaming event for when function call arguments are completed.
       properties:
         arguments:
+          title: Arguments
           type: string
-          description: >-
-            Final complete arguments JSON string for the function call
         item_id:
+          title: Item Id
           type: string
-          description: >-
-            Unique identifier of the completed function call
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of the item in the output list
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.function_call_arguments.done
           default: response.function_call_arguments.done
-          description: >-
-            Event type identifier, always "response.function_call_arguments.done"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - arguments
-        - item_id
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone
-      description: >-
-        Streaming event for when function call arguments are completed.
-    "OpenAIResponseObjectStreamResponseInProgress":
+      - arguments
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone
       type: object
+    OpenAIResponseObjectStreamResponseInProgress:
+      description: Streaming event indicating the response remains in progress.
       properties:
         response:
           $ref: '#/components/schemas/OpenAIResponseObject'
-          description: Current response state while in progress
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.in_progress
           default: response.in_progress
-          description: >-
-            Event type identifier, always "response.in_progress"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - response
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseInProgress
-      description: >-
-        Streaming event indicating the response remains in progress.
-    "OpenAIResponseObjectStreamResponseIncomplete":
+      - response
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseInProgress
       type: object
+    OpenAIResponseObjectStreamResponseIncomplete:
+      description: Streaming event emitted when a response ends in an incomplete state.
       properties:
         response:
           $ref: '#/components/schemas/OpenAIResponseObject'
-          description: >-
-            Response object describing the incomplete state
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.incomplete
           default: response.incomplete
-          description: >-
-            Event type identifier, always "response.incomplete"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - response
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseIncomplete
-      description: >-
-        Streaming event emitted when a response ends in an incomplete state.
-    "OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta":
+      - response
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseIncomplete
       type: object
+    OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta:
       properties:
         delta:
+          title: Delta
           type: string
         item_id:
+          title: Item Id
           type: string
         output_index:
+          title: Output Index
           type: integer
         sequence_number:
+          title: Sequence Number
           type: integer
         type:
-          type: string
           const: response.mcp_call.arguments.delta
           default: response.mcp_call.arguments.delta
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - delta
-        - item_id
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta
-    "OpenAIResponseObjectStreamResponseMcpCallArgumentsDone":
+      - delta
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta
       type: object
+    OpenAIResponseObjectStreamResponseMcpCallArgumentsDone:
       properties:
         arguments:
+          title: Arguments
           type: string
         item_id:
+          title: Item Id
           type: string
         output_index:
+          title: Output Index
           type: integer
         sequence_number:
+          title: Sequence Number
           type: integer
         type:
-          type: string
           const: response.mcp_call.arguments.done
           default: response.mcp_call.arguments.done
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - arguments
-        - item_id
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseMcpCallArgumentsDone
-    "OpenAIResponseObjectStreamResponseMcpCallCompleted":
+      - arguments
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseMcpCallArgumentsDone
       type: object
+    OpenAIResponseObjectStreamResponseMcpCallCompleted:
+      description: Streaming event for completed MCP calls.
       properties:
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.mcp_call.completed
           default: response.mcp_call.completed
-          description: >-
-            Event type identifier, always "response.mcp_call.completed"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseMcpCallCompleted
-      description: Streaming event for completed MCP calls.
-    "OpenAIResponseObjectStreamResponseMcpCallFailed":
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseMcpCallCompleted
       type: object
+    OpenAIResponseObjectStreamResponseMcpCallFailed:
+      description: Streaming event for failed MCP calls.
       properties:
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.mcp_call.failed
           default: response.mcp_call.failed
-          description: >-
-            Event type identifier, always "response.mcp_call.failed"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseMcpCallFailed
-      description: Streaming event for failed MCP calls.
-    "OpenAIResponseObjectStreamResponseMcpCallInProgress":
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseMcpCallFailed
       type: object
+    OpenAIResponseObjectStreamResponseMcpCallInProgress:
+      description: Streaming event for MCP calls in progress.
       properties:
         item_id:
+          title: Item Id
           type: string
-          description: Unique identifier of the MCP call
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of the item in the output list
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.mcp_call.in_progress
           default: response.mcp_call.in_progress
-          description: >-
-            Event type identifier, always "response.mcp_call.in_progress"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - item_id
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseMcpCallInProgress
-      description: >-
-        Streaming event for MCP calls in progress.
-    "OpenAIResponseObjectStreamResponseMcpListToolsCompleted":
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseMcpCallInProgress
       type: object
+    OpenAIResponseObjectStreamResponseMcpListToolsCompleted:
       properties:
         sequence_number:
+          title: Sequence Number
           type: integer
         type:
-          type: string
           const: response.mcp_list_tools.completed
           default: response.mcp_list_tools.completed
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseMcpListToolsCompleted
-    "OpenAIResponseObjectStreamResponseMcpListToolsFailed":
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseMcpListToolsCompleted
       type: object
+    OpenAIResponseObjectStreamResponseMcpListToolsFailed:
       properties:
         sequence_number:
+          title: Sequence Number
           type: integer
         type:
-          type: string
           const: response.mcp_list_tools.failed
           default: response.mcp_list_tools.failed
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseMcpListToolsFailed
-    "OpenAIResponseObjectStreamResponseMcpListToolsInProgress":
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseMcpListToolsFailed
       type: object
+    OpenAIResponseObjectStreamResponseMcpListToolsInProgress:
       properties:
         sequence_number:
+          title: Sequence Number
           type: integer
         type:
-          type: string
           const: response.mcp_list_tools.in_progress
           default: response.mcp_list_tools.in_progress
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseMcpListToolsInProgress
-    "OpenAIResponseObjectStreamResponseOutputItemAdded":
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseMcpListToolsInProgress
       type: object
+    OpenAIResponseObjectStreamResponseOutputItemAdded:
+      description: Streaming event for when a new output item is added to the response.
       properties:
         response_id:
+          title: Response Id
           type: string
-          description: >-
-            Unique identifier of the response containing this output
         item:
-          oneOf:
-            - $ref: '#/components/schemas/OpenAIResponseMessage'
-            - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
-            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
-            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
-            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
-            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
-            - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
           discriminator:
-            propertyName: type
             mapping:
-              message: '#/components/schemas/OpenAIResponseMessage'
-              web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
               file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
               function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+              mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
               mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
               mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
-              mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
-          description: >-
-            The output item that was added (message, tool call, etc.)
+              message: '#/components/schemas/OpenAIResponseMessage'
+              web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+            propertyName: type
+          oneOf:
+          - $ref: '#/components/schemas/OpenAIResponseMessage'
+            title: OpenAIResponseMessage
+          - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+            title: OpenAIResponseOutputMessageWebSearchToolCall
+          - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+            title: OpenAIResponseOutputMessageFileSearchToolCall
+          - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+            title: OpenAIResponseOutputMessageFunctionToolCall
+          - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+            title: OpenAIResponseOutputMessageMCPCall
+          - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+            title: OpenAIResponseOutputMessageMCPListTools
+          - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+            title: OpenAIResponseMCPApprovalRequest
+          title: OpenAIResponseMessage | ... (7 variants)
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of this item in the output list
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.output_item.added
           default: response.output_item.added
-          description: >-
-            Event type identifier, always "response.output_item.added"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - response_id
-        - item
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseOutputItemAdded
-      description: >-
-        Streaming event for when a new output item is added to the response.
-    "OpenAIResponseObjectStreamResponseOutputItemDone":
+      - response_id
+      - item
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseOutputItemAdded
       type: object
+    OpenAIResponseObjectStreamResponseOutputItemDone:
+      description: Streaming event for when an output item is completed.
       properties:
         response_id:
+          title: Response Id
           type: string
-          description: >-
-            Unique identifier of the response containing this output
         item:
-          oneOf:
-            - $ref: '#/components/schemas/OpenAIResponseMessage'
-            - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
-            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
-            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
-            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
-            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
-            - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
           discriminator:
-            propertyName: type
             mapping:
-              message: '#/components/schemas/OpenAIResponseMessage'
-              web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
               file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
               function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+              mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
               mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
               mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
-              mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
-          description: >-
-            The completed output item (message, tool call, etc.)
+              message: '#/components/schemas/OpenAIResponseMessage'
+              web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+            propertyName: type
+          oneOf:
+          - $ref: '#/components/schemas/OpenAIResponseMessage'
+            title: OpenAIResponseMessage
+          - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+            title: OpenAIResponseOutputMessageWebSearchToolCall
+          - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+            title: OpenAIResponseOutputMessageFileSearchToolCall
+          - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+            title: OpenAIResponseOutputMessageFunctionToolCall
+          - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+            title: OpenAIResponseOutputMessageMCPCall
+          - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+            title: OpenAIResponseOutputMessageMCPListTools
+          - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+            title: OpenAIResponseMCPApprovalRequest
+          title: OpenAIResponseMessage | ... (7 variants)
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of this item in the output list
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.output_item.done
           default: response.output_item.done
-          description: >-
-            Event type identifier, always "response.output_item.done"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - response_id
-        - item
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseOutputItemDone
-      description: >-
-        Streaming event for when an output item is completed.
-    "OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded":
+      - response_id
+      - item
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseOutputItemDone
       type: object
+    OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded:
+      description: Streaming event for when an annotation is added to output text.
       properties:
         item_id:
+          title: Item Id
           type: string
-          description: >-
-            Unique identifier of the item to which the annotation is being added
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of the output item in the response's output array
         content_index:
+          title: Content Index
           type: integer
-          description: >-
-            Index position of the content part within the output item
         annotation_index:
+          title: Annotation Index
           type: integer
-          description: >-
-            Index of the annotation within the content part
         annotation:
-          oneOf:
-            - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
-            - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation'
-            - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
-            - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath'
           discriminator:
-            propertyName: type
             mapping:
-              file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
-              url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation'
               container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
+              file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
               file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath'
-          description: The annotation object being added
+              url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation'
+            propertyName: type
+          oneOf:
+          - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
+            title: OpenAIResponseAnnotationFileCitation
+          - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation'
+            title: OpenAIResponseAnnotationCitation
+          - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
+            title: OpenAIResponseAnnotationContainerFileCitation
+          - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath'
+            title: OpenAIResponseAnnotationFilePath
+          title: OpenAIResponseAnnotationFileCitation | ... (4 variants)
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.output_text.annotation.added
           default: response.output_text.annotation.added
-          description: >-
-            Event type identifier, always "response.output_text.annotation.added"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - item_id
-        - output_index
-        - content_index
-        - annotation_index
-        - annotation
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded
-      description: >-
-        Streaming event for when an annotation is added to output text.
-    "OpenAIResponseObjectStreamResponseOutputTextDelta":
+      - item_id
+      - output_index
+      - content_index
+      - annotation_index
+      - annotation
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded
       type: object
+    OpenAIResponseObjectStreamResponseOutputTextDelta:
+      description: Streaming event for incremental text content updates.
       properties:
         content_index:
+          title: Content Index
           type: integer
-          description: Index position within the text content
         delta:
+          title: Delta
           type: string
-          description: Incremental text content being added
         item_id:
+          title: Item Id
           type: string
-          description: >-
-            Unique identifier of the output item being updated
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of the item in the output list
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.output_text.delta
           default: response.output_text.delta
-          description: >-
-            Event type identifier, always "response.output_text.delta"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - content_index
-        - delta
-        - item_id
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseOutputTextDelta
-      description: >-
-        Streaming event for incremental text content updates.
-    "OpenAIResponseObjectStreamResponseOutputTextDone":
+      - content_index
+      - delta
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseOutputTextDelta
       type: object
+    OpenAIResponseObjectStreamResponseOutputTextDone:
+      description: Streaming event for when text output is completed.
       properties:
         content_index:
+          title: Content Index
           type: integer
-          description: Index position within the text content
         text:
+          title: Text
           type: string
-          description: >-
-            Final complete text content of the output item
         item_id:
+          title: Item Id
           type: string
-          description: >-
-            Unique identifier of the completed output item
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of the item in the output list
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.output_text.done
           default: response.output_text.done
-          description: >-
-            Event type identifier, always "response.output_text.done"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - content_index
-        - text
-        - item_id
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseOutputTextDone
-      description: >-
-        Streaming event for when text output is completed.
-    "OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded":
+      - content_index
+      - text
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseOutputTextDone
       type: object
+    OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded:
+      description: Streaming event for when a new reasoning summary part is added.
       properties:
         item_id:
+          title: Item Id
           type: string
-          description: Unique identifier of the output item
         output_index:
+          title: Output Index
           type: integer
-          description: Index position of the output item
         part:
           $ref: '#/components/schemas/OpenAIResponseContentPartReasoningSummary'
-          description: The summary part that was added
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         summary_index:
+          title: Summary Index
           type: integer
-          description: >-
-            Index of the summary part within the reasoning summary
         type:
-          type: string
           const: response.reasoning_summary_part.added
           default: response.reasoning_summary_part.added
-          description: >-
-            Event type identifier, always "response.reasoning_summary_part.added"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - item_id
-        - output_index
-        - part
-        - sequence_number
-        - summary_index
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded
-      description: >-
-        Streaming event for when a new reasoning summary part is added.
-    "OpenAIResponseObjectStreamResponseReasoningSummaryPartDone":
+      - item_id
+      - output_index
+      - part
+      - sequence_number
+      - summary_index
+      title: OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded
       type: object
+    OpenAIResponseObjectStreamResponseReasoningSummaryPartDone:
+      description: Streaming event for when a reasoning summary part is completed.
       properties:
         item_id:
+          title: Item Id
           type: string
-          description: Unique identifier of the output item
         output_index:
+          title: Output Index
           type: integer
-          description: Index position of the output item
         part:
           $ref: '#/components/schemas/OpenAIResponseContentPartReasoningSummary'
-          description: The completed summary part
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         summary_index:
+          title: Summary Index
           type: integer
-          description: >-
-            Index of the summary part within the reasoning summary
         type:
-          type: string
           const: response.reasoning_summary_part.done
           default: response.reasoning_summary_part.done
-          description: >-
-            Event type identifier, always "response.reasoning_summary_part.done"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - item_id
-        - output_index
-        - part
-        - sequence_number
-        - summary_index
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseReasoningSummaryPartDone
-      description: >-
-        Streaming event for when a reasoning summary part is completed.
-    "OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta":
+      - item_id
+      - output_index
+      - part
+      - sequence_number
+      - summary_index
+      title: OpenAIResponseObjectStreamResponseReasoningSummaryPartDone
       type: object
+    OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta:
+      description: Streaming event for incremental reasoning summary text updates.
       properties:
         delta:
+          title: Delta
           type: string
-          description: Incremental summary text being added
         item_id:
+          title: Item Id
           type: string
-          description: Unique identifier of the output item
         output_index:
+          title: Output Index
           type: integer
-          description: Index position of the output item
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         summary_index:
+          title: Summary Index
           type: integer
-          description: >-
-            Index of the summary part within the reasoning summary
         type:
-          type: string
           const: response.reasoning_summary_text.delta
           default: response.reasoning_summary_text.delta
-          description: >-
-            Event type identifier, always "response.reasoning_summary_text.delta"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - delta
-        - item_id
-        - output_index
-        - sequence_number
-        - summary_index
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta
-      description: >-
-        Streaming event for incremental reasoning summary text updates.
-    "OpenAIResponseObjectStreamResponseReasoningSummaryTextDone":
+      - delta
+      - item_id
+      - output_index
+      - sequence_number
+      - summary_index
+      title: OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta
       type: object
+    OpenAIResponseObjectStreamResponseReasoningSummaryTextDone:
+      description: Streaming event for when reasoning summary text is completed.
       properties:
         text:
+          title: Text
           type: string
-          description: Final complete summary text
         item_id:
+          title: Item Id
           type: string
-          description: Unique identifier of the output item
         output_index:
+          title: Output Index
           type: integer
-          description: Index position of the output item
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         summary_index:
+          title: Summary Index
           type: integer
-          description: >-
-            Index of the summary part within the reasoning summary
         type:
-          type: string
           const: response.reasoning_summary_text.done
           default: response.reasoning_summary_text.done
-          description: >-
-            Event type identifier, always "response.reasoning_summary_text.done"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - text
-        - item_id
-        - output_index
-        - sequence_number
-        - summary_index
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseReasoningSummaryTextDone
-      description: >-
-        Streaming event for when reasoning summary text is completed.
-    "OpenAIResponseObjectStreamResponseReasoningTextDelta":
+      - text
+      - item_id
+      - output_index
+      - sequence_number
+      - summary_index
+      title: OpenAIResponseObjectStreamResponseReasoningSummaryTextDone
       type: object
+    OpenAIResponseObjectStreamResponseReasoningTextDelta:
+      description: Streaming event for incremental reasoning text updates.
       properties:
         content_index:
+          title: Content Index
           type: integer
-          description: >-
-            Index position of the reasoning content part
         delta:
+          title: Delta
           type: string
-          description: Incremental reasoning text being added
         item_id:
+          title: Item Id
           type: string
-          description: >-
-            Unique identifier of the output item being updated
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of the item in the output list
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.reasoning_text.delta
           default: response.reasoning_text.delta
-          description: >-
-            Event type identifier, always "response.reasoning_text.delta"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - content_index
-        - delta
-        - item_id
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseReasoningTextDelta
-      description: >-
-        Streaming event for incremental reasoning text updates.
-    "OpenAIResponseObjectStreamResponseReasoningTextDone":
+      - content_index
+      - delta
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseReasoningTextDelta
       type: object
+    OpenAIResponseObjectStreamResponseReasoningTextDone:
+      description: Streaming event for when reasoning text is completed.
       properties:
         content_index:
+          title: Content Index
           type: integer
-          description: >-
-            Index position of the reasoning content part
         text:
+          title: Text
           type: string
-          description: Final complete reasoning text
         item_id:
+          title: Item Id
           type: string
-          description: >-
-            Unique identifier of the completed output item
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of the item in the output list
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.reasoning_text.done
           default: response.reasoning_text.done
-          description: >-
-            Event type identifier, always "response.reasoning_text.done"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - content_index
-        - text
-        - item_id
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseReasoningTextDone
-      description: >-
-        Streaming event for when reasoning text is completed.
-    "OpenAIResponseObjectStreamResponseRefusalDelta":
+      - content_index
+      - text
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseReasoningTextDone
       type: object
+    OpenAIResponseObjectStreamResponseRefusalDelta:
+      description: Streaming event for incremental refusal text updates.
       properties:
         content_index:
+          title: Content Index
           type: integer
-          description: Index position of the content part
         delta:
+          title: Delta
           type: string
-          description: Incremental refusal text being added
         item_id:
+          title: Item Id
           type: string
-          description: Unique identifier of the output item
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of the item in the output list
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.refusal.delta
           default: response.refusal.delta
-          description: >-
-            Event type identifier, always "response.refusal.delta"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - content_index
-        - delta
-        - item_id
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseRefusalDelta
-      description: >-
-        Streaming event for incremental refusal text updates.
-    "OpenAIResponseObjectStreamResponseRefusalDone":
+      - content_index
+      - delta
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseRefusalDelta
       type: object
+    OpenAIResponseObjectStreamResponseRefusalDone:
+      description: Streaming event for when refusal text is completed.
       properties:
         content_index:
+          title: Content Index
           type: integer
-          description: Index position of the content part
         refusal:
+          title: Refusal
           type: string
-          description: Final complete refusal text
         item_id:
+          title: Item Id
           type: string
-          description: Unique identifier of the output item
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of the item in the output list
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.refusal.done
           default: response.refusal.done
-          description: >-
-            Event type identifier, always "response.refusal.done"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - content_index
-        - refusal
-        - item_id
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseRefusalDone
-      description: >-
-        Streaming event for when refusal text is completed.
-    "OpenAIResponseObjectStreamResponseWebSearchCallCompleted":
+      - content_index
+      - refusal
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseRefusalDone
       type: object
+    OpenAIResponseObjectStreamResponseWebSearchCallCompleted:
+      description: Streaming event for completed web search calls.
       properties:
         item_id:
+          title: Item Id
           type: string
-          description: >-
-            Unique identifier of the completed web search call
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of the item in the output list
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.web_search_call.completed
           default: response.web_search_call.completed
-          description: >-
-            Event type identifier, always "response.web_search_call.completed"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - item_id
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseWebSearchCallCompleted
-      description: >-
-        Streaming event for completed web search calls.
-    "OpenAIResponseObjectStreamResponseWebSearchCallInProgress":
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseWebSearchCallCompleted
       type: object
+    OpenAIResponseObjectStreamResponseWebSearchCallInProgress:
+      description: Streaming event for web search calls in progress.
       properties:
         item_id:
+          title: Item Id
           type: string
-          description: Unique identifier of the web search call
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of the item in the output list
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.web_search_call.in_progress
           default: response.web_search_call.in_progress
-          description: >-
-            Event type identifier, always "response.web_search_call.in_progress"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - item_id
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseWebSearchCallInProgress
-      description: >-
-        Streaming event for web search calls in progress.
-    "OpenAIResponseObjectStreamResponseWebSearchCallSearching":
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseWebSearchCallInProgress
       type: object
+    OpenAIResponseObjectStreamResponseWebSearchCallSearching:
       properties:
         item_id:
+          title: Item Id
           type: string
         output_index:
+          title: Output Index
           type: integer
         sequence_number:
+          title: Sequence Number
           type: integer
         type:
-          type: string
           const: response.web_search_call.searching
           default: response.web_search_call.searching
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - item_id
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseWebSearchCallSearching
-    OpenAIDeleteResponseObject:
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseWebSearchCallSearching
       type: object
+    OpenAIDeleteResponseObject:
       properties:
         id:
           type: string
-          description: >-
-            Unique identifier of the deleted response
+          title: Id
         object:
           type: string
           const: response
+          title: Object
           default: response
-          description: >-
-            Object type identifier, always "response"
         deleted:
           type: boolean
+          title: Deleted
           default: true
-          description: Deletion confirmation flag, always True
-      additionalProperties: false
-      required:
-        - id
-        - object
-        - deleted
-      title: OpenAIDeleteResponseObject
-      description: >-
-        Response object confirming deletion of an OpenAI response.
-    ListOpenAIResponseInputItem:
       type: object
+      required:
+      - id
+      title: OpenAIDeleteResponseObject
+      description: Response object confirming deletion of an OpenAI response.
+    ListOpenAIResponseInputItem:
       properties:
         data:
-          type: array
           items:
-            $ref: '#/components/schemas/OpenAIResponseInput'
-          description: List of input items
+            anyOf:
+            - oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseMessage-Output'
+                title: OpenAIResponseMessage-Output
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+                title: OpenAIResponseOutputMessageWebSearchToolCall
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                title: OpenAIResponseOutputMessageFileSearchToolCall
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                title: OpenAIResponseOutputMessageFunctionToolCall
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                title: OpenAIResponseOutputMessageMCPCall
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                title: OpenAIResponseOutputMessageMCPListTools
+              - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                title: OpenAIResponseMCPApprovalRequest
+              discriminator:
+                propertyName: type
+                mapping:
+                  file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                  function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                  mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                  mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                  mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                  message: '#/components/schemas/OpenAIResponseMessage-Output'
+                  web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+              title: OpenAIResponseMessage-Output | ... (7 variants)
+            - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+              title: OpenAIResponseInputFunctionToolCallOutput
+            - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+              title: OpenAIResponseMCPApprovalResponse
+            - $ref: '#/components/schemas/OpenAIResponseMessage-Output'
+              title: OpenAIResponseMessage-Output
+            title: OpenAIResponseInputFunctionToolCallOutput | OpenAIResponseMCPApprovalResponse | OpenAIResponseMessage-Output
+          type: array
+          title: Data
         object:
           type: string
           const: list
+          title: Object
           default: list
-          description: Object type identifier, always "list"
-      additionalProperties: false
-      required:
-        - data
-        - object
-      title: ListOpenAIResponseInputItem
-      description: >-
-        List container for OpenAI response input items.
-    RunShieldRequest:
       type: object
+      required:
+      - data
+      title: ListOpenAIResponseInputItem
+      description: List container for OpenAI response input items.
+    RunShieldRequest:
       properties:
         shield_id:
           type: string
-          description: The identifier of the shield to run.
+          title: Shield Id
         messages:
-          type: array
           items:
-            $ref: '#/components/schemas/OpenAIMessageParam'
-          description: The messages to run the shield on.
-        params:
-          type: object
-          additionalProperties:
             oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: The parameters of the shield.
-      additionalProperties: false
+            - $ref: '#/components/schemas/OpenAIUserMessageParam-Input'
+              title: OpenAIUserMessageParam-Input
+            - $ref: '#/components/schemas/OpenAISystemMessageParam'
+              title: OpenAISystemMessageParam
+            - $ref: '#/components/schemas/OpenAIAssistantMessageParam-Input'
+              title: OpenAIAssistantMessageParam-Input
+            - $ref: '#/components/schemas/OpenAIToolMessageParam'
+              title: OpenAIToolMessageParam
+            - $ref: '#/components/schemas/OpenAIDeveloperMessageParam'
+              title: OpenAIDeveloperMessageParam
+            discriminator:
+              propertyName: role
+              mapping:
+                assistant: '#/components/schemas/OpenAIAssistantMessageParam-Input'
+                developer: '#/components/schemas/OpenAIDeveloperMessageParam'
+                system: '#/components/schemas/OpenAISystemMessageParam'
+                tool: '#/components/schemas/OpenAIToolMessageParam'
+                user: '#/components/schemas/OpenAIUserMessageParam-Input'
+            title: OpenAIUserMessageParam-Input | ... (5 variants)
+          type: array
+          title: Messages
+        params:
+          additionalProperties: true
+          type: object
+          title: Params
+      type: object
       required:
-        - shield_id
-        - messages
-        - params
+      - shield_id
+      - messages
+      - params
       title: RunShieldRequest
     RunShieldResponse:
-      type: object
       properties:
         violation:
-          $ref: '#/components/schemas/SafetyViolation'
-          description: >-
-            (Optional) Safety violation detected by the shield, if any
-      additionalProperties: false
+          anyOf:
+          - $ref: '#/components/schemas/SafetyViolation'
+            title: SafetyViolation
+          - type: 'null'
+          title: SafetyViolation
+      type: object
       title: RunShieldResponse
       description: Response from running a safety shield.
     SafetyViolation:
-      type: object
       properties:
         violation_level:
           $ref: '#/components/schemas/ViolationLevel'
-          description: Severity level of the violation
         user_message:
-          type: string
-          description: >-
-            (Optional) Message to convey to the user about the violation
+          anyOf:
+          - type: string
+          - type: 'null'
         metadata:
+          additionalProperties: true
           type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            Additional metadata including specific violation codes for debugging and
-            telemetry
-      additionalProperties: false
+          title: Metadata
+      type: object
       required:
-        - violation_level
-        - metadata
+      - violation_level
       title: SafetyViolation
-      description: >-
-        Details of a safety violation detected by content moderation.
+      description: Details of a safety violation detected by content moderation.
     ViolationLevel:
       type: string
       enum:
-        - info
-        - warn
-        - error
+      - info
+      - warn
+      - error
       title: ViolationLevel
       description: Severity level of a safety violation.
     AggregationFunctionType:
       type: string
       enum:
-        - average
-        - weighted_average
-        - median
-        - categorical_count
-        - accuracy
+      - average
+      - weighted_average
+      - median
+      - categorical_count
+      - accuracy
       title: AggregationFunctionType
-      description: >-
-        Types of aggregation functions for scoring results.
+      description: Types of aggregation functions for scoring results.
     ArrayType:
-      type: object
       properties:
         type:
           type: string
           const: array
+          title: Type
           default: array
-          description: Discriminator type. Always "array"
-      additionalProperties: false
-      required:
-        - type
+      type: object
       title: ArrayType
       description: Parameter type for array values.
     BasicScoringFnParams:
-      type: object
       properties:
         type:
-          $ref: '#/components/schemas/ScoringFnParamsType'
+          type: string
           const: basic
+          title: Type
           default: basic
-          description: >-
-            The type of scoring function parameters, always basic
         aggregation_functions:
-          type: array
           items:
             $ref: '#/components/schemas/AggregationFunctionType'
-          description: >-
-            Aggregation functions to apply to the scores of each row
-      additionalProperties: false
-      required:
-        - type
-        - aggregation_functions
-      title: BasicScoringFnParams
-      description: >-
-        Parameters for basic scoring function configuration.
-    BooleanType:
+          type: array
+          title: Aggregation Functions
+          description: Aggregation functions to apply to the scores of each row
       type: object
+      title: BasicScoringFnParams
+      description: Parameters for basic scoring function configuration.
+    BooleanType:
       properties:
         type:
           type: string
           const: boolean
+          title: Type
           default: boolean
-          description: Discriminator type. Always "boolean"
-      additionalProperties: false
-      required:
-        - type
+      type: object
       title: BooleanType
       description: Parameter type for boolean values.
     ChatCompletionInputType:
-      type: object
       properties:
         type:
           type: string
           const: chat_completion_input
+          title: Type
           default: chat_completion_input
-          description: >-
-            Discriminator type. Always "chat_completion_input"
-      additionalProperties: false
-      required:
-        - type
-      title: ChatCompletionInputType
-      description: >-
-        Parameter type for chat completion input.
-    CompletionInputType:
       type: object
+      title: ChatCompletionInputType
+      description: Parameter type for chat completion input.
+    CompletionInputType:
       properties:
         type:
           type: string
           const: completion_input
+          title: Type
           default: completion_input
-          description: >-
-            Discriminator type. Always "completion_input"
-      additionalProperties: false
-      required:
-        - type
+      type: object
       title: CompletionInputType
       description: Parameter type for completion input.
     JsonType:
-      type: object
       properties:
         type:
           type: string
           const: json
+          title: Type
           default: json
-          description: Discriminator type. Always "json"
-      additionalProperties: false
-      required:
-        - type
+      type: object
       title: JsonType
       description: Parameter type for JSON values.
     LLMAsJudgeScoringFnParams:
-      type: object
       properties:
         type:
-          $ref: '#/components/schemas/ScoringFnParamsType'
+          type: string
           const: llm_as_judge
+          title: Type
           default: llm_as_judge
-          description: >-
-            The type of scoring function parameters, always llm_as_judge
         judge_model:
           type: string
-          description: >-
-            Identifier of the LLM model to use as a judge for scoring
+          title: Judge Model
         prompt_template:
-          type: string
-          description: >-
-            (Optional) Custom prompt template for the judge model
+          anyOf:
+          - type: string
+          - type: 'null'
         judge_score_regexes:
-          type: array
           items:
             type: string
-          description: >-
-            Regexes to extract the answer from generated response
-        aggregation_functions:
           type: array
+          title: Judge Score Regexes
+          description: Regexes to extract the answer from generated response
+        aggregation_functions:
           items:
             $ref: '#/components/schemas/AggregationFunctionType'
-          description: >-
-            Aggregation functions to apply to the scores of each row
-      additionalProperties: false
-      required:
-        - type
-        - judge_model
-        - judge_score_regexes
-        - aggregation_functions
-      title: LLMAsJudgeScoringFnParams
-      description: >-
-        Parameters for LLM-as-judge scoring function configuration.
-    NumberType:
+          type: array
+          title: Aggregation Functions
+          description: Aggregation functions to apply to the scores of each row
       type: object
+      required:
+      - judge_model
+      title: LLMAsJudgeScoringFnParams
+      description: Parameters for LLM-as-judge scoring function configuration.
+    NumberType:
       properties:
         type:
           type: string
           const: number
+          title: Type
           default: number
-          description: Discriminator type. Always "number"
-      additionalProperties: false
-      required:
-        - type
+      type: object
       title: NumberType
       description: Parameter type for numeric values.
     ObjectType:
-      type: object
       properties:
         type:
           type: string
           const: object
+          title: Type
           default: object
-          description: Discriminator type. Always "object"
-      additionalProperties: false
-      required:
-        - type
+      type: object
       title: ObjectType
       description: Parameter type for object values.
     RegexParserScoringFnParams:
-      type: object
       properties:
         type:
-          $ref: '#/components/schemas/ScoringFnParamsType'
+          type: string
           const: regex_parser
+          title: Type
           default: regex_parser
-          description: >-
-            The type of scoring function parameters, always regex_parser
         parsing_regexes:
-          type: array
           items:
             type: string
-          description: >-
-            Regex to extract the answer from generated response
-        aggregation_functions:
           type: array
+          title: Parsing Regexes
+          description: Regex to extract the answer from generated response
+        aggregation_functions:
           items:
             $ref: '#/components/schemas/AggregationFunctionType'
-          description: >-
-            Aggregation functions to apply to the scores of each row
-      additionalProperties: false
-      required:
-        - type
-        - parsing_regexes
-        - aggregation_functions
-      title: RegexParserScoringFnParams
-      description: >-
-        Parameters for regex parser scoring function configuration.
-    ScoringFn:
+          type: array
+          title: Aggregation Functions
+          description: Aggregation functions to apply to the scores of each row
       type: object
+      title: RegexParserScoringFnParams
+      description: Parameters for regex parser scoring function configuration.
+    ScoringFn:
       properties:
         identifier:
           type: string
+          title: Identifier
+          description: Unique identifier for this resource in llama stack
         provider_resource_id:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: Unique identifier for this resource in the provider
         provider_id:
           type: string
+          title: Provider Id
+          description: ID of the provider that owns this resource
         type:
           type: string
-          enum:
-            - model
-            - shield
-            - vector_store
-            - dataset
-            - scoring_function
-            - benchmark
-            - tool
-            - tool_group
-            - prompt
           const: scoring_function
+          title: Type
           default: scoring_function
-          description: >-
-            The resource type, always scoring_function
         description:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
         metadata:
+          additionalProperties: true
           type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
+          title: Metadata
+          description: Any additional metadata for this definition
         return_type:
           oneOf:
-            - $ref: '#/components/schemas/StringType'
-            - $ref: '#/components/schemas/NumberType'
-            - $ref: '#/components/schemas/BooleanType'
-            - $ref: '#/components/schemas/ArrayType'
-            - $ref: '#/components/schemas/ObjectType'
-            - $ref: '#/components/schemas/JsonType'
-            - $ref: '#/components/schemas/UnionType'
-            - $ref: '#/components/schemas/ChatCompletionInputType'
-            - $ref: '#/components/schemas/CompletionInputType'
+          - $ref: '#/components/schemas/StringType'
+            title: StringType
+          - $ref: '#/components/schemas/NumberType'
+            title: NumberType
+          - $ref: '#/components/schemas/BooleanType'
+            title: BooleanType
+          - $ref: '#/components/schemas/ArrayType'
+            title: ArrayType
+          - $ref: '#/components/schemas/ObjectType'
+            title: ObjectType
+          - $ref: '#/components/schemas/JsonType'
+            title: JsonType
+          - $ref: '#/components/schemas/UnionType'
+            title: UnionType
+          - $ref: '#/components/schemas/ChatCompletionInputType'
+            title: ChatCompletionInputType
+          - $ref: '#/components/schemas/CompletionInputType'
+            title: CompletionInputType
+          title: StringType | ... (9 variants)
+          description: The return type of the deterministic function
           discriminator:
             propertyName: type
             mapping:
-              string: '#/components/schemas/StringType'
-              number: '#/components/schemas/NumberType'
-              boolean: '#/components/schemas/BooleanType'
               array: '#/components/schemas/ArrayType'
-              object: '#/components/schemas/ObjectType'
-              json: '#/components/schemas/JsonType'
-              union: '#/components/schemas/UnionType'
+              boolean: '#/components/schemas/BooleanType'
               chat_completion_input: '#/components/schemas/ChatCompletionInputType'
               completion_input: '#/components/schemas/CompletionInputType'
+              json: '#/components/schemas/JsonType'
+              number: '#/components/schemas/NumberType'
+              object: '#/components/schemas/ObjectType'
+              string: '#/components/schemas/StringType'
+              union: '#/components/schemas/UnionType'
         params:
-          $ref: '#/components/schemas/ScoringFnParams'
-      additionalProperties: false
+          anyOf:
+          - oneOf:
+            - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
+              title: LLMAsJudgeScoringFnParams
+            - $ref: '#/components/schemas/RegexParserScoringFnParams'
+              title: RegexParserScoringFnParams
+            - $ref: '#/components/schemas/BasicScoringFnParams'
+              title: BasicScoringFnParams
+            discriminator:
+              propertyName: type
+              mapping:
+                basic: '#/components/schemas/BasicScoringFnParams'
+                llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams'
+                regex_parser: '#/components/schemas/RegexParserScoringFnParams'
+            title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams
+          - type: 'null'
+          title: Params
+          description: The parameters for the scoring function for benchmark eval, these can be overridden for app eval
+      type: object
       required:
-        - identifier
-        - provider_id
-        - type
-        - metadata
-        - return_type
+      - identifier
+      - provider_id
+      - return_type
       title: ScoringFn
-      description: >-
-        A scoring function resource for evaluating model outputs.
+      description: A scoring function resource for evaluating model outputs.
     ScoringFnParams:
-      oneOf:
-        - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
-        - $ref: '#/components/schemas/RegexParserScoringFnParams'
-        - $ref: '#/components/schemas/BasicScoringFnParams'
       discriminator:
-        propertyName: type
         mapping:
+          basic: '#/components/schemas/BasicScoringFnParams'
           llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams'
           regex_parser: '#/components/schemas/RegexParserScoringFnParams'
-          basic: '#/components/schemas/BasicScoringFnParams'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
+        title: LLMAsJudgeScoringFnParams
+      - $ref: '#/components/schemas/RegexParserScoringFnParams'
+        title: RegexParserScoringFnParams
+      - $ref: '#/components/schemas/BasicScoringFnParams'
+        title: BasicScoringFnParams
+      title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams
     ScoringFnParamsType:
-      type: string
+      description: Types of scoring function parameter configurations.
       enum:
-        - llm_as_judge
-        - regex_parser
-        - basic
+      - llm_as_judge
+      - regex_parser
+      - basic
       title: ScoringFnParamsType
-      description: >-
-        Types of scoring function parameter configurations.
+      type: string
     StringType:
-      type: object
       properties:
         type:
           type: string
           const: string
+          title: Type
           default: string
-          description: Discriminator type. Always "string"
-      additionalProperties: false
-      required:
-        - type
+      type: object
       title: StringType
       description: Parameter type for string values.
     UnionType:
-      type: object
       properties:
         type:
           type: string
           const: union
+          title: Type
           default: union
-          description: Discriminator type. Always "union"
-      additionalProperties: false
-      required:
-        - type
+      type: object
       title: UnionType
       description: Parameter type for union values.
     ListScoringFunctionsResponse:
-      type: object
       properties:
         data:
-          type: array
           items:
             $ref: '#/components/schemas/ScoringFn'
-      additionalProperties: false
+          type: array
+          title: Data
+      type: object
       required:
-        - data
+      - data
       title: ListScoringFunctionsResponse
     ScoreRequest:
-      type: object
       properties:
         input_rows:
-          type: array
           items:
+            additionalProperties: true
             type: object
-            additionalProperties:
-              oneOf:
-                - type: 'null'
-                - type: boolean
-                - type: number
-                - type: string
-                - type: array
-                - type: object
-          description: The rows to score.
+          type: array
+          title: Input Rows
         scoring_functions:
-          type: object
           additionalProperties:
-            oneOf:
-              - $ref: '#/components/schemas/ScoringFnParams'
-              - type: 'null'
-          description: >-
-            The scoring functions to use for the scoring.
-      additionalProperties: false
+            anyOf:
+            - oneOf:
+              - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
+                title: LLMAsJudgeScoringFnParams
+              - $ref: '#/components/schemas/RegexParserScoringFnParams'
+                title: RegexParserScoringFnParams
+              - $ref: '#/components/schemas/BasicScoringFnParams'
+                title: BasicScoringFnParams
+              discriminator:
+                propertyName: type
+                mapping:
+                  basic: '#/components/schemas/BasicScoringFnParams'
+                  llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams'
+                  regex_parser: '#/components/schemas/RegexParserScoringFnParams'
+              title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams
+            - type: 'null'
+            title: AdditionalpropertiesUnion
+          type: object
+          title: Scoring Functions
+      type: object
       required:
-        - input_rows
-        - scoring_functions
+      - input_rows
+      - scoring_functions
       title: ScoreRequest
     ScoreResponse:
-      type: object
       properties:
         results:
-          type: object
           additionalProperties:
             $ref: '#/components/schemas/ScoringResult'
-          description: >-
-            A map of scoring function name to ScoringResult.
-      additionalProperties: false
+          type: object
+          title: Results
+      type: object
       required:
-        - results
+      - results
       title: ScoreResponse
       description: The response from scoring.
     ScoringResult:
-      type: object
       properties:
         score_rows:
-          type: array
           items:
+            additionalProperties: true
             type: object
-            additionalProperties:
-              oneOf:
-                - type: 'null'
-                - type: boolean
-                - type: number
-                - type: string
-                - type: array
-                - type: object
-          description: >-
-            The scoring result for each row. Each row is a map of column name to value.
+          type: array
+          title: Score Rows
         aggregated_results:
+          additionalProperties: true
           type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: Map of metric name to aggregated value
-      additionalProperties: false
+          title: Aggregated Results
+      type: object
       required:
-        - score_rows
-        - aggregated_results
+      - score_rows
+      - aggregated_results
       title: ScoringResult
       description: A scoring result for a single row.
     ScoreBatchRequest:
-      type: object
       properties:
         dataset_id:
           type: string
-          description: The ID of the dataset to score.
+          title: Dataset Id
         scoring_functions:
-          type: object
           additionalProperties:
-            oneOf:
-              - $ref: '#/components/schemas/ScoringFnParams'
-              - type: 'null'
-          description: >-
-            The scoring functions to use for the scoring.
+            anyOf:
+            - oneOf:
+              - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
+                title: LLMAsJudgeScoringFnParams
+              - $ref: '#/components/schemas/RegexParserScoringFnParams'
+                title: RegexParserScoringFnParams
+              - $ref: '#/components/schemas/BasicScoringFnParams'
+                title: BasicScoringFnParams
+              discriminator:
+                propertyName: type
+                mapping:
+                  basic: '#/components/schemas/BasicScoringFnParams'
+                  llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams'
+                  regex_parser: '#/components/schemas/RegexParserScoringFnParams'
+              title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams
+            - type: 'null'
+            title: AdditionalpropertiesUnion
+          type: object
+          title: Scoring Functions
         save_results_dataset:
           type: boolean
-          description: >-
-            Whether to save the results to a dataset.
-      additionalProperties: false
+          title: Save Results Dataset
+          default: false
+      type: object
       required:
-        - dataset_id
-        - scoring_functions
-        - save_results_dataset
+      - dataset_id
+      - scoring_functions
       title: ScoreBatchRequest
     ScoreBatchResponse:
-      type: object
       properties:
         dataset_id:
-          type: string
-          description: >-
-            (Optional) The identifier of the dataset that was scored
+          anyOf:
+          - type: string
+          - type: 'null'
         results:
-          type: object
           additionalProperties:
             $ref: '#/components/schemas/ScoringResult'
-          description: >-
-            A map of scoring function name to ScoringResult
-      additionalProperties: false
-      required:
-        - results
-      title: ScoreBatchResponse
-      description: >-
-        Response from batch scoring operations on datasets.
-    Shield:
+          type: object
+          title: Results
       type: object
+      required:
+      - results
+      title: ScoreBatchResponse
+      description: Response from batch scoring operations on datasets.
+    Shield:
       properties:
         identifier:
           type: string
+          title: Identifier
+          description: Unique identifier for this resource in llama stack
         provider_resource_id:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: Unique identifier for this resource in the provider
         provider_id:
           type: string
+          title: Provider Id
+          description: ID of the provider that owns this resource
         type:
           type: string
-          enum:
-            - model
-            - shield
-            - vector_store
-            - dataset
-            - scoring_function
-            - benchmark
-            - tool
-            - tool_group
-            - prompt
           const: shield
+          title: Type
           default: shield
-          description: The resource type, always shield
         params:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) Configuration parameters for the shield
-      additionalProperties: false
-      required:
-        - identifier
-        - provider_id
-        - type
-      title: Shield
-      description: >-
-        A safety shield resource that can be used to check content.
-    ListShieldsResponse:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
       type: object
+      required:
+      - identifier
+      - provider_id
+      title: Shield
+      description: A safety shield resource that can be used to check content.
+    ListShieldsResponse:
       properties:
         data:
-          type: array
           items:
             $ref: '#/components/schemas/Shield'
-      additionalProperties: false
+          type: array
+          title: Data
+      type: object
       required:
-        - data
+      - data
       title: ListShieldsResponse
     InvokeToolRequest:
-      type: object
       properties:
         tool_name:
           type: string
-          description: The name of the tool to invoke.
+          title: Tool Name
         kwargs:
+          additionalProperties: true
           type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            A dictionary of arguments to pass to the tool.
+          title: Kwargs
         authorization:
-          type: string
-          description: >-
-            (Optional) OAuth access token for authenticating with the MCP server.
-      additionalProperties: false
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
       required:
-        - tool_name
-        - kwargs
+      - tool_name
+      - kwargs
       title: InvokeToolRequest
     ImageContentItem:
-      type: object
+      description: A image content item
       properties:
         type:
-          type: string
           const: image
           default: image
-          description: >-
-            Discriminator type of the content item. Always "image"
+          title: Type
+          type: string
         image:
-          type: object
-          properties:
-            url:
-              $ref: '#/components/schemas/URL'
-              description: >-
-                A URL of the image or data URL in the format of data:image/{type};base64,{data}.
-                Note that URL could have length limits.
-            data:
-              type: string
-              contentEncoding: base64
-              description: base64 encoded image data as string
-          additionalProperties: false
-          description: >-
-            Image as a base64 encoded string or an URL
-      additionalProperties: false
+          $ref: '#/components/schemas/_URLOrData'
       required:
-        - type
-        - image
+      - image
       title: ImageContentItem
-      description: A image content item
+      type: object
     InterleavedContent:
-      oneOf:
-        - type: string
-        - $ref: '#/components/schemas/InterleavedContentItem'
-        - type: array
-          items:
-            $ref: '#/components/schemas/InterleavedContentItem'
-    InterleavedContentItem:
-      oneOf:
+      anyOf:
+      - type: string
+      - discriminator:
+          mapping:
+            image: '#/components/schemas/ImageContentItem'
+            text: '#/components/schemas/TextContentItem'
+          propertyName: type
+        oneOf:
         - $ref: '#/components/schemas/ImageContentItem'
+          title: ImageContentItem
         - $ref: '#/components/schemas/TextContentItem'
+          title: TextContentItem
+        title: ImageContentItem | TextContentItem
+      - items:
+          discriminator:
+            mapping:
+              image: '#/components/schemas/ImageContentItem'
+              text: '#/components/schemas/TextContentItem'
+            propertyName: type
+          oneOf:
+          - $ref: '#/components/schemas/ImageContentItem'
+            title: ImageContentItem
+          - $ref: '#/components/schemas/TextContentItem'
+            title: TextContentItem
+          title: ImageContentItem | TextContentItem
+        type: array
+        title: list[ImageContentItem | TextContentItem]
+      title: string | list[ImageContentItem | TextContentItem]
+    InterleavedContentItem:
       discriminator:
-        propertyName: type
         mapping:
           image: '#/components/schemas/ImageContentItem'
           text: '#/components/schemas/TextContentItem'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/ImageContentItem'
+        title: ImageContentItem
+      - $ref: '#/components/schemas/TextContentItem'
+        title: TextContentItem
+      title: ImageContentItem | TextContentItem
     TextContentItem:
-      type: object
       properties:
         type:
           type: string
           const: text
+          title: Type
           default: text
-          description: >-
-            Discriminator type of the content item. Always "text"
         text:
           type: string
-          description: Text content
-      additionalProperties: false
+          title: Text
+      type: object
       required:
-        - type
-        - text
+      - text
       title: TextContentItem
       description: A text content item
     ToolInvocationResult:
-      type: object
       properties:
         content:
-          $ref: '#/components/schemas/InterleavedContent'
-          description: >-
-            (Optional) The output content from the tool execution
+          anyOf:
+          - type: string
+          - oneOf:
+            - $ref: '#/components/schemas/ImageContentItem-Output'
+              title: ImageContentItem-Output
+            - $ref: '#/components/schemas/TextContentItem'
+              title: TextContentItem
+            discriminator:
+              propertyName: type
+              mapping:
+                image: '#/components/schemas/ImageContentItem-Output'
+                text: '#/components/schemas/TextContentItem'
+            title: ImageContentItem-Output | TextContentItem
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/ImageContentItem-Output'
+                title: ImageContentItem-Output
+              - $ref: '#/components/schemas/TextContentItem'
+                title: TextContentItem
+              discriminator:
+                propertyName: type
+                mapping:
+                  image: '#/components/schemas/ImageContentItem-Output'
+                  text: '#/components/schemas/TextContentItem'
+              title: ImageContentItem-Output | TextContentItem
+            type: array
+            title: list[ImageContentItem-Output | TextContentItem]
+          - type: 'null'
+          title: string | list[ImageContentItem-Output | TextContentItem]
         error_message:
-          type: string
-          description: >-
-            (Optional) Error message if the tool execution failed
+          anyOf:
+          - type: string
+          - type: 'null'
         error_code:
-          type: integer
-          description: >-
-            (Optional) Numeric error code if the tool execution failed
+          anyOf:
+          - type: integer
+          - type: 'null'
         metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) Additional metadata about the tool execution
-      additionalProperties: false
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+      type: object
       title: ToolInvocationResult
       description: Result of a tool invocation.
     URL:
-      type: object
       properties:
         uri:
           type: string
-          description: The URL string pointing to the resource
-      additionalProperties: false
+          title: Uri
+      type: object
       required:
-        - uri
+      - uri
       title: URL
       description: A URL reference to external content.
     ToolDef:
-      type: object
       properties:
         toolgroup_id:
-          type: string
-          description: >-
-            (Optional) ID of the tool group this tool belongs to
+          anyOf:
+          - type: string
+          - type: 'null'
         name:
           type: string
-          description: Name of the tool
+          title: Name
         description:
-          type: string
-          description: >-
-            (Optional) Human-readable description of what the tool does
+          anyOf:
+          - type: string
+          - type: 'null'
         input_schema:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) JSON Schema for tool inputs (MCP inputSchema)
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
         output_schema:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) JSON Schema for tool outputs (MCP outputSchema)
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
         metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) Additional metadata about the tool
-      additionalProperties: false
-      required:
-        - name
-      title: ToolDef
-      description: >-
-        Tool definition used in runtime contexts.
-    ListToolDefsResponse:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
       type: object
+      required:
+      - name
+      title: ToolDef
+      description: Tool definition used in runtime contexts.
+    ListToolDefsResponse:
       properties:
         data:
-          type: array
           items:
             $ref: '#/components/schemas/ToolDef'
-          description: List of tool definitions
-      additionalProperties: false
-      required:
-        - data
-      title: ListToolDefsResponse
-      description: >-
-        Response containing a list of tool definitions.
-    ToolGroup:
+          type: array
+          title: Data
       type: object
+      required:
+      - data
+      title: ListToolDefsResponse
+      description: Response containing a list of tool definitions.
+    ToolGroup:
       properties:
         identifier:
           type: string
+          title: Identifier
+          description: Unique identifier for this resource in llama stack
         provider_resource_id:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: Unique identifier for this resource in the provider
         provider_id:
           type: string
+          title: Provider Id
+          description: ID of the provider that owns this resource
         type:
           type: string
-          enum:
-            - model
-            - shield
-            - vector_store
-            - dataset
-            - scoring_function
-            - benchmark
-            - tool
-            - tool_group
-            - prompt
           const: tool_group
+          title: Type
           default: tool_group
-          description: Type of resource, always 'tool_group'
         mcp_endpoint:
-          $ref: '#/components/schemas/URL'
-          description: >-
-            (Optional) Model Context Protocol endpoint for remote tools
+          anyOf:
+          - $ref: '#/components/schemas/URL'
+            title: URL
+          - type: 'null'
+          title: URL
         args:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) Additional arguments for the tool group
-      additionalProperties: false
-      required:
-        - identifier
-        - provider_id
-        - type
-      title: ToolGroup
-      description: >-
-        A group of related tools managed together.
-    ListToolGroupsResponse:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
       type: object
+      required:
+      - identifier
+      - provider_id
+      title: ToolGroup
+      description: A group of related tools managed together.
+    ListToolGroupsResponse:
       properties:
         data:
-          type: array
           items:
             $ref: '#/components/schemas/ToolGroup'
-          description: List of tool groups
-      additionalProperties: false
-      required:
-        - data
-      title: ListToolGroupsResponse
-      description: >-
-        Response containing a list of tool groups.
-    Chunk:
+          type: array
+          title: Data
       type: object
+      required:
+      - data
+      title: ListToolGroupsResponse
+      description: Response containing a list of tool groups.
+    Chunk:
+      description: A chunk of content that can be inserted into a vector database.
       properties:
         content:
-          $ref: '#/components/schemas/InterleavedContent'
-          description: >-
-            The content of the chunk, which can be interleaved text, images, or other
-            types.
-        chunk_id:
-          type: string
-          description: >-
-            Unique identifier for the chunk. Must be provided explicitly.
-        metadata:
-          type: object
-          additionalProperties:
+          anyOf:
+          - type: string
+          - discriminator:
+              mapping:
+                image: '#/components/schemas/ImageContentItem'
+                text: '#/components/schemas/TextContentItem'
+              propertyName: type
             oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            Metadata associated with the chunk that will be used in the model context
-            during inference.
+            - $ref: '#/components/schemas/ImageContentItem'
+              title: ImageContentItem
+            - $ref: '#/components/schemas/TextContentItem'
+              title: TextContentItem
+            title: ImageContentItem | TextContentItem
+          - items:
+              discriminator:
+                mapping:
+                  image: '#/components/schemas/ImageContentItem'
+                  text: '#/components/schemas/TextContentItem'
+                propertyName: type
+              oneOf:
+              - $ref: '#/components/schemas/ImageContentItem'
+                title: ImageContentItem
+              - $ref: '#/components/schemas/TextContentItem'
+                title: TextContentItem
+              title: ImageContentItem | TextContentItem
+            type: array
+            title: list[ImageContentItem | TextContentItem]
+          title: string | list[ImageContentItem | TextContentItem]
+        chunk_id:
+          title: Chunk Id
+          type: string
+        metadata:
+          additionalProperties: true
+          title: Metadata
+          type: object
         embedding:
-          type: array
-          items:
-            type: number
-          description: >-
-            Optional embedding for the chunk. If not provided, it will be computed
-            later.
+          anyOf:
+          - items:
+              type: number
+            type: array
+          - type: 'null'
+          nullable: true
         chunk_metadata:
-          $ref: '#/components/schemas/ChunkMetadata'
-          description: >-
-            Metadata for the chunk that will NOT be used in the context during inference.
-            The `chunk_metadata` is required backend functionality.
-      additionalProperties: false
+          anyOf:
+          - $ref: '#/components/schemas/ChunkMetadata'
+            title: ChunkMetadata
+          - type: 'null'
+          nullable: true
+          title: ChunkMetadata
       required:
-        - content
-        - chunk_id
-        - metadata
+      - content
+      - chunk_id
       title: Chunk
-      description: >-
-        A chunk of content that can be inserted into a vector database.
-    ChunkMetadata:
       type: object
+    ChunkMetadata:
       properties:
         chunk_id:
-          type: string
-          description: >-
-            The ID of the chunk. If not set, it will be generated based on the document
-            ID and content.
+          anyOf:
+          - type: string
+          - type: 'null'
         document_id:
-          type: string
-          description: >-
-            The ID of the document this chunk belongs to.
+          anyOf:
+          - type: string
+          - type: 'null'
         source:
-          type: string
-          description: >-
-            The source of the content, such as a URL, file path, or other identifier.
+          anyOf:
+          - type: string
+          - type: 'null'
         created_timestamp:
-          type: integer
-          description: >-
-            An optional timestamp indicating when the chunk was created.
+          anyOf:
+          - type: integer
+          - type: 'null'
         updated_timestamp:
-          type: integer
-          description: >-
-            An optional timestamp indicating when the chunk was last updated.
+          anyOf:
+          - type: integer
+          - type: 'null'
         chunk_window:
-          type: string
-          description: >-
-            The window of the chunk, which can be used to group related chunks together.
+          anyOf:
+          - type: string
+          - type: 'null'
         chunk_tokenizer:
-          type: string
-          description: >-
-            The tokenizer used to create the chunk. Default is Tiktoken.
+          anyOf:
+          - type: string
+          - type: 'null'
         chunk_embedding_model:
-          type: string
-          description: >-
-            The embedding model used to create the chunk's embedding.
+          anyOf:
+          - type: string
+          - type: 'null'
         chunk_embedding_dimension:
-          type: integer
-          description: >-
-            The dimension of the embedding vector for the chunk.
+          anyOf:
+          - type: integer
+          - type: 'null'
         content_token_count:
-          type: integer
-          description: >-
-            The number of tokens in the content of the chunk.
+          anyOf:
+          - type: integer
+          - type: 'null'
         metadata_token_count:
-          type: integer
-          description: >-
-            The number of tokens in the metadata of the chunk.
-      additionalProperties: false
-      title: ChunkMetadata
-      description: >-
-        `ChunkMetadata` is backend metadata for a `Chunk` that is used to store additional
-        information about the chunk that     will not be used in the context during
-        inference, but is required for backend functionality. The `ChunkMetadata`     is
-        set during chunk creation in `MemoryToolRuntimeImpl().insert()`and is not
-        expected to change after.     Use `Chunk.metadata` for metadata that will
-        be used in the context during inference.
-    InsertChunksRequest:
+          anyOf:
+          - type: integer
+          - type: 'null'
       type: object
+      title: ChunkMetadata
+      description: |-
+        `ChunkMetadata` is backend metadata for a `Chunk` that is used to store additional information about the chunk that
+            will not be used in the context during inference, but is required for backend functionality. The `ChunkMetadata`
+            is set during chunk creation in `MemoryToolRuntimeImpl().insert()`and is not expected to change after.
+            Use `Chunk.metadata` for metadata that will be used in the context during inference.
+    InsertChunksRequest:
       properties:
         vector_store_id:
           type: string
-          description: >-
-            The identifier of the vector database to insert the chunks into.
+          title: Vector Store Id
         chunks:
-          type: array
           items:
-            $ref: '#/components/schemas/Chunk'
-          description: >-
-            The chunks to insert. Each `Chunk` should contain content which can be
-            interleaved text, images, or other types. `metadata`: `dict[str, Any]`
-            and `embedding`: `List[float]` are optional. If `metadata` is provided,
-            you configure how Llama Stack formats the chunk during generation. If
-            `embedding` is not provided, it will be computed later.
+            $ref: '#/components/schemas/Chunk-Input'
+          type: array
+          title: Chunks
         ttl_seconds:
-          type: integer
-          description: The time to live of the chunks.
-      additionalProperties: false
+          anyOf:
+          - type: integer
+          - type: 'null'
+      type: object
       required:
-        - vector_store_id
-        - chunks
+      - vector_store_id
+      - chunks
       title: InsertChunksRequest
     QueryChunksRequest:
-      type: object
       properties:
         vector_store_id:
           type: string
-          description: >-
-            The identifier of the vector database to query.
+          title: Vector Store Id
         query:
-          $ref: '#/components/schemas/InterleavedContent'
-          description: The query to search for.
+          anyOf:
+          - type: string
+          - oneOf:
+            - $ref: '#/components/schemas/ImageContentItem-Input'
+              title: ImageContentItem-Input
+            - $ref: '#/components/schemas/TextContentItem'
+              title: TextContentItem
+            discriminator:
+              propertyName: type
+              mapping:
+                image: '#/components/schemas/ImageContentItem-Input'
+                text: '#/components/schemas/TextContentItem'
+            title: ImageContentItem-Input | TextContentItem
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/ImageContentItem-Input'
+                title: ImageContentItem-Input
+              - $ref: '#/components/schemas/TextContentItem'
+                title: TextContentItem
+              discriminator:
+                propertyName: type
+                mapping:
+                  image: '#/components/schemas/ImageContentItem-Input'
+                  text: '#/components/schemas/TextContentItem'
+              title: ImageContentItem-Input | TextContentItem
+            type: array
+            title: list[ImageContentItem-Input | TextContentItem]
+          title: string | list[ImageContentItem-Input | TextContentItem]
         params:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: The parameters of the query.
-      additionalProperties: false
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+      type: object
       required:
-        - vector_store_id
-        - query
+      - vector_store_id
+      - query
       title: QueryChunksRequest
     QueryChunksResponse:
-      type: object
       properties:
         chunks:
-          type: array
           items:
-            $ref: '#/components/schemas/Chunk'
-          description: >-
-            List of content chunks returned from the query
-        scores:
+            $ref: '#/components/schemas/Chunk-Output'
           type: array
+          title: Chunks
+        scores:
           items:
             type: number
-          description: >-
-            Relevance scores corresponding to each returned chunk
-      additionalProperties: false
-      required:
-        - chunks
-        - scores
-      title: QueryChunksResponse
-      description: >-
-        Response from querying chunks in a vector database.
-    VectorStoreFileCounts:
+          type: array
+          title: Scores
       type: object
+      required:
+      - chunks
+      - scores
+      title: QueryChunksResponse
+      description: Response from querying chunks in a vector database.
+    VectorStoreFileCounts:
       properties:
         completed:
           type: integer
-          description: >-
-            Number of files that have been successfully processed
+          title: Completed
         cancelled:
           type: integer
-          description: >-
-            Number of files that had their processing cancelled
+          title: Cancelled
         failed:
           type: integer
-          description: Number of files that failed to process
+          title: Failed
         in_progress:
           type: integer
-          description: >-
-            Number of files currently being processed
+          title: In Progress
         total:
           type: integer
-          description: >-
-            Total number of files in the vector store
-      additionalProperties: false
-      required:
-        - completed
-        - cancelled
-        - failed
-        - in_progress
-        - total
-      title: VectorStoreFileCounts
-      description: >-
-        File processing status counts for a vector store.
-    VectorStoreListResponse:
+          title: Total
       type: object
+      required:
+      - completed
+      - cancelled
+      - failed
+      - in_progress
+      - total
+      title: VectorStoreFileCounts
+      description: File processing status counts for a vector store.
+    VectorStoreListResponse:
       properties:
         object:
           type: string
+          title: Object
           default: list
-          description: Object type identifier, always "list"
         data:
-          type: array
           items:
             $ref: '#/components/schemas/VectorStoreObject'
-          description: List of vector store objects
+          type: array
+          title: Data
         first_id:
-          type: string
-          description: >-
-            (Optional) ID of the first vector store in the list for pagination
+          anyOf:
+          - type: string
+          - type: 'null'
         last_id:
-          type: string
-          description: >-
-            (Optional) ID of the last vector store in the list for pagination
+          anyOf:
+          - type: string
+          - type: 'null'
         has_more:
           type: boolean
+          title: Has More
           default: false
-          description: >-
-            Whether there are more vector stores available beyond this page
-      additionalProperties: false
+      type: object
       required:
-        - object
-        - data
-        - has_more
+      - data
       title: VectorStoreListResponse
       description: Response from listing vector stores.
     VectorStoreObject:
-      type: object
       properties:
         id:
           type: string
-          description: Unique identifier for the vector store
+          title: Id
         object:
           type: string
+          title: Object
           default: vector_store
-          description: >-
-            Object type identifier, always "vector_store"
         created_at:
           type: integer
-          description: >-
-            Timestamp when the vector store was created
+          title: Created At
         name:
-          type: string
-          description: (Optional) Name of the vector store
+          anyOf:
+          - type: string
+          - type: 'null'
         usage_bytes:
           type: integer
+          title: Usage Bytes
           default: 0
-          description: >-
-            Storage space used by the vector store in bytes
         file_counts:
           $ref: '#/components/schemas/VectorStoreFileCounts'
-          description: >-
-            File processing status counts for the vector store
         status:
           type: string
+          title: Status
           default: completed
-          description: Current status of the vector store
         expires_after:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) Expiration policy for the vector store
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
         expires_at:
-          type: integer
-          description: >-
-            (Optional) Timestamp when the vector store will expire
+          anyOf:
+          - type: integer
+          - type: 'null'
         last_active_at:
-          type: integer
-          description: >-
-            (Optional) Timestamp of last activity on the vector store
+          anyOf:
+          - type: integer
+          - type: 'null'
         metadata:
+          additionalProperties: true
           type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            Set of key-value pairs that can be attached to the vector store
-      additionalProperties: false
+          title: Metadata
+      type: object
       required:
-        - id
-        - object
-        - created_at
-        - usage_bytes
-        - file_counts
-        - status
-        - metadata
+      - id
+      - created_at
+      - file_counts
       title: VectorStoreObject
       description: OpenAI Vector Store object.
     VectorStoreChunkingStrategy:
-      oneOf:
-        - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
-        - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
       discriminator:
-        propertyName: type
         mapping:
           auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
           static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+        title: VectorStoreChunkingStrategyAuto
+      - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+        title: VectorStoreChunkingStrategyStatic
+      title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic
     VectorStoreChunkingStrategyAuto:
-      type: object
       properties:
         type:
           type: string
           const: auto
+          title: Type
           default: auto
-          description: >-
-            Strategy type, always "auto" for automatic chunking
-      additionalProperties: false
-      required:
-        - type
-      title: VectorStoreChunkingStrategyAuto
-      description: >-
-        Automatic chunking strategy for vector store files.
-    VectorStoreChunkingStrategyStatic:
       type: object
+      title: VectorStoreChunkingStrategyAuto
+      description: Automatic chunking strategy for vector store files.
+    VectorStoreChunkingStrategyStatic:
       properties:
         type:
           type: string
           const: static
+          title: Type
           default: static
-          description: >-
-            Strategy type, always "static" for static chunking
         static:
           $ref: '#/components/schemas/VectorStoreChunkingStrategyStaticConfig'
-          description: >-
-            Configuration parameters for the static chunking strategy
-      additionalProperties: false
-      required:
-        - type
-        - static
-      title: VectorStoreChunkingStrategyStatic
-      description: >-
-        Static chunking strategy with configurable parameters.
-    VectorStoreChunkingStrategyStaticConfig:
       type: object
+      required:
+      - static
+      title: VectorStoreChunkingStrategyStatic
+      description: Static chunking strategy with configurable parameters.
+    VectorStoreChunkingStrategyStaticConfig:
       properties:
         chunk_overlap_tokens:
           type: integer
+          title: Chunk Overlap Tokens
           default: 400
-          description: >-
-            Number of tokens to overlap between adjacent chunks
         max_chunk_size_tokens:
           type: integer
+          maximum: 4096.0
+          minimum: 100.0
+          title: Max Chunk Size Tokens
           default: 800
-          description: >-
-            Maximum number of tokens per chunk, must be between 100 and 4096
-      additionalProperties: false
-      required:
-        - chunk_overlap_tokens
-        - max_chunk_size_tokens
+      type: object
       title: VectorStoreChunkingStrategyStaticConfig
-      description: >-
-        Configuration for static chunking strategy.
-    "OpenAICreateVectorStoreRequestWithExtraBody":
-      type: object
+      description: Configuration for static chunking strategy.
+    OpenAICreateVectorStoreRequestWithExtraBody:
       properties:
         name:
-          type: string
-          description: (Optional) A name for the vector store
+          anyOf:
+          - type: string
+          - type: 'null'
         file_ids:
-          type: array
-          items:
-            type: string
-          description: >-
-            List of file IDs to include in the vector store
+          anyOf:
+          - items:
+              type: string
+            type: array
+          - type: 'null'
         expires_after:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) Expiration policy for the vector store
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
         chunking_strategy:
-          $ref: '#/components/schemas/VectorStoreChunkingStrategy'
-          description: >-
-            (Optional) Strategy for splitting files into chunks
+          anyOf:
+          - oneOf:
+            - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+              title: VectorStoreChunkingStrategyAuto
+            - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+              title: VectorStoreChunkingStrategyStatic
+            discriminator:
+              propertyName: type
+              mapping:
+                auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+                static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+            title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic
+          - type: 'null'
+          title: Chunking Strategy
         metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            Set of key-value pairs that can be attached to the vector store
-      additionalProperties: false
-      title: >-
-        OpenAICreateVectorStoreRequestWithExtraBody
-      description: >-
-        Request to create a vector store with extra_body support.
-    OpenaiUpdateVectorStoreRequest:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+      additionalProperties: true
       type: object
+      title: OpenAICreateVectorStoreRequestWithExtraBody
+      description: Request to create a vector store with extra_body support.
+    OpenaiUpdateVectorStoreRequest:
       properties:
         name:
-          type: string
-          description: The name of the vector store.
+          anyOf:
+          - type: string
+          - type: 'null'
         expires_after:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            The expiration policy for a vector store.
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
         metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            Set of 16 key-value pairs that can be attached to an object.
-      additionalProperties: false
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+      type: object
       title: OpenaiUpdateVectorStoreRequest
     VectorStoreDeleteResponse:
-      type: object
       properties:
         id:
           type: string
-          description: >-
-            Unique identifier of the deleted vector store
+          title: Id
         object:
           type: string
+          title: Object
           default: vector_store.deleted
-          description: >-
-            Object type identifier for the deletion response
         deleted:
           type: boolean
+          title: Deleted
           default: true
-          description: >-
-            Whether the deletion operation was successful
-      additionalProperties: false
+      type: object
       required:
-        - id
-        - object
-        - deleted
+      - id
       title: VectorStoreDeleteResponse
       description: Response from deleting a vector store.
-    "OpenAICreateVectorStoreFileBatchRequestWithExtraBody":
-      type: object
+    OpenAICreateVectorStoreFileBatchRequestWithExtraBody:
       properties:
         file_ids:
-          type: array
           items:
             type: string
-          description: >-
-            A list of File IDs that the vector store should use
+          type: array
+          title: File Ids
         attributes:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) Key-value attributes to store with the files
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
         chunking_strategy:
-          $ref: '#/components/schemas/VectorStoreChunkingStrategy'
-          description: >-
-            (Optional) The chunking strategy used to chunk the file(s). Defaults to
-            auto
-      additionalProperties: false
-      required:
-        - file_ids
-      title: >-
-        OpenAICreateVectorStoreFileBatchRequestWithExtraBody
-      description: >-
-        Request to create a vector store file batch with extra_body support.
-    VectorStoreFileBatchObject:
+          anyOf:
+          - oneOf:
+            - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+              title: VectorStoreChunkingStrategyAuto
+            - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+              title: VectorStoreChunkingStrategyStatic
+            discriminator:
+              propertyName: type
+              mapping:
+                auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+                static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+            title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic
+          - type: 'null'
+          title: Chunking Strategy
+      additionalProperties: true
       type: object
+      required:
+      - file_ids
+      title: OpenAICreateVectorStoreFileBatchRequestWithExtraBody
+      description: Request to create a vector store file batch with extra_body support.
+    VectorStoreFileBatchObject:
       properties:
         id:
           type: string
-          description: Unique identifier for the file batch
+          title: Id
         object:
           type: string
+          title: Object
           default: vector_store.file_batch
-          description: >-
-            Object type identifier, always "vector_store.file_batch"
         created_at:
           type: integer
-          description: >-
-            Timestamp when the file batch was created
+          title: Created At
         vector_store_id:
           type: string
-          description: >-
-            ID of the vector store containing the file batch
+          title: Vector Store Id
         status:
-          $ref: '#/components/schemas/VectorStoreFileStatus'
-          description: >-
-            Current processing status of the file batch
+          title: Status
+          type: string
+          enum:
+          - completed
+          - in_progress
+          - cancelled
+          - failed
+          default: completed
         file_counts:
           $ref: '#/components/schemas/VectorStoreFileCounts'
-          description: >-
-            File processing status counts for the batch
-      additionalProperties: false
+      type: object
       required:
-        - id
-        - object
-        - created_at
-        - vector_store_id
-        - status
-        - file_counts
+      - id
+      - created_at
+      - vector_store_id
+      - status
+      - file_counts
       title: VectorStoreFileBatchObject
       description: OpenAI Vector Store File Batch object.
     VectorStoreFileStatus:
-      oneOf:
-        - type: string
-          const: completed
-        - type: string
-          const: in_progress
-        - type: string
-          const: cancelled
-        - type: string
-          const: failed
+      type: string
+      enum:
+      - completed
+      - in_progress
+      - cancelled
+      - failed
+      default: completed
     VectorStoreFileLastError:
-      type: object
       properties:
         code:
-          oneOf:
-            - type: string
-              const: server_error
-            - type: string
-              const: rate_limit_exceeded
-          description: >-
-            Error code indicating the type of failure
+          title: Code
+          type: string
+          enum:
+          - server_error
+          - rate_limit_exceeded
+          default: server_error
         message:
           type: string
-          description: >-
-            Human-readable error message describing the failure
-      additionalProperties: false
-      required:
-        - code
-        - message
-      title: VectorStoreFileLastError
-      description: >-
-        Error information for failed vector store file processing.
-    VectorStoreFileObject:
+          title: Message
       type: object
+      required:
+      - code
+      - message
+      title: VectorStoreFileLastError
+      description: Error information for failed vector store file processing.
+    VectorStoreFileObject:
       properties:
         id:
           type: string
-          description: Unique identifier for the file
+          title: Id
         object:
           type: string
+          title: Object
           default: vector_store.file
-          description: >-
-            Object type identifier, always "vector_store.file"
         attributes:
+          additionalProperties: true
           type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            Key-value attributes associated with the file
+          title: Attributes
         chunking_strategy:
           oneOf:
-            - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
-            - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+          - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+            title: VectorStoreChunkingStrategyAuto
+          - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+            title: VectorStoreChunkingStrategyStatic
+          title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic
           discriminator:
             propertyName: type
             mapping:
               auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
               static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
-          description: >-
-            Strategy used for splitting the file into chunks
         created_at:
           type: integer
-          description: >-
-            Timestamp when the file was added to the vector store
+          title: Created At
         last_error:
-          $ref: '#/components/schemas/VectorStoreFileLastError'
-          description: >-
-            (Optional) Error information if file processing failed
+          anyOf:
+          - $ref: '#/components/schemas/VectorStoreFileLastError'
+            title: VectorStoreFileLastError
+          - type: 'null'
+          title: VectorStoreFileLastError
         status:
-          $ref: '#/components/schemas/VectorStoreFileStatus'
-          description: Current processing status of the file
+          title: Status
+          type: string
+          enum:
+          - completed
+          - in_progress
+          - cancelled
+          - failed
+          default: completed
         usage_bytes:
           type: integer
+          title: Usage Bytes
           default: 0
-          description: Storage space used by this file in bytes
         vector_store_id:
           type: string
-          description: >-
-            ID of the vector store containing this file
-      additionalProperties: false
+          title: Vector Store Id
+      type: object
       required:
-        - id
-        - object
-        - attributes
-        - chunking_strategy
-        - created_at
-        - status
-        - usage_bytes
-        - vector_store_id
+      - id
+      - chunking_strategy
+      - created_at
+      - status
+      - vector_store_id
       title: VectorStoreFileObject
       description: OpenAI Vector Store File object.
     VectorStoreFilesListInBatchResponse:
-      type: object
       properties:
         object:
           type: string
+          title: Object
           default: list
-          description: Object type identifier, always "list"
         data:
-          type: array
           items:
             $ref: '#/components/schemas/VectorStoreFileObject'
-          description: >-
-            List of vector store file objects in the batch
+          type: array
+          title: Data
         first_id:
-          type: string
-          description: >-
-            (Optional) ID of the first file in the list for pagination
+          anyOf:
+          - type: string
+          - type: 'null'
         last_id:
-          type: string
-          description: >-
-            (Optional) ID of the last file in the list for pagination
+          anyOf:
+          - type: string
+          - type: 'null'
         has_more:
           type: boolean
+          title: Has More
           default: false
-          description: >-
-            Whether there are more files available beyond this page
-      additionalProperties: false
+      type: object
       required:
-        - object
-        - data
-        - has_more
+      - data
       title: VectorStoreFilesListInBatchResponse
-      description: >-
-        Response from listing files in a vector store file batch.
+      description: Response from listing files in a vector store file batch.
     VectorStoreListFilesResponse:
-      type: object
       properties:
         object:
           type: string
+          title: Object
           default: list
-          description: Object type identifier, always "list"
         data:
-          type: array
           items:
             $ref: '#/components/schemas/VectorStoreFileObject'
-          description: List of vector store file objects
+          type: array
+          title: Data
         first_id:
-          type: string
-          description: >-
-            (Optional) ID of the first file in the list for pagination
+          anyOf:
+          - type: string
+          - type: 'null'
         last_id:
-          type: string
-          description: >-
-            (Optional) ID of the last file in the list for pagination
+          anyOf:
+          - type: string
+          - type: 'null'
         has_more:
           type: boolean
+          title: Has More
           default: false
-          description: >-
-            Whether there are more files available beyond this page
-      additionalProperties: false
-      required:
-        - object
-        - data
-        - has_more
-      title: VectorStoreListFilesResponse
-      description: >-
-        Response from listing files in a vector store.
-    OpenaiAttachFileToVectorStoreRequest:
       type: object
+      required:
+      - data
+      title: VectorStoreListFilesResponse
+      description: Response from listing files in a vector store.
+    OpenaiAttachFileToVectorStoreRequest:
       properties:
         file_id:
           type: string
-          description: >-
-            The ID of the file to attach to the vector store.
+          title: File Id
         attributes:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            The key-value attributes stored with the file, which can be used for filtering.
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
         chunking_strategy:
-          $ref: '#/components/schemas/VectorStoreChunkingStrategy'
-          description: >-
-            The chunking strategy to use for the file.
-      additionalProperties: false
+          anyOf:
+          - oneOf:
+            - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+              title: VectorStoreChunkingStrategyAuto
+            - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+              title: VectorStoreChunkingStrategyStatic
+            discriminator:
+              propertyName: type
+              mapping:
+                auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+                static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+            title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic
+          - type: 'null'
+          title: Chunking Strategy
+      type: object
       required:
-        - file_id
+      - file_id
       title: OpenaiAttachFileToVectorStoreRequest
     OpenaiUpdateVectorStoreFileRequest:
-      type: object
       properties:
         attributes:
+          additionalProperties: true
           type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            The updated key-value attributes to store with the file.
-      additionalProperties: false
+          title: Attributes
+      type: object
       required:
-        - attributes
+      - attributes
       title: OpenaiUpdateVectorStoreFileRequest
     VectorStoreFileDeleteResponse:
-      type: object
       properties:
         id:
           type: string
-          description: Unique identifier of the deleted file
+          title: Id
         object:
           type: string
+          title: Object
           default: vector_store.file.deleted
-          description: >-
-            Object type identifier for the deletion response
         deleted:
           type: boolean
+          title: Deleted
           default: true
-          description: >-
-            Whether the deletion operation was successful
-      additionalProperties: false
-      required:
-        - id
-        - object
-        - deleted
-      title: VectorStoreFileDeleteResponse
-      description: >-
-        Response from deleting a vector store file.
-    bool:
-      type: boolean
-    VectorStoreContent:
       type: object
+      required:
+      - id
+      title: VectorStoreFileDeleteResponse
+      description: Response from deleting a vector store file.
+    VectorStoreContent:
       properties:
         type:
           type: string
           const: text
-          description: >-
-            Content type, currently only "text" is supported
+          title: Type
         text:
           type: string
-          description: The actual text content
+          title: Text
         embedding:
-          type: array
-          items:
-            type: number
-          description: >-
-            Optional embedding vector for this content chunk
+          anyOf:
+          - items:
+              type: number
+            type: array
+          - type: 'null'
         chunk_metadata:
-          $ref: '#/components/schemas/ChunkMetadata'
-          description: Optional chunk metadata
+          anyOf:
+          - $ref: '#/components/schemas/ChunkMetadata'
+            title: ChunkMetadata
+          - type: 'null'
+          title: ChunkMetadata
         metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: Optional user-defined metadata
-      additionalProperties: false
-      required:
-        - type
-        - text
-      title: VectorStoreContent
-      description: >-
-        Content item from a vector store file or search result.
-    VectorStoreFileContentResponse:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
       type: object
+      required:
+      - type
+      - text
+      title: VectorStoreContent
+      description: Content item from a vector store file or search result.
+    VectorStoreFileContentResponse:
       properties:
         object:
           type: string
           const: vector_store.file_content.page
+          title: Object
           default: vector_store.file_content.page
-          description: >-
-            The object type, which is always `vector_store.file_content.page`
         data:
-          type: array
           items:
             $ref: '#/components/schemas/VectorStoreContent'
-          description: Parsed content of the file
+          type: array
+          title: Data
         has_more:
           type: boolean
+          title: Has More
           default: false
-          description: >-
-            Indicates if there are more content pages to fetch
         next_page:
-          type: string
-          description: The token for the next page, if any
-      additionalProperties: false
-      required:
-        - object
-        - data
-        - has_more
-      title: VectorStoreFileContentResponse
-      description: >-
-        Represents the parsed content of a vector store file.
-    OpenaiSearchVectorStoreRequest:
+          anyOf:
+          - type: string
+          - type: 'null'
       type: object
+      required:
+      - data
+      title: VectorStoreFileContentResponse
+      description: Represents the parsed content of a vector store file.
+    OpenaiSearchVectorStoreRequest:
       properties:
         query:
-          oneOf:
-            - type: string
-            - type: array
-              items:
-                type: string
-          description: >-
-            The query string or array for performing the search.
-        filters:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            Filters based on file attributes to narrow the search results.
-        max_num_results:
-          type: integer
-          description: >-
-            Maximum number of results to return (1 to 50 inclusive, default 10).
-        ranking_options:
-          type: object
-          properties:
-            ranker:
+          anyOf:
+          - type: string
+          - items:
               type: string
-              description: >-
-                (Optional) Name of the ranking algorithm to use
-            score_threshold:
-              type: number
-              default: 0.0
-              description: >-
-                (Optional) Minimum relevance score threshold for results
-          additionalProperties: false
-          description: >-
-            Ranking options for fine-tuning the search results.
+            type: array
+            title: list[string]
+          title: string | list[string]
+        filters:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+        max_num_results:
+          anyOf:
+          - type: integer
+          - type: 'null'
+          default: 10
+        ranking_options:
+          anyOf:
+          - $ref: '#/components/schemas/SearchRankingOptions'
+            title: SearchRankingOptions
+          - type: 'null'
+          title: SearchRankingOptions
         rewrite_query:
-          type: boolean
-          description: >-
-            Whether to rewrite the natural language query for vector search (default
-            false)
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          default: false
         search_mode:
-          type: string
-          description: >-
-            The search mode to use - "keyword", "vector", or "hybrid" (default "vector")
-      additionalProperties: false
+          anyOf:
+          - type: string
+          - type: 'null'
+          default: vector
+      type: object
       required:
-        - query
+      - query
       title: OpenaiSearchVectorStoreRequest
     VectorStoreSearchResponse:
-      type: object
       properties:
         file_id:
           type: string
-          description: >-
-            Unique identifier of the file containing the result
+          title: File Id
         filename:
           type: string
-          description: Name of the file containing the result
+          title: Filename
         score:
           type: number
-          description: Relevance score for this search result
+          title: Score
         attributes:
-          type: object
-          additionalProperties:
-            oneOf:
+          anyOf:
+          - additionalProperties:
+              anyOf:
               - type: string
               - type: number
               - type: boolean
-          description: >-
-            (Optional) Key-value attributes associated with the file
+              title: string | number | boolean
+            type: object
+          - type: 'null'
         content:
-          type: array
           items:
             $ref: '#/components/schemas/VectorStoreContent'
-          description: >-
-            List of content items matching the search query
-      additionalProperties: false
+          type: array
+          title: Content
+      type: object
       required:
-        - file_id
-        - filename
-        - score
-        - content
+      - file_id
+      - filename
+      - score
+      - content
       title: VectorStoreSearchResponse
       description: Response from searching a vector store.
     VectorStoreSearchResponsePage:
-      type: object
       properties:
         object:
           type: string
+          title: Object
           default: vector_store.search_results.page
-          description: >-
-            Object type identifier for the search results page
         search_query:
-          type: array
           items:
             type: string
-          description: >-
-            The original search query that was executed
-        data:
           type: array
+          title: Search Query
+        data:
           items:
             $ref: '#/components/schemas/VectorStoreSearchResponse'
-          description: List of search result objects
+          type: array
+          title: Data
         has_more:
           type: boolean
+          title: Has More
           default: false
-          description: >-
-            Whether there are more results available beyond this page
         next_page:
-          type: string
-          description: >-
-            (Optional) Token for retrieving the next page of results
-      additionalProperties: false
-      required:
-        - object
-        - search_query
-        - data
-        - has_more
-      title: VectorStoreSearchResponsePage
-      description: >-
-        Paginated response from searching a vector store.
-    VersionInfo:
+          anyOf:
+          - type: string
+          - type: 'null'
       type: object
+      required:
+      - search_query
+      - data
+      title: VectorStoreSearchResponsePage
+      description: Paginated response from searching a vector store.
+    VersionInfo:
       properties:
         version:
           type: string
-          description: Version number of the service
-      additionalProperties: false
+          title: Version
+      type: object
       required:
-        - version
+      - version
       title: VersionInfo
       description: Version information for the service.
+    PaginatedResponse:
+      properties:
+        data:
+          items:
+            additionalProperties: true
+            type: object
+          type: array
+          title: Data
+        has_more:
+          type: boolean
+          title: Has More
+        url:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      required:
+      - data
+      - has_more
+      title: PaginatedResponse
+      description: A generic paginated response that follows a simple format.
+    Dataset:
+      properties:
+        identifier:
+          type: string
+          title: Identifier
+          description: Unique identifier for this resource in llama stack
+        provider_resource_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: Unique identifier for this resource in the provider
+        provider_id:
+          type: string
+          title: Provider Id
+          description: ID of the provider that owns this resource
+        type:
+          type: string
+          const: dataset
+          title: Type
+          default: dataset
+        purpose:
+          $ref: '#/components/schemas/DatasetPurpose'
+        source:
+          oneOf:
+          - $ref: '#/components/schemas/URIDataSource'
+            title: URIDataSource
+          - $ref: '#/components/schemas/RowsDataSource'
+            title: RowsDataSource
+          title: URIDataSource | RowsDataSource
+          discriminator:
+            propertyName: type
+            mapping:
+              rows: '#/components/schemas/RowsDataSource'
+              uri: '#/components/schemas/URIDataSource'
+        metadata:
+          additionalProperties: true
+          type: object
+          title: Metadata
+          description: Any additional metadata for this dataset
+      type: object
+      required:
+      - identifier
+      - provider_id
+      - purpose
+      - source
+      title: Dataset
+      description: Dataset resource for storing and accessing training or evaluation data.
+    RowsDataSource:
+      properties:
+        type:
+          type: string
+          const: rows
+          title: Type
+          default: rows
+        rows:
+          items:
+            additionalProperties: true
+            type: object
+          type: array
+          title: Rows
+      type: object
+      required:
+      - rows
+      title: RowsDataSource
+      description: A dataset stored in rows.
+    URIDataSource:
+      properties:
+        type:
+          type: string
+          const: uri
+          title: Type
+          default: uri
+        uri:
+          type: string
+          title: Uri
+      type: object
+      required:
+      - uri
+      title: URIDataSource
+      description: A dataset that can be obtained from a URI.
+    ListDatasetsResponse:
+      properties:
+        data:
+          items:
+            $ref: '#/components/schemas/Dataset'
+          type: array
+          title: Data
+      type: object
+      required:
+      - data
+      title: ListDatasetsResponse
+      description: Response from listing datasets.
+    Benchmark:
+      properties:
+        identifier:
+          type: string
+          title: Identifier
+          description: Unique identifier for this resource in llama stack
+        provider_resource_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: Unique identifier for this resource in the provider
+        provider_id:
+          type: string
+          title: Provider Id
+          description: ID of the provider that owns this resource
+        type:
+          type: string
+          const: benchmark
+          title: Type
+          default: benchmark
+        dataset_id:
+          type: string
+          title: Dataset Id
+        scoring_functions:
+          items:
+            type: string
+          type: array
+          title: Scoring Functions
+        metadata:
+          additionalProperties: true
+          type: object
+          title: Metadata
+          description: Metadata for this evaluation task
+      type: object
+      required:
+      - identifier
+      - provider_id
+      - dataset_id
+      - scoring_functions
+      title: Benchmark
+      description: A benchmark resource for evaluating model performance.
+    ListBenchmarksResponse:
+      properties:
+        data:
+          items:
+            $ref: '#/components/schemas/Benchmark'
+          type: array
+          title: Data
+      type: object
+      required:
+      - data
+      title: ListBenchmarksResponse
+    BenchmarkConfig:
+      properties:
+        eval_candidate:
+          $ref: '#/components/schemas/ModelCandidate'
+        scoring_params:
+          additionalProperties:
+            oneOf:
+            - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
+              title: LLMAsJudgeScoringFnParams
+            - $ref: '#/components/schemas/RegexParserScoringFnParams'
+              title: RegexParserScoringFnParams
+            - $ref: '#/components/schemas/BasicScoringFnParams'
+              title: BasicScoringFnParams
+            discriminator:
+              propertyName: type
+              mapping:
+                basic: '#/components/schemas/BasicScoringFnParams'
+                llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams'
+                regex_parser: '#/components/schemas/RegexParserScoringFnParams'
+            title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams
+          type: object
+          title: Scoring Params
+          description: Map between scoring function id and parameters for each scoring function you want to run
+        num_examples:
+          anyOf:
+          - type: integer
+          - type: 'null'
+          description: Number of examples to evaluate (useful for testing), if not provided, all examples in the dataset will be evaluated
+      type: object
+      required:
+      - eval_candidate
+      title: BenchmarkConfig
+      description: A benchmark configuration for evaluation.
+    GreedySamplingStrategy:
+      properties:
+        type:
+          type: string
+          const: greedy
+          title: Type
+          default: greedy
+      type: object
+      title: GreedySamplingStrategy
+      description: Greedy sampling strategy that selects the highest probability token at each step.
+    ModelCandidate:
+      properties:
+        type:
+          type: string
+          const: model
+          title: Type
+          default: model
+        model:
+          type: string
+          title: Model
+        sampling_params:
+          $ref: '#/components/schemas/SamplingParams'
+        system_message:
+          anyOf:
+          - $ref: '#/components/schemas/SystemMessage'
+            title: SystemMessage
+          - type: 'null'
+          title: SystemMessage
+      type: object
+      required:
+      - model
+      - sampling_params
+      title: ModelCandidate
+      description: A model candidate for evaluation.
+    SamplingParams:
+      properties:
+        strategy:
+          oneOf:
+          - $ref: '#/components/schemas/GreedySamplingStrategy'
+            title: GreedySamplingStrategy
+          - $ref: '#/components/schemas/TopPSamplingStrategy'
+            title: TopPSamplingStrategy
+          - $ref: '#/components/schemas/TopKSamplingStrategy'
+            title: TopKSamplingStrategy
+          title: GreedySamplingStrategy | TopPSamplingStrategy | TopKSamplingStrategy
+          discriminator:
+            propertyName: type
+            mapping:
+              greedy: '#/components/schemas/GreedySamplingStrategy'
+              top_k: '#/components/schemas/TopKSamplingStrategy'
+              top_p: '#/components/schemas/TopPSamplingStrategy'
+        max_tokens:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        repetition_penalty:
+          anyOf:
+          - type: number
+          - type: 'null'
+          default: 1.0
+        stop:
+          anyOf:
+          - items:
+              type: string
+            type: array
+          - type: 'null'
+      type: object
+      title: SamplingParams
+      description: Sampling parameters.
+    SystemMessage:
+      properties:
+        role:
+          type: string
+          const: system
+          title: Role
+          default: system
+        content:
+          anyOf:
+          - type: string
+          - oneOf:
+            - $ref: '#/components/schemas/ImageContentItem-Input'
+              title: ImageContentItem-Input
+            - $ref: '#/components/schemas/TextContentItem'
+              title: TextContentItem
+            discriminator:
+              propertyName: type
+              mapping:
+                image: '#/components/schemas/ImageContentItem-Input'
+                text: '#/components/schemas/TextContentItem'
+            title: ImageContentItem-Input | TextContentItem
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/ImageContentItem-Input'
+                title: ImageContentItem-Input
+              - $ref: '#/components/schemas/TextContentItem'
+                title: TextContentItem
+              discriminator:
+                propertyName: type
+                mapping:
+                  image: '#/components/schemas/ImageContentItem-Input'
+                  text: '#/components/schemas/TextContentItem'
+              title: ImageContentItem-Input | TextContentItem
+            type: array
+            title: list[ImageContentItem-Input | TextContentItem]
+          title: string | list[ImageContentItem-Input | TextContentItem]
+      type: object
+      required:
+      - content
+      title: SystemMessage
+      description: A system message providing instructions or context to the model.
+    TopKSamplingStrategy:
+      properties:
+        type:
+          type: string
+          const: top_k
+          title: Type
+          default: top_k
+        top_k:
+          type: integer
+          minimum: 1.0
+          title: Top K
+      type: object
+      required:
+      - top_k
+      title: TopKSamplingStrategy
+      description: Top-k sampling strategy that restricts sampling to the k most likely tokens.
+    TopPSamplingStrategy:
+      properties:
+        type:
+          type: string
+          const: top_p
+          title: Type
+          default: top_p
+        temperature:
+          anyOf:
+          - type: number
+            minimum: 0.0
+          - type: 'null'
+        top_p:
+          anyOf:
+          - type: number
+          - type: 'null'
+          default: 0.95
+      type: object
+      required:
+      - temperature
+      title: TopPSamplingStrategy
+      description: Top-p (nucleus) sampling strategy that samples from the smallest set of tokens with cumulative probability >= p.
+    EvaluateResponse:
+      properties:
+        generations:
+          items:
+            additionalProperties: true
+            type: object
+          type: array
+          title: Generations
+        scores:
+          additionalProperties:
+            $ref: '#/components/schemas/ScoringResult'
+          type: object
+          title: Scores
+      type: object
+      required:
+      - generations
+      - scores
+      title: EvaluateResponse
+      description: The response from an evaluation.
+    Job:
+      properties:
+        job_id:
+          type: string
+          title: Job Id
+        status:
+          $ref: '#/components/schemas/JobStatus'
+      type: object
+      required:
+      - job_id
+      - status
+      title: Job
+      description: A job execution instance with status tracking.
+    RerankData:
+      properties:
+        index:
+          type: integer
+          title: Index
+        relevance_score:
+          type: number
+          title: Relevance Score
+      type: object
+      required:
+      - index
+      - relevance_score
+      title: RerankData
+      description: A single rerank result from a reranking response.
+    RerankResponse:
+      properties:
+        data:
+          items:
+            $ref: '#/components/schemas/RerankData'
+          type: array
+          title: Data
+      type: object
+      required:
+      - data
+      title: RerankResponse
+      description: Response from a reranking request.
+    Checkpoint:
+      properties:
+        identifier:
+          type: string
+          title: Identifier
+        created_at:
+          type: string
+          format: date-time
+          title: Created At
+        epoch:
+          type: integer
+          title: Epoch
+        post_training_job_id:
+          type: string
+          title: Post Training Job Id
+        path:
+          type: string
+          title: Path
+        training_metrics:
+          anyOf:
+          - $ref: '#/components/schemas/PostTrainingMetric'
+            title: PostTrainingMetric
+          - type: 'null'
+          title: PostTrainingMetric
+      type: object
+      required:
+      - identifier
+      - created_at
+      - epoch
+      - post_training_job_id
+      - path
+      title: Checkpoint
+      description: Checkpoint created during training runs.
+    PostTrainingJobArtifactsResponse:
+      properties:
+        job_uuid:
+          type: string
+          title: Job Uuid
+        checkpoints:
+          items:
+            $ref: '#/components/schemas/Checkpoint'
+          type: array
+          title: Checkpoints
+      type: object
+      required:
+      - job_uuid
+      title: PostTrainingJobArtifactsResponse
+      description: Artifacts of a finetuning job.
+    PostTrainingMetric:
+      properties:
+        epoch:
+          type: integer
+          title: Epoch
+        train_loss:
+          type: number
+          title: Train Loss
+        validation_loss:
+          type: number
+          title: Validation Loss
+        perplexity:
+          type: number
+          title: Perplexity
+      type: object
+      required:
+      - epoch
+      - train_loss
+      - validation_loss
+      - perplexity
+      title: PostTrainingMetric
+      description: Training metrics captured during post-training jobs.
+    PostTrainingJobStatusResponse:
+      properties:
+        job_uuid:
+          type: string
+          title: Job Uuid
+        status:
+          $ref: '#/components/schemas/JobStatus'
+        scheduled_at:
+          anyOf:
+          - type: string
+            format: date-time
+          - type: 'null'
+        started_at:
+          anyOf:
+          - type: string
+            format: date-time
+          - type: 'null'
+        completed_at:
+          anyOf:
+          - type: string
+            format: date-time
+          - type: 'null'
+        resources_allocated:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+        checkpoints:
+          items:
+            $ref: '#/components/schemas/Checkpoint'
+          type: array
+          title: Checkpoints
+      type: object
+      required:
+      - job_uuid
+      - status
+      title: PostTrainingJobStatusResponse
+      description: Status of a finetuning job.
+    ListPostTrainingJobsResponse:
+      properties:
+        data:
+          items:
+            $ref: '#/components/schemas/PostTrainingJob'
+          type: array
+          title: Data
+      type: object
+      required:
+      - data
+      title: ListPostTrainingJobsResponse
+    DPOAlignmentConfig:
+      properties:
+        beta:
+          type: number
+          title: Beta
+        loss_type:
+          $ref: '#/components/schemas/DPOLossType'
+          default: sigmoid
+      type: object
+      required:
+      - beta
+      title: DPOAlignmentConfig
+      description: Configuration for Direct Preference Optimization (DPO) alignment.
+    DPOLossType:
+      type: string
+      enum:
+      - sigmoid
+      - hinge
+      - ipo
+      - kto_pair
+      title: DPOLossType
+    DataConfig:
+      properties:
+        dataset_id:
+          type: string
+          title: Dataset Id
+        batch_size:
+          type: integer
+          title: Batch Size
+        shuffle:
+          type: boolean
+          title: Shuffle
+        data_format:
+          $ref: '#/components/schemas/DatasetFormat'
+        validation_dataset_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        packed:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          default: false
+        train_on_input:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          default: false
+      type: object
+      required:
+      - dataset_id
+      - batch_size
+      - shuffle
+      - data_format
+      title: DataConfig
+      description: Configuration for training data and data loading.
+    DatasetFormat:
+      type: string
+      enum:
+      - instruct
+      - dialog
+      title: DatasetFormat
+      description: Format of the training dataset.
+    EfficiencyConfig:
+      properties:
+        enable_activation_checkpointing:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          default: false
+        enable_activation_offloading:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          default: false
+        memory_efficient_fsdp_wrap:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          default: false
+        fsdp_cpu_offload:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          default: false
+      type: object
+      title: EfficiencyConfig
+      description: Configuration for memory and compute efficiency optimizations.
+    OptimizerConfig:
+      properties:
+        optimizer_type:
+          $ref: '#/components/schemas/OptimizerType'
+        lr:
+          type: number
+          title: Lr
+        weight_decay:
+          type: number
+          title: Weight Decay
+        num_warmup_steps:
+          type: integer
+          title: Num Warmup Steps
+      type: object
+      required:
+      - optimizer_type
+      - lr
+      - weight_decay
+      - num_warmup_steps
+      title: OptimizerConfig
+      description: Configuration parameters for the optimization algorithm.
+    OptimizerType:
+      type: string
+      enum:
+      - adam
+      - adamw
+      - sgd
+      title: OptimizerType
+      description: Available optimizer algorithms for training.
+    TrainingConfig:
+      properties:
+        n_epochs:
+          type: integer
+          title: N Epochs
+        max_steps_per_epoch:
+          type: integer
+          title: Max Steps Per Epoch
+          default: 1
+        gradient_accumulation_steps:
+          type: integer
+          title: Gradient Accumulation Steps
+          default: 1
+        max_validation_steps:
+          anyOf:
+          - type: integer
+          - type: 'null'
+          default: 1
+        data_config:
+          anyOf:
+          - $ref: '#/components/schemas/DataConfig'
+            title: DataConfig
+          - type: 'null'
+          title: DataConfig
+        optimizer_config:
+          anyOf:
+          - $ref: '#/components/schemas/OptimizerConfig'
+            title: OptimizerConfig
+          - type: 'null'
+          title: OptimizerConfig
+        efficiency_config:
+          anyOf:
+          - $ref: '#/components/schemas/EfficiencyConfig'
+            title: EfficiencyConfig
+          - type: 'null'
+          title: EfficiencyConfig
+        dtype:
+          anyOf:
+          - type: string
+          - type: 'null'
+          default: bf16
+      type: object
+      required:
+      - n_epochs
+      title: TrainingConfig
+      description: Comprehensive configuration for the training process.
+    PostTrainingJob:
+      properties:
+        job_uuid:
+          type: string
+          title: Job Uuid
+      type: object
+      required:
+      - job_uuid
+      title: PostTrainingJob
+    AlgorithmConfig:
+      discriminator:
+        mapping:
+          LoRA: '#/components/schemas/LoraFinetuningConfig'
+          QAT: '#/components/schemas/QATFinetuningConfig'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/LoraFinetuningConfig'
+        title: LoraFinetuningConfig
+      - $ref: '#/components/schemas/QATFinetuningConfig'
+        title: QATFinetuningConfig
+      title: LoraFinetuningConfig | QATFinetuningConfig
+    LoraFinetuningConfig:
+      properties:
+        type:
+          type: string
+          const: LoRA
+          title: Type
+          default: LoRA
+        lora_attn_modules:
+          items:
+            type: string
+          type: array
+          title: Lora Attn Modules
+        apply_lora_to_mlp:
+          type: boolean
+          title: Apply Lora To Mlp
+        apply_lora_to_output:
+          type: boolean
+          title: Apply Lora To Output
+        rank:
+          type: integer
+          title: Rank
+        alpha:
+          type: integer
+          title: Alpha
+        use_dora:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          default: false
+        quantize_base:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          default: false
+      type: object
+      required:
+      - lora_attn_modules
+      - apply_lora_to_mlp
+      - apply_lora_to_output
+      - rank
+      - alpha
+      title: LoraFinetuningConfig
+      description: Configuration for Low-Rank Adaptation (LoRA) fine-tuning.
+    QATFinetuningConfig:
+      properties:
+        type:
+          type: string
+          const: QAT
+          title: Type
+          default: QAT
+        quantizer_name:
+          type: string
+          title: Quantizer Name
+        group_size:
+          type: integer
+          title: Group Size
+      type: object
+      required:
+      - quantizer_name
+      - group_size
+      title: QATFinetuningConfig
+      description: Configuration for Quantization-Aware Training (QAT) fine-tuning.
+    ParamType:
+      discriminator:
+        mapping:
+          array: '#/components/schemas/ArrayType'
+          boolean: '#/components/schemas/BooleanType'
+          chat_completion_input: '#/components/schemas/ChatCompletionInputType'
+          completion_input: '#/components/schemas/CompletionInputType'
+          json: '#/components/schemas/JsonType'
+          number: '#/components/schemas/NumberType'
+          object: '#/components/schemas/ObjectType'
+          string: '#/components/schemas/StringType'
+          union: '#/components/schemas/UnionType'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/StringType'
+        title: StringType
+      - $ref: '#/components/schemas/NumberType'
+        title: NumberType
+      - $ref: '#/components/schemas/BooleanType'
+        title: BooleanType
+      - $ref: '#/components/schemas/ArrayType'
+        title: ArrayType
+      - $ref: '#/components/schemas/ObjectType'
+        title: ObjectType
+      - $ref: '#/components/schemas/JsonType'
+        title: JsonType
+      - $ref: '#/components/schemas/UnionType'
+        title: UnionType
+      - $ref: '#/components/schemas/ChatCompletionInputType'
+        title: ChatCompletionInputType
+      - $ref: '#/components/schemas/CompletionInputType'
+        title: CompletionInputType
+      title: StringType | ... (9 variants)
+    DataSource:
+      discriminator:
+        mapping:
+          rows: '#/components/schemas/RowsDataSource'
+          uri: '#/components/schemas/URIDataSource'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/URIDataSource'
+        title: URIDataSource
+      - $ref: '#/components/schemas/RowsDataSource'
+        title: RowsDataSource
+      title: URIDataSource | RowsDataSource
+    AllowedToolsFilter:
+      properties:
+        tool_names:
+          anyOf:
+          - items:
+              type: string
+            type: array
+          - type: 'null'
+      type: object
+      title: AllowedToolsFilter
+      description: Filter configuration for restricting which MCP tools can be used.
+    ApprovalFilter:
+      properties:
+        always:
+          anyOf:
+          - items:
+              type: string
+            type: array
+          - type: 'null'
+        never:
+          anyOf:
+          - items:
+              type: string
+            type: array
+          - type: 'null'
+      type: object
+      title: ApprovalFilter
+      description: Filter configuration for MCP tool approval requirements.
+    BatchError:
+      properties:
+        code:
+          anyOf:
+          - type: string
+          - type: 'null'
+        line:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        message:
+          anyOf:
+          - type: string
+          - type: 'null'
+        param:
+          anyOf:
+          - type: string
+          - type: 'null'
+      additionalProperties: true
+      type: object
+      title: BatchError
+    BatchRequestCounts:
+      properties:
+        completed:
+          type: integer
+          title: Completed
+        failed:
+          type: integer
+          title: Failed
+        total:
+          type: integer
+          title: Total
+      additionalProperties: true
+      type: object
+      required:
+      - completed
+      - failed
+      - total
+      title: BatchRequestCounts
+    BatchUsage:
+      properties:
+        input_tokens:
+          type: integer
+          title: Input Tokens
+        input_tokens_details:
+          $ref: '#/components/schemas/InputTokensDetails'
+        output_tokens:
+          type: integer
+          title: Output Tokens
+        output_tokens_details:
+          $ref: '#/components/schemas/OutputTokensDetails'
+        total_tokens:
+          type: integer
+          title: Total Tokens
+      additionalProperties: true
+      type: object
+      required:
+      - input_tokens
+      - input_tokens_details
+      - output_tokens
+      - output_tokens_details
+      - total_tokens
+      title: BatchUsage
+    Body_openai_upload_file_v1_files_post:
+      properties:
+        file:
+          type: string
+          format: binary
+          title: File
+        purpose:
+          $ref: '#/components/schemas/OpenAIFilePurpose'
+        expires_after:
+          anyOf:
+          - $ref: '#/components/schemas/ExpiresAfter'
+            title: ExpiresAfter
+          - type: 'null'
+          title: ExpiresAfter
+      type: object
+      required:
+      - file
+      - purpose
+      title: Body_openai_upload_file_v1_files_post
+    Chunk-Input:
+      properties:
+        content:
+          anyOf:
+          - type: string
+          - oneOf:
+            - $ref: '#/components/schemas/ImageContentItem-Input'
+              title: ImageContentItem-Input
+            - $ref: '#/components/schemas/TextContentItem'
+              title: TextContentItem
+            discriminator:
+              propertyName: type
+              mapping:
+                image: '#/components/schemas/ImageContentItem-Input'
+                text: '#/components/schemas/TextContentItem'
+            title: ImageContentItem-Input | TextContentItem
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/ImageContentItem-Input'
+                title: ImageContentItem-Input
+              - $ref: '#/components/schemas/TextContentItem'
+                title: TextContentItem
+              discriminator:
+                propertyName: type
+                mapping:
+                  image: '#/components/schemas/ImageContentItem-Input'
+                  text: '#/components/schemas/TextContentItem'
+              title: ImageContentItem-Input | TextContentItem
+            type: array
+            title: list[ImageContentItem-Input | TextContentItem]
+          title: string | list[ImageContentItem-Input | TextContentItem]
+        chunk_id:
+          type: string
+          title: Chunk Id
+        metadata:
+          additionalProperties: true
+          type: object
+          title: Metadata
+        embedding:
+          anyOf:
+          - items:
+              type: number
+            type: array
+          - type: 'null'
+        chunk_metadata:
+          anyOf:
+          - $ref: '#/components/schemas/ChunkMetadata'
+            title: ChunkMetadata
+          - type: 'null'
+          title: ChunkMetadata
+      type: object
+      required:
+      - content
+      - chunk_id
+      title: Chunk
+      description: A chunk of content that can be inserted into a vector database.
+    Chunk-Output:
+      properties:
+        content:
+          anyOf:
+          - type: string
+          - oneOf:
+            - $ref: '#/components/schemas/ImageContentItem-Output'
+              title: ImageContentItem-Output
+            - $ref: '#/components/schemas/TextContentItem'
+              title: TextContentItem
+            discriminator:
+              propertyName: type
+              mapping:
+                image: '#/components/schemas/ImageContentItem-Output'
+                text: '#/components/schemas/TextContentItem'
+            title: ImageContentItem-Output | TextContentItem
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/ImageContentItem-Output'
+                title: ImageContentItem-Output
+              - $ref: '#/components/schemas/TextContentItem'
+                title: TextContentItem
+              discriminator:
+                propertyName: type
+                mapping:
+                  image: '#/components/schemas/ImageContentItem-Output'
+                  text: '#/components/schemas/TextContentItem'
+              title: ImageContentItem-Output | TextContentItem
+            type: array
+            title: list[ImageContentItem-Output | TextContentItem]
+          title: string | list[ImageContentItem-Output | TextContentItem]
+        chunk_id:
+          type: string
+          title: Chunk Id
+        metadata:
+          additionalProperties: true
+          type: object
+          title: Metadata
+        embedding:
+          anyOf:
+          - items:
+              type: number
+            type: array
+          - type: 'null'
+        chunk_metadata:
+          anyOf:
+          - $ref: '#/components/schemas/ChunkMetadata'
+            title: ChunkMetadata
+          - type: 'null'
+          title: ChunkMetadata
+      type: object
+      required:
+      - content
+      - chunk_id
+      title: Chunk
+      description: A chunk of content that can be inserted into a vector database.
+    ConversationItemInclude:
+      type: string
+      enum:
+      - web_search_call.action.sources
+      - code_interpreter_call.outputs
+      - computer_call_output.output.image_url
+      - file_search_call.results
+      - message.input_image.image_url
+      - message.output_text.logprobs
+      - reasoning.encrypted_content
+      title: ConversationItemInclude
+      description: Specify additional output data to include in the model response.
+    DatasetPurpose:
+      type: string
+      enum:
+      - post-training/messages
+      - eval/question-answer
+      - eval/messages-answer
+      title: DatasetPurpose
+      description: Purpose of the dataset. Each purpose has a required input data schema.
+    Errors:
+      properties:
+        data:
+          anyOf:
+          - items:
+              $ref: '#/components/schemas/BatchError'
+            type: array
+          - type: 'null'
+        object:
+          anyOf:
+          - type: string
+          - type: 'null'
+      additionalProperties: true
+      type: object
+      title: Errors
+    HealthStatus:
+      type: string
+      enum:
+      - OK
+      - Error
+      - Not Implemented
+      title: HealthStatus
+    ImageContentItem-Input:
+      properties:
+        type:
+          type: string
+          const: image
+          title: Type
+          default: image
+        image:
+          $ref: '#/components/schemas/_URLOrData'
+      type: object
+      required:
+      - image
+      title: ImageContentItem
+      description: A image content item
+    ImageContentItem-Output:
+      properties:
+        type:
+          type: string
+          const: image
+          title: Type
+          default: image
+        image:
+          $ref: '#/components/schemas/_URLOrData'
+      type: object
+      required:
+      - image
+      title: ImageContentItem
+      description: A image content item
+    InputTokensDetails:
+      properties:
+        cached_tokens:
+          type: integer
+          title: Cached Tokens
+      additionalProperties: true
+      type: object
+      required:
+      - cached_tokens
+      title: InputTokensDetails
+    JobStatus:
+      type: string
+      enum:
+      - completed
+      - in_progress
+      - failed
+      - scheduled
+      - cancelled
+      title: JobStatus
+      description: Status of a job execution.
+    MCPListToolsTool:
+      properties:
+        input_schema:
+          additionalProperties: true
+          type: object
+          title: Input Schema
+        name:
+          type: string
+          title: Name
+        description:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      required:
+      - input_schema
+      - name
+      title: MCPListToolsTool
+      description: Tool definition returned by MCP list tools operation.
+    OpenAIAssistantMessageParam-Input:
+      properties:
+        role:
+          type: string
+          const: assistant
+          title: Role
+          default: assistant
+        content:
+          anyOf:
+          - type: string
+          - items:
+              $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+            type: array
+            title: list[OpenAIChatCompletionContentPartTextParam]
+          - type: 'null'
+          title: string | list[OpenAIChatCompletionContentPartTextParam]
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+        tool_calls:
+          anyOf:
+          - items:
+              $ref: '#/components/schemas/OpenAIChatCompletionToolCall'
+            type: array
+          - type: 'null'
+      type: object
+      title: OpenAIAssistantMessageParam
+      description: A message containing the model's (assistant) response in an OpenAI-compatible chat completion request.
+    OpenAIAssistantMessageParam-Output:
+      properties:
+        role:
+          type: string
+          const: assistant
+          title: Role
+          default: assistant
+        content:
+          anyOf:
+          - type: string
+          - items:
+              $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+            type: array
+            title: list[OpenAIChatCompletionContentPartTextParam]
+          - type: 'null'
+          title: string | list[OpenAIChatCompletionContentPartTextParam]
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+        tool_calls:
+          anyOf:
+          - items:
+              $ref: '#/components/schemas/OpenAIChatCompletionToolCall'
+            type: array
+          - type: 'null'
+      type: object
+      title: OpenAIAssistantMessageParam
+      description: A message containing the model's (assistant) response in an OpenAI-compatible chat completion request.
+    OpenAIChatCompletionUsageCompletionTokensDetails:
+      properties:
+        reasoning_tokens:
+          anyOf:
+          - type: integer
+          - type: 'null'
+      type: object
+      title: OpenAIChatCompletionUsageCompletionTokensDetails
+      description: Token details for output tokens in OpenAI chat completion usage.
+    OpenAIChatCompletionUsagePromptTokensDetails:
+      properties:
+        cached_tokens:
+          anyOf:
+          - type: integer
+          - type: 'null'
+      type: object
+      title: OpenAIChatCompletionUsagePromptTokensDetails
+      description: Token details for prompt tokens in OpenAI chat completion usage.
+    OpenAIResponseMessage-Input:
+      properties:
+        content:
+          anyOf:
+          - type: string
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
+                title: OpenAIResponseInputMessageContentText
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+                title: OpenAIResponseInputMessageContentImage
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile'
+                title: OpenAIResponseInputMessageContentFile
+              discriminator:
+                propertyName: type
+                mapping:
+                  input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile'
+                  input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+                  input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
+              title: OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile
+            type: array
+            title: list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile]
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText'
+                title: OpenAIResponseOutputMessageContentOutputText
+              - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
+                title: OpenAIResponseContentPartRefusal
+              discriminator:
+                propertyName: type
+                mapping:
+                  output_text: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText'
+                  refusal: '#/components/schemas/OpenAIResponseContentPartRefusal'
+              title: OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal
+            type: array
+            title: list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal]
+          title: string | list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile] | list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal]
+        role:
+          title: Role
+          type: string
+          enum:
+          - system
+          - developer
+          - user
+          - assistant
+          default: system
+        type:
+          type: string
+          const: message
+          title: Type
+          default: message
+        id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        status:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      required:
+      - content
+      - role
+      title: OpenAIResponseMessage
+      description: |-
+        Corresponds to the various Message types in the Responses API.
+        They are all under one type because the Responses API gives them all
+        the same "type" value, and there is no way to tell them apart in certain
+        scenarios.
+    OpenAIResponseMessage-Output:
+      properties:
+        content:
+          anyOf:
+          - type: string
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
+                title: OpenAIResponseInputMessageContentText
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+                title: OpenAIResponseInputMessageContentImage
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile'
+                title: OpenAIResponseInputMessageContentFile
+              discriminator:
+                propertyName: type
+                mapping:
+                  input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile'
+                  input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+                  input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
+              title: OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile
+            type: array
+            title: list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile]
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText'
+                title: OpenAIResponseOutputMessageContentOutputText
+              - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
+                title: OpenAIResponseContentPartRefusal
+              discriminator:
+                propertyName: type
+                mapping:
+                  output_text: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText'
+                  refusal: '#/components/schemas/OpenAIResponseContentPartRefusal'
+              title: OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal
+            type: array
+            title: list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal]
+          title: string | list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile] | list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal]
+        role:
+          title: Role
+          type: string
+          enum:
+          - system
+          - developer
+          - user
+          - assistant
+          default: system
+        type:
+          type: string
+          const: message
+          title: Type
+          default: message
+        id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        status:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      required:
+      - content
+      - role
+      title: OpenAIResponseMessage
+      description: |-
+        Corresponds to the various Message types in the Responses API.
+        They are all under one type because the Responses API gives them all
+        the same "type" value, and there is no way to tell them apart in certain
+        scenarios.
+    OpenAIResponseOutputMessageFileSearchToolCallResults:
+      properties:
+        attributes:
+          additionalProperties: true
+          type: object
+          title: Attributes
+        file_id:
+          type: string
+          title: File Id
+        filename:
+          type: string
+          title: Filename
+        score:
+          type: number
+          title: Score
+        text:
+          type: string
+          title: Text
+      type: object
+      required:
+      - attributes
+      - file_id
+      - filename
+      - score
+      - text
+      title: OpenAIResponseOutputMessageFileSearchToolCallResults
+      description: Search results returned by the file search operation.
+    OpenAIResponseTextFormat:
+      properties:
+        type:
+          title: Type
+          type: string
+          enum:
+          - text
+          - json_schema
+          - json_object
+          default: text
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+        schema:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+        description:
+          anyOf:
+          - type: string
+          - type: 'null'
+        strict:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+      type: object
+      title: OpenAIResponseTextFormat
+      description: Configuration for Responses API text format.
+    OpenAIResponseUsageInputTokensDetails:
+      properties:
+        cached_tokens:
+          anyOf:
+          - type: integer
+          - type: 'null'
+      type: object
+      title: OpenAIResponseUsageInputTokensDetails
+      description: Token details for input tokens in OpenAI response usage.
+    OpenAIResponseUsageOutputTokensDetails:
+      properties:
+        reasoning_tokens:
+          anyOf:
+          - type: integer
+          - type: 'null'
+      type: object
+      title: OpenAIResponseUsageOutputTokensDetails
+      description: Token details for output tokens in OpenAI response usage.
+    OpenAIUserMessageParam-Input:
+      properties:
+        role:
+          type: string
+          const: user
+          title: Role
+          default: user
+        content:
+          anyOf:
+          - type: string
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+                title: OpenAIChatCompletionContentPartTextParam
+              - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+                title: OpenAIChatCompletionContentPartImageParam
+              - $ref: '#/components/schemas/OpenAIFile'
+                title: OpenAIFile
+              discriminator:
+                propertyName: type
+                mapping:
+                  file: '#/components/schemas/OpenAIFile'
+                  image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+                  text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+              title: OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile
+            type: array
+            title: list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile]
+          title: string | list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile]
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      required:
+      - content
+      title: OpenAIUserMessageParam
+      description: A message from the user in an OpenAI-compatible chat completion request.
+    OpenAIUserMessageParam-Output:
+      properties:
+        role:
+          type: string
+          const: user
+          title: Role
+          default: user
+        content:
+          anyOf:
+          - type: string
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+                title: OpenAIChatCompletionContentPartTextParam
+              - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+                title: OpenAIChatCompletionContentPartImageParam
+              - $ref: '#/components/schemas/OpenAIFile'
+                title: OpenAIFile
+              discriminator:
+                propertyName: type
+                mapping:
+                  file: '#/components/schemas/OpenAIFile'
+                  image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+                  text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+              title: OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile
+            type: array
+            title: list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile]
+          title: string | list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile]
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      required:
+      - content
+      title: OpenAIUserMessageParam
+      description: A message from the user in an OpenAI-compatible chat completion request.
+    OutputTokensDetails:
+      properties:
+        reasoning_tokens:
+          type: integer
+          title: Reasoning Tokens
+      additionalProperties: true
+      type: object
+      required:
+      - reasoning_tokens
+      title: OutputTokensDetails
+    SearchRankingOptions:
+      properties:
+        ranker:
+          anyOf:
+          - type: string
+          - type: 'null'
+        score_threshold:
+          anyOf:
+          - type: number
+          - type: 'null'
+          default: 0.0
+      type: object
+      title: SearchRankingOptions
+      description: Options for ranking and filtering search results.
+    _URLOrData:
+      properties:
+        url:
+          anyOf:
+          - $ref: '#/components/schemas/URL'
+            title: URL
+          - type: 'null'
+          title: URL
+        data:
+          anyOf:
+          - type: string
+          - type: 'null'
+          contentEncoding: base64
+      type: object
+      title: _URLOrData
+      description: A URL or a base64 encoded string
+    SamplingStrategy:
+      discriminator:
+        mapping:
+          greedy: '#/components/schemas/GreedySamplingStrategy'
+          top_k: '#/components/schemas/TopKSamplingStrategy'
+          top_p: '#/components/schemas/TopPSamplingStrategy'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/GreedySamplingStrategy'
+        title: GreedySamplingStrategy
+      - $ref: '#/components/schemas/TopPSamplingStrategy'
+        title: TopPSamplingStrategy
+      - $ref: '#/components/schemas/TopKSamplingStrategy'
+        title: TopKSamplingStrategy
+      title: GreedySamplingStrategy | TopPSamplingStrategy | TopKSamplingStrategy
+    GrammarResponseFormat:
+      description: Configuration for grammar-guided response generation.
+      properties:
+        type:
+          const: grammar
+          default: grammar
+          title: Type
+          type: string
+        bnf:
+          additionalProperties: true
+          title: Bnf
+          type: object
+      required:
+      - bnf
+      title: GrammarResponseFormat
+      type: object
+    JsonSchemaResponseFormat:
+      description: Configuration for JSON schema-guided response generation.
+      properties:
+        type:
+          const: json_schema
+          default: json_schema
+          title: Type
+          type: string
+        json_schema:
+          additionalProperties: true
+          title: Json Schema
+          type: object
+      required:
+      - json_schema
+      title: JsonSchemaResponseFormat
+      type: object
+    ResponseFormat:
+      discriminator:
+        mapping:
+          grammar: '#/components/schemas/GrammarResponseFormat'
+          json_schema: '#/components/schemas/JsonSchemaResponseFormat'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/JsonSchemaResponseFormat'
+        title: JsonSchemaResponseFormat
+      - $ref: '#/components/schemas/GrammarResponseFormat'
+        title: GrammarResponseFormat
+      title: JsonSchemaResponseFormat | GrammarResponseFormat
+    OpenAIResponseContentPart:
+      discriminator:
+        mapping:
+          output_text: '#/components/schemas/OpenAIResponseContentPartOutputText'
+          reasoning_text: '#/components/schemas/OpenAIResponseContentPartReasoningText'
+          refusal: '#/components/schemas/OpenAIResponseContentPartRefusal'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText'
+        title: OpenAIResponseContentPartOutputText
+      - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
+        title: OpenAIResponseContentPartRefusal
+      - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
+        title: OpenAIResponseContentPartReasoningText
+      title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText
+    SpanEndPayload:
+      description: Payload for a span end event.
+      properties:
+        type:
+          const: span_end
+          default: span_end
+          title: Type
+          type: string
+        status:
+          $ref: '#/components/schemas/SpanStatus'
+      required:
+      - status
+      title: SpanEndPayload
+      type: object
+    SpanStartPayload:
+      description: Payload for a span start event.
+      properties:
+        type:
+          const: span_start
+          default: span_start
+          title: Type
+          type: string
+        name:
+          title: Name
+          type: string
+        parent_span_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+      required:
+      - name
+      title: SpanStartPayload
+      type: object
+    SpanStatus:
+      description: The status of a span indicating whether it completed successfully or with an error.
+      enum:
+      - ok
+      - error
+      title: SpanStatus
+      type: string
+    StructuredLogPayload:
+      discriminator:
+        mapping:
+          span_end: '#/components/schemas/SpanEndPayload'
+          span_start: '#/components/schemas/SpanStartPayload'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/SpanStartPayload'
+        title: SpanStartPayload
+      - $ref: '#/components/schemas/SpanEndPayload'
+        title: SpanEndPayload
+      title: SpanStartPayload | SpanEndPayload
+    LogSeverity:
+      description: The severity level of a log message.
+      enum:
+      - verbose
+      - debug
+      - info
+      - warn
+      - error
+      - critical
+      title: LogSeverity
+      type: string
+    MetricEvent:
+      description: A metric event containing a measured value.
+      properties:
+        trace_id:
+          title: Trace Id
+          type: string
+        span_id:
+          title: Span Id
+          type: string
+        timestamp:
+          format: date-time
+          title: Timestamp
+          type: string
+        attributes:
+          anyOf:
+          - additionalProperties:
+              anyOf:
+              - type: string
+              - type: integer
+              - type: number
+              - type: boolean
+              - type: 'null'
+              title: string | ... (4 variants)
+            type: object
+          - type: 'null'
+        type:
+          const: metric
+          default: metric
+          title: Type
+          type: string
+        metric:
+          title: Metric
+          type: string
+        value:
+          anyOf:
+          - type: integer
+          - type: number
+          title: integer | number
+        unit:
+          title: Unit
+          type: string
+      required:
+      - trace_id
+      - span_id
+      - timestamp
+      - metric
+      - value
+      - unit
+      title: MetricEvent
+      type: object
+    StructuredLogEvent:
+      description: A structured log event containing typed payload data.
+      properties:
+        trace_id:
+          title: Trace Id
+          type: string
+        span_id:
+          title: Span Id
+          type: string
+        timestamp:
+          format: date-time
+          title: Timestamp
+          type: string
+        attributes:
+          anyOf:
+          - additionalProperties:
+              anyOf:
+              - type: string
+              - type: integer
+              - type: number
+              - type: boolean
+              - type: 'null'
+              title: string | ... (4 variants)
+            type: object
+          - type: 'null'
+        type:
+          const: structured_log
+          default: structured_log
+          title: Type
+          type: string
+        payload:
+          discriminator:
+            mapping:
+              span_end: '#/components/schemas/SpanEndPayload'
+              span_start: '#/components/schemas/SpanStartPayload'
+            propertyName: type
+          oneOf:
+          - $ref: '#/components/schemas/SpanStartPayload'
+            title: SpanStartPayload
+          - $ref: '#/components/schemas/SpanEndPayload'
+            title: SpanEndPayload
+          title: SpanStartPayload | SpanEndPayload
+      required:
+      - trace_id
+      - span_id
+      - timestamp
+      - payload
+      title: StructuredLogEvent
+      type: object
+    UnstructuredLogEvent:
+      description: An unstructured log event containing a simple text message.
+      properties:
+        trace_id:
+          title: Trace Id
+          type: string
+        span_id:
+          title: Span Id
+          type: string
+        timestamp:
+          format: date-time
+          title: Timestamp
+          type: string
+        attributes:
+          anyOf:
+          - additionalProperties:
+              anyOf:
+              - type: string
+              - type: integer
+              - type: number
+              - type: boolean
+              - type: 'null'
+              title: string | ... (4 variants)
+            type: object
+          - type: 'null'
+        type:
+          const: unstructured_log
+          default: unstructured_log
+          title: Type
+          type: string
+        message:
+          title: Message
+          type: string
+        severity:
+          $ref: '#/components/schemas/LogSeverity'
+      required:
+      - trace_id
+      - span_id
+      - timestamp
+      - message
+      - severity
+      title: UnstructuredLogEvent
+      type: object
+    Event:
+      discriminator:
+        mapping:
+          metric: '#/components/schemas/MetricEvent'
+          structured_log: '#/components/schemas/StructuredLogEvent'
+          unstructured_log: '#/components/schemas/UnstructuredLogEvent'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/UnstructuredLogEvent'
+        title: UnstructuredLogEvent
+      - $ref: '#/components/schemas/MetricEvent'
+        title: MetricEvent
+      - $ref: '#/components/schemas/StructuredLogEvent'
+        title: StructuredLogEvent
+      title: UnstructuredLogEvent | MetricEvent | StructuredLogEvent
+    MetricInResponse:
+      description: A metric value included in API responses.
+      properties:
+        metric:
+          title: Metric
+          type: string
+        value:
+          anyOf:
+          - type: integer
+          - type: number
+          title: integer | number
+        unit:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+      required:
+      - metric
+      - value
+      title: MetricInResponse
+      type: object
+    TextDelta:
+      description: A text content delta for streaming responses.
+      properties:
+        type:
+          const: text
+          default: text
+          title: Type
+          type: string
+        text:
+          title: Text
+          type: string
+      required:
+      - text
+      title: TextDelta
+      type: object
+    ImageDelta:
+      description: An image content delta for streaming responses.
+      properties:
+        type:
+          const: image
+          default: image
+          title: Type
+          type: string
+        image:
+          format: binary
+          title: Image
+          type: string
+      required:
+      - image
+      title: ImageDelta
+      type: object
+    Fp8QuantizationConfig:
+      description: Configuration for 8-bit floating point quantization.
+      properties:
+        type:
+          const: fp8_mixed
+          default: fp8_mixed
+          title: Type
+          type: string
+      title: Fp8QuantizationConfig
+      type: object
+    Bf16QuantizationConfig:
+      description: Configuration for BFloat16 precision (typically no quantization).
+      properties:
+        type:
+          const: bf16
+          default: bf16
+          title: Type
+          type: string
+      title: Bf16QuantizationConfig
+      type: object
+    Int4QuantizationConfig:
+      description: Configuration for 4-bit integer quantization.
+      properties:
+        type:
+          const: int4_mixed
+          default: int4_mixed
+          title: Type
+          type: string
+        scheme:
+          anyOf:
+          - type: string
+          - type: 'null'
+          default: int4_weight_int8_dynamic_activation
+      title: Int4QuantizationConfig
+      type: object
+    UserMessage:
+      description: A message from the user in a chat conversation.
+      properties:
+        role:
+          const: user
+          default: user
+          title: Role
+          type: string
+        content:
+          anyOf:
+          - type: string
+          - discriminator:
+              mapping:
+                image: '#/components/schemas/ImageContentItem'
+                text: '#/components/schemas/TextContentItem'
+              propertyName: type
+            oneOf:
+            - $ref: '#/components/schemas/ImageContentItem'
+              title: ImageContentItem
+            - $ref: '#/components/schemas/TextContentItem'
+              title: TextContentItem
+            title: ImageContentItem | TextContentItem
+          - items:
+              discriminator:
+                mapping:
+                  image: '#/components/schemas/ImageContentItem'
+                  text: '#/components/schemas/TextContentItem'
+                propertyName: type
+              oneOf:
+              - $ref: '#/components/schemas/ImageContentItem'
+                title: ImageContentItem
+              - $ref: '#/components/schemas/TextContentItem'
+                title: TextContentItem
+              title: ImageContentItem | TextContentItem
+            type: array
+            title: list[ImageContentItem | TextContentItem]
+          title: string | list[ImageContentItem | TextContentItem]
+        context:
+          anyOf:
+          - type: string
+          - discriminator:
+              mapping:
+                image: '#/components/schemas/ImageContentItem'
+                text: '#/components/schemas/TextContentItem'
+              propertyName: type
+            oneOf:
+            - $ref: '#/components/schemas/ImageContentItem'
+              title: ImageContentItem
+            - $ref: '#/components/schemas/TextContentItem'
+              title: TextContentItem
+            title: ImageContentItem | TextContentItem
+          - items:
+              discriminator:
+                mapping:
+                  image: '#/components/schemas/ImageContentItem'
+                  text: '#/components/schemas/TextContentItem'
+                propertyName: type
+              oneOf:
+              - $ref: '#/components/schemas/ImageContentItem'
+                title: ImageContentItem
+              - $ref: '#/components/schemas/TextContentItem'
+                title: TextContentItem
+              title: ImageContentItem | TextContentItem
+            type: array
+            title: list[ImageContentItem | TextContentItem]
+          - type: 'null'
+          title: string | list[ImageContentItem | TextContentItem]
+          nullable: true
+      required:
+      - content
+      title: UserMessage
+      type: object
+    ToolResponseMessage:
+      description: A message representing the result of a tool invocation.
+      properties:
+        role:
+          const: tool
+          default: tool
+          title: Role
+          type: string
+        call_id:
+          title: Call Id
+          type: string
+        content:
+          anyOf:
+          - type: string
+          - discriminator:
+              mapping:
+                image: '#/components/schemas/ImageContentItem'
+                text: '#/components/schemas/TextContentItem'
+              propertyName: type
+            oneOf:
+            - $ref: '#/components/schemas/ImageContentItem'
+              title: ImageContentItem
+            - $ref: '#/components/schemas/TextContentItem'
+              title: TextContentItem
+            title: ImageContentItem | TextContentItem
+          - items:
+              discriminator:
+                mapping:
+                  image: '#/components/schemas/ImageContentItem'
+                  text: '#/components/schemas/TextContentItem'
+                propertyName: type
+              oneOf:
+              - $ref: '#/components/schemas/ImageContentItem'
+                title: ImageContentItem
+              - $ref: '#/components/schemas/TextContentItem'
+                title: TextContentItem
+              title: ImageContentItem | TextContentItem
+            type: array
+            title: list[ImageContentItem | TextContentItem]
+          title: string | list[ImageContentItem | TextContentItem]
+      required:
+      - call_id
+      - content
+      title: ToolResponseMessage
+      type: object
+    TokenLogProbs:
+      description: Log probabilities for generated tokens.
+      properties:
+        logprobs_by_token:
+          additionalProperties:
+            type: number
+          title: Logprobs By Token
+          type: object
+      required:
+      - logprobs_by_token
+      title: TokenLogProbs
+      type: object
+    EmbeddingsResponse:
+      description: Response containing generated embeddings.
+      properties:
+        embeddings:
+          items:
+            items:
+              type: number
+            type: array
+          title: Embeddings
+          type: array
+      required:
+      - embeddings
+      title: EmbeddingsResponse
+      type: object
+    OpenAICompletionLogprobs:
+      description: |-
+        The log probabilities for the tokens in the message from an OpenAI-compatible completion response.
+
+        :text_offset: (Optional) The offset of the token in the text
+        :token_logprobs: (Optional) The log probabilities for the tokens
+        :tokens: (Optional) The tokens
+        :top_logprobs: (Optional) The top log probabilities for the tokens
+      properties:
+        text_offset:
+          anyOf:
+          - items:
+              type: integer
+            type: array
+          - type: 'null'
+          nullable: true
+        token_logprobs:
+          anyOf:
+          - items:
+              type: number
+            type: array
+          - type: 'null'
+          nullable: true
+        tokens:
+          anyOf:
+          - items:
+              type: string
+            type: array
+          - type: 'null'
+          nullable: true
+        top_logprobs:
+          anyOf:
+          - items:
+              additionalProperties:
+                type: number
+              type: object
+            type: array
+          - type: 'null'
+          nullable: true
+      title: OpenAICompletionLogprobs
+      type: object
+    VectorStoreCreateRequest:
+      description: Request to create a vector store.
+      properties:
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+        file_ids:
+          items:
+            type: string
+          title: File Ids
+          type: array
+        expires_after:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+          nullable: true
+        chunking_strategy:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+          nullable: true
+        metadata:
+          additionalProperties: true
+          title: Metadata
+          type: object
+      title: VectorStoreCreateRequest
+      type: object
+    VectorStoreModifyRequest:
+      description: Request to modify a vector store.
+      properties:
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+        expires_after:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+          nullable: true
+        metadata:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+          nullable: true
+      title: VectorStoreModifyRequest
+      type: object
+    VectorStoreSearchRequest:
+      description: Request to search a vector store.
+      properties:
+        query:
+          anyOf:
+          - type: string
+          - items:
+              type: string
+            type: array
+            title: list[string]
+          title: string | list[string]
+        filters:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+          nullable: true
+        max_num_results:
+          default: 10
+          title: Max Num Results
+          type: integer
+        ranking_options:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+          nullable: true
+        rewrite_query:
+          default: false
+          title: Rewrite Query
+          type: boolean
+      required:
+      - query
+      title: VectorStoreSearchRequest
+      type: object
+    DialogType:
+      description: Parameter type for dialog data with semantic output labels.
+      properties:
+        type:
+          const: dialog
+          default: dialog
+          title: Type
+          type: string
+      title: DialogType
+      type: object
+    ConversationMessage:
+      description: OpenAI-compatible message item for conversations.
+      properties:
+        id:
+          description: unique identifier for this message
+          title: Id
+          type: string
+        content:
+          description: message content
+          items:
+            additionalProperties: true
+            type: object
+          title: Content
+          type: array
+        role:
+          description: message role
+          title: Role
+          type: string
+        status:
+          description: message status
+          title: Status
+          type: string
+        type:
+          const: message
+          default: message
+          title: Type
+          type: string
+        object:
+          const: message
+          default: message
+          title: Object
+          type: string
+      required:
+      - id
+      - content
+      - role
+      - status
+      title: ConversationMessage
+      type: object
+    ConversationItemCreateRequest:
+      description: Request body for creating conversation items.
+      properties:
+        items:
+          description: Items to include in the conversation context. You may add up to 20 items at a time.
+          items:
+            discriminator:
+              mapping:
+                file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                function_call_output: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+                mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                mcp_approval_response: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+                mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                message: '#/components/schemas/OpenAIResponseMessage'
+                web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+              propertyName: type
+            oneOf:
+            - $ref: '#/components/schemas/OpenAIResponseMessage'
+              title: OpenAIResponseMessage
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+              title: OpenAIResponseOutputMessageWebSearchToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+              title: OpenAIResponseOutputMessageFileSearchToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+              title: OpenAIResponseOutputMessageFunctionToolCall
+            - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+              title: OpenAIResponseInputFunctionToolCallOutput
+            - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+              title: OpenAIResponseMCPApprovalRequest
+            - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+              title: OpenAIResponseMCPApprovalResponse
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+              title: OpenAIResponseOutputMessageMCPCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+              title: OpenAIResponseOutputMessageMCPListTools
+            title: OpenAIResponseMessage | ... (9 variants)
+          maxItems: 20
+          title: Items
+          type: array
+      required:
+      - items
+      title: ConversationItemCreateRequest
+      type: object
+    ToolGroupInput:
+      description: Input data for registering a tool group.
+      properties:
+        toolgroup_id:
+          title: Toolgroup Id
+          type: string
+        provider_id:
+          title: Provider Id
+          type: string
+        args:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+          nullable: true
+        mcp_endpoint:
+          anyOf:
+          - $ref: '#/components/schemas/URL'
+            title: URL
+          - type: 'null'
+          nullable: true
+          title: URL
+      required:
+      - toolgroup_id
+      - provider_id
+      title: ToolGroupInput
+      type: object
+    Api:
+      description: Enumeration of all available APIs in the Llama Stack system.
+      enum:
+      - providers
+      - inference
+      - safety
+      - agents
+      - batches
+      - vector_io
+      - datasetio
+      - scoring
+      - eval
+      - post_training
+      - tool_runtime
+      - models
+      - shields
+      - vector_stores
+      - datasets
+      - scoring_functions
+      - benchmarks
+      - tool_groups
+      - files
+      - prompts
+      - conversations
+      - inspect
+      title: Api
+      type: string
+    ProviderSpec:
+      properties:
+        api:
+          $ref: '#/components/schemas/Api'
+        provider_type:
+          title: Provider Type
+          type: string
+        config_class:
+          description: Fully-qualified classname of the config for this provider
+          title: Config Class
+          type: string
+        api_dependencies:
+          description: Higher-level API surfaces may depend on other providers to provide their functionality
+          items:
+            $ref: '#/components/schemas/Api'
+          title: Api Dependencies
+          type: array
+        optional_api_dependencies:
+          items:
+            $ref: '#/components/schemas/Api'
+          title: Optional Api Dependencies
+          type: array
+        deprecation_warning:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: If this provider is deprecated, specify the warning message here
+          nullable: true
+        deprecation_error:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: If this provider is deprecated and does NOT work, specify the error message here
+          nullable: true
+        module:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: |2-
+
+             Fully-qualified name of the module to import. The module is expected to have:
+
+              - `get_adapter_impl(config, deps)`: returns the adapter implementation
+
+              Example: `module: ramalama_stack`
+          nullable: true
+        pip_packages:
+          description: The pip dependencies needed for this implementation
+          items:
+            type: string
+          title: Pip Packages
+          type: array
+        provider_data_validator:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+        is_external:
+          default: false
+          description: Notes whether this provider is an external provider.
+          title: Is External
+          type: boolean
+        deps__:
+          items:
+            type: string
+          title: Deps
+          type: array
+      required:
+      - api
+      - provider_type
+      - config_class
+      title: ProviderSpec
+      type: object
+    InlineProviderSpec:
+      properties:
+        api:
+          $ref: '#/components/schemas/Api'
+        provider_type:
+          title: Provider Type
+          type: string
+        config_class:
+          description: Fully-qualified classname of the config for this provider
+          title: Config Class
+          type: string
+        api_dependencies:
+          description: Higher-level API surfaces may depend on other providers to provide their functionality
+          items:
+            $ref: '#/components/schemas/Api'
+          title: Api Dependencies
+          type: array
+        optional_api_dependencies:
+          items:
+            $ref: '#/components/schemas/Api'
+          title: Optional Api Dependencies
+          type: array
+        deprecation_warning:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: If this provider is deprecated, specify the warning message here
+          nullable: true
+        deprecation_error:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: If this provider is deprecated and does NOT work, specify the error message here
+          nullable: true
+        module:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: |2-
+
+             Fully-qualified name of the module to import. The module is expected to have:
+
+              - `get_adapter_impl(config, deps)`: returns the adapter implementation
+
+              Example: `module: ramalama_stack`
+          nullable: true
+        pip_packages:
+          description: The pip dependencies needed for this implementation
+          items:
+            type: string
+          title: Pip Packages
+          type: array
+        provider_data_validator:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+        is_external:
+          default: false
+          description: Notes whether this provider is an external provider.
+          title: Is External
+          type: boolean
+        deps__:
+          items:
+            type: string
+          title: Deps
+          type: array
+        container_image:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: |2
+
+            The container image to use for this implementation. If one is provided, pip_packages will be ignored.
+            If a provider depends on other providers, the dependencies MUST NOT specify a container image.
+          nullable: true
+        description:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: |2
+
+            A description of the provider. This is used to display in the documentation.
+          nullable: true
+      required:
+      - api
+      - provider_type
+      - config_class
+      title: InlineProviderSpec
+      type: object
+    RemoteProviderSpec:
+      properties:
+        api:
+          $ref: '#/components/schemas/Api'
+        provider_type:
+          title: Provider Type
+          type: string
+        config_class:
+          description: Fully-qualified classname of the config for this provider
+          title: Config Class
+          type: string
+        api_dependencies:
+          description: Higher-level API surfaces may depend on other providers to provide their functionality
+          items:
+            $ref: '#/components/schemas/Api'
+          title: Api Dependencies
+          type: array
+        optional_api_dependencies:
+          items:
+            $ref: '#/components/schemas/Api'
+          title: Optional Api Dependencies
+          type: array
+        deprecation_warning:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: If this provider is deprecated, specify the warning message here
+          nullable: true
+        deprecation_error:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: If this provider is deprecated and does NOT work, specify the error message here
+          nullable: true
+        module:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: |2-
+
+             Fully-qualified name of the module to import. The module is expected to have:
+
+              - `get_adapter_impl(config, deps)`: returns the adapter implementation
+
+              Example: `module: ramalama_stack`
+          nullable: true
+        pip_packages:
+          description: The pip dependencies needed for this implementation
+          items:
+            type: string
+          title: Pip Packages
+          type: array
+        provider_data_validator:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+        is_external:
+          default: false
+          description: Notes whether this provider is an external provider.
+          title: Is External
+          type: boolean
+        deps__:
+          items:
+            type: string
+          title: Deps
+          type: array
+        adapter_type:
+          description: Unique identifier for this adapter
+          title: Adapter Type
+          type: string
+        description:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: |2
+
+            A description of the provider. This is used to display in the documentation.
+          nullable: true
+      required:
+      - api
+      - provider_type
+      - config_class
+      - adapter_type
+      title: RemoteProviderSpec
+      type: object
+    PostTrainingJobLogStream:
+      description: Stream of logs from a finetuning job.
+      properties:
+        job_uuid:
+          title: Job Uuid
+          type: string
+        log_lines:
+          items:
+            type: string
+          title: Log Lines
+          type: array
+      required:
+      - job_uuid
+      - log_lines
+      title: PostTrainingJobLogStream
+      type: object
+    RLHFAlgorithm:
+      description: Available reinforcement learning from human feedback algorithms.
+      enum:
+      - dpo
+      title: RLHFAlgorithm
+      type: string
+    PostTrainingRLHFRequest:
+      description: Request to finetune a model using reinforcement learning from human feedback.
+      properties:
+        job_uuid:
+          title: Job Uuid
+          type: string
+        finetuned_model:
+          $ref: '#/components/schemas/URL'
+        dataset_id:
+          title: Dataset Id
+          type: string
+        validation_dataset_id:
+          title: Validation Dataset Id
+          type: string
+        algorithm:
+          $ref: '#/components/schemas/RLHFAlgorithm'
+        algorithm_config:
+          $ref: '#/components/schemas/DPOAlignmentConfig'
+        optimizer_config:
+          $ref: '#/components/schemas/OptimizerConfig'
+        training_config:
+          $ref: '#/components/schemas/TrainingConfig'
+        hyperparam_search_config:
+          additionalProperties: true
+          title: Hyperparam Search Config
+          type: object
+        logger_config:
+          additionalProperties: true
+          title: Logger Config
+          type: object
+      required:
+      - job_uuid
+      - finetuned_model
+      - dataset_id
+      - validation_dataset_id
+      - algorithm
+      - algorithm_config
+      - optimizer_config
+      - training_config
+      - hyperparam_search_config
+      - logger_config
+      title: PostTrainingRLHFRequest
+      type: object
+    Span:
+      description: A span representing a single operation within a trace.
+      properties:
+        span_id:
+          title: Span Id
+          type: string
+        trace_id:
+          title: Trace Id
+          type: string
+        parent_span_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+        name:
+          title: Name
+          type: string
+        start_time:
+          format: date-time
+          title: Start Time
+          type: string
+        end_time:
+          anyOf:
+          - format: date-time
+            type: string
+          - type: 'null'
+          nullable: true
+        attributes:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+      required:
+      - span_id
+      - trace_id
+      - name
+      - start_time
+      title: Span
+      type: object
+    Trace:
+      description: A trace representing the complete execution path of a request across multiple operations.
+      properties:
+        trace_id:
+          title: Trace Id
+          type: string
+        root_span_id:
+          title: Root Span Id
+          type: string
+        start_time:
+          format: date-time
+          title: Start Time
+          type: string
+        end_time:
+          anyOf:
+          - format: date-time
+            type: string
+          - type: 'null'
+          nullable: true
+      required:
+      - trace_id
+      - root_span_id
+      - start_time
+      title: Trace
+      type: object
+    EventType:
+      description: The type of telemetry event being logged.
+      enum:
+      - unstructured_log
+      - structured_log
+      - metric
+      title: EventType
+      type: string
+    StructuredLogType:
+      description: The type of structured log event payload.
+      enum:
+      - span_start
+      - span_end
+      title: StructuredLogType
+      type: string
+    EvalTrace:
+      description: A trace record for evaluation purposes.
+      properties:
+        session_id:
+          title: Session Id
+          type: string
+        step:
+          title: Step
+          type: string
+        input:
+          title: Input
+          type: string
+        output:
+          title: Output
+          type: string
+        expected_output:
+          title: Expected Output
+          type: string
+      required:
+      - session_id
+      - step
+      - input
+      - output
+      - expected_output
+      title: EvalTrace
+      type: object
+    SpanWithStatus:
+      description: A span that includes status information.
+      properties:
+        span_id:
+          title: Span Id
+          type: string
+        trace_id:
+          title: Trace Id
+          type: string
+        parent_span_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+        name:
+          title: Name
+          type: string
+        start_time:
+          format: date-time
+          title: Start Time
+          type: string
+        end_time:
+          anyOf:
+          - format: date-time
+            type: string
+          - type: 'null'
+          nullable: true
+        attributes:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+        status:
+          anyOf:
+          - $ref: '#/components/schemas/SpanStatus'
+            title: SpanStatus
+          - type: 'null'
+          nullable: true
+          title: SpanStatus
+      required:
+      - span_id
+      - trace_id
+      - name
+      - start_time
+      title: SpanWithStatus
+      type: object
+    QueryConditionOp:
+      description: Comparison operators for query conditions.
+      enum:
+      - eq
+      - ne
+      - gt
+      - lt
+      title: QueryConditionOp
+      type: string
+    QueryCondition:
+      description: A condition for filtering query results.
+      properties:
+        key:
+          title: Key
+          type: string
+        op:
+          $ref: '#/components/schemas/QueryConditionOp'
+        value:
+          title: Value
+      required:
+      - key
+      - op
+      - value
+      title: QueryCondition
+      type: object
+    MetricLabel:
+      description: A label associated with a metric.
+      properties:
+        name:
+          title: Name
+          type: string
+        value:
+          title: Value
+          type: string
+      required:
+      - name
+      - value
+      title: MetricLabel
+      type: object
+    MetricDataPoint:
+      description: A single data point in a metric time series.
+      properties:
+        timestamp:
+          title: Timestamp
+          type: integer
+        value:
+          title: Value
+          type: number
+        unit:
+          title: Unit
+          type: string
+      required:
+      - timestamp
+      - value
+      - unit
+      title: MetricDataPoint
+      type: object
+    MetricSeries:
+      description: A time series of metric data points.
+      properties:
+        metric:
+          title: Metric
+          type: string
+        labels:
+          items:
+            $ref: '#/components/schemas/MetricLabel'
+          title: Labels
+          type: array
+        values:
+          items:
+            $ref: '#/components/schemas/MetricDataPoint'
+          title: Values
+          type: array
+      required:
+      - metric
+      - labels
+      - values
+      title: MetricSeries
+      type: object
   responses:
     BadRequest400:
       description: The request was invalid or malformed
@@ -9626,8 +12115,7 @@ components:
             title: Bad Request
             detail: The request was invalid or malformed
     TooManyRequests429:
-      description: >-
-        The client has sent too many requests in a given amount of time
+      description: The client has sent too many requests in a given amount of time
       content:
         application/json:
           schema:
@@ -9635,11 +12123,9 @@ components:
           example:
             status: 429
             title: Too Many Requests
-            detail: >-
-              You have exceeded the rate limit. Please try again later.
+            detail: You have exceeded the rate limit. Please try again later.
     InternalServerError500:
-      description: >-
-        The server encountered an unexpected error
+      description: The server encountered an unexpected error
       content:
         application/json:
           schema:
@@ -9647,185 +12133,101 @@ components:
           example:
             status: 500
             title: Internal Server Error
-            detail: >-
-              An unexpected error occurred. Our team has been notified.
+            detail: An unexpected error occurred
     DefaultError:
-      description: An unexpected error occurred
+      description: An error occurred
       content:
         application/json:
           schema:
             $ref: '#/components/schemas/Error'
-          example:
-            status: 0
-            title: Error
-            detail: An unexpected error occurred
-security:
-  - Default: []
 tags:
-  - name: Agents
-    description: >-
-      APIs for creating and interacting with agentic systems.
+- description: APIs for creating and interacting with agentic systems.
+  name: Agents
+  x-displayName: Agents
+- description: |-
+    The API is designed to allow use of openai client libraries for seamless integration.
 
+    This API provides the following extensions:
+     - idempotent batch creation
 
-      ## Responses API
+    Note: This API is currently under active development and may undergo changes.
+  name: Batches
+  x-displayName: The Batches API enables efficient processing of multiple requests in a single operation, particularly useful for processing large datasets, batch evaluation workflows, and cost-effective inference at scale.
+- description: ''
+  name: Benchmarks
+- description: Protocol for conversation management operations.
+  name: Conversations
+  x-displayName: Conversations
+- description: ''
+  name: DatasetIO
+- description: ''
+  name: Datasets
+- description: Llama Stack Evaluation API for running evaluations on model and agent candidates.
+  name: Eval
+  x-displayName: Evaluations
+- description: This API is used to upload documents that can be used with other Llama Stack APIs.
+  name: Files
+  x-displayName: Files
+- description: |-
+    Llama Stack Inference API for generating completions, chat completions, and embeddings.
 
-
-      The Responses API provides OpenAI-compatible functionality with enhanced capabilities
-      for dynamic, stateful interactions.
-
-
-      > **✅ STABLE**: This API is production-ready with backward compatibility guarantees.
-      Recommended for production applications.
-
-
-      ### ✅ Supported Tools
-
-
-      The Responses API supports the following tool types:
-
-
-      - **`web_search`**: Search the web for current information and real-time data
-
-      - **`file_search`**: Search through uploaded files and vector stores
-        - Supports dynamic `vector_store_ids` per call
-        - Compatible with OpenAI file search patterns
-      - **`function`**: Call custom functions with JSON schema validation
-
-      - **`mcp_tool`**: Model Context Protocol integration
-
-
-      ### ✅ Supported Fields & Features
-
-
-      **Core Capabilities:**
-
-      - **Dynamic Configuration**: Switch models, vector stores, and tools per request
-      without pre-configuration
-
-      - **Conversation Branching**: Use `previous_response_id` to branch conversations
-      and explore different paths
-
-      - **Rich Annotations**: Automatic file citations, URL citations, and container
-      file citations
-
-      - **Status Tracking**: Monitor tool call execution status and handle failures
-      gracefully
-
-
-      ### 🚧 Work in Progress
-
-
-      - Full real-time response streaming support
-
-      - `tool_choice` parameter
-
-      - `max_tool_calls` parameter
-
-      - Built-in tools (code interpreter, containers API)
-
-      - Safety & guardrails
-
-      - `reasoning` capabilities
-
-      - `service_tier`
-
-      - `logprobs`
-
-      - `max_output_tokens`
-
-      - `metadata` handling
-
-      - `instructions`
-
-      - `incomplete_details`
-
-      - `background`
-    x-displayName: Agents
-  - name: Batches
-    description: >-
-      The API is designed to allow use of openai client libraries for seamless integration.
-
-
-      This API provides the following extensions:
-       - idempotent batch creation
-
-      Note: This API is currently under active development and may undergo changes.
-    x-displayName: >-
-      The Batches API enables efficient processing of multiple requests in a single
-      operation, particularly useful for processing large datasets, batch evaluation
-      workflows, and cost-effective inference at scale.
-  - name: Conversations
-    description: >-
-      Protocol for conversation management operations.
-    x-displayName: Conversations
-  - name: Files
-    description: >-
-      This API is used to upload documents that can be used with other Llama Stack
-      APIs.
-    x-displayName: Files
-  - name: Inference
-    description: >-
-      Llama Stack Inference API for generating completions, chat completions, and
-      embeddings.
-
-
-      This API provides the raw interface to the underlying models. Three kinds of
-      models are supported:
-
-      - LLM models: these models generate "raw" and "chat" (conversational) completions.
-
-      - Embedding models: these models generate embeddings to be used for semantic
-      search.
-
-      - Rerank models: these models reorder the documents based on their relevance
-      to a query.
-    x-displayName: Inference
-  - name: Inspect
-    description: >-
-      APIs for inspecting the Llama Stack service, including health status, available
-      API routes with methods and implementing providers.
-    x-displayName: Inspect
-  - name: Models
-    description: ''
-  - name: Prompts
-    description: >-
-      Protocol for prompt management operations.
-    x-displayName: Prompts
-  - name: Providers
-    description: >-
-      Providers API for inspecting, listing, and modifying providers and their configurations.
-    x-displayName: Providers
-  - name: Safety
-    description: OpenAI-compatible Moderations API.
-    x-displayName: Safety
-  - name: Scoring
-    description: ''
-  - name: ScoringFunctions
-    description: ''
-  - name: Shields
-    description: ''
-  - name: ToolGroups
-    description: ''
-  - name: ToolRuntime
-    description: ''
-  - name: VectorIO
-    description: ''
+    This API provides the raw interface to the underlying models. Three kinds of models are supported:
+    - LLM models: these models generate "raw" and "chat" (conversational) completions.
+    - Embedding models: these models generate embeddings to be used for semantic search.
+    - Rerank models: these models reorder the documents based on their relevance to a query.
+  name: Inference
+  x-displayName: Inference
+- description: APIs for inspecting the Llama Stack service, including health status, available API routes with methods and implementing providers.
+  name: Inspect
+  x-displayName: Inspect
+- description: ''
+  name: Models
+- description: ''
+  name: PostTraining (Coming Soon)
+- description: Protocol for prompt management operations.
+  name: Prompts
+  x-displayName: Prompts
+- description: Providers API for inspecting, listing, and modifying providers and their configurations.
+  name: Providers
+  x-displayName: Providers
+- description: OpenAI-compatible Moderations API.
+  name: Safety
+  x-displayName: Safety
+- description: ''
+  name: Scoring
+- description: ''
+  name: ScoringFunctions
+- description: ''
+  name: Shields
+- description: ''
+  name: ToolGroups
+- description: ''
+  name: ToolRuntime
+- description: ''
+  name: VectorIO
 x-tagGroups:
-  - name: Operations
-    tags:
-      - Agents
-      - Batches
-      - Conversations
-      - Files
-      - Inference
-      - Inspect
-      - Models
-      - Prompts
-      - Providers
-      - Safety
-      - Scoring
-      - ScoringFunctions
-      - Shields
-      - ToolGroups
-      - ToolRuntime
-      - VectorIO
+- name: Operations
+  tags:
+  - Agents
+  - Batches
+  - Benchmarks
+  - Conversations
+  - DatasetIO
+  - Datasets
+  - Eval
+  - Files
+  - Inference
+  - Inspect
+  - Models
+  - PostTraining (Coming Soon)
+  - Prompts
+  - Providers
+  - Safety
+  - Scoring
+  - ScoringFunctions
+  - Shields
+  - ToolGroups
+  - ToolRuntime
+  - VectorIO
+security:
+- Default: []
diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml
index d0813de4d..ff86e30e1 100644
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@@ -1,19 +1,18 @@
 openapi: 3.1.0
 info:
-  title: >-
-    Llama Stack Specification - Stable & Experimental APIs
-  version: v1
-  description: >-
+  title: Llama Stack Specification - Stable & Experimental APIs
+  description: |-
     This is the specification of the Llama Stack that provides
-                    a set of endpoints and their corresponding interfaces that are
-    tailored to
-                    best leverage Llama Models.
+                        a set of endpoints and their corresponding interfaces that are
+        tailored to
+                        best leverage Llama Models.
 
-    **🔗 COMBINED**: This specification includes both stable production-ready APIs
-    and experimental pre-release APIs. Use stable APIs for production deployments
-    and experimental APIs for testing new features.
+        **🔗 COMBINED**: This specification includes both stable production-ready APIs
+        and experimental pre-release APIs. Use stable APIs for production deployments
+        and experimental APIs for testing new features.
+  version: v1
 servers:
-  - url: http://any-hosted-llama-stack.com
+- url: http://any-hosted-llama-stack.com
 paths:
   /v1/batches:
     get:
@@ -26,34 +25,37 @@ paths:
                 $ref: '#/components/schemas/ListBatchesResponse'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - Batches
-      summary: List all batches for the current user.
+      - Batches
+      summary: List Batches
       description: List all batches for the current user.
+      operationId: list_batches_v1_batches_get
       parameters:
-        - name: after
-          in: query
-          description: >-
-            A cursor for pagination; returns batches after this batch ID.
-          required: false
-          schema:
-            type: string
-        - name: limit
-          in: query
-          description: >-
-            Number of batches to return (default 20, max 100).
-          required: true
-          schema:
-            type: integer
-      deprecated: false
+      - name: after
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: After
+      - name: limit
+        in: query
+        required: false
+        schema:
+          type: integer
+          default: 20
+          title: Limit
     post:
       responses:
         '200':
@@ -64,28 +66,27 @@ paths:
                 $ref: '#/components/schemas/Batch'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - Batches
-      summary: >-
-        Create a new batch for processing multiple API requests.
-      description: >-
-        Create a new batch for processing multiple API requests.
-      parameters: []
+      - Batches
+      summary: Create Batch
+      description: Create a new batch for processing multiple API requests.
+      operationId: create_batch_v1_batches_post
       requestBody:
+        required: true
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/CreateBatchRequest'
-        required: true
-      deprecated: false
   /v1/batches/{batch_id}:
     get:
       responses:
@@ -96,29 +97,29 @@ paths:
               schema:
                 $ref: '#/components/schemas/Batch'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Batches
-      summary: >-
-        Retrieve information about a specific batch.
-      description: >-
-        Retrieve information about a specific batch.
+      - Batches
+      summary: Retrieve Batch
+      description: Retrieve information about a specific batch.
+      operationId: retrieve_batch_v1_batches__batch_id__get
       parameters:
-        - name: batch_id
-          in: path
-          description: The ID of the batch to retrieve.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: batch_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: batch_id'
   /v1/batches/{batch_id}/cancel:
     post:
       responses:
@@ -129,27 +130,29 @@ paths:
               schema:
                 $ref: '#/components/schemas/Batch'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Batches
-      summary: Cancel a batch that is in progress.
+      - Batches
+      summary: Cancel Batch
       description: Cancel a batch that is in progress.
+      operationId: cancel_batch_v1_batches__batch_id__cancel_post
       parameters:
-        - name: batch_id
-          in: path
-          description: The ID of the batch to cancel.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: batch_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: batch_id'
   /v1/chat/completions:
     get:
       responses:
@@ -161,48 +164,56 @@ paths:
                 $ref: '#/components/schemas/ListOpenAIChatCompletionResponse'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - Inference
-      summary: List chat completions.
+      - Inference
+      summary: List Chat Completions
       description: List chat completions.
+      operationId: list_chat_completions_v1_chat_completions_get
       parameters:
-        - name: after
-          in: query
-          description: >-
-            The ID of the last chat completion to return.
-          required: false
-          schema:
-            type: string
-        - name: limit
-          in: query
-          description: >-
-            The maximum number of chat completions to return.
-          required: false
-          schema:
-            type: integer
-        - name: model
-          in: query
-          description: The model to filter by.
-          required: false
-          schema:
-            type: string
-        - name: order
-          in: query
-          description: >-
-            The order to sort the chat completions by: "asc" or "desc". Defaults to
-            "desc".
-          required: false
-          schema:
-            $ref: '#/components/schemas/Order'
-      deprecated: false
+      - name: after
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: After
+      - name: limit
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: integer
+          - type: 'null'
+          default: 20
+          title: Limit
+      - name: model
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: Model
+      - name: order
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - $ref: '#/components/schemas/Order'
+          - type: 'null'
+          default: desc
+          title: Order
     post:
       responses:
         '200':
@@ -210,35 +221,36 @@ paths:
           content:
             application/json:
               schema:
-                oneOf:
-                  - $ref: '#/components/schemas/OpenAIChatCompletion'
-                  - $ref: '#/components/schemas/OpenAIChatCompletionChunk'
+                $ref: '#/components/schemas/OpenAIChatCompletion'
+            text/event-stream:
+              schema:
+                $ref: '#/components/schemas/OpenAIChatCompletionChunk'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - Inference
-      summary: Create chat completions.
-      description: >-
+      - Inference
+      summary: Openai Chat Completion
+      description: |-
         Create chat completions.
 
-        Generate an OpenAI-compatible chat completion for the given messages using
-        the specified model.
-      parameters: []
+        Generate an OpenAI-compatible chat completion for the given messages using the specified model.
+      operationId: openai_chat_completion_v1_chat_completions_post
       requestBody:
+        required: true
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/OpenAIChatCompletionRequestWithExtraBody'
-        required: true
-      deprecated: false
   /v1/chat/completions/{completion_id}:
     get:
       responses:
@@ -249,30 +261,32 @@ paths:
               schema:
                 $ref: '#/components/schemas/OpenAICompletionWithInputMessages'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Inference
-      summary: Get chat completion.
-      description: >-
+      - Inference
+      summary: Get Chat Completion
+      description: |-
         Get chat completion.
 
         Describe a chat completion by its ID.
+      operationId: get_chat_completion_v1_chat_completions__completion_id__get
       parameters:
-        - name: completion_id
-          in: path
-          description: ID of the chat completion.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: completion_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: completion_id'
   /v1/completions:
     post:
       responses:
@@ -283,31 +297,31 @@ paths:
               schema:
                 $ref: '#/components/schemas/OpenAICompletion'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Inference
-      summary: Create completion.
-      description: >-
+      - Inference
+      summary: Openai Completion
+      description: |-
         Create completion.
 
-        Generate an OpenAI-compatible completion for the given prompt using the specified
-        model.
-      parameters: []
+        Generate an OpenAI-compatible completion for the given prompt using the specified model.
+      operationId: openai_completion_v1_completions_post
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/OpenAICompletionRequestWithExtraBody'
         required: true
-      deprecated: false
   /v1/conversations:
     post:
       responses:
@@ -318,30 +332,31 @@ paths:
               schema:
                 $ref: '#/components/schemas/Conversation'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Conversations
-      summary: Create a conversation.
-      description: >-
+      - Conversations
+      summary: Create Conversation
+      description: |-
         Create a conversation.
 
         Create a conversation.
-      parameters: []
+      operationId: create_conversation_v1_conversations_post
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/CreateConversationRequest'
         required: true
-      deprecated: false
   /v1/conversations/{conversation_id}:
     get:
       responses:
@@ -352,30 +367,32 @@ paths:
               schema:
                 $ref: '#/components/schemas/Conversation'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Conversations
-      summary: Retrieve a conversation.
-      description: >-
+      - Conversations
+      summary: Get Conversation
+      description: |-
         Retrieve a conversation.
 
         Get a conversation with the given ID.
+      operationId: get_conversation_v1_conversations__conversation_id__get
       parameters:
-        - name: conversation_id
-          in: path
-          description: The conversation identifier.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: conversation_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: conversation_id'
     post:
       responses:
         '200':
@@ -385,36 +402,38 @@ paths:
               schema:
                 $ref: '#/components/schemas/Conversation'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Conversations
-      summary: Update a conversation.
-      description: >-
+      - Conversations
+      summary: Update Conversation
+      description: |-
         Update a conversation.
 
         Update a conversation's metadata with the given ID.
+      operationId: update_conversation_v1_conversations__conversation_id__post
       parameters:
-        - name: conversation_id
-          in: path
-          description: The conversation identifier.
-          required: true
-          schema:
-            type: string
+      - name: conversation_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: conversation_id'
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/UpdateConversationRequest'
         required: true
-      deprecated: false
     delete:
       responses:
         '200':
@@ -424,30 +443,32 @@ paths:
               schema:
                 $ref: '#/components/schemas/ConversationDeletedResource'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Conversations
-      summary: Delete a conversation.
-      description: >-
+      - Conversations
+      summary: Openai Delete Conversation
+      description: |-
         Delete a conversation.
 
         Delete a conversation with the given ID.
+      operationId: openai_delete_conversation_v1_conversations__conversation_id__delete
       parameters:
-        - name: conversation_id
-          in: path
-          description: The conversation identifier.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: conversation_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: conversation_id'
   /v1/conversations/{conversation_id}/items:
     get:
       responses:
@@ -459,73 +480,68 @@ paths:
                 $ref: '#/components/schemas/ConversationItemList'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - Conversations
-      summary: List items.
-      description: >-
+      - Conversations
+      summary: List Items
+      description: |-
         List items.
 
         List items in the conversation.
+      operationId: list_items_v1_conversations__conversation_id__items_get
       parameters:
-        - name: conversation_id
-          in: path
-          description: The conversation identifier.
-          required: true
-          schema:
+      - name: after
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: After
+      - name: limit
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: integer
+          - type: 'null'
+          title: Limit
+      - name: order
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - enum:
+            - asc
+            - desc
             type: string
-        - name: after
-          in: query
-          description: >-
-            An item ID to list items after, used in pagination.
-          required: false
-          schema:
-            type: string
-        - name: include
-          in: query
-          description: >-
-            Specify additional output data to include in the response.
-          required: false
-          schema:
-            type: array
+          - type: 'null'
+          title: Order
+      - name: conversation_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: conversation_id'
+      - name: include
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: array
             items:
-              type: string
-              enum:
-                - web_search_call.action.sources
-                - code_interpreter_call.outputs
-                - computer_call_output.output.image_url
-                - file_search_call.results
-                - message.input_image.image_url
-                - message.output_text.logprobs
-                - reasoning.encrypted_content
-              title: ConversationItemInclude
-              description: >-
-                Specify additional output data to include in the model response.
-        - name: limit
-          in: query
-          description: >-
-            A limit on the number of objects to be returned (1-100, default 20).
-          required: false
-          schema:
-            type: integer
-        - name: order
-          in: query
-          description: >-
-            The order to return items in (asc or desc, default desc).
-          required: false
-          schema:
-            type: string
-            enum:
-              - asc
-              - desc
-      deprecated: false
+              $ref: '#/components/schemas/ConversationItemInclude'
+          - type: 'null'
+          title: Include
     post:
       responses:
         '200':
@@ -536,35 +552,37 @@ paths:
                 $ref: '#/components/schemas/ConversationItemList'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - Conversations
-      summary: Create items.
-      description: >-
+      - Conversations
+      summary: Add Items
+      description: |-
         Create items.
 
         Create items in the conversation.
+      operationId: add_items_v1_conversations__conversation_id__items_post
       parameters:
-        - name: conversation_id
-          in: path
-          description: The conversation identifier.
-          required: true
-          schema:
-            type: string
+      - name: conversation_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: conversation_id'
       requestBody:
+        required: true
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/AddItemsRequest'
-        required: true
-      deprecated: false
   /v1/conversations/{conversation_id}/items/{item_id}:
     get:
       responses:
@@ -573,38 +591,40 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ConversationItem'
+                $ref: '#/components/schemas/OpenAIResponseMessage'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Conversations
-      summary: Retrieve an item.
-      description: >-
+      - Conversations
+      summary: Retrieve
+      description: |-
         Retrieve an item.
 
         Retrieve a conversation item.
+      operationId: retrieve_v1_conversations__conversation_id__items__item_id__get
       parameters:
-        - name: conversation_id
-          in: path
-          description: The conversation identifier.
-          required: true
-          schema:
-            type: string
-        - name: item_id
-          in: path
-          description: The item identifier.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: conversation_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: conversation_id'
+      - name: item_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: item_id'
     delete:
       responses:
         '200':
@@ -614,365 +634,352 @@ paths:
               schema:
                 $ref: '#/components/schemas/ConversationItemDeletedResource'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Conversations
-      summary: Delete an item.
-      description: >-
+      - Conversations
+      summary: Openai Delete Conversation Item
+      description: |-
         Delete an item.
 
         Delete a conversation item.
+      operationId: openai_delete_conversation_item_v1_conversations__conversation_id__items__item_id__delete
       parameters:
-        - name: conversation_id
-          in: path
-          description: The conversation identifier.
-          required: true
-          schema:
-            type: string
-        - name: item_id
-          in: path
-          description: The item identifier.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: conversation_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: conversation_id'
+      - name: item_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: item_id'
   /v1/embeddings:
     post:
       responses:
         '200':
-          description: >-
-            An OpenAIEmbeddingsResponse containing the embeddings.
+          description: An OpenAIEmbeddingsResponse containing the embeddings.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/OpenAIEmbeddingsResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Inference
-      summary: Create embeddings.
-      description: >-
+      - Inference
+      summary: Openai Embeddings
+      description: |-
         Create embeddings.
 
-        Generate OpenAI-compatible embeddings for the given input using the specified
-        model.
-      parameters: []
+        Generate OpenAI-compatible embeddings for the given input using the specified model.
+      operationId: openai_embeddings_v1_embeddings_post
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/OpenAIEmbeddingsRequestWithExtraBody'
         required: true
-      deprecated: false
   /v1/files:
     get:
       responses:
         '200':
-          description: >-
-            An ListOpenAIFileResponse containing the list of files.
+          description: An ListOpenAIFileResponse containing the list of files.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ListOpenAIFileResponse'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - Files
-      summary: List files.
-      description: >-
+      - Files
+      summary: Openai List Files
+      description: |-
         List files.
 
         Returns a list of files that belong to the user's organization.
+      operationId: openai_list_files_v1_files_get
       parameters:
-        - name: after
-          in: query
-          description: >-
-            A cursor for use in pagination. `after` is an object ID that defines your
-            place in the list. For instance, if you make a list request and receive
-            100 objects, ending with obj_foo, your subsequent call can include after=obj_foo
-            in order to fetch the next page of the list.
-          required: false
-          schema:
-            type: string
-        - name: limit
-          in: query
-          description: >-
-            A limit on the number of objects to be returned. Limit can range between
-            1 and 10,000, and the default is 10,000.
-          required: false
-          schema:
-            type: integer
-        - name: order
-          in: query
-          description: >-
-            Sort order by the `created_at` timestamp of the objects. `asc` for ascending
-            order and `desc` for descending order.
-          required: false
-          schema:
-            $ref: '#/components/schemas/Order'
-        - name: purpose
-          in: query
-          description: >-
-            Only return files with the given purpose.
-          required: false
-          schema:
-            $ref: '#/components/schemas/OpenAIFilePurpose'
-      deprecated: false
+      - name: after
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: After
+      - name: limit
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: integer
+          - type: 'null'
+          default: 10000
+          title: Limit
+      - name: order
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - $ref: '#/components/schemas/Order'
+          - type: 'null'
+          default: desc
+          title: Order
+      - name: purpose
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIFilePurpose'
+          - type: 'null'
+          title: Purpose
     post:
       responses:
         '200':
-          description: >-
-            An OpenAIFileObject representing the uploaded file.
+          description: An OpenAIFileObject representing the uploaded file.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/OpenAIFileObject'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - Files
-      summary: Upload file.
-      description: >-
+      - Files
+      summary: Openai Upload File
+      description: |-
         Upload file.
 
         Upload a file that can be used across various endpoints.
 
-
         The file upload should be a multipart form request with:
-
         - file: The File object (not file name) to be uploaded.
-
         - purpose: The intended purpose of the uploaded file.
-
         - expires_after: Optional form values describing expiration for the file.
-      parameters: []
+      operationId: openai_upload_file_v1_files_post
       requestBody:
+        required: true
         content:
           multipart/form-data:
             schema:
-              type: object
-              properties:
-                file:
-                  type: string
-                  format: binary
-                purpose:
-                  $ref: '#/components/schemas/OpenAIFilePurpose'
-                expires_after:
-                  $ref: '#/components/schemas/ExpiresAfter'
-              required:
-                - file
-                - purpose
-        required: true
-      deprecated: false
+              $ref: '#/components/schemas/Body_openai_upload_file_v1_files_post'
   /v1/files/{file_id}:
     get:
       responses:
         '200':
-          description: >-
-            An OpenAIFileObject containing file information.
+          description: An OpenAIFileObject containing file information.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/OpenAIFileObject'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Files
-      summary: Retrieve file.
-      description: >-
+      - Files
+      summary: Openai Retrieve File
+      description: |-
         Retrieve file.
 
         Returns information about a specific file.
+      operationId: openai_retrieve_file_v1_files__file_id__get
       parameters:
-        - name: file_id
-          in: path
-          description: >-
-            The ID of the file to use for this request.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: file_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: file_id'
     delete:
       responses:
         '200':
-          description: >-
-            An OpenAIFileDeleteResponse indicating successful deletion.
+          description: An OpenAIFileDeleteResponse indicating successful deletion.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/OpenAIFileDeleteResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Files
-      summary: Delete file.
+      - Files
+      summary: Openai Delete File
       description: Delete file.
+      operationId: openai_delete_file_v1_files__file_id__delete
       parameters:
-        - name: file_id
-          in: path
-          description: >-
-            The ID of the file to use for this request.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: file_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: file_id'
   /v1/files/{file_id}/content:
     get:
       responses:
         '200':
-          description: >-
-            The raw file content as a binary response.
+          description: The raw file content as a binary response.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/Response'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Files
-      summary: Retrieve file content.
-      description: >-
+      - Files
+      summary: Openai Retrieve File Content
+      description: |-
         Retrieve file content.
 
         Returns the contents of the specified file.
+      operationId: openai_retrieve_file_content_v1_files__file_id__content_get
       parameters:
-        - name: file_id
-          in: path
-          description: >-
-            The ID of the file to use for this request.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: file_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: file_id'
   /v1/health:
     get:
       responses:
         '200':
-          description: >-
-            Health information indicating if the service is operational.
+          description: Health information indicating if the service is operational.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/HealthInfo'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Inspect
-      summary: Get health status.
-      description: >-
+      - Inspect
+      summary: Health
+      description: |-
         Get health status.
 
         Get the current health status of the service.
-      parameters: []
-      deprecated: false
+      operationId: health_v1_health_get
   /v1/inspect/routes:
     get:
       responses:
         '200':
-          description: >-
-            Response containing information about all available routes.
+          description: Response containing information about all available routes.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ListRoutesResponse'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - Inspect
-      summary: List routes.
-      description: >-
+      - Inspect
+      summary: List Routes
+      description: |-
         List routes.
 
         List all available API routes with their methods and implementing providers.
+      operationId: list_routes_v1_inspect_routes_get
       parameters:
-        - name: api_filter
-          in: query
-          description: >-
-            Optional filter to control which routes are returned. Can be an API level
-            ('v1', 'v1alpha', 'v1beta') to show non-deprecated routes at that level,
-            or 'deprecated' to show deprecated routes across all levels. If not specified,
-            returns all non-deprecated routes.
-          required: false
-          schema:
+      - name: api_filter
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - enum:
+            - v1
+            - v1alpha
+            - v1beta
+            - deprecated
             type: string
-            enum:
-              - v1
-              - v1alpha
-              - v1beta
-              - deprecated
-      deprecated: false
+          - type: 'null'
+          title: Api Filter
   /v1/models:
     get:
       responses:
@@ -983,21 +990,22 @@ paths:
               schema:
                 $ref: '#/components/schemas/OpenAIListModelsResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Models
-      summary: List models using the OpenAI API.
+      - Models
+      summary: Openai List Models
       description: List models using the OpenAI API.
-      parameters: []
-      deprecated: false
+      operationId: openai_list_models_v1_models_get
     post:
       responses:
         '200':
@@ -1007,23 +1015,25 @@ paths:
               schema:
                 $ref: '#/components/schemas/Model'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Models
-      summary: Register model.
-      description: >-
+      - Models
+      summary: Register Model
+      description: |-
         Register model.
 
         Register a model.
-      parameters: []
+      operationId: register_model_v1_models_post
       requestBody:
         content:
           application/json:
@@ -1041,59 +1051,63 @@ paths:
               schema:
                 $ref: '#/components/schemas/Model'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Models
-      summary: Get model.
-      description: >-
+      - Models
+      summary: Get Model
+      description: |-
         Get model.
 
         Get a model by its identifier.
+      operationId: get_model_v1_models__model_id__get
       parameters:
-        - name: model_id
-          in: path
-          description: The identifier of the model to get.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: model_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: model_id'
     delete:
       responses:
-        '200':
-          description: OK
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
+        '204':
+          description: Successful Response
       tags:
-        - Models
-      summary: Unregister model.
-      description: >-
+      - Models
+      summary: Unregister Model
+      description: |-
         Unregister model.
 
         Unregister a model.
+      operationId: unregister_model_v1_models__model_id__delete
       parameters:
-        - name: model_id
-          in: path
-          description: >-
-            The identifier of the model to unregister.
-          required: true
-          schema:
-            type: string
+      - name: model_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: model_id'
       deprecated: true
   /v1/moderations:
     post:
@@ -1105,56 +1119,57 @@ paths:
               schema:
                 $ref: '#/components/schemas/ModerationObject'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Safety
-      summary: Create moderation.
-      description: >-
+      - Safety
+      summary: Run Moderation
+      description: |-
         Create moderation.
 
         Classifies if text and/or image inputs are potentially harmful.
-      parameters: []
+      operationId: run_moderation_v1_moderations_post
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/RunModerationRequest'
         required: true
-      deprecated: false
   /v1/prompts:
     get:
       responses:
         '200':
-          description: >-
-            A ListPromptsResponse containing all prompts.
+          description: A ListPromptsResponse containing all prompts.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ListPromptsResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Prompts
-      summary: List all prompts.
+      - Prompts
+      summary: List Prompts
       description: List all prompts.
-      parameters: []
-      deprecated: false
+      operationId: list_prompts_v1_prompts_get
     post:
       responses:
         '200':
@@ -1164,30 +1179,31 @@ paths:
               schema:
                 $ref: '#/components/schemas/Prompt'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Prompts
-      summary: Create prompt.
-      description: >-
+      - Prompts
+      summary: Create Prompt
+      description: |-
         Create prompt.
 
         Create a new prompt.
-      parameters: []
+      operationId: create_prompt_v1_prompts_post
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/CreatePromptRequest'
         required: true
-      deprecated: false
   /v1/prompts/{prompt_id}:
     get:
       responses:
@@ -1199,246 +1215,254 @@ paths:
                 $ref: '#/components/schemas/Prompt'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - Prompts
-      summary: Get prompt.
-      description: >-
+      - Prompts
+      summary: Get Prompt
+      description: |-
         Get prompt.
 
         Get a prompt by its identifier and optional version.
+      operationId: get_prompt_v1_prompts__prompt_id__get
       parameters:
-        - name: prompt_id
-          in: path
-          description: The identifier of the prompt to get.
-          required: true
-          schema:
-            type: string
-        - name: version
-          in: query
-          description: >-
-            The version of the prompt to get (defaults to latest).
-          required: false
-          schema:
-            type: integer
-      deprecated: false
+      - name: version
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: integer
+          - type: 'null'
+          title: Version
+      - name: prompt_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: prompt_id'
     post:
       responses:
         '200':
-          description: >-
-            The updated Prompt resource with incremented version.
+          description: The updated Prompt resource with incremented version.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/Prompt'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - Prompts
-      summary: Update prompt.
-      description: >-
+      - Prompts
+      summary: Update Prompt
+      description: |-
         Update prompt.
 
         Update an existing prompt (increments version).
+      operationId: update_prompt_v1_prompts__prompt_id__post
       parameters:
-        - name: prompt_id
-          in: path
-          description: The identifier of the prompt to update.
-          required: true
-          schema:
-            type: string
+      - name: prompt_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: prompt_id'
       requestBody:
+        required: true
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/UpdatePromptRequest'
-        required: true
-      deprecated: false
     delete:
       responses:
-        '200':
-          description: OK
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
+        '204':
+          description: Successful Response
       tags:
-        - Prompts
-      summary: Delete prompt.
-      description: >-
+      - Prompts
+      summary: Delete Prompt
+      description: |-
         Delete prompt.
 
         Delete a prompt.
+      operationId: delete_prompt_v1_prompts__prompt_id__delete
       parameters:
-        - name: prompt_id
-          in: path
-          description: The identifier of the prompt to delete.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: prompt_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: prompt_id'
   /v1/prompts/{prompt_id}/set-default-version:
     post:
       responses:
         '200':
-          description: >-
-            The prompt with the specified version now set as default.
+          description: The prompt with the specified version now set as default.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/Prompt'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Prompts
-      summary: Set prompt version.
-      description: >-
+      - Prompts
+      summary: Set Default Version
+      description: |-
         Set prompt version.
 
         Set which version of a prompt should be the default in get_prompt (latest).
+      operationId: set_default_version_v1_prompts__prompt_id__set_default_version_post
       parameters:
-        - name: prompt_id
-          in: path
-          description: The identifier of the prompt.
-          required: true
-          schema:
-            type: string
+      - name: prompt_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: prompt_id'
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/SetDefaultVersionRequest'
         required: true
-      deprecated: false
   /v1/prompts/{prompt_id}/versions:
     get:
       responses:
         '200':
-          description: >-
-            A ListPromptsResponse containing all versions of the prompt.
+          description: A ListPromptsResponse containing all versions of the prompt.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ListPromptsResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Prompts
-      summary: List prompt versions.
-      description: >-
+      - Prompts
+      summary: List Prompt Versions
+      description: |-
         List prompt versions.
 
         List all versions of a specific prompt.
+      operationId: list_prompt_versions_v1_prompts__prompt_id__versions_get
       parameters:
-        - name: prompt_id
-          in: path
-          description: >-
-            The identifier of the prompt to list versions for.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: prompt_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: prompt_id'
   /v1/providers:
     get:
       responses:
         '200':
-          description: >-
-            A ListProvidersResponse containing information about all providers.
+          description: A ListProvidersResponse containing information about all providers.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ListProvidersResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Providers
-      summary: List providers.
-      description: >-
+      - Providers
+      summary: List Providers
+      description: |-
         List providers.
 
         List all available providers.
-      parameters: []
-      deprecated: false
+      operationId: list_providers_v1_providers_get
   /v1/providers/{provider_id}:
     get:
       responses:
         '200':
-          description: >-
-            A ProviderInfo object containing the provider's details.
+          description: A ProviderInfo object containing the provider's details.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ProviderInfo'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Providers
-      summary: Get provider.
-      description: >-
+      - Providers
+      summary: Inspect Provider
+      description: |-
         Get provider.
 
         Get detailed information about a specific provider.
+      operationId: inspect_provider_v1_providers__provider_id__get
       parameters:
-        - name: provider_id
-          in: path
-          description: The ID of the provider to inspect.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: provider_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: provider_id'
   /v1/responses:
     get:
       responses:
@@ -1450,45 +1474,56 @@ paths:
                 $ref: '#/components/schemas/ListOpenAIResponseObject'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - Agents
-      summary: List all responses.
+      - Agents
+      summary: List Openai Responses
       description: List all responses.
+      operationId: list_openai_responses_v1_responses_get
       parameters:
-        - name: after
-          in: query
-          description: The ID of the last response to return.
-          required: false
-          schema:
-            type: string
-        - name: limit
-          in: query
-          description: The number of responses to return.
-          required: false
-          schema:
-            type: integer
-        - name: model
-          in: query
-          description: The model to filter responses by.
-          required: false
-          schema:
-            type: string
-        - name: order
-          in: query
-          description: >-
-            The order to sort responses by when sorted by created_at ('asc' or 'desc').
-          required: false
-          schema:
-            $ref: '#/components/schemas/Order'
-      deprecated: false
+      - name: after
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: After
+      - name: limit
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: integer
+          - type: 'null'
+          default: 50
+          title: Limit
+      - name: model
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: Model
+      - name: order
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - $ref: '#/components/schemas/Order'
+          - type: 'null'
+          default: desc
+          title: Order
     post:
       responses:
         '200':
@@ -1502,38 +1537,51 @@ paths:
                 $ref: '#/components/schemas/OpenAIResponseObjectStream'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - Agents
-      summary: Create a model response.
+      - Agents
+      summary: Create Openai Response
       description: Create a model response.
-      parameters: []
+      operationId: create_openai_response_v1_responses_post
       requestBody:
+        required: true
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/CreateOpenaiResponseRequest'
-        required: true
-      deprecated: false
-      x-llama-stack-extra-body-params:
-        - name: guardrails
-          schema:
-            type: array
-            items:
-              oneOf:
+        x-llama-stack-extra-body-params:
+          guardrails:
+            $defs:
+              ResponseGuardrailSpec:
+                description: |-
+                  Specification for a guardrail to apply during response generation.
+
+                  :param type: The type/identifier of the guardrail.
+                properties:
+                  type:
+                    title: Type
+                    type: string
+                required:
+                - type
+                title: ResponseGuardrailSpec
+                type: object
+            anyOf:
+            - items:
+                anyOf:
                 - type: string
                 - $ref: '#/components/schemas/ResponseGuardrailSpec'
-          description: >-
-            List of guardrails to apply during response generation. Guardrails provide
-            safety and content moderation.
-          required: false
+              type: array
+            - type: 'null'
+            description: List of guardrails to apply during response generation. Guardrails provide safety and content moderation.
   /v1/responses/{response_id}:
     get:
       responses:
@@ -1544,28 +1592,29 @@ paths:
               schema:
                 $ref: '#/components/schemas/OpenAIResponseObject'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Agents
-      summary: Get a model response.
+      - Agents
+      summary: Get Openai Response
       description: Get a model response.
+      operationId: get_openai_response_v1_responses__response_id__get
       parameters:
-        - name: response_id
-          in: path
-          description: >-
-            The ID of the OpenAI response to retrieve.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: response_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: response_id'
     delete:
       responses:
         '200':
@@ -1575,27 +1624,29 @@ paths:
               schema:
                 $ref: '#/components/schemas/OpenAIDeleteResponseObject'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Agents
-      summary: Delete a response.
+      - Agents
+      summary: Delete Openai Response
       description: Delete a response.
+      operationId: delete_openai_response_v1_responses__response_id__delete
       parameters:
-        - name: response_id
-          in: path
-          description: The ID of the OpenAI response to delete.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: response_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: response_id'
   /v1/responses/{response_id}/input_items:
     get:
       responses:
@@ -1607,65 +1658,72 @@ paths:
                 $ref: '#/components/schemas/ListOpenAIResponseInputItem'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - Agents
-      summary: List input items.
+      - Agents
+      summary: List Openai Response Input Items
       description: List input items.
+      operationId: list_openai_response_input_items_v1_responses__response_id__input_items_get
       parameters:
-        - name: response_id
-          in: path
-          description: >-
-            The ID of the response to retrieve input items for.
-          required: true
-          schema:
-            type: string
-        - name: after
-          in: query
-          description: >-
-            An item ID to list items after, used for pagination.
-          required: false
-          schema:
-            type: string
-        - name: before
-          in: query
-          description: >-
-            An item ID to list items before, used for pagination.
-          required: false
-          schema:
-            type: string
-        - name: include
-          in: query
-          description: >-
-            Additional fields to include in the response.
-          required: false
-          schema:
-            type: array
+      - name: after
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: After
+      - name: before
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: Before
+      - name: limit
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: integer
+          - type: 'null'
+          default: 20
+          title: Limit
+      - name: order
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - $ref: '#/components/schemas/Order'
+          - type: 'null'
+          default: desc
+          title: Order
+      - name: response_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: response_id'
+      - name: include
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: array
             items:
               type: string
-        - name: limit
-          in: query
-          description: >-
-            A limit on the number of objects to be returned. Limit can range between
-            1 and 100, and the default is 20.
-          required: false
-          schema:
-            type: integer
-        - name: order
-          in: query
-          description: >-
-            The order to return the input items in. Default is desc.
-          required: false
-          schema:
-            $ref: '#/components/schemas/Order'
-      deprecated: false
+          - type: 'null'
+          title: Include
   /v1/safety/run-shield:
     post:
       responses:
@@ -1676,30 +1734,31 @@ paths:
               schema:
                 $ref: '#/components/schemas/RunShieldResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Safety
-      summary: Run shield.
-      description: >-
+      - Safety
+      summary: Run Shield
+      description: |-
         Run shield.
 
         Run a shield.
-      parameters: []
+      operationId: run_shield_v1_safety_run_shield_post
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/RunShieldRequest'
         required: true
-      deprecated: false
   /v1/scoring-functions:
     get:
       responses:
@@ -1710,45 +1769,48 @@ paths:
               schema:
                 $ref: '#/components/schemas/ListScoringFunctionsResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - ScoringFunctions
-      summary: List all scoring functions.
+      - Scoring Functions
+      summary: List Scoring Functions
       description: List all scoring functions.
-      parameters: []
-      deprecated: false
+      operationId: list_scoring_functions_v1_scoring_functions_get
     post:
       responses:
-        '200':
-          description: OK
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
+        '204':
+          description: Successful Response
       tags:
-        - ScoringFunctions
-      summary: Register a scoring function.
+      - Scoring Functions
+      summary: Register Scoring Function
       description: Register a scoring function.
-      parameters: []
+      operationId: register_scoring_function_v1_scoring_functions_post
       requestBody:
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/RegisterScoringFunctionRequest'
+              $ref: '#/components/schemas/RegisterScoringFunctionRequestLoose'
         required: true
       deprecated: true
   /v1/scoring-functions/{scoring_fn_id}:
@@ -1761,86 +1823,90 @@ paths:
               schema:
                 $ref: '#/components/schemas/ScoringFn'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - ScoringFunctions
-      summary: Get a scoring function by its ID.
+      - Scoring Functions
+      summary: Get Scoring Function
       description: Get a scoring function by its ID.
+      operationId: get_scoring_function_v1_scoring_functions__scoring_fn_id__get
       parameters:
-        - name: scoring_fn_id
-          in: path
-          description: The ID of the scoring function to get.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: scoring_fn_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: scoring_fn_id'
     delete:
       responses:
-        '200':
-          description: OK
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
+        '204':
+          description: Successful Response
       tags:
-        - ScoringFunctions
-      summary: Unregister a scoring function.
+      - Scoring Functions
+      summary: Unregister Scoring Function
       description: Unregister a scoring function.
+      operationId: unregister_scoring_function_v1_scoring_functions__scoring_fn_id__delete
       parameters:
-        - name: scoring_fn_id
-          in: path
-          description: >-
-            The ID of the scoring function to unregister.
-          required: true
-          schema:
-            type: string
+      - name: scoring_fn_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: scoring_fn_id'
       deprecated: true
   /v1/scoring/score:
     post:
       responses:
         '200':
-          description: >-
-            A ScoreResponse object containing rows and aggregated results.
+          description: A ScoreResponse object containing rows and aggregated results.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ScoreResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Scoring
-      summary: Score a list of rows.
+      - Scoring
+      summary: Score
       description: Score a list of rows.
-      parameters: []
+      operationId: score_v1_scoring_score_post
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/ScoreRequest'
         required: true
-      deprecated: false
   /v1/scoring/score-batch:
     post:
       responses:
@@ -1851,27 +1917,28 @@ paths:
               schema:
                 $ref: '#/components/schemas/ScoreBatchResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Scoring
-      summary: Score a batch of rows.
+      - Scoring
+      summary: Score Batch
       description: Score a batch of rows.
-      parameters: []
+      operationId: score_batch_v1_scoring_score_batch_post
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/ScoreBatchRequest'
         required: true
-      deprecated: false
   /v1/shields:
     get:
       responses:
@@ -1882,21 +1949,22 @@ paths:
               schema:
                 $ref: '#/components/schemas/ListShieldsResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Shields
-      summary: List all shields.
+      - Shields
+      summary: List Shields
       description: List all shields.
-      parameters: []
-      deprecated: false
+      operationId: list_shields_v1_shields_get
     post:
       responses:
         '200':
@@ -1906,20 +1974,22 @@ paths:
               schema:
                 $ref: '#/components/schemas/Shield'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Shields
-      summary: Register a shield.
+      - Shields
+      summary: Register Shield
       description: Register a shield.
-      parameters: []
+      operationId: register_shield_v1_shields_post
       requestBody:
         content:
           application/json:
@@ -1937,53 +2007,57 @@ paths:
               schema:
                 $ref: '#/components/schemas/Shield'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Shields
-      summary: Get a shield by its identifier.
+      - Shields
+      summary: Get Shield
       description: Get a shield by its identifier.
+      operationId: get_shield_v1_shields__identifier__get
       parameters:
-        - name: identifier
-          in: path
-          description: The identifier of the shield to get.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: identifier
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: identifier'
     delete:
       responses:
-        '200':
-          description: OK
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
+        '204':
+          description: Successful Response
       tags:
-        - Shields
-      summary: Unregister a shield.
+      - Shields
+      summary: Unregister Shield
       description: Unregister a shield.
+      operationId: unregister_shield_v1_shields__identifier__delete
       parameters:
-        - name: identifier
-          in: path
-          description: >-
-            The identifier of the shield to unregister.
-          required: true
-          schema:
-            type: string
+      - name: identifier
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: identifier'
       deprecated: true
   /v1/tool-runtime/invoke:
     post:
@@ -1995,27 +2069,28 @@ paths:
               schema:
                 $ref: '#/components/schemas/ToolInvocationResult'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - ToolRuntime
-      summary: Run a tool with the given arguments.
+      - Tool Runtime
+      summary: Invoke Tool
       description: Run a tool with the given arguments.
-      parameters: []
+      operationId: invoke_tool_v1_tool_runtime_invoke_post
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/InvokeToolRequest'
         required: true
-      deprecated: false
   /v1/tool-runtime/list-tools:
     get:
       responses:
@@ -2027,41 +2102,46 @@ paths:
                 $ref: '#/components/schemas/ListToolDefsResponse'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - ToolRuntime
-      summary: List all tools in the runtime.
+      - Tool Runtime
+      summary: List Runtime Tools
       description: List all tools in the runtime.
+      operationId: list_runtime_tools_v1_tool_runtime_list_tools_get
       parameters:
-        - name: tool_group_id
-          in: query
-          description: >-
-            The ID of the tool group to list tools for.
-          required: false
-          schema:
-            type: string
-        - name: mcp_endpoint
-          in: query
-          description: >-
-            The MCP endpoint to use for the tool group.
-          required: false
-          schema:
-            $ref: '#/components/schemas/URL'
-        - name: authorization
-          in: query
-          description: >-
-            (Optional) OAuth access token for authenticating with the MCP server.
-          required: false
-          schema:
-            type: string
-      deprecated: false
+      - name: authorization
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: Authorization
+      - name: tool_group_id
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: Tool Group Id
+      - name: mcp_endpoint
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - $ref: '#/components/schemas/URL'
+          - type: 'null'
+          title: Mcp Endpoint
   /v1/toolgroups:
     get:
       responses:
@@ -2072,40 +2152,43 @@ paths:
               schema:
                 $ref: '#/components/schemas/ListToolGroupsResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - ToolGroups
-      summary: List tool groups with optional provider.
+      - Tool Groups
+      summary: List Tool Groups
       description: List tool groups with optional provider.
-      parameters: []
-      deprecated: false
+      operationId: list_tool_groups_v1_toolgroups_get
     post:
       responses:
-        '200':
-          description: OK
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
+        '204':
+          description: Successful Response
       tags:
-        - ToolGroups
-      summary: Register a tool group.
+      - Tool Groups
+      summary: Register Tool Group
       description: Register a tool group.
-      parameters: []
+      operationId: register_tool_group_v1_toolgroups_post
       requestBody:
         content:
           application/json:
@@ -2123,52 +2206,57 @@ paths:
               schema:
                 $ref: '#/components/schemas/ToolGroup'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - ToolGroups
-      summary: Get a tool group by its ID.
+      - Tool Groups
+      summary: Get Tool Group
       description: Get a tool group by its ID.
+      operationId: get_tool_group_v1_toolgroups__toolgroup_id__get
       parameters:
-        - name: toolgroup_id
-          in: path
-          description: The ID of the tool group to get.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: toolgroup_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: toolgroup_id'
     delete:
       responses:
-        '200':
-          description: OK
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
+        '204':
+          description: Successful Response
       tags:
-        - ToolGroups
-      summary: Unregister a tool group.
+      - Tool Groups
+      summary: Unregister Toolgroup
       description: Unregister a tool group.
+      operationId: unregister_toolgroup_v1_toolgroups__toolgroup_id__delete
       parameters:
-        - name: toolgroup_id
-          in: path
-          description: The ID of the tool group to unregister.
-          required: true
-          schema:
-            type: string
+      - name: toolgroup_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: toolgroup_id'
       deprecated: true
   /v1/tools:
     get:
@@ -2181,27 +2269,30 @@ paths:
                 $ref: '#/components/schemas/ListToolDefsResponse'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - ToolGroups
-      summary: List tools with optional tool group.
+      - Tool Groups
+      summary: List Tools
       description: List tools with optional tool group.
+      operationId: list_tools_v1_tools_get
       parameters:
-        - name: toolgroup_id
-          in: query
-          description: >-
-            The ID of the tool group to list tools for.
-          required: false
-          schema:
-            type: string
-      deprecated: false
+      - name: toolgroup_id
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: Toolgroup Id
   /v1/tools/{tool_name}:
     get:
       responses:
@@ -2212,54 +2303,57 @@ paths:
               schema:
                 $ref: '#/components/schemas/ToolDef'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - ToolGroups
-      summary: Get a tool by its name.
+      - Tool Groups
+      summary: Get Tool
       description: Get a tool by its name.
+      operationId: get_tool_v1_tools__tool_name__get
       parameters:
-        - name: tool_name
-          in: path
-          description: The name of the tool to get.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: tool_name
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: tool_name'
   /v1/vector-io/insert:
     post:
       responses:
-        '200':
-          description: OK
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
+        '204':
+          description: Successful Response
       tags:
-        - VectorIO
-      summary: Insert chunks into a vector database.
+      - Vector Io
+      summary: Insert Chunks
       description: Insert chunks into a vector database.
-      parameters: []
+      operationId: insert_chunks_v1_vector_io_insert_post
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/InsertChunksRequest'
         required: true
-      deprecated: false
   /v1/vector-io/query:
     post:
       responses:
@@ -2270,815 +2364,829 @@ paths:
               schema:
                 $ref: '#/components/schemas/QueryChunksResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - VectorIO
-      summary: Query chunks from a vector database.
+      - Vector Io
+      summary: Query Chunks
       description: Query chunks from a vector database.
-      parameters: []
+      operationId: query_chunks_v1_vector_io_query_post
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/QueryChunksRequest'
         required: true
-      deprecated: false
   /v1/vector_stores:
     get:
       responses:
         '200':
-          description: >-
-            A VectorStoreListResponse containing the list of vector stores.
+          description: A VectorStoreListResponse containing the list of vector stores.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/VectorStoreListResponse'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - VectorIO
-      summary: Returns a list of vector stores.
+      - Vector Io
+      summary: Openai List Vector Stores
       description: Returns a list of vector stores.
+      operationId: openai_list_vector_stores_v1_vector_stores_get
       parameters:
-        - name: limit
-          in: query
-          description: >-
-            A limit on the number of objects to be returned. Limit can range between
-            1 and 100, and the default is 20.
-          required: false
-          schema:
-            type: integer
-        - name: order
-          in: query
-          description: >-
-            Sort order by the `created_at` timestamp of the objects. `asc` for ascending
-            order and `desc` for descending order.
-          required: false
-          schema:
-            type: string
-        - name: after
-          in: query
-          description: >-
-            A cursor for use in pagination. `after` is an object ID that defines your
-            place in the list.
-          required: false
-          schema:
-            type: string
-        - name: before
-          in: query
-          description: >-
-            A cursor for use in pagination. `before` is an object ID that defines
-            your place in the list.
-          required: false
-          schema:
-            type: string
-      deprecated: false
+      - name: after
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: After
+      - name: before
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: Before
+      - name: limit
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: integer
+          - type: 'null'
+          default: 20
+          title: Limit
+      - name: order
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          default: desc
+          title: Order
     post:
       responses:
         '200':
-          description: >-
-            A VectorStoreObject representing the created vector store.
+          description: A VectorStoreObject representing the created vector store.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/VectorStoreObject'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - VectorIO
-      summary: Creates a vector store.
-      description: >-
+      - Vector Io
+      summary: Openai Create Vector Store
+      description: |-
         Creates a vector store.
 
         Generate an OpenAI-compatible vector store with the given parameters.
-      parameters: []
+      operationId: openai_create_vector_store_v1_vector_stores_post
       requestBody:
+        required: true
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/OpenAICreateVectorStoreRequestWithExtraBody'
-        required: true
-      deprecated: false
   /v1/vector_stores/{vector_store_id}:
     get:
       responses:
         '200':
-          description: >-
-            A VectorStoreObject representing the vector store.
+          description: A VectorStoreObject representing the vector store.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/VectorStoreObject'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - VectorIO
-      summary: Retrieves a vector store.
+      - Vector Io
+      summary: Openai Retrieve Vector Store
       description: Retrieves a vector store.
+      operationId: openai_retrieve_vector_store_v1_vector_stores__vector_store_id__get
       parameters:
-        - name: vector_store_id
-          in: path
-          description: The ID of the vector store to retrieve.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: vector_store_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: vector_store_id'
     post:
       responses:
         '200':
-          description: >-
-            A VectorStoreObject representing the updated vector store.
+          description: A VectorStoreObject representing the updated vector store.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/VectorStoreObject'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - VectorIO
-      summary: Updates a vector store.
+      - Vector Io
+      summary: Openai Update Vector Store
       description: Updates a vector store.
+      operationId: openai_update_vector_store_v1_vector_stores__vector_store_id__post
       parameters:
-        - name: vector_store_id
-          in: path
-          description: The ID of the vector store to update.
-          required: true
-          schema:
-            type: string
+      - name: vector_store_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: vector_store_id'
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/OpenaiUpdateVectorStoreRequest'
         required: true
-      deprecated: false
     delete:
       responses:
         '200':
-          description: >-
-            A VectorStoreDeleteResponse indicating the deletion status.
+          description: A VectorStoreDeleteResponse indicating the deletion status.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/VectorStoreDeleteResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - VectorIO
-      summary: Delete a vector store.
+      - Vector Io
+      summary: Openai Delete Vector Store
       description: Delete a vector store.
+      operationId: openai_delete_vector_store_v1_vector_stores__vector_store_id__delete
       parameters:
-        - name: vector_store_id
-          in: path
-          description: The ID of the vector store to delete.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: vector_store_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: vector_store_id'
   /v1/vector_stores/{vector_store_id}/file_batches:
     post:
       responses:
         '200':
-          description: >-
-            A VectorStoreFileBatchObject representing the created file batch.
+          description: A VectorStoreFileBatchObject representing the created file batch.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/VectorStoreFileBatchObject'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - VectorIO
-      summary: Create a vector store file batch.
-      description: >-
+      - Vector Io
+      summary: Openai Create Vector Store File Batch
+      description: |-
         Create a vector store file batch.
 
-        Generate an OpenAI-compatible vector store file batch for the given vector
-        store.
+        Generate an OpenAI-compatible vector store file batch for the given vector store.
+      operationId: openai_create_vector_store_file_batch_v1_vector_stores__vector_store_id__file_batches_post
       parameters:
-        - name: vector_store_id
-          in: path
-          description: >-
-            The ID of the vector store to create the file batch for.
-          required: true
-          schema:
-            type: string
+      - name: vector_store_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: vector_store_id'
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/OpenAICreateVectorStoreFileBatchRequestWithExtraBody'
         required: true
-      deprecated: false
   /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}:
     get:
       responses:
         '200':
-          description: >-
-            A VectorStoreFileBatchObject representing the file batch.
+          description: A VectorStoreFileBatchObject representing the file batch.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/VectorStoreFileBatchObject'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - VectorIO
-      summary: Retrieve a vector store file batch.
+      - Vector Io
+      summary: Openai Retrieve Vector Store File Batch
       description: Retrieve a vector store file batch.
+      operationId: openai_retrieve_vector_store_file_batch_v1_vector_stores__vector_store_id__file_batches__batch_id__get
       parameters:
-        - name: batch_id
-          in: path
-          description: The ID of the file batch to retrieve.
-          required: true
-          schema:
-            type: string
-        - name: vector_store_id
-          in: path
-          description: >-
-            The ID of the vector store containing the file batch.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: vector_store_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: vector_store_id'
+      - name: batch_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: batch_id'
   /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel:
     post:
       responses:
         '200':
-          description: >-
-            A VectorStoreFileBatchObject representing the cancelled file batch.
+          description: A VectorStoreFileBatchObject representing the cancelled file batch.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/VectorStoreFileBatchObject'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - VectorIO
-      summary: Cancels a vector store file batch.
+      - Vector Io
+      summary: Openai Cancel Vector Store File Batch
       description: Cancels a vector store file batch.
+      operationId: openai_cancel_vector_store_file_batch_v1_vector_stores__vector_store_id__file_batches__batch_id__cancel_post
       parameters:
-        - name: batch_id
-          in: path
-          description: The ID of the file batch to cancel.
-          required: true
-          schema:
-            type: string
-        - name: vector_store_id
-          in: path
-          description: >-
-            The ID of the vector store containing the file batch.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: vector_store_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: vector_store_id'
+      - name: batch_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: batch_id'
   /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/files:
     get:
       responses:
         '200':
-          description: >-
-            A VectorStoreFilesListInBatchResponse containing the list of files in
-            the batch.
+          description: A VectorStoreFilesListInBatchResponse containing the list of files in the batch.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/VectorStoreFilesListInBatchResponse'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - VectorIO
-      summary: >-
-        Returns a list of vector store files in a batch.
-      description: >-
-        Returns a list of vector store files in a batch.
+      - Vector Io
+      summary: Openai List Files In Vector Store File Batch
+      description: Returns a list of vector store files in a batch.
+      operationId: openai_list_files_in_vector_store_file_batch_v1_vector_stores__vector_store_id__file_batches__batch_id__files_get
       parameters:
-        - name: batch_id
-          in: path
-          description: >-
-            The ID of the file batch to list files from.
-          required: true
-          schema:
-            type: string
-        - name: vector_store_id
-          in: path
-          description: >-
-            The ID of the vector store containing the file batch.
-          required: true
-          schema:
-            type: string
-        - name: after
-          in: query
-          description: >-
-            A cursor for use in pagination. `after` is an object ID that defines your
-            place in the list.
-          required: false
-          schema:
-            type: string
-        - name: before
-          in: query
-          description: >-
-            A cursor for use in pagination. `before` is an object ID that defines
-            your place in the list.
-          required: false
-          schema:
-            type: string
-        - name: filter
-          in: query
-          description: >-
-            Filter by file status. One of in_progress, completed, failed, cancelled.
-          required: false
-          schema:
-            type: string
-        - name: limit
-          in: query
-          description: >-
-            A limit on the number of objects to be returned. Limit can range between
-            1 and 100, and the default is 20.
-          required: false
-          schema:
-            type: integer
-        - name: order
-          in: query
-          description: >-
-            Sort order by the `created_at` timestamp of the objects. `asc` for ascending
-            order and `desc` for descending order.
-          required: false
-          schema:
-            type: string
-      deprecated: false
+      - name: after
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: After
+      - name: before
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: Before
+      - name: filter
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: Filter
+      - name: limit
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: integer
+          - type: 'null'
+          default: 20
+          title: Limit
+      - name: order
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          default: desc
+          title: Order
+      - name: vector_store_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: vector_store_id'
+      - name: batch_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: batch_id'
   /v1/vector_stores/{vector_store_id}/files:
     get:
       responses:
         '200':
-          description: >-
-            A VectorStoreListFilesResponse containing the list of files.
+          description: A VectorStoreListFilesResponse containing the list of files.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/VectorStoreListFilesResponse'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - VectorIO
-      summary: List files in a vector store.
+      - Vector Io
+      summary: Openai List Files In Vector Store
       description: List files in a vector store.
+      operationId: openai_list_files_in_vector_store_v1_vector_stores__vector_store_id__files_get
       parameters:
-        - name: vector_store_id
-          in: path
-          description: >-
-            The ID of the vector store to list files from.
-          required: true
-          schema:
-            type: string
-        - name: limit
-          in: query
-          description: >-
-            (Optional) A limit on the number of objects to be returned. Limit can
-            range between 1 and 100, and the default is 20.
-          required: false
-          schema:
-            type: integer
-        - name: order
-          in: query
-          description: >-
-            (Optional) Sort order by the `created_at` timestamp of the objects. `asc`
-            for ascending order and `desc` for descending order.
-          required: false
-          schema:
-            type: string
-        - name: after
-          in: query
-          description: >-
-            (Optional) A cursor for use in pagination. `after` is an object ID that
-            defines your place in the list.
-          required: false
-          schema:
-            type: string
-        - name: before
-          in: query
-          description: >-
-            (Optional) A cursor for use in pagination. `before` is an object ID that
-            defines your place in the list.
-          required: false
-          schema:
-            type: string
-        - name: filter
-          in: query
-          description: >-
-            (Optional) Filter by file status to only return files with the specified
-            status.
-          required: false
-          schema:
-            $ref: '#/components/schemas/VectorStoreFileStatus'
-      deprecated: false
+      - name: after
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: After
+      - name: before
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: Before
+      - name: filter
+        in: query
+        required: false
+        schema:
+          title: Filter
+          type: string
+          enum:
+          - completed
+          - in_progress
+          - cancelled
+          - failed
+          default: completed
+          nullable: true
+      - name: limit
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: integer
+          - type: 'null'
+          default: 20
+          title: Limit
+      - name: order
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          default: desc
+          title: Order
+      - name: vector_store_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: vector_store_id'
     post:
       responses:
         '200':
-          description: >-
-            A VectorStoreFileObject representing the attached file.
+          description: A VectorStoreFileObject representing the attached file.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/VectorStoreFileObject'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - VectorIO
-      summary: Attach a file to a vector store.
+      - Vector Io
+      summary: Openai Attach File To Vector Store
       description: Attach a file to a vector store.
+      operationId: openai_attach_file_to_vector_store_v1_vector_stores__vector_store_id__files_post
       parameters:
-        - name: vector_store_id
-          in: path
-          description: >-
-            The ID of the vector store to attach the file to.
-          required: true
-          schema:
-            type: string
+      - name: vector_store_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: vector_store_id'
       requestBody:
+        required: true
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/OpenaiAttachFileToVectorStoreRequest'
-        required: true
-      deprecated: false
   /v1/vector_stores/{vector_store_id}/files/{file_id}:
     get:
       responses:
         '200':
-          description: >-
-            A VectorStoreFileObject representing the file.
+          description: A VectorStoreFileObject representing the file.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/VectorStoreFileObject'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - VectorIO
-      summary: Retrieves a vector store file.
+      - Vector Io
+      summary: Openai Retrieve Vector Store File
       description: Retrieves a vector store file.
+      operationId: openai_retrieve_vector_store_file_v1_vector_stores__vector_store_id__files__file_id__get
       parameters:
-        - name: vector_store_id
-          in: path
-          description: >-
-            The ID of the vector store containing the file to retrieve.
-          required: true
-          schema:
-            type: string
-        - name: file_id
-          in: path
-          description: The ID of the file to retrieve.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: vector_store_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: vector_store_id'
+      - name: file_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: file_id'
     post:
       responses:
         '200':
-          description: >-
-            A VectorStoreFileObject representing the updated file.
+          description: A VectorStoreFileObject representing the updated file.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/VectorStoreFileObject'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - VectorIO
-      summary: Updates a vector store file.
+      - Vector Io
+      summary: Openai Update Vector Store File
       description: Updates a vector store file.
+      operationId: openai_update_vector_store_file_v1_vector_stores__vector_store_id__files__file_id__post
       parameters:
-        - name: vector_store_id
-          in: path
-          description: >-
-            The ID of the vector store containing the file to update.
-          required: true
-          schema:
-            type: string
-        - name: file_id
-          in: path
-          description: The ID of the file to update.
-          required: true
-          schema:
-            type: string
+      - name: vector_store_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: vector_store_id'
+      - name: file_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: file_id'
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/OpenaiUpdateVectorStoreFileRequest'
         required: true
-      deprecated: false
     delete:
       responses:
         '200':
-          description: >-
-            A VectorStoreFileDeleteResponse indicating the deletion status.
+          description: A VectorStoreFileDeleteResponse indicating the deletion status.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/VectorStoreFileDeleteResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - VectorIO
-      summary: Delete a vector store file.
+      - Vector Io
+      summary: Openai Delete Vector Store File
       description: Delete a vector store file.
+      operationId: openai_delete_vector_store_file_v1_vector_stores__vector_store_id__files__file_id__delete
       parameters:
-        - name: vector_store_id
-          in: path
-          description: >-
-            The ID of the vector store containing the file to delete.
-          required: true
-          schema:
-            type: string
-        - name: file_id
-          in: path
-          description: The ID of the file to delete.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: vector_store_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: vector_store_id'
+      - name: file_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: file_id'
   /v1/vector_stores/{vector_store_id}/files/{file_id}/content:
     get:
       responses:
         '200':
-          description: >-
-            File contents, optionally with embeddings and metadata based on query
-            parameters.
+          description: File contents, optionally with embeddings and metadata based on query parameters.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/VectorStoreFileContentResponse'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - VectorIO
-      summary: >-
-        Retrieves the contents of a vector store file.
-      description: >-
-        Retrieves the contents of a vector store file.
+      - Vector Io
+      summary: Openai Retrieve Vector Store File Contents
+      description: Retrieves the contents of a vector store file.
+      operationId: openai_retrieve_vector_store_file_contents_v1_vector_stores__vector_store_id__files__file_id__content_get
       parameters:
-        - name: vector_store_id
-          in: path
-          description: >-
-            The ID of the vector store containing the file to retrieve.
-          required: true
-          schema:
-            type: string
-        - name: file_id
-          in: path
-          description: The ID of the file to retrieve.
-          required: true
-          schema:
-            type: string
-        - name: include_embeddings
-          in: query
-          description: >-
-            Whether to include embedding vectors in the response.
-          required: false
-          schema:
-            $ref: '#/components/schemas/bool'
-        - name: include_metadata
-          in: query
-          description: >-
-            Whether to include chunk metadata in the response.
-          required: false
-          schema:
-            $ref: '#/components/schemas/bool'
-      deprecated: false
+      - name: include_embeddings
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          default: false
+          title: Include Embeddings
+      - name: include_metadata
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          default: false
+          title: Include Metadata
+      - name: vector_store_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: vector_store_id'
+      - name: file_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: file_id'
   /v1/vector_stores/{vector_store_id}/search:
     post:
       responses:
         '200':
-          description: >-
-            A VectorStoreSearchResponse containing the search results.
+          description: A VectorStoreSearchResponse containing the search results.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/VectorStoreSearchResponsePage'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - VectorIO
-      summary: Search for chunks in a vector store.
-      description: >-
+      - Vector Io
+      summary: Openai Search Vector Store
+      description: |-
         Search for chunks in a vector store.
 
-        Searches a vector store for relevant chunks based on a query and optional
-        file attribute filters.
+        Searches a vector store for relevant chunks based on a query and optional file attribute filters.
+      operationId: openai_search_vector_store_v1_vector_stores__vector_store_id__search_post
       parameters:
-        - name: vector_store_id
-          in: path
-          description: The ID of the vector store to search.
-          required: true
-          schema:
-            type: string
+      - name: vector_store_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: vector_store_id'
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/OpenaiSearchVectorStoreRequest'
         required: true
-      deprecated: false
   /v1/version:
     get:
       responses:
         '200':
-          description: >-
-            Version information containing the service version number.
+          description: Version information containing the service version number.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/VersionInfo'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Inspect
-      summary: Get version.
-      description: >-
+      - Inspect
+      summary: Version
+      description: |-
         Get version.
 
         Get the version of the service.
-      parameters: []
-      deprecated: false
+      operationId: version_v1_version_get
   /v1beta/datasetio/append-rows/{dataset_id}:
     post:
       responses:
-        '200':
-          description: OK
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
+        '204':
+          description: Successful Response
       tags:
-        - DatasetIO
-      summary: Append rows to a dataset.
+      - Datasetio
+      summary: Append Rows
       description: Append rows to a dataset.
+      operationId: append_rows_v1beta_datasetio_append_rows__dataset_id__post
       parameters:
-        - name: dataset_id
-          in: path
-          description: >-
-            The ID of the dataset to append the rows to.
-          required: true
-          schema:
-            type: string
+      - name: dataset_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: dataset_id'
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/AppendRowsRequest'
         required: true
-      deprecated: false
   /v1beta/datasetio/iterrows/{dataset_id}:
     get:
       responses:
@@ -3090,55 +3198,53 @@ paths:
                 $ref: '#/components/schemas/PaginatedResponse'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - DatasetIO
-      summary: >-
-        Get a paginated list of rows from a dataset.
-      description: >-
+      - Datasetio
+      summary: Iterrows
+      description: |-
         Get a paginated list of rows from a dataset.
 
         Uses offset-based pagination where:
-
         - start_index: The starting index (0-based). If None, starts from beginning.
-
         - limit: Number of items to return. If None or -1, returns all items.
 
-
         The response includes:
-
         - data: List of items for the current page.
-
         - has_more: Whether there are more items available after this set.
+      operationId: iterrows_v1beta_datasetio_iterrows__dataset_id__get
       parameters:
-        - name: dataset_id
-          in: path
-          description: >-
-            The ID of the dataset to get the rows from.
-          required: true
-          schema:
-            type: string
-        - name: start_index
-          in: query
-          description: >-
-            Index into dataset for the first row to get. Get all rows if None.
-          required: false
-          schema:
-            type: integer
-        - name: limit
-          in: query
-          description: The number of rows to get.
-          required: false
-          schema:
-            type: integer
-      deprecated: false
+      - name: limit
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: integer
+          - type: 'null'
+          title: Limit
+      - name: start_index
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: integer
+          - type: 'null'
+          title: Start Index
+      - name: dataset_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: dataset_id'
   /v1beta/datasets:
     get:
       responses:
@@ -3149,21 +3255,22 @@ paths:
               schema:
                 $ref: '#/components/schemas/ListDatasetsResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Datasets
-      summary: List all datasets.
+      - Datasets
+      summary: List Datasets
       description: List all datasets.
-      parameters: []
-      deprecated: false
+      operationId: list_datasets_v1beta_datasets_get
     post:
       responses:
         '200':
@@ -3173,25 +3280,27 @@ paths:
               schema:
                 $ref: '#/components/schemas/Dataset'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Datasets
-      summary: Register a new dataset.
+      - Datasets
+      summary: Register Dataset
       description: Register a new dataset.
-      parameters: []
+      operationId: register_dataset_v1beta_datasets_post
       requestBody:
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/RegisterDatasetRequest'
+              $ref: '#/components/schemas/RegisterDatasetRequestLoose'
         required: true
       deprecated: true
   /v1beta/datasets/{dataset_id}:
@@ -3204,52 +3313,57 @@ paths:
               schema:
                 $ref: '#/components/schemas/Dataset'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Datasets
-      summary: Get a dataset by its ID.
+      - Datasets
+      summary: Get Dataset
       description: Get a dataset by its ID.
+      operationId: get_dataset_v1beta_datasets__dataset_id__get
       parameters:
-        - name: dataset_id
-          in: path
-          description: The ID of the dataset to get.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: dataset_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: dataset_id'
     delete:
       responses:
-        '200':
-          description: OK
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
+        '204':
+          description: Successful Response
       tags:
-        - Datasets
-      summary: Unregister a dataset by its ID.
+      - Datasets
+      summary: Unregister Dataset
       description: Unregister a dataset by its ID.
+      operationId: unregister_dataset_v1beta_datasets__dataset_id__delete
       parameters:
-        - name: dataset_id
-          in: path
-          description: The ID of the dataset to unregister.
-          required: true
-          schema:
-            type: string
+      - name: dataset_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: dataset_id'
       deprecated: true
   /v1alpha/eval/benchmarks:
     get:
@@ -3261,40 +3375,43 @@ paths:
               schema:
                 $ref: '#/components/schemas/ListBenchmarksResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Benchmarks
-      summary: List all benchmarks.
+      - Benchmarks
+      summary: List Benchmarks
       description: List all benchmarks.
-      parameters: []
-      deprecated: false
+      operationId: list_benchmarks_v1alpha_eval_benchmarks_get
     post:
       responses:
-        '200':
-          description: OK
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
+        '204':
+          description: Successful Response
       tags:
-        - Benchmarks
-      summary: Register a benchmark.
+      - Benchmarks
+      summary: Register Benchmark
       description: Register a benchmark.
-      parameters: []
+      operationId: register_benchmark_v1alpha_eval_benchmarks_post
       requestBody:
         content:
           application/json:
@@ -3312,131 +3429,136 @@ paths:
               schema:
                 $ref: '#/components/schemas/Benchmark'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Benchmarks
-      summary: Get a benchmark by its ID.
+      - Benchmarks
+      summary: Get Benchmark
       description: Get a benchmark by its ID.
+      operationId: get_benchmark_v1alpha_eval_benchmarks__benchmark_id__get
       parameters:
-        - name: benchmark_id
-          in: path
-          description: The ID of the benchmark to get.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: benchmark_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: benchmark_id'
     delete:
       responses:
-        '200':
-          description: OK
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
+        '204':
+          description: Successful Response
       tags:
-        - Benchmarks
-      summary: Unregister a benchmark.
+      - Benchmarks
+      summary: Unregister Benchmark
       description: Unregister a benchmark.
+      operationId: unregister_benchmark_v1alpha_eval_benchmarks__benchmark_id__delete
       parameters:
-        - name: benchmark_id
-          in: path
-          description: The ID of the benchmark to unregister.
-          required: true
-          schema:
-            type: string
+      - name: benchmark_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: benchmark_id'
       deprecated: true
   /v1alpha/eval/benchmarks/{benchmark_id}/evaluations:
     post:
       responses:
         '200':
-          description: >-
-            EvaluateResponse object containing generations and scores.
+          description: EvaluateResponse object containing generations and scores.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/EvaluateResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Eval
-      summary: Evaluate a list of rows on a benchmark.
+      - Eval
+      summary: Evaluate Rows
       description: Evaluate a list of rows on a benchmark.
+      operationId: evaluate_rows_v1alpha_eval_benchmarks__benchmark_id__evaluations_post
       parameters:
-        - name: benchmark_id
-          in: path
-          description: >-
-            The ID of the benchmark to run the evaluation on.
-          required: true
-          schema:
-            type: string
+      - name: benchmark_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: benchmark_id'
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/EvaluateRowsRequest'
         required: true
-      deprecated: false
   /v1alpha/eval/benchmarks/{benchmark_id}/jobs:
     post:
       responses:
         '200':
-          description: >-
-            The job that was created to run the evaluation.
+          description: The job that was created to run the evaluation.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/Job'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Eval
-      summary: Run an evaluation on a benchmark.
+      - Eval
+      summary: Run Eval
       description: Run an evaluation on a benchmark.
+      operationId: run_eval_v1alpha_eval_benchmarks__benchmark_id__jobs_post
       parameters:
-        - name: benchmark_id
-          in: path
-          description: >-
-            The ID of the benchmark to run the evaluation on.
-          required: true
-          schema:
-            type: string
+      - name: benchmark_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: benchmark_id'
       requestBody:
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/RunEvalRequest'
+              $ref: '#/components/schemas/BenchmarkConfig'
         required: true
-      deprecated: false
   /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}:
     get:
       responses:
@@ -3447,67 +3569,69 @@ paths:
               schema:
                 $ref: '#/components/schemas/Job'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Eval
-      summary: Get the status of a job.
+      - Eval
+      summary: Job Status
       description: Get the status of a job.
+      operationId: job_status_v1alpha_eval_benchmarks__benchmark_id__jobs__job_id__get
       parameters:
-        - name: benchmark_id
-          in: path
-          description: >-
-            The ID of the benchmark to run the evaluation on.
-          required: true
-          schema:
-            type: string
-        - name: job_id
-          in: path
-          description: The ID of the job to get the status of.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: benchmark_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: benchmark_id'
+      - name: job_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: job_id'
     delete:
       responses:
-        '200':
-          description: OK
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
+        '204':
+          description: Successful Response
       tags:
-        - Eval
-      summary: Cancel a job.
+      - Eval
+      summary: Job Cancel
       description: Cancel a job.
+      operationId: job_cancel_v1alpha_eval_benchmarks__benchmark_id__jobs__job_id__delete
       parameters:
-        - name: benchmark_id
-          in: path
-          description: >-
-            The ID of the benchmark to run the evaluation on.
-          required: true
-          schema:
-            type: string
-        - name: job_id
-          in: path
-          description: The ID of the job to cancel.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: benchmark_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: benchmark_id'
+      - name: job_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: job_id'
   /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result:
     get:
       responses:
@@ -3518,68 +3642,67 @@ paths:
               schema:
                 $ref: '#/components/schemas/EvaluateResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Eval
-      summary: Get the result of a job.
+      - Eval
+      summary: Job Result
       description: Get the result of a job.
+      operationId: job_result_v1alpha_eval_benchmarks__benchmark_id__jobs__job_id__result_get
       parameters:
-        - name: benchmark_id
-          in: path
-          description: >-
-            The ID of the benchmark to run the evaluation on.
-          required: true
-          schema:
-            type: string
-        - name: job_id
-          in: path
-          description: The ID of the job to get the result of.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: benchmark_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: benchmark_id'
+      - name: job_id
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: job_id'
   /v1alpha/inference/rerank:
     post:
       responses:
         '200':
-          description: >-
-            RerankResponse with indices sorted by relevance score (descending).
+          description: RerankResponse with indices sorted by relevance score (descending).
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/RerankResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - Inference
-      summary: >-
-        Rerank a list of documents based on their relevance to a query.
-      description: >-
-        Rerank a list of documents based on their relevance to a query.
-      parameters: []
+      - Inference
+      summary: Rerank
+      description: Rerank a list of documents based on their relevance to a query.
+      operationId: rerank_v1alpha_inference_rerank_post
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/RerankRequest'
         required: true
-      deprecated: false
   /v1alpha/post-training/job/artifacts:
     get:
       responses:
@@ -3591,54 +3714,56 @@ paths:
                 $ref: '#/components/schemas/PostTrainingJobArtifactsResponse'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - PostTraining (Coming Soon)
-      summary: Get the artifacts of a training job.
+      - Post Training
+      summary: Get Training Job Artifacts
       description: Get the artifacts of a training job.
+      operationId: get_training_job_artifacts_v1alpha_post_training_job_artifacts_get
       parameters:
-        - name: job_uuid
-          in: query
-          description: >-
-            The UUID of the job to get the artifacts of.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: job_uuid
+        in: query
+        required: true
+        schema:
+          type: string
+          title: Job Uuid
   /v1alpha/post-training/job/cancel:
     post:
       responses:
-        '200':
-          description: OK
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
+        '204':
+          description: Successful Response
       tags:
-        - PostTraining (Coming Soon)
-      summary: Cancel a training job.
+      - Post Training
+      summary: Cancel Training Job
       description: Cancel a training job.
-      parameters: []
+      operationId: cancel_training_job_v1alpha_post_training_job_cancel_post
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/CancelTrainingJobRequest'
         required: true
-      deprecated: false
   /v1alpha/post-training/job/status:
     get:
       responses:
@@ -3650,27 +3775,28 @@ paths:
                 $ref: '#/components/schemas/PostTrainingJobStatusResponse'
         '400':
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
-        - PostTraining (Coming Soon)
-      summary: Get the status of a training job.
+      - Post Training
+      summary: Get Training Job Status
       description: Get the status of a training job.
+      operationId: get_training_job_status_v1alpha_post_training_job_status_get
       parameters:
-        - name: job_uuid
-          in: query
-          description: >-
-            The UUID of the job to get the status of.
-          required: true
-          schema:
-            type: string
-      deprecated: false
+      - name: job_uuid
+        in: query
+        required: true
+        schema:
+          type: string
+          title: Job Uuid
   /v1alpha/post-training/jobs:
     get:
       responses:
@@ -3681,21 +3807,22 @@ paths:
               schema:
                 $ref: '#/components/schemas/ListPostTrainingJobsResponse'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - PostTraining (Coming Soon)
-      summary: Get all training jobs.
+      - Post Training
+      summary: Get Training Jobs
       description: Get all training jobs.
-      parameters: []
-      deprecated: false
+      operationId: get_training_jobs_v1alpha_post_training_jobs_get
   /v1alpha/post-training/preference-optimize:
     post:
       responses:
@@ -3706,27 +3833,28 @@ paths:
               schema:
                 $ref: '#/components/schemas/PostTrainingJob'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - PostTraining (Coming Soon)
-      summary: Run preference optimization of a model.
+      - Post Training
+      summary: Preference Optimize
       description: Run preference optimization of a model.
-      parameters: []
+      operationId: preference_optimize_v1alpha_post_training_preference_optimize_post
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/PreferenceOptimizeRequest'
         required: true
-      deprecated: false
   /v1alpha/post-training/supervised-fine-tune:
     post:
       responses:
@@ -3737,1473 +3865,1277 @@ paths:
               schema:
                 $ref: '#/components/schemas/PostTrainingJob'
         '400':
+          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
         '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
         '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
         default:
+          description: Default Response
           $ref: '#/components/responses/DefaultError'
       tags:
-        - PostTraining (Coming Soon)
-      summary: Run supervised fine-tuning of a model.
+      - Post Training
+      summary: Supervised Fine Tune
       description: Run supervised fine-tuning of a model.
-      parameters: []
+      operationId: supervised_fine_tune_v1alpha_post_training_supervised_fine_tune_post
       requestBody:
         content:
           application/json:
             schema:
               $ref: '#/components/schemas/SupervisedFineTuneRequest'
         required: true
-      deprecated: false
-jsonSchemaDialect: >-
-  https://json-schema.org/draft/2020-12/schema
 components:
   schemas:
     Error:
-      type: object
+      description: Error response from the API. Roughly follows RFC 7807.
       properties:
         status:
+          title: Status
           type: integer
-          description: HTTP status code
         title:
+          title: Title
           type: string
-          description: >-
-            Error title, a short summary of the error which is invariant for an error
-            type
         detail:
+          title: Detail
           type: string
-          description: >-
-            Error detail, a longer human-readable description of the error
         instance:
-          type: string
-          description: >-
-            (Optional) A URL which can be used to retrieve more information about
-            the specific occurrence of the error
-      additionalProperties: false
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
       required:
-        - status
-        - title
-        - detail
+      - status
+      - title
+      - detail
       title: Error
-      description: >-
-        Error response from the API. Roughly follows RFC 7807.
-    ListBatchesResponse:
       type: object
+    ListBatchesResponse:
       properties:
         object:
           type: string
           const: list
+          title: Object
           default: list
         data:
-          type: array
           items:
-            type: object
-            properties:
-              id:
-                type: string
-              completion_window:
-                type: string
-              created_at:
-                type: integer
-              endpoint:
-                type: string
-              input_file_id:
-                type: string
-              object:
-                type: string
-                const: batch
-              status:
-                type: string
-                enum:
-                  - validating
-                  - failed
-                  - in_progress
-                  - finalizing
-                  - completed
-                  - expired
-                  - cancelling
-                  - cancelled
-              cancelled_at:
-                type: integer
-              cancelling_at:
-                type: integer
-              completed_at:
-                type: integer
-              error_file_id:
-                type: string
-              errors:
-                type: object
-                properties:
-                  data:
-                    type: array
-                    items:
-                      type: object
-                      properties:
-                        code:
-                          type: string
-                        line:
-                          type: integer
-                        message:
-                          type: string
-                        param:
-                          type: string
-                      additionalProperties: false
-                      title: BatchError
-                  object:
-                    type: string
-                additionalProperties: false
-                title: Errors
-              expired_at:
-                type: integer
-              expires_at:
-                type: integer
-              failed_at:
-                type: integer
-              finalizing_at:
-                type: integer
-              in_progress_at:
-                type: integer
-              metadata:
-                type: object
-                additionalProperties:
-                  type: string
-              model:
-                type: string
-              output_file_id:
-                type: string
-              request_counts:
-                type: object
-                properties:
-                  completed:
-                    type: integer
-                  failed:
-                    type: integer
-                  total:
-                    type: integer
-                additionalProperties: false
-                required:
-                  - completed
-                  - failed
-                  - total
-                title: BatchRequestCounts
-              usage:
-                type: object
-                properties:
-                  input_tokens:
-                    type: integer
-                  input_tokens_details:
-                    type: object
-                    properties:
-                      cached_tokens:
-                        type: integer
-                    additionalProperties: false
-                    required:
-                      - cached_tokens
-                    title: InputTokensDetails
-                  output_tokens:
-                    type: integer
-                  output_tokens_details:
-                    type: object
-                    properties:
-                      reasoning_tokens:
-                        type: integer
-                    additionalProperties: false
-                    required:
-                      - reasoning_tokens
-                    title: OutputTokensDetails
-                  total_tokens:
-                    type: integer
-                additionalProperties: false
-                required:
-                  - input_tokens
-                  - input_tokens_details
-                  - output_tokens
-                  - output_tokens_details
-                  - total_tokens
-                title: BatchUsage
-            additionalProperties: false
-            required:
-              - id
-              - completion_window
-              - created_at
-              - endpoint
-              - input_file_id
-              - object
-              - status
-            title: Batch
+            $ref: '#/components/schemas/Batch'
+          type: array
+          title: Data
+          description: List of batch objects
         first_id:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: ID of the first batch in the list
         last_id:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: ID of the last batch in the list
         has_more:
           type: boolean
+          title: Has More
+          description: Whether there are more batches available
           default: false
-      additionalProperties: false
-      required:
-        - object
-        - data
-        - has_more
-      title: ListBatchesResponse
-      description: >-
-        Response containing a list of batch objects.
-    CreateBatchRequest:
       type: object
+      required:
+      - data
+      title: ListBatchesResponse
+      description: Response containing a list of batch objects.
+    CreateBatchRequest:
       properties:
         input_file_id:
           type: string
-          description: >-
-            The ID of an uploaded file containing requests for the batch.
+          title: Input File Id
         endpoint:
           type: string
-          description: >-
-            The endpoint to be used for all requests in the batch.
+          title: Endpoint
         completion_window:
           type: string
           const: 24h
-          description: >-
-            The time window within which the batch should be processed.
+          title: Completion Window
         metadata:
-          type: object
-          additionalProperties:
-            type: string
-          description: Optional metadata for the batch.
+          anyOf:
+          - additionalProperties:
+              type: string
+            type: object
+          - type: 'null'
         idempotency_key:
-          type: string
-          description: >-
-            Optional idempotency key. When provided, enables idempotent behavior.
-      additionalProperties: false
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
       required:
-        - input_file_id
-        - endpoint
-        - completion_window
+      - input_file_id
+      - endpoint
+      - completion_window
       title: CreateBatchRequest
     Batch:
-      type: object
       properties:
         id:
           type: string
+          title: Id
         completion_window:
           type: string
+          title: Completion Window
         created_at:
           type: integer
+          title: Created At
         endpoint:
           type: string
+          title: Endpoint
         input_file_id:
           type: string
+          title: Input File Id
         object:
           type: string
           const: batch
+          title: Object
         status:
           type: string
           enum:
-            - validating
-            - failed
-            - in_progress
-            - finalizing
-            - completed
-            - expired
-            - cancelling
-            - cancelled
+          - validating
+          - failed
+          - in_progress
+          - finalizing
+          - completed
+          - expired
+          - cancelling
+          - cancelled
+          title: Status
         cancelled_at:
-          type: integer
+          anyOf:
+          - type: integer
+          - type: 'null'
         cancelling_at:
-          type: integer
+          anyOf:
+          - type: integer
+          - type: 'null'
         completed_at:
-          type: integer
+          anyOf:
+          - type: integer
+          - type: 'null'
         error_file_id:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
         errors:
-          type: object
-          properties:
-            data:
-              type: array
-              items:
-                type: object
-                properties:
-                  code:
-                    type: string
-                  line:
-                    type: integer
-                  message:
-                    type: string
-                  param:
-                    type: string
-                additionalProperties: false
-                title: BatchError
-            object:
-              type: string
-          additionalProperties: false
+          anyOf:
+          - $ref: '#/components/schemas/Errors'
+            title: Errors
+          - type: 'null'
           title: Errors
         expired_at:
-          type: integer
+          anyOf:
+          - type: integer
+          - type: 'null'
         expires_at:
-          type: integer
+          anyOf:
+          - type: integer
+          - type: 'null'
         failed_at:
-          type: integer
+          anyOf:
+          - type: integer
+          - type: 'null'
         finalizing_at:
-          type: integer
+          anyOf:
+          - type: integer
+          - type: 'null'
         in_progress_at:
-          type: integer
+          anyOf:
+          - type: integer
+          - type: 'null'
         metadata:
-          type: object
-          additionalProperties:
-            type: string
+          anyOf:
+          - additionalProperties:
+              type: string
+            type: object
+          - type: 'null'
         model:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
         output_file_id:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
         request_counts:
-          type: object
-          properties:
-            completed:
-              type: integer
-            failed:
-              type: integer
-            total:
-              type: integer
-          additionalProperties: false
-          required:
-            - completed
-            - failed
-            - total
+          anyOf:
+          - $ref: '#/components/schemas/BatchRequestCounts'
+            title: BatchRequestCounts
+          - type: 'null'
           title: BatchRequestCounts
         usage:
-          type: object
-          properties:
-            input_tokens:
-              type: integer
-            input_tokens_details:
-              type: object
-              properties:
-                cached_tokens:
-                  type: integer
-              additionalProperties: false
-              required:
-                - cached_tokens
-              title: InputTokensDetails
-            output_tokens:
-              type: integer
-            output_tokens_details:
-              type: object
-              properties:
-                reasoning_tokens:
-                  type: integer
-              additionalProperties: false
-              required:
-                - reasoning_tokens
-              title: OutputTokensDetails
-            total_tokens:
-              type: integer
-          additionalProperties: false
-          required:
-            - input_tokens
-            - input_tokens_details
-            - output_tokens
-            - output_tokens_details
-            - total_tokens
+          anyOf:
+          - $ref: '#/components/schemas/BatchUsage'
+            title: BatchUsage
+          - type: 'null'
           title: BatchUsage
-      additionalProperties: false
+      additionalProperties: true
+      type: object
       required:
-        - id
-        - completion_window
-        - created_at
-        - endpoint
-        - input_file_id
-        - object
-        - status
+      - id
+      - completion_window
+      - created_at
+      - endpoint
+      - input_file_id
+      - object
+      - status
       title: Batch
     Order:
       type: string
       enum:
-        - asc
-        - desc
+      - asc
+      - desc
       title: Order
       description: Sort order for paginated responses.
     ListOpenAIChatCompletionResponse:
-      type: object
       properties:
         data:
-          type: array
           items:
-            type: object
-            properties:
-              id:
-                type: string
-                description: The ID of the chat completion
-              choices:
-                type: array
-                items:
-                  $ref: '#/components/schemas/OpenAIChoice'
-                description: List of choices
-              object:
-                type: string
-                const: chat.completion
-                default: chat.completion
-                description: >-
-                  The object type, which will be "chat.completion"
-              created:
-                type: integer
-                description: >-
-                  The Unix timestamp in seconds when the chat completion was created
-              model:
-                type: string
-                description: >-
-                  The model that was used to generate the chat completion
-              usage:
-                $ref: '#/components/schemas/OpenAIChatCompletionUsage'
-                description: >-
-                  Token usage information for the completion
-              input_messages:
-                type: array
-                items:
-                  $ref: '#/components/schemas/OpenAIMessageParam'
-            additionalProperties: false
-            required:
-              - id
-              - choices
-              - object
-              - created
-              - model
-              - input_messages
-            title: OpenAICompletionWithInputMessages
-          description: >-
-            List of chat completion objects with their input messages
+            $ref: '#/components/schemas/OpenAICompletionWithInputMessages'
+          type: array
+          title: Data
         has_more:
           type: boolean
-          description: >-
-            Whether there are more completions available beyond this list
+          title: Has More
         first_id:
           type: string
-          description: ID of the first completion in this list
+          title: First Id
         last_id:
           type: string
-          description: ID of the last completion in this list
+          title: Last Id
         object:
           type: string
           const: list
+          title: Object
           default: list
-          description: >-
-            Must be "list" to identify this as a list response
-      additionalProperties: false
-      required:
-        - data
-        - has_more
-        - first_id
-        - last_id
-        - object
-      title: ListOpenAIChatCompletionResponse
-      description: >-
-        Response from listing OpenAI-compatible chat completions.
-    OpenAIAssistantMessageParam:
       type: object
+      required:
+      - data
+      - has_more
+      - first_id
+      - last_id
+      title: ListOpenAIChatCompletionResponse
+      description: Response from listing OpenAI-compatible chat completions.
+    OpenAIAssistantMessageParam:
+      description: A message containing the model's (assistant) response in an OpenAI-compatible chat completion request.
       properties:
         role:
-          type: string
           const: assistant
           default: assistant
-          description: >-
-            Must be "assistant" to identify this as the model's response
-        content:
-          oneOf:
-            - type: string
-            - type: array
-              items:
-                $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
-          description: The content of the model's response
-        name:
+          title: Role
           type: string
-          description: >-
-            (Optional) The name of the assistant message participant.
+        content:
+          anyOf:
+          - type: string
+          - items:
+              $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+            type: array
+            title: list[OpenAIChatCompletionContentPartTextParam]
+          - type: 'null'
+          title: string | list[OpenAIChatCompletionContentPartTextParam]
+          nullable: true
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
         tool_calls:
-          type: array
-          items:
-            $ref: '#/components/schemas/OpenAIChatCompletionToolCall'
-          description: >-
-            List of tool calls. Each tool call is an OpenAIChatCompletionToolCall
-            object.
-      additionalProperties: false
-      required:
-        - role
+          anyOf:
+          - items:
+              $ref: '#/components/schemas/OpenAIChatCompletionToolCall'
+            type: array
+          - type: 'null'
+          nullable: true
       title: OpenAIAssistantMessageParam
-      description: >-
-        A message containing the model's (assistant) response in an OpenAI-compatible
-        chat completion request.
-    "OpenAIChatCompletionContentPartImageParam":
       type: object
+    OpenAIChatCompletionContentPartImageParam:
       properties:
         type:
           type: string
           const: image_url
+          title: Type
           default: image_url
-          description: >-
-            Must be "image_url" to identify this as image content
         image_url:
           $ref: '#/components/schemas/OpenAIImageURL'
-          description: >-
-            Image URL specification and processing details
-      additionalProperties: false
-      required:
-        - type
-        - image_url
-      title: >-
-        OpenAIChatCompletionContentPartImageParam
-      description: >-
-        Image content part for OpenAI-compatible chat completion messages.
-    OpenAIChatCompletionContentPartParam:
-      oneOf:
-        - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
-        - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
-        - $ref: '#/components/schemas/OpenAIFile'
-      discriminator:
-        propertyName: type
-        mapping:
-          text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
-          image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
-          file: '#/components/schemas/OpenAIFile'
-    OpenAIChatCompletionContentPartTextParam:
       type: object
+      required:
+      - image_url
+      title: OpenAIChatCompletionContentPartImageParam
+      description: Image content part for OpenAI-compatible chat completion messages.
+    OpenAIChatCompletionContentPartParam:
+      discriminator:
+        mapping:
+          file: '#/components/schemas/OpenAIFile'
+          image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+          text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+        title: OpenAIChatCompletionContentPartTextParam
+      - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+        title: OpenAIChatCompletionContentPartImageParam
+      - $ref: '#/components/schemas/OpenAIFile'
+        title: OpenAIFile
+      title: OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile
+    OpenAIChatCompletionContentPartTextParam:
       properties:
         type:
           type: string
           const: text
+          title: Type
           default: text
-          description: >-
-            Must be "text" to identify this as text content
         text:
           type: string
-          description: The text content of the message
-      additionalProperties: false
-      required:
-        - type
-        - text
-      title: OpenAIChatCompletionContentPartTextParam
-      description: >-
-        Text content part for OpenAI-compatible chat completion messages.
-    OpenAIChatCompletionToolCall:
+          title: Text
       type: object
+      required:
+      - text
+      title: OpenAIChatCompletionContentPartTextParam
+      description: Text content part for OpenAI-compatible chat completion messages.
+    OpenAIChatCompletionToolCall:
       properties:
         index:
-          type: integer
-          description: >-
-            (Optional) Index of the tool call in the list
+          anyOf:
+          - type: integer
+          - type: 'null'
         id:
-          type: string
-          description: >-
-            (Optional) Unique identifier for the tool call
+          anyOf:
+          - type: string
+          - type: 'null'
         type:
           type: string
           const: function
+          title: Type
           default: function
-          description: >-
-            Must be "function" to identify this as a function call
         function:
-          $ref: '#/components/schemas/OpenAIChatCompletionToolCallFunction'
-          description: (Optional) Function call details
-      additionalProperties: false
-      required:
-        - type
-      title: OpenAIChatCompletionToolCall
-      description: >-
-        Tool call specification for OpenAI-compatible chat completion responses.
-    OpenAIChatCompletionToolCallFunction:
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIChatCompletionToolCallFunction'
+            title: OpenAIChatCompletionToolCallFunction
+          - type: 'null'
+          title: OpenAIChatCompletionToolCallFunction
       type: object
+      title: OpenAIChatCompletionToolCall
+      description: Tool call specification for OpenAI-compatible chat completion responses.
+    OpenAIChatCompletionToolCallFunction:
       properties:
         name:
-          type: string
-          description: (Optional) Name of the function to call
+          anyOf:
+          - type: string
+          - type: 'null'
         arguments:
-          type: string
-          description: >-
-            (Optional) Arguments to pass to the function as a JSON string
-      additionalProperties: false
-      title: OpenAIChatCompletionToolCallFunction
-      description: >-
-        Function call details for OpenAI-compatible tool calls.
-    OpenAIChatCompletionUsage:
+          anyOf:
+          - type: string
+          - type: 'null'
       type: object
+      title: OpenAIChatCompletionToolCallFunction
+      description: Function call details for OpenAI-compatible tool calls.
+    OpenAIChatCompletionUsage:
       properties:
         prompt_tokens:
           type: integer
-          description: Number of tokens in the prompt
+          title: Prompt Tokens
         completion_tokens:
           type: integer
-          description: Number of tokens in the completion
+          title: Completion Tokens
         total_tokens:
           type: integer
-          description: Total tokens used (prompt + completion)
+          title: Total Tokens
         prompt_tokens_details:
-          type: object
-          properties:
-            cached_tokens:
-              type: integer
-              description: Number of tokens retrieved from cache
-          additionalProperties: false
-          title: >-
-            OpenAIChatCompletionUsagePromptTokensDetails
-          description: >-
-            Token details for prompt tokens in OpenAI chat completion usage.
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIChatCompletionUsagePromptTokensDetails'
+            title: OpenAIChatCompletionUsagePromptTokensDetails
+          - type: 'null'
+          title: OpenAIChatCompletionUsagePromptTokensDetails
         completion_tokens_details:
-          type: object
-          properties:
-            reasoning_tokens:
-              type: integer
-              description: >-
-                Number of tokens used for reasoning (o1/o3 models)
-          additionalProperties: false
-          title: >-
-            OpenAIChatCompletionUsageCompletionTokensDetails
-          description: >-
-            Token details for output tokens in OpenAI chat completion usage.
-      additionalProperties: false
-      required:
-        - prompt_tokens
-        - completion_tokens
-        - total_tokens
-      title: OpenAIChatCompletionUsage
-      description: >-
-        Usage information for OpenAI chat completion.
-    OpenAIChoice:
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIChatCompletionUsageCompletionTokensDetails'
+            title: OpenAIChatCompletionUsageCompletionTokensDetails
+          - type: 'null'
+          title: OpenAIChatCompletionUsageCompletionTokensDetails
       type: object
+      required:
+      - prompt_tokens
+      - completion_tokens
+      - total_tokens
+      title: OpenAIChatCompletionUsage
+      description: Usage information for OpenAI chat completion.
+    OpenAIChoice:
       properties:
         message:
           oneOf:
-            - $ref: '#/components/schemas/OpenAIUserMessageParam'
-            - $ref: '#/components/schemas/OpenAISystemMessageParam'
-            - $ref: '#/components/schemas/OpenAIAssistantMessageParam'
-            - $ref: '#/components/schemas/OpenAIToolMessageParam'
-            - $ref: '#/components/schemas/OpenAIDeveloperMessageParam'
+          - $ref: '#/components/schemas/OpenAIUserMessageParam-Output'
+            title: OpenAIUserMessageParam-Output
+          - $ref: '#/components/schemas/OpenAISystemMessageParam'
+            title: OpenAISystemMessageParam
+          - $ref: '#/components/schemas/OpenAIAssistantMessageParam-Output'
+            title: OpenAIAssistantMessageParam-Output
+          - $ref: '#/components/schemas/OpenAIToolMessageParam'
+            title: OpenAIToolMessageParam
+          - $ref: '#/components/schemas/OpenAIDeveloperMessageParam'
+            title: OpenAIDeveloperMessageParam
+          title: OpenAIUserMessageParam-Output | ... (5 variants)
           discriminator:
             propertyName: role
             mapping:
-              user: '#/components/schemas/OpenAIUserMessageParam'
-              system: '#/components/schemas/OpenAISystemMessageParam'
-              assistant: '#/components/schemas/OpenAIAssistantMessageParam'
-              tool: '#/components/schemas/OpenAIToolMessageParam'
+              assistant: '#/components/schemas/OpenAIAssistantMessageParam-Output'
               developer: '#/components/schemas/OpenAIDeveloperMessageParam'
-          description: The message from the model
+              system: '#/components/schemas/OpenAISystemMessageParam'
+              tool: '#/components/schemas/OpenAIToolMessageParam'
+              user: '#/components/schemas/OpenAIUserMessageParam-Output'
         finish_reason:
           type: string
-          description: The reason the model stopped generating
+          title: Finish Reason
         index:
           type: integer
-          description: The index of the choice
+          title: Index
         logprobs:
-          $ref: '#/components/schemas/OpenAIChoiceLogprobs'
-          description: >-
-            (Optional) The log probabilities for the tokens in the message
-      additionalProperties: false
-      required:
-        - message
-        - finish_reason
-        - index
-      title: OpenAIChoice
-      description: >-
-        A choice from an OpenAI-compatible chat completion response.
-    OpenAIChoiceLogprobs:
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIChoiceLogprobs'
+            title: OpenAIChoiceLogprobs
+          - type: 'null'
+          title: OpenAIChoiceLogprobs
       type: object
+      required:
+      - message
+      - finish_reason
+      - index
+      title: OpenAIChoice
+      description: A choice from an OpenAI-compatible chat completion response.
+    OpenAIChoiceLogprobs:
       properties:
         content:
-          type: array
-          items:
-            $ref: '#/components/schemas/OpenAITokenLogProb'
-          description: >-
-            (Optional) The log probabilities for the tokens in the message
+          anyOf:
+          - items:
+              $ref: '#/components/schemas/OpenAITokenLogProb'
+            type: array
+          - type: 'null'
         refusal:
-          type: array
-          items:
-            $ref: '#/components/schemas/OpenAITokenLogProb'
-          description: >-
-            (Optional) The log probabilities for the tokens in the message
-      additionalProperties: false
-      title: OpenAIChoiceLogprobs
-      description: >-
-        The log probabilities for the tokens in the message from an OpenAI-compatible
-        chat completion response.
-    OpenAIDeveloperMessageParam:
+          anyOf:
+          - items:
+              $ref: '#/components/schemas/OpenAITokenLogProb'
+            type: array
+          - type: 'null'
       type: object
+      title: OpenAIChoiceLogprobs
+      description: The log probabilities for the tokens in the message from an OpenAI-compatible chat completion response.
+    OpenAIDeveloperMessageParam:
       properties:
         role:
           type: string
           const: developer
+          title: Role
           default: developer
-          description: >-
-            Must be "developer" to identify this as a developer message
         content:
-          oneOf:
-            - type: string
-            - type: array
-              items:
-                $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
-          description: The content of the developer message
+          anyOf:
+          - type: string
+          - items:
+              $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+            type: array
+            title: list[OpenAIChatCompletionContentPartTextParam]
+          title: string | list[OpenAIChatCompletionContentPartTextParam]
         name:
-          type: string
-          description: >-
-            (Optional) The name of the developer message participant.
-      additionalProperties: false
-      required:
-        - role
-        - content
-      title: OpenAIDeveloperMessageParam
-      description: >-
-        A message from the developer in an OpenAI-compatible chat completion request.
-    OpenAIFile:
+          anyOf:
+          - type: string
+          - type: 'null'
       type: object
+      required:
+      - content
+      title: OpenAIDeveloperMessageParam
+      description: A message from the developer in an OpenAI-compatible chat completion request.
+    OpenAIFile:
       properties:
         type:
           type: string
           const: file
+          title: Type
           default: file
         file:
           $ref: '#/components/schemas/OpenAIFileFile'
-      additionalProperties: false
+      type: object
       required:
-        - type
-        - file
+      - file
       title: OpenAIFile
     OpenAIFileFile:
-      type: object
       properties:
         file_data:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
         file_id:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
         filename:
-          type: string
-      additionalProperties: false
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
       title: OpenAIFileFile
     OpenAIImageURL:
-      type: object
       properties:
         url:
           type: string
-          description: >-
-            URL of the image to include in the message
+          title: Url
         detail:
-          type: string
-          description: >-
-            (Optional) Level of detail for image processing. Can be "low", "high",
-            or "auto"
-      additionalProperties: false
-      required:
-        - url
-      title: OpenAIImageURL
-      description: >-
-        Image URL specification for OpenAI-compatible chat completion messages.
-    OpenAIMessageParam:
-      oneOf:
-        - $ref: '#/components/schemas/OpenAIUserMessageParam'
-        - $ref: '#/components/schemas/OpenAISystemMessageParam'
-        - $ref: '#/components/schemas/OpenAIAssistantMessageParam'
-        - $ref: '#/components/schemas/OpenAIToolMessageParam'
-        - $ref: '#/components/schemas/OpenAIDeveloperMessageParam'
-      discriminator:
-        propertyName: role
-        mapping:
-          user: '#/components/schemas/OpenAIUserMessageParam'
-          system: '#/components/schemas/OpenAISystemMessageParam'
-          assistant: '#/components/schemas/OpenAIAssistantMessageParam'
-          tool: '#/components/schemas/OpenAIToolMessageParam'
-          developer: '#/components/schemas/OpenAIDeveloperMessageParam'
-    OpenAISystemMessageParam:
+          anyOf:
+          - type: string
+          - type: 'null'
       type: object
+      required:
+      - url
+      title: OpenAIImageURL
+      description: Image URL specification for OpenAI-compatible chat completion messages.
+    OpenAIMessageParam:
+      discriminator:
+        mapping:
+          assistant: '#/components/schemas/OpenAIAssistantMessageParam'
+          developer: '#/components/schemas/OpenAIDeveloperMessageParam'
+          system: '#/components/schemas/OpenAISystemMessageParam'
+          tool: '#/components/schemas/OpenAIToolMessageParam'
+          user: '#/components/schemas/OpenAIUserMessageParam'
+        propertyName: role
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIUserMessageParam'
+        title: OpenAIUserMessageParam
+      - $ref: '#/components/schemas/OpenAISystemMessageParam'
+        title: OpenAISystemMessageParam
+      - $ref: '#/components/schemas/OpenAIAssistantMessageParam'
+        title: OpenAIAssistantMessageParam
+      - $ref: '#/components/schemas/OpenAIToolMessageParam'
+        title: OpenAIToolMessageParam
+      - $ref: '#/components/schemas/OpenAIDeveloperMessageParam'
+        title: OpenAIDeveloperMessageParam
+      title: OpenAIUserMessageParam | ... (5 variants)
+    OpenAISystemMessageParam:
       properties:
         role:
           type: string
           const: system
+          title: Role
           default: system
-          description: >-
-            Must be "system" to identify this as a system message
         content:
-          oneOf:
-            - type: string
-            - type: array
-              items:
-                $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
-          description: >-
-            The content of the "system prompt". If multiple system messages are provided,
-            they are concatenated. The underlying Llama Stack code may also add other
-            system messages (for example, for formatting tool definitions).
+          anyOf:
+          - type: string
+          - items:
+              $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+            type: array
+            title: list[OpenAIChatCompletionContentPartTextParam]
+          title: string | list[OpenAIChatCompletionContentPartTextParam]
         name:
-          type: string
-          description: >-
-            (Optional) The name of the system message participant.
-      additionalProperties: false
-      required:
-        - role
-        - content
-      title: OpenAISystemMessageParam
-      description: >-
-        A system message providing instructions or context to the model.
-    OpenAITokenLogProb:
+          anyOf:
+          - type: string
+          - type: 'null'
       type: object
+      required:
+      - content
+      title: OpenAISystemMessageParam
+      description: A system message providing instructions or context to the model.
+    OpenAITokenLogProb:
       properties:
         token:
           type: string
+          title: Token
         bytes:
-          type: array
-          items:
-            type: integer
+          anyOf:
+          - items:
+              type: integer
+            type: array
+          - type: 'null'
         logprob:
           type: number
+          title: Logprob
         top_logprobs:
-          type: array
           items:
             $ref: '#/components/schemas/OpenAITopLogProb'
-      additionalProperties: false
-      required:
-        - token
-        - logprob
-        - top_logprobs
-      title: OpenAITokenLogProb
-      description: >-
-        The log probability for a token from an OpenAI-compatible chat completion
-        response.
-    OpenAIToolMessageParam:
+          type: array
+          title: Top Logprobs
       type: object
+      required:
+      - token
+      - logprob
+      - top_logprobs
+      title: OpenAITokenLogProb
+      description: |-
+        The log probability for a token from an OpenAI-compatible chat completion response.
+
+        :token: The token
+        :bytes: (Optional) The bytes for the token
+        :logprob: The log probability of the token
+        :top_logprobs: The top log probabilities for the token
+    OpenAIToolMessageParam:
       properties:
         role:
           type: string
           const: tool
+          title: Role
           default: tool
-          description: >-
-            Must be "tool" to identify this as a tool response
         tool_call_id:
           type: string
-          description: >-
-            Unique identifier for the tool call this response is for
+          title: Tool Call Id
         content:
-          oneOf:
-            - type: string
-            - type: array
-              items:
-                $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
-          description: The response content from the tool
-      additionalProperties: false
-      required:
-        - role
-        - tool_call_id
-        - content
-      title: OpenAIToolMessageParam
-      description: >-
-        A message representing the result of a tool invocation in an OpenAI-compatible
-        chat completion request.
-    OpenAITopLogProb:
+          anyOf:
+          - type: string
+          - items:
+              $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+            type: array
+            title: list[OpenAIChatCompletionContentPartTextParam]
+          title: string | list[OpenAIChatCompletionContentPartTextParam]
       type: object
+      required:
+      - tool_call_id
+      - content
+      title: OpenAIToolMessageParam
+      description: A message representing the result of a tool invocation in an OpenAI-compatible chat completion request.
+    OpenAITopLogProb:
       properties:
         token:
           type: string
+          title: Token
         bytes:
-          type: array
-          items:
-            type: integer
+          anyOf:
+          - items:
+              type: integer
+            type: array
+          - type: 'null'
         logprob:
           type: number
-      additionalProperties: false
-      required:
-        - token
-        - logprob
-      title: OpenAITopLogProb
-      description: >-
-        The top log probability for a token from an OpenAI-compatible chat completion
-        response.
-    OpenAIUserMessageParam:
+          title: Logprob
       type: object
+      required:
+      - token
+      - logprob
+      title: OpenAITopLogProb
+      description: |-
+        The top log probability for a token from an OpenAI-compatible chat completion response.
+
+        :token: The token
+        :bytes: (Optional) The bytes for the token
+        :logprob: The log probability of the token
+    OpenAIUserMessageParam:
+      description: A message from the user in an OpenAI-compatible chat completion request.
       properties:
         role:
-          type: string
           const: user
           default: user
-          description: >-
-            Must be "user" to identify this as a user message
-        content:
-          oneOf:
-            - type: string
-            - type: array
-              items:
-                $ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
-          description: >-
-            The content of the message, which can include text and other media
-        name:
+          title: Role
           type: string
-          description: >-
-            (Optional) The name of the user message participant.
-      additionalProperties: false
+        content:
+          anyOf:
+          - type: string
+          - items:
+              discriminator:
+                mapping:
+                  file: '#/components/schemas/OpenAIFile'
+                  image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+                  text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+                propertyName: type
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+                title: OpenAIChatCompletionContentPartTextParam
+              - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+                title: OpenAIChatCompletionContentPartImageParam
+              - $ref: '#/components/schemas/OpenAIFile'
+                title: OpenAIFile
+              title: OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile
+            type: array
+            title: list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile]
+          title: string | list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile]
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
       required:
-        - role
-        - content
+      - content
       title: OpenAIUserMessageParam
-      description: >-
-        A message from the user in an OpenAI-compatible chat completion request.
-    OpenAIJSONSchema:
       type: object
+    OpenAIJSONSchema:
       properties:
         name:
           type: string
-          description: Name of the schema
+          title: Name
         description:
-          type: string
-          description: (Optional) Description of the schema
+          anyOf:
+          - type: string
+          - type: 'null'
         strict:
-          type: boolean
-          description: >-
-            (Optional) Whether to enforce strict adherence to the schema
+          anyOf:
+          - type: boolean
+          - type: 'null'
         schema:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: (Optional) The JSON schema definition
-      additionalProperties: false
-      required:
-        - name
-      title: OpenAIJSONSchema
-      description: >-
-        JSON schema specification for OpenAI-compatible structured response format.
-    OpenAIResponseFormatJSONObject:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
       type: object
+      title: OpenAIJSONSchema
+      description: JSON schema specification for OpenAI-compatible structured response format.
+    OpenAIResponseFormatJSONObject:
       properties:
         type:
           type: string
           const: json_object
+          title: Type
           default: json_object
-          description: >-
-            Must be "json_object" to indicate generic JSON object response format
-      additionalProperties: false
-      required:
-        - type
-      title: OpenAIResponseFormatJSONObject
-      description: >-
-        JSON object response format for OpenAI-compatible chat completion requests.
-    OpenAIResponseFormatJSONSchema:
       type: object
+      title: OpenAIResponseFormatJSONObject
+      description: JSON object response format for OpenAI-compatible chat completion requests.
+    OpenAIResponseFormatJSONSchema:
       properties:
         type:
           type: string
           const: json_schema
+          title: Type
           default: json_schema
-          description: >-
-            Must be "json_schema" to indicate structured JSON response format
         json_schema:
           $ref: '#/components/schemas/OpenAIJSONSchema'
-          description: >-
-            The JSON schema specification for the response
-      additionalProperties: false
-      required:
-        - type
-        - json_schema
-      title: OpenAIResponseFormatJSONSchema
-      description: >-
-        JSON schema response format for OpenAI-compatible chat completion requests.
-    OpenAIResponseFormatParam:
-      oneOf:
-        - $ref: '#/components/schemas/OpenAIResponseFormatText'
-        - $ref: '#/components/schemas/OpenAIResponseFormatJSONSchema'
-        - $ref: '#/components/schemas/OpenAIResponseFormatJSONObject'
-      discriminator:
-        propertyName: type
-        mapping:
-          text: '#/components/schemas/OpenAIResponseFormatText'
-          json_schema: '#/components/schemas/OpenAIResponseFormatJSONSchema'
-          json_object: '#/components/schemas/OpenAIResponseFormatJSONObject'
-    OpenAIResponseFormatText:
       type: object
+      required:
+      - json_schema
+      title: OpenAIResponseFormatJSONSchema
+      description: JSON schema response format for OpenAI-compatible chat completion requests.
+    OpenAIResponseFormatParam:
+      discriminator:
+        mapping:
+          json_object: '#/components/schemas/OpenAIResponseFormatJSONObject'
+          json_schema: '#/components/schemas/OpenAIResponseFormatJSONSchema'
+          text: '#/components/schemas/OpenAIResponseFormatText'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIResponseFormatText'
+        title: OpenAIResponseFormatText
+      - $ref: '#/components/schemas/OpenAIResponseFormatJSONSchema'
+        title: OpenAIResponseFormatJSONSchema
+      - $ref: '#/components/schemas/OpenAIResponseFormatJSONObject'
+        title: OpenAIResponseFormatJSONObject
+      title: OpenAIResponseFormatText | OpenAIResponseFormatJSONSchema | OpenAIResponseFormatJSONObject
+    OpenAIResponseFormatText:
       properties:
         type:
           type: string
           const: text
+          title: Type
           default: text
-          description: >-
-            Must be "text" to indicate plain text response format
-      additionalProperties: false
-      required:
-        - type
-      title: OpenAIResponseFormatText
-      description: >-
-        Text response format for OpenAI-compatible chat completion requests.
-    OpenAIChatCompletionRequestWithExtraBody:
       type: object
+      title: OpenAIResponseFormatText
+      description: Text response format for OpenAI-compatible chat completion requests.
+    OpenAIChatCompletionRequestWithExtraBody:
       properties:
         model:
           type: string
-          description: >-
-            The identifier of the model to use. The model must be registered with
-            Llama Stack and available via the /models endpoint.
+          title: Model
         messages:
-          type: array
           items:
-            $ref: '#/components/schemas/OpenAIMessageParam'
-          description: List of messages in the conversation.
-        frequency_penalty:
-          type: number
-          description: >-
-            (Optional) The penalty for repeated tokens.
-        function_call:
-          oneOf:
-            - type: string
-            - type: object
-              additionalProperties:
-                oneOf:
-                  - type: 'null'
-                  - type: boolean
-                  - type: number
-                  - type: string
-                  - type: array
-                  - type: object
-          description: (Optional) The function call to use.
-        functions:
-          type: array
-          items:
-            type: object
-            additionalProperties:
-              oneOf:
-                - type: 'null'
-                - type: boolean
-                - type: number
-                - type: string
-                - type: array
-                - type: object
-          description: (Optional) List of functions to use.
-        logit_bias:
-          type: object
-          additionalProperties:
-            type: number
-          description: (Optional) The logit bias to use.
-        logprobs:
-          type: boolean
-          description: (Optional) The log probabilities to use.
-        max_completion_tokens:
-          type: integer
-          description: >-
-            (Optional) The maximum number of tokens to generate.
-        max_tokens:
-          type: integer
-          description: >-
-            (Optional) The maximum number of tokens to generate.
-        n:
-          type: integer
-          description: >-
-            (Optional) The number of completions to generate.
-        parallel_tool_calls:
-          type: boolean
-          description: >-
-            (Optional) Whether to parallelize tool calls.
-        presence_penalty:
-          type: number
-          description: >-
-            (Optional) The penalty for repeated tokens.
-        response_format:
-          $ref: '#/components/schemas/OpenAIResponseFormatParam'
-          description: (Optional) The response format to use.
-        seed:
-          type: integer
-          description: (Optional) The seed to use.
-        stop:
-          oneOf:
-            - type: string
-            - type: array
-              items:
-                type: string
-          description: (Optional) The stop tokens to use.
-        stream:
-          type: boolean
-          description: >-
-            (Optional) Whether to stream the response.
-        stream_options:
-          type: object
-          additionalProperties:
             oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: (Optional) The stream options to use.
-        temperature:
-          type: number
-          description: (Optional) The temperature to use.
-        tool_choice:
-          oneOf:
-            - type: string
-            - type: object
-              additionalProperties:
-                oneOf:
-                  - type: 'null'
-                  - type: boolean
-                  - type: number
-                  - type: string
-                  - type: array
-                  - type: object
-          description: (Optional) The tool choice to use.
-        tools:
+            - $ref: '#/components/schemas/OpenAIUserMessageParam-Input'
+              title: OpenAIUserMessageParam-Input
+            - $ref: '#/components/schemas/OpenAISystemMessageParam'
+              title: OpenAISystemMessageParam
+            - $ref: '#/components/schemas/OpenAIAssistantMessageParam-Input'
+              title: OpenAIAssistantMessageParam-Input
+            - $ref: '#/components/schemas/OpenAIToolMessageParam'
+              title: OpenAIToolMessageParam
+            - $ref: '#/components/schemas/OpenAIDeveloperMessageParam'
+              title: OpenAIDeveloperMessageParam
+            discriminator:
+              propertyName: role
+              mapping:
+                assistant: '#/components/schemas/OpenAIAssistantMessageParam-Input'
+                developer: '#/components/schemas/OpenAIDeveloperMessageParam'
+                system: '#/components/schemas/OpenAISystemMessageParam'
+                tool: '#/components/schemas/OpenAIToolMessageParam'
+                user: '#/components/schemas/OpenAIUserMessageParam-Input'
+            title: OpenAIUserMessageParam-Input | ... (5 variants)
           type: array
-          items:
+          minItems: 1
+          title: Messages
+        frequency_penalty:
+          anyOf:
+          - type: number
+          - type: 'null'
+        function_call:
+          anyOf:
+          - type: string
+          - additionalProperties: true
             type: object
-            additionalProperties:
-              oneOf:
-                - type: 'null'
-                - type: boolean
-                - type: number
-                - type: string
-                - type: array
-                - type: object
-          description: (Optional) The tools to use.
+          - type: 'null'
+          title: string | object
+        functions:
+          anyOf:
+          - items:
+              additionalProperties: true
+              type: object
+            type: array
+          - type: 'null'
+        logit_bias:
+          anyOf:
+          - additionalProperties:
+              type: number
+            type: object
+          - type: 'null'
+        logprobs:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+        max_completion_tokens:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        max_tokens:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        n:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        parallel_tool_calls:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+        presence_penalty:
+          anyOf:
+          - type: number
+          - type: 'null'
+        response_format:
+          anyOf:
+          - oneOf:
+            - $ref: '#/components/schemas/OpenAIResponseFormatText'
+              title: OpenAIResponseFormatText
+            - $ref: '#/components/schemas/OpenAIResponseFormatJSONSchema'
+              title: OpenAIResponseFormatJSONSchema
+            - $ref: '#/components/schemas/OpenAIResponseFormatJSONObject'
+              title: OpenAIResponseFormatJSONObject
+            discriminator:
+              propertyName: type
+              mapping:
+                json_object: '#/components/schemas/OpenAIResponseFormatJSONObject'
+                json_schema: '#/components/schemas/OpenAIResponseFormatJSONSchema'
+                text: '#/components/schemas/OpenAIResponseFormatText'
+            title: OpenAIResponseFormatText | OpenAIResponseFormatJSONSchema | OpenAIResponseFormatJSONObject
+          - type: 'null'
+          title: Response Format
+        seed:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        stop:
+          anyOf:
+          - type: string
+          - items:
+              type: string
+            type: array
+            title: list[string]
+          - type: 'null'
+          title: string | list[string]
+        stream:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+        stream_options:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+        temperature:
+          anyOf:
+          - type: number
+          - type: 'null'
+        tool_choice:
+          anyOf:
+          - type: string
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+          title: string | object
+        tools:
+          anyOf:
+          - items:
+              additionalProperties: true
+              type: object
+            type: array
+          - type: 'null'
         top_logprobs:
-          type: integer
-          description: >-
-            (Optional) The top log probabilities to use.
+          anyOf:
+          - type: integer
+          - type: 'null'
         top_p:
-          type: number
-          description: (Optional) The top p to use.
+          anyOf:
+          - type: number
+          - type: 'null'
         user:
-          type: string
-          description: (Optional) The user to use.
-      additionalProperties: false
-      required:
-        - model
-        - messages
-      title: OpenAIChatCompletionRequestWithExtraBody
-      description: >-
-        Request parameters for OpenAI-compatible chat completion endpoint.
-    OpenAIChatCompletion:
+          anyOf:
+          - type: string
+          - type: 'null'
+      additionalProperties: true
       type: object
+      required:
+      - model
+      - messages
+      title: OpenAIChatCompletionRequestWithExtraBody
+      description: Request parameters for OpenAI-compatible chat completion endpoint.
+    OpenAIChatCompletion:
       properties:
         id:
           type: string
-          description: The ID of the chat completion
+          title: Id
         choices:
-          type: array
           items:
             $ref: '#/components/schemas/OpenAIChoice'
-          description: List of choices
+          type: array
+          title: Choices
         object:
           type: string
           const: chat.completion
+          title: Object
           default: chat.completion
-          description: >-
-            The object type, which will be "chat.completion"
         created:
           type: integer
-          description: >-
-            The Unix timestamp in seconds when the chat completion was created
+          title: Created
         model:
           type: string
-          description: >-
-            The model that was used to generate the chat completion
+          title: Model
         usage:
-          $ref: '#/components/schemas/OpenAIChatCompletionUsage'
-          description: >-
-            Token usage information for the completion
-      additionalProperties: false
-      required:
-        - id
-        - choices
-        - object
-        - created
-        - model
-      title: OpenAIChatCompletion
-      description: >-
-        Response from an OpenAI-compatible chat completion request.
-    OpenAIChatCompletionChunk:
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIChatCompletionUsage'
+            title: OpenAIChatCompletionUsage
+          - type: 'null'
+          title: OpenAIChatCompletionUsage
       type: object
+      required:
+      - id
+      - choices
+      - created
+      - model
+      title: OpenAIChatCompletion
+      description: Response from an OpenAI-compatible chat completion request.
+    OpenAIChatCompletionChunk:
+      description: Chunk from a streaming response to an OpenAI-compatible chat completion request.
       properties:
         id:
+          title: Id
           type: string
-          description: The ID of the chat completion
         choices:
-          type: array
           items:
             $ref: '#/components/schemas/OpenAIChunkChoice'
-          description: List of choices
+          title: Choices
+          type: array
         object:
-          type: string
           const: chat.completion.chunk
           default: chat.completion.chunk
-          description: >-
-            The object type, which will be "chat.completion.chunk"
-        created:
-          type: integer
-          description: >-
-            The Unix timestamp in seconds when the chat completion was created
-        model:
+          title: Object
+          type: string
+        created:
+          title: Created
+          type: integer
+        model:
+          title: Model
           type: string
-          description: >-
-            The model that was used to generate the chat completion
         usage:
-          $ref: '#/components/schemas/OpenAIChatCompletionUsage'
-          description: >-
-            Token usage information (typically included in final chunk with stream_options)
-      additionalProperties: false
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIChatCompletionUsage'
+            title: OpenAIChatCompletionUsage
+          - type: 'null'
+          nullable: true
+          title: OpenAIChatCompletionUsage
       required:
-        - id
-        - choices
-        - object
-        - created
-        - model
+      - id
+      - choices
+      - created
+      - model
       title: OpenAIChatCompletionChunk
-      description: >-
-        Chunk from a streaming response to an OpenAI-compatible chat completion request.
-    OpenAIChoiceDelta:
       type: object
+    OpenAIChoiceDelta:
+      description: A delta from an OpenAI-compatible chat completion streaming response.
       properties:
         content:
-          type: string
-          description: (Optional) The content of the delta
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
         refusal:
-          type: string
-          description: (Optional) The refusal of the delta
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
         role:
-          type: string
-          description: (Optional) The role of the delta
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
         tool_calls:
-          type: array
-          items:
-            $ref: '#/components/schemas/OpenAIChatCompletionToolCall'
-          description: (Optional) The tool calls of the delta
+          anyOf:
+          - items:
+              $ref: '#/components/schemas/OpenAIChatCompletionToolCall'
+            type: array
+          - type: 'null'
+          nullable: true
         reasoning_content:
-          type: string
-          description: >-
-            (Optional) The reasoning content from the model (non-standard, for o1/o3
-            models)
-      additionalProperties: false
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
       title: OpenAIChoiceDelta
-      description: >-
-        A delta from an OpenAI-compatible chat completion streaming response.
-    OpenAIChunkChoice:
       type: object
+    OpenAIChunkChoice:
+      description: A chunk choice from an OpenAI-compatible chat completion streaming response.
       properties:
         delta:
           $ref: '#/components/schemas/OpenAIChoiceDelta'
-          description: The delta from the chunk
         finish_reason:
+          title: Finish Reason
           type: string
-          description: The reason the model stopped generating
         index:
+          title: Index
           type: integer
-          description: The index of the choice
         logprobs:
-          $ref: '#/components/schemas/OpenAIChoiceLogprobs'
-          description: >-
-            (Optional) The log probabilities for the tokens in the message
-      additionalProperties: false
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIChoiceLogprobs'
+            title: OpenAIChoiceLogprobs
+          - type: 'null'
+          nullable: true
+          title: OpenAIChoiceLogprobs
       required:
-        - delta
-        - finish_reason
-        - index
+      - delta
+      - finish_reason
+      - index
       title: OpenAIChunkChoice
-      description: >-
-        A chunk choice from an OpenAI-compatible chat completion streaming response.
-    OpenAICompletionWithInputMessages:
       type: object
+    OpenAICompletionWithInputMessages:
       properties:
         id:
           type: string
-          description: The ID of the chat completion
+          title: Id
         choices:
-          type: array
           items:
             $ref: '#/components/schemas/OpenAIChoice'
-          description: List of choices
+          type: array
+          title: Choices
         object:
           type: string
           const: chat.completion
+          title: Object
           default: chat.completion
-          description: >-
-            The object type, which will be "chat.completion"
         created:
           type: integer
-          description: >-
-            The Unix timestamp in seconds when the chat completion was created
+          title: Created
         model:
           type: string
-          description: >-
-            The model that was used to generate the chat completion
+          title: Model
         usage:
-          $ref: '#/components/schemas/OpenAIChatCompletionUsage'
-          description: >-
-            Token usage information for the completion
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIChatCompletionUsage'
+            title: OpenAIChatCompletionUsage
+          - type: 'null'
+          title: OpenAIChatCompletionUsage
         input_messages:
-          type: array
           items:
-            $ref: '#/components/schemas/OpenAIMessageParam'
-      additionalProperties: false
+            oneOf:
+            - $ref: '#/components/schemas/OpenAIUserMessageParam-Output'
+              title: OpenAIUserMessageParam-Output
+            - $ref: '#/components/schemas/OpenAISystemMessageParam'
+              title: OpenAISystemMessageParam
+            - $ref: '#/components/schemas/OpenAIAssistantMessageParam-Output'
+              title: OpenAIAssistantMessageParam-Output
+            - $ref: '#/components/schemas/OpenAIToolMessageParam'
+              title: OpenAIToolMessageParam
+            - $ref: '#/components/schemas/OpenAIDeveloperMessageParam'
+              title: OpenAIDeveloperMessageParam
+            discriminator:
+              propertyName: role
+              mapping:
+                assistant: '#/components/schemas/OpenAIAssistantMessageParam-Output'
+                developer: '#/components/schemas/OpenAIDeveloperMessageParam'
+                system: '#/components/schemas/OpenAISystemMessageParam'
+                tool: '#/components/schemas/OpenAIToolMessageParam'
+                user: '#/components/schemas/OpenAIUserMessageParam-Output'
+            title: OpenAIUserMessageParam-Output | ... (5 variants)
+          type: array
+          title: Input Messages
+      type: object
       required:
-        - id
-        - choices
-        - object
-        - created
-        - model
-        - input_messages
+      - id
+      - choices
+      - created
+      - model
+      - input_messages
       title: OpenAICompletionWithInputMessages
     OpenAICompletionRequestWithExtraBody:
-      type: object
       properties:
         model:
           type: string
-          description: >-
-            The identifier of the model to use. The model must be registered with
-            Llama Stack and available via the /models endpoint.
+          title: Model
         prompt:
-          oneOf:
-            - type: string
-            - type: array
-              items:
-                type: string
-            - type: array
+          anyOf:
+          - type: string
+          - items:
+              type: string
+            type: array
+            title: list[string]
+          - items:
+              type: integer
+            type: array
+            title: list[integer]
+          - items:
               items:
                 type: integer
-            - type: array
-              items:
-                type: array
-                items:
-                  type: integer
-          description: The prompt to generate a completion for.
+              type: array
+            type: array
+            title: list[array]
+          title: string | ... (4 variants)
         best_of:
-          type: integer
-          description: >-
-            (Optional) The number of completions to generate.
+          anyOf:
+          - type: integer
+          - type: 'null'
         echo:
-          type: boolean
-          description: (Optional) Whether to echo the prompt.
+          anyOf:
+          - type: boolean
+          - type: 'null'
         frequency_penalty:
-          type: number
-          description: >-
-            (Optional) The penalty for repeated tokens.
+          anyOf:
+          - type: number
+          - type: 'null'
         logit_bias:
-          type: object
-          additionalProperties:
-            type: number
-          description: (Optional) The logit bias to use.
+          anyOf:
+          - additionalProperties:
+              type: number
+            type: object
+          - type: 'null'
         logprobs:
-          type: boolean
-          description: (Optional) The log probabilities to use.
+          anyOf:
+          - type: boolean
+          - type: 'null'
         max_tokens:
-          type: integer
-          description: >-
-            (Optional) The maximum number of tokens to generate.
+          anyOf:
+          - type: integer
+          - type: 'null'
         n:
-          type: integer
-          description: >-
-            (Optional) The number of completions to generate.
+          anyOf:
+          - type: integer
+          - type: 'null'
         presence_penalty:
-          type: number
-          description: >-
-            (Optional) The penalty for repeated tokens.
+          anyOf:
+          - type: number
+          - type: 'null'
         seed:
-          type: integer
-          description: (Optional) The seed to use.
+          anyOf:
+          - type: integer
+          - type: 'null'
         stop:
-          oneOf:
-            - type: string
-            - type: array
-              items:
-                type: string
-          description: (Optional) The stop tokens to use.
+          anyOf:
+          - type: string
+          - items:
+              type: string
+            type: array
+            title: list[string]
+          - type: 'null'
+          title: string | list[string]
         stream:
-          type: boolean
-          description: >-
-            (Optional) Whether to stream the response.
+          anyOf:
+          - type: boolean
+          - type: 'null'
         stream_options:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: (Optional) The stream options to use.
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
         temperature:
-          type: number
-          description: (Optional) The temperature to use.
+          anyOf:
+          - type: number
+          - type: 'null'
         top_p:
-          type: number
-          description: (Optional) The top p to use.
+          anyOf:
+          - type: number
+          - type: 'null'
         user:
-          type: string
-          description: (Optional) The user to use.
+          anyOf:
+          - type: string
+          - type: 'null'
         suffix:
-          type: string
-          description: >-
-            (Optional) The suffix that should be appended to the completion.
-      additionalProperties: false
-      required:
-        - model
-        - prompt
-      title: OpenAICompletionRequestWithExtraBody
-      description: >-
-        Request parameters for OpenAI-compatible completion endpoint.
-    OpenAICompletion:
+          anyOf:
+          - type: string
+          - type: 'null'
+      additionalProperties: true
       type: object
+      required:
+      - model
+      - prompt
+      title: OpenAICompletionRequestWithExtraBody
+      description: Request parameters for OpenAI-compatible completion endpoint.
+    OpenAICompletion:
       properties:
         id:
           type: string
+          title: Id
         choices:
-          type: array
           items:
             $ref: '#/components/schemas/OpenAICompletionChoice'
+          type: array
+          title: Choices
         created:
           type: integer
+          title: Created
         model:
           type: string
+          title: Model
         object:
           type: string
           const: text_completion
+          title: Object
           default: text_completion
-      additionalProperties: false
-      required:
-        - id
-        - choices
-        - created
-        - model
-        - object
-      title: OpenAICompletion
-      description: >-
-        Response from an OpenAI-compatible completion request.
-    OpenAICompletionChoice:
       type: object
+      required:
+      - id
+      - choices
+      - created
+      - model
+      title: OpenAICompletion
+      description: |-
+        Response from an OpenAI-compatible completion request.
+
+        :id: The ID of the completion
+        :choices: List of choices
+        :created: The Unix timestamp in seconds when the completion was created
+        :model: The model that was used to generate the completion
+        :object: The object type, which will be "text_completion"
+    OpenAICompletionChoice:
       properties:
         finish_reason:
           type: string
+          title: Finish Reason
         text:
           type: string
+          title: Text
         index:
           type: integer
+          title: Index
         logprobs:
-          $ref: '#/components/schemas/OpenAIChoiceLogprobs'
-      additionalProperties: false
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIChoiceLogprobs'
+            title: OpenAIChoiceLogprobs
+          - type: 'null'
+          title: OpenAIChoiceLogprobs
+      type: object
       required:
-        - finish_reason
-        - text
-        - index
+      - finish_reason
+      - text
+      - index
       title: OpenAICompletionChoice
-      description: >-
+      description: |-
         A choice from an OpenAI-compatible completion response.
+
+        :finish_reason: The reason the model stopped generating
+        :text: The text of the choice
+        :index: The index of the choice
+        :logprobs: (Optional) The log probabilities for the tokens in the choice
     ConversationItem:
-      oneOf:
-        - $ref: '#/components/schemas/OpenAIResponseMessage'
-        - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
-        - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
-        - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
-        - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
-        - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
-        - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
-        - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
-        - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
       discriminator:
-        propertyName: type
         mapping:
-          message: '#/components/schemas/OpenAIResponseMessage'
-          web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
           file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
           function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
           function_call_output: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
@@ -5211,6704 +5143,8240 @@ components:
           mcp_approval_response: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
           mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
           mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+          message: '#/components/schemas/OpenAIResponseMessage'
+          web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIResponseMessage'
+        title: OpenAIResponseMessage
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+        title: OpenAIResponseOutputMessageWebSearchToolCall
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+        title: OpenAIResponseOutputMessageFileSearchToolCall
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+        title: OpenAIResponseOutputMessageFunctionToolCall
+      - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+        title: OpenAIResponseInputFunctionToolCallOutput
+      - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+        title: OpenAIResponseMCPApprovalRequest
+      - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+        title: OpenAIResponseMCPApprovalResponse
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+        title: OpenAIResponseOutputMessageMCPCall
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+        title: OpenAIResponseOutputMessageMCPListTools
+      title: OpenAIResponseMessage | ... (9 variants)
     OpenAIResponseAnnotationCitation:
-      type: object
       properties:
         type:
           type: string
           const: url_citation
+          title: Type
           default: url_citation
-          description: >-
-            Annotation type identifier, always "url_citation"
         end_index:
           type: integer
-          description: >-
-            End position of the citation span in the content
+          title: End Index
         start_index:
           type: integer
-          description: >-
-            Start position of the citation span in the content
+          title: Start Index
         title:
           type: string
-          description: Title of the referenced web resource
+          title: Title
         url:
           type: string
-          description: URL of the referenced web resource
-      additionalProperties: false
-      required:
-        - type
-        - end_index
-        - start_index
-        - title
-        - url
-      title: OpenAIResponseAnnotationCitation
-      description: >-
-        URL citation annotation for referencing external web resources.
-    "OpenAIResponseAnnotationContainerFileCitation":
+          title: Url
       type: object
+      required:
+      - end_index
+      - start_index
+      - title
+      - url
+      title: OpenAIResponseAnnotationCitation
+      description: URL citation annotation for referencing external web resources.
+    OpenAIResponseAnnotationContainerFileCitation:
       properties:
         type:
           type: string
           const: container_file_citation
+          title: Type
           default: container_file_citation
         container_id:
           type: string
+          title: Container Id
         end_index:
           type: integer
+          title: End Index
         file_id:
           type: string
+          title: File Id
         filename:
           type: string
+          title: Filename
         start_index:
           type: integer
-      additionalProperties: false
-      required:
-        - type
-        - container_id
-        - end_index
-        - file_id
-        - filename
-        - start_index
-      title: >-
-        OpenAIResponseAnnotationContainerFileCitation
-    OpenAIResponseAnnotationFileCitation:
+          title: Start Index
       type: object
+      required:
+      - container_id
+      - end_index
+      - file_id
+      - filename
+      - start_index
+      title: OpenAIResponseAnnotationContainerFileCitation
+    OpenAIResponseAnnotationFileCitation:
       properties:
         type:
           type: string
           const: file_citation
+          title: Type
           default: file_citation
-          description: >-
-            Annotation type identifier, always "file_citation"
         file_id:
           type: string
-          description: Unique identifier of the referenced file
+          title: File Id
         filename:
           type: string
-          description: Name of the referenced file
+          title: Filename
         index:
           type: integer
-          description: >-
-            Position index of the citation within the content
-      additionalProperties: false
-      required:
-        - type
-        - file_id
-        - filename
-        - index
-      title: OpenAIResponseAnnotationFileCitation
-      description: >-
-        File citation annotation for referencing specific files in response content.
-    OpenAIResponseAnnotationFilePath:
+          title: Index
       type: object
+      required:
+      - file_id
+      - filename
+      - index
+      title: OpenAIResponseAnnotationFileCitation
+      description: File citation annotation for referencing specific files in response content.
+    OpenAIResponseAnnotationFilePath:
       properties:
         type:
           type: string
           const: file_path
+          title: Type
           default: file_path
         file_id:
           type: string
+          title: File Id
         index:
           type: integer
-      additionalProperties: false
+          title: Index
+      type: object
       required:
-        - type
-        - file_id
-        - index
+      - file_id
+      - index
       title: OpenAIResponseAnnotationFilePath
     OpenAIResponseAnnotations:
-      oneOf:
-        - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
-        - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation'
-        - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
-        - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath'
       discriminator:
-        propertyName: type
         mapping:
-          file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
-          url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation'
           container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
+          file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
           file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath'
+          url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
+        title: OpenAIResponseAnnotationFileCitation
+      - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation'
+        title: OpenAIResponseAnnotationCitation
+      - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
+        title: OpenAIResponseAnnotationContainerFileCitation
+      - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath'
+        title: OpenAIResponseAnnotationFilePath
+      title: OpenAIResponseAnnotationFileCitation | ... (4 variants)
     OpenAIResponseContentPartRefusal:
-      type: object
       properties:
         type:
           type: string
           const: refusal
+          title: Type
           default: refusal
-          description: >-
-            Content part type identifier, always "refusal"
         refusal:
           type: string
-          description: Refusal text supplied by the model
-      additionalProperties: false
-      required:
-        - type
-        - refusal
-      title: OpenAIResponseContentPartRefusal
-      description: >-
-        Refusal content within a streamed response part.
-    "OpenAIResponseInputFunctionToolCallOutput":
+          title: Refusal
       type: object
+      required:
+      - refusal
+      title: OpenAIResponseContentPartRefusal
+      description: Refusal content within a streamed response part.
+    OpenAIResponseInputFunctionToolCallOutput:
       properties:
         call_id:
           type: string
+          title: Call Id
         output:
           type: string
+          title: Output
         type:
           type: string
           const: function_call_output
+          title: Type
           default: function_call_output
         id:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
         status:
-          type: string
-      additionalProperties: false
-      required:
-        - call_id
-        - output
-        - type
-      title: >-
-        OpenAIResponseInputFunctionToolCallOutput
-      description: >-
-        This represents the output of a function call that gets passed back to the
-        model.
-    OpenAIResponseInputMessageContent:
-      oneOf:
-        - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
-        - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
-        - $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile'
-      discriminator:
-        propertyName: type
-        mapping:
-          input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
-          input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
-          input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile'
-    OpenAIResponseInputMessageContentFile:
+          anyOf:
+          - type: string
+          - type: 'null'
       type: object
+      required:
+      - call_id
+      - output
+      title: OpenAIResponseInputFunctionToolCallOutput
+      description: This represents the output of a function call that gets passed back to the model.
+    OpenAIResponseInputMessageContent:
+      discriminator:
+        mapping:
+          input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile'
+          input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+          input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
+        title: OpenAIResponseInputMessageContentText
+      - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+        title: OpenAIResponseInputMessageContentImage
+      - $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile'
+        title: OpenAIResponseInputMessageContentFile
+      title: OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile
+    OpenAIResponseInputMessageContentFile:
       properties:
         type:
           type: string
           const: input_file
+          title: Type
           default: input_file
-          description: >-
-            The type of the input item. Always `input_file`.
         file_data:
-          type: string
-          description: >-
-            The data of the file to be sent to the model.
+          anyOf:
+          - type: string
+          - type: 'null'
         file_id:
-          type: string
-          description: >-
-            (Optional) The ID of the file to be sent to the model.
+          anyOf:
+          - type: string
+          - type: 'null'
         file_url:
-          type: string
-          description: >-
-            The URL of the file to be sent to the model.
+          anyOf:
+          - type: string
+          - type: 'null'
         filename:
-          type: string
-          description: >-
-            The name of the file to be sent to the model.
-      additionalProperties: false
-      required:
-        - type
-      title: OpenAIResponseInputMessageContentFile
-      description: >-
-        File content for input messages in OpenAI response format.
-    OpenAIResponseInputMessageContentImage:
+          anyOf:
+          - type: string
+          - type: 'null'
       type: object
+      title: OpenAIResponseInputMessageContentFile
+      description: File content for input messages in OpenAI response format.
+    OpenAIResponseInputMessageContentImage:
       properties:
         detail:
-          oneOf:
-            - type: string
-              const: low
-            - type: string
-              const: high
-            - type: string
-              const: auto
+          title: Detail
           default: auto
-          description: >-
-            Level of detail for image processing, can be "low", "high", or "auto"
+          type: string
+          enum:
+          - low
+          - high
+          - auto
         type:
           type: string
           const: input_image
+          title: Type
           default: input_image
-          description: >-
-            Content type identifier, always "input_image"
         file_id:
-          type: string
-          description: >-
-            (Optional) The ID of the file to be sent to the model.
+          anyOf:
+          - type: string
+          - type: 'null'
         image_url:
-          type: string
-          description: (Optional) URL of the image content
-      additionalProperties: false
-      required:
-        - detail
-        - type
-      title: OpenAIResponseInputMessageContentImage
-      description: >-
-        Image content for input messages in OpenAI response format.
-    OpenAIResponseInputMessageContentText:
+          anyOf:
+          - type: string
+          - type: 'null'
       type: object
+      title: OpenAIResponseInputMessageContentImage
+      description: Image content for input messages in OpenAI response format.
+    OpenAIResponseInputMessageContentText:
       properties:
         text:
           type: string
-          description: The text content of the input message
+          title: Text
         type:
           type: string
           const: input_text
+          title: Type
           default: input_text
-          description: >-
-            Content type identifier, always "input_text"
-      additionalProperties: false
-      required:
-        - text
-        - type
-      title: OpenAIResponseInputMessageContentText
-      description: >-
-        Text content for input messages in OpenAI response format.
-    OpenAIResponseMCPApprovalRequest:
       type: object
+      required:
+      - text
+      title: OpenAIResponseInputMessageContentText
+      description: Text content for input messages in OpenAI response format.
+    OpenAIResponseMCPApprovalRequest:
       properties:
         arguments:
           type: string
+          title: Arguments
         id:
           type: string
+          title: Id
         name:
           type: string
+          title: Name
         server_label:
           type: string
+          title: Server Label
         type:
           type: string
           const: mcp_approval_request
+          title: Type
           default: mcp_approval_request
-      additionalProperties: false
-      required:
-        - arguments
-        - id
-        - name
-        - server_label
-        - type
-      title: OpenAIResponseMCPApprovalRequest
-      description: >-
-        A request for human approval of a tool invocation.
-    OpenAIResponseMCPApprovalResponse:
       type: object
+      required:
+      - arguments
+      - id
+      - name
+      - server_label
+      title: OpenAIResponseMCPApprovalRequest
+      description: A request for human approval of a tool invocation.
+    OpenAIResponseMCPApprovalResponse:
       properties:
         approval_request_id:
           type: string
+          title: Approval Request Id
         approve:
           type: boolean
+          title: Approve
         type:
           type: string
           const: mcp_approval_response
+          title: Type
           default: mcp_approval_response
         id:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
         reason:
-          type: string
-      additionalProperties: false
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
       required:
-        - approval_request_id
-        - approve
-        - type
+      - approval_request_id
+      - approve
       title: OpenAIResponseMCPApprovalResponse
       description: A response to an MCP approval request.
     OpenAIResponseMessage:
-      type: object
+      description: |-
+        Corresponds to the various Message types in the Responses API.
+        They are all under one type because the Responses API gives them all
+        the same "type" value, and there is no way to tell them apart in certain
+        scenarios.
       properties:
         content:
-          oneOf:
-            - type: string
-            - type: array
-              items:
-                $ref: '#/components/schemas/OpenAIResponseInputMessageContent'
-            - type: array
-              items:
-                $ref: '#/components/schemas/OpenAIResponseOutputMessageContent'
+          anyOf:
+          - type: string
+          - items:
+              discriminator:
+                mapping:
+                  input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile'
+                  input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+                  input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
+                propertyName: type
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
+                title: OpenAIResponseInputMessageContentText
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+                title: OpenAIResponseInputMessageContentImage
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile'
+                title: OpenAIResponseInputMessageContentFile
+              title: OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile
+            type: array
+            title: list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile]
+          - items:
+              discriminator:
+                mapping:
+                  output_text: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText'
+                  refusal: '#/components/schemas/OpenAIResponseContentPartRefusal'
+                propertyName: type
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText'
+                title: OpenAIResponseOutputMessageContentOutputText
+              - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
+                title: OpenAIResponseContentPartRefusal
+              title: OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal
+            type: array
+            title: list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal]
+          title: string | list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile] | list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal]
         role:
-          oneOf:
-            - type: string
-              const: system
-            - type: string
-              const: developer
-            - type: string
-              const: user
-            - type: string
-              const: assistant
-        type:
+          title: Role
           type: string
+          enum:
+          - system
+          - developer
+          - user
+          - assistant
+          default: system
+        type:
           const: message
           default: message
+          title: Type
+          type: string
         id:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
         status:
-          type: string
-      additionalProperties: false
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
       required:
-        - content
-        - role
-        - type
+      - content
+      - role
       title: OpenAIResponseMessage
-      description: >-
-        Corresponds to the various Message types in the Responses API. They are all
-        under one type because the Responses API gives them all the same "type" value,
-        and there is no way to tell them apart in certain scenarios.
+      type: object
     OpenAIResponseOutputMessageContent:
-      oneOf:
-        - $ref: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText'
-        - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
       discriminator:
-        propertyName: type
         mapping:
           output_text: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText'
           refusal: '#/components/schemas/OpenAIResponseContentPartRefusal'
-    "OpenAIResponseOutputMessageContentOutputText":
-      type: object
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText'
+        title: OpenAIResponseOutputMessageContentOutputText
+      - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
+        title: OpenAIResponseContentPartRefusal
+      title: OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal
+    OpenAIResponseOutputMessageContentOutputText:
       properties:
         text:
           type: string
+          title: Text
         type:
           type: string
           const: output_text
+          title: Type
           default: output_text
         annotations:
-          type: array
           items:
-            $ref: '#/components/schemas/OpenAIResponseAnnotations'
-      additionalProperties: false
-      required:
-        - text
-        - type
-        - annotations
-      title: >-
-        OpenAIResponseOutputMessageContentOutputText
-    "OpenAIResponseOutputMessageFileSearchToolCall":
+            oneOf:
+            - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
+              title: OpenAIResponseAnnotationFileCitation
+            - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation'
+              title: OpenAIResponseAnnotationCitation
+            - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
+              title: OpenAIResponseAnnotationContainerFileCitation
+            - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath'
+              title: OpenAIResponseAnnotationFilePath
+            discriminator:
+              propertyName: type
+              mapping:
+                container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
+                file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
+                file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath'
+                url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation'
+            title: OpenAIResponseAnnotationFileCitation | ... (4 variants)
+          type: array
+          title: Annotations
       type: object
+      required:
+      - text
+      title: OpenAIResponseOutputMessageContentOutputText
+    OpenAIResponseOutputMessageFileSearchToolCall:
       properties:
         id:
           type: string
-          description: Unique identifier for this tool call
+          title: Id
         queries:
-          type: array
           items:
             type: string
-          description: List of search queries executed
+          type: array
+          title: Queries
         status:
           type: string
-          description: >-
-            Current status of the file search operation
+          title: Status
         type:
           type: string
           const: file_search_call
+          title: Type
           default: file_search_call
-          description: >-
-            Tool call type identifier, always "file_search_call"
         results:
-          type: array
-          items:
-            type: object
-            properties:
-              attributes:
-                type: object
-                additionalProperties:
-                  oneOf:
-                    - type: 'null'
-                    - type: boolean
-                    - type: number
-                    - type: string
-                    - type: array
-                    - type: object
-                description: >-
-                  (Optional) Key-value attributes associated with the file
-              file_id:
-                type: string
-                description: >-
-                  Unique identifier of the file containing the result
-              filename:
-                type: string
-                description: Name of the file containing the result
-              score:
-                type: number
-                description: >-
-                  Relevance score for this search result (between 0 and 1)
-              text:
-                type: string
-                description: Text content of the search result
-            additionalProperties: false
-            required:
-              - attributes
-              - file_id
-              - filename
-              - score
-              - text
-            title: >-
-              OpenAIResponseOutputMessageFileSearchToolCallResults
-            description: >-
-              Search results returned by the file search operation.
-          description: >-
-            (Optional) Search results returned by the file search operation
-      additionalProperties: false
-      required:
-        - id
-        - queries
-        - status
-        - type
-      title: >-
-        OpenAIResponseOutputMessageFileSearchToolCall
-      description: >-
-        File search tool call output message for OpenAI responses.
-    "OpenAIResponseOutputMessageFunctionToolCall":
+          anyOf:
+          - items:
+              $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCallResults'
+            type: array
+          - type: 'null'
       type: object
+      required:
+      - id
+      - queries
+      - status
+      title: OpenAIResponseOutputMessageFileSearchToolCall
+      description: File search tool call output message for OpenAI responses.
+    OpenAIResponseOutputMessageFunctionToolCall:
       properties:
         call_id:
           type: string
-          description: Unique identifier for the function call
+          title: Call Id
         name:
           type: string
-          description: Name of the function being called
+          title: Name
         arguments:
           type: string
-          description: >-
-            JSON string containing the function arguments
+          title: Arguments
         type:
           type: string
           const: function_call
+          title: Type
           default: function_call
-          description: >-
-            Tool call type identifier, always "function_call"
         id:
-          type: string
-          description: >-
-            (Optional) Additional identifier for the tool call
+          anyOf:
+          - type: string
+          - type: 'null'
         status:
-          type: string
-          description: >-
-            (Optional) Current status of the function call execution
-      additionalProperties: false
-      required:
-        - call_id
-        - name
-        - arguments
-        - type
-      title: >-
-        OpenAIResponseOutputMessageFunctionToolCall
-      description: >-
-        Function tool call output message for OpenAI responses.
-    OpenAIResponseOutputMessageMCPCall:
+          anyOf:
+          - type: string
+          - type: 'null'
       type: object
+      required:
+      - call_id
+      - name
+      - arguments
+      title: OpenAIResponseOutputMessageFunctionToolCall
+      description: Function tool call output message for OpenAI responses.
+    OpenAIResponseOutputMessageMCPCall:
       properties:
         id:
           type: string
-          description: Unique identifier for this MCP call
+          title: Id
         type:
           type: string
           const: mcp_call
+          title: Type
           default: mcp_call
-          description: >-
-            Tool call type identifier, always "mcp_call"
         arguments:
           type: string
-          description: >-
-            JSON string containing the MCP call arguments
+          title: Arguments
         name:
           type: string
-          description: Name of the MCP method being called
+          title: Name
         server_label:
           type: string
-          description: >-
-            Label identifying the MCP server handling the call
+          title: Server Label
         error:
-          type: string
-          description: >-
-            (Optional) Error message if the MCP call failed
+          anyOf:
+          - type: string
+          - type: 'null'
         output:
-          type: string
-          description: >-
-            (Optional) Output result from the successful MCP call
-      additionalProperties: false
-      required:
-        - id
-        - type
-        - arguments
-        - name
-        - server_label
-      title: OpenAIResponseOutputMessageMCPCall
-      description: >-
-        Model Context Protocol (MCP) call output message for OpenAI responses.
-    OpenAIResponseOutputMessageMCPListTools:
+          anyOf:
+          - type: string
+          - type: 'null'
       type: object
+      required:
+      - id
+      - arguments
+      - name
+      - server_label
+      title: OpenAIResponseOutputMessageMCPCall
+      description: Model Context Protocol (MCP) call output message for OpenAI responses.
+    OpenAIResponseOutputMessageMCPListTools:
       properties:
         id:
           type: string
-          description: >-
-            Unique identifier for this MCP list tools operation
+          title: Id
         type:
           type: string
           const: mcp_list_tools
+          title: Type
           default: mcp_list_tools
-          description: >-
-            Tool call type identifier, always "mcp_list_tools"
         server_label:
           type: string
-          description: >-
-            Label identifying the MCP server providing the tools
+          title: Server Label
         tools:
-          type: array
           items:
-            type: object
-            properties:
-              input_schema:
-                type: object
-                additionalProperties:
-                  oneOf:
-                    - type: 'null'
-                    - type: boolean
-                    - type: number
-                    - type: string
-                    - type: array
-                    - type: object
-                description: >-
-                  JSON schema defining the tool's input parameters
-              name:
-                type: string
-                description: Name of the tool
-              description:
-                type: string
-                description: >-
-                  (Optional) Description of what the tool does
-            additionalProperties: false
-            required:
-              - input_schema
-              - name
-            title: MCPListToolsTool
-            description: >-
-              Tool definition returned by MCP list tools operation.
-          description: >-
-            List of available tools provided by the MCP server
-      additionalProperties: false
-      required:
-        - id
-        - type
-        - server_label
-        - tools
-      title: OpenAIResponseOutputMessageMCPListTools
-      description: >-
-        MCP list tools output message containing available tools from an MCP server.
-    "OpenAIResponseOutputMessageWebSearchToolCall":
+            $ref: '#/components/schemas/MCPListToolsTool'
+          type: array
+          title: Tools
       type: object
+      required:
+      - id
+      - server_label
+      - tools
+      title: OpenAIResponseOutputMessageMCPListTools
+      description: MCP list tools output message containing available tools from an MCP server.
+    OpenAIResponseOutputMessageWebSearchToolCall:
       properties:
         id:
           type: string
-          description: Unique identifier for this tool call
+          title: Id
         status:
           type: string
-          description: >-
-            Current status of the web search operation
+          title: Status
         type:
           type: string
           const: web_search_call
+          title: Type
           default: web_search_call
-          description: >-
-            Tool call type identifier, always "web_search_call"
-      additionalProperties: false
-      required:
-        - id
-        - status
-        - type
-      title: >-
-        OpenAIResponseOutputMessageWebSearchToolCall
-      description: >-
-        Web search tool call output message for OpenAI responses.
-    CreateConversationRequest:
       type: object
+      required:
+      - id
+      - status
+      title: OpenAIResponseOutputMessageWebSearchToolCall
+      description: Web search tool call output message for OpenAI responses.
+    CreateConversationRequest:
       properties:
         items:
-          type: array
-          items:
-            $ref: '#/components/schemas/ConversationItem'
-          description: >-
-            Initial items to include in the conversation context.
+          anyOf:
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseMessage-Input'
+                title: OpenAIResponseMessage-Input
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+                title: OpenAIResponseOutputMessageWebSearchToolCall
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                title: OpenAIResponseOutputMessageFileSearchToolCall
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                title: OpenAIResponseOutputMessageFunctionToolCall
+              - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+                title: OpenAIResponseInputFunctionToolCallOutput
+              - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                title: OpenAIResponseMCPApprovalRequest
+              - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+                title: OpenAIResponseMCPApprovalResponse
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                title: OpenAIResponseOutputMessageMCPCall
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                title: OpenAIResponseOutputMessageMCPListTools
+              discriminator:
+                propertyName: type
+                mapping:
+                  file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                  function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                  function_call_output: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+                  mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                  mcp_approval_response: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+                  mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                  mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                  message: '#/components/schemas/OpenAIResponseMessage-Input'
+                  web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+              title: OpenAIResponseMessage-Input | ... (9 variants)
+            type: array
+          - type: 'null'
         metadata:
-          type: object
-          additionalProperties:
-            type: string
-          description: >-
-            Set of key-value pairs that can be attached to an object.
-      additionalProperties: false
+          anyOf:
+          - additionalProperties:
+              type: string
+            type: object
+          - type: 'null'
+      type: object
       title: CreateConversationRequest
     Conversation:
-      type: object
       properties:
         id:
           type: string
+          title: Id
+          description: The unique ID of the conversation.
         object:
           type: string
           const: conversation
+          title: Object
+          description: The object type, which is always conversation.
           default: conversation
         created_at:
           type: integer
+          title: Created At
+          description: The time at which the conversation was created, measured in seconds since the Unix epoch.
         metadata:
-          type: object
-          additionalProperties:
-            type: string
-        items:
-          type: array
-          items:
+          anyOf:
+          - additionalProperties:
+              type: string
             type: object
-            title: dict
-            description: >-
-              dict() -> new empty dictionary dict(mapping) -> new dictionary initialized
-              from a mapping object's     (key, value) pairs dict(iterable) -> new
-              dictionary initialized as if via:     d = {}     for k, v in iterable:         d[k]
-              = v dict(**kwargs) -> new dictionary initialized with the name=value
-              pairs     in the keyword argument list.  For example:  dict(one=1, two=2)
-      additionalProperties: false
+          - type: 'null'
+          description: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard.
+        items:
+          anyOf:
+          - items:
+              additionalProperties: true
+              type: object
+            type: array
+          - type: 'null'
+          description: Initial items to include in the conversation context. You may add up to 20 items at a time.
+      type: object
       required:
-        - id
-        - object
-        - created_at
+      - id
+      - created_at
       title: Conversation
       description: OpenAI-compatible conversation object.
     UpdateConversationRequest:
-      type: object
       properties:
         metadata:
-          type: object
           additionalProperties:
             type: string
-          description: >-
-            Set of key-value pairs that can be attached to an object.
-      additionalProperties: false
+          type: object
+          title: Metadata
+      type: object
       required:
-        - metadata
+      - metadata
       title: UpdateConversationRequest
     ConversationDeletedResource:
-      type: object
       properties:
         id:
           type: string
+          title: Id
+          description: The deleted conversation identifier
         object:
           type: string
+          title: Object
+          description: Object type
           default: conversation.deleted
         deleted:
           type: boolean
+          title: Deleted
+          description: Whether the object was deleted
           default: true
-      additionalProperties: false
+      type: object
       required:
-        - id
-        - object
-        - deleted
+      - id
       title: ConversationDeletedResource
       description: Response for deleted conversation.
     ConversationItemList:
-      type: object
       properties:
         object:
           type: string
+          title: Object
+          description: Object type
           default: list
         data:
-          type: array
           items:
-            $ref: '#/components/schemas/ConversationItem'
+            oneOf:
+            - $ref: '#/components/schemas/OpenAIResponseMessage-Output'
+              title: OpenAIResponseMessage-Output
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+              title: OpenAIResponseOutputMessageWebSearchToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+              title: OpenAIResponseOutputMessageFileSearchToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+              title: OpenAIResponseOutputMessageFunctionToolCall
+            - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+              title: OpenAIResponseInputFunctionToolCallOutput
+            - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+              title: OpenAIResponseMCPApprovalRequest
+            - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+              title: OpenAIResponseMCPApprovalResponse
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+              title: OpenAIResponseOutputMessageMCPCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+              title: OpenAIResponseOutputMessageMCPListTools
+            discriminator:
+              propertyName: type
+              mapping:
+                file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                function_call_output: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+                mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                mcp_approval_response: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+                mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                message: '#/components/schemas/OpenAIResponseMessage-Output'
+                web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+            title: OpenAIResponseMessage-Output | ... (9 variants)
+          type: array
+          title: Data
+          description: List of conversation items
         first_id:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: The ID of the first item in the list
         last_id:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: The ID of the last item in the list
         has_more:
           type: boolean
+          title: Has More
+          description: Whether there are more items available
           default: false
-      additionalProperties: false
-      required:
-        - object
-        - data
-        - has_more
-      title: ConversationItemList
-      description: >-
-        List of conversation items with pagination.
-    AddItemsRequest:
       type: object
+      required:
+      - data
+      title: ConversationItemList
+      description: List of conversation items with pagination.
+    AddItemsRequest:
       properties:
         items:
-          type: array
           items:
-            $ref: '#/components/schemas/ConversationItem'
-          description: >-
-            Items to include in the conversation context.
-      additionalProperties: false
+            oneOf:
+            - $ref: '#/components/schemas/OpenAIResponseMessage-Input'
+              title: OpenAIResponseMessage-Input
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+              title: OpenAIResponseOutputMessageWebSearchToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+              title: OpenAIResponseOutputMessageFileSearchToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+              title: OpenAIResponseOutputMessageFunctionToolCall
+            - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+              title: OpenAIResponseInputFunctionToolCallOutput
+            - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+              title: OpenAIResponseMCPApprovalRequest
+            - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+              title: OpenAIResponseMCPApprovalResponse
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+              title: OpenAIResponseOutputMessageMCPCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+              title: OpenAIResponseOutputMessageMCPListTools
+            discriminator:
+              propertyName: type
+              mapping:
+                file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                function_call_output: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+                mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                mcp_approval_response: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+                mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                message: '#/components/schemas/OpenAIResponseMessage-Input'
+                web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+            title: OpenAIResponseMessage-Input | ... (9 variants)
+          type: array
+          title: Items
+      type: object
       required:
-        - items
+      - items
       title: AddItemsRequest
     ConversationItemDeletedResource:
-      type: object
       properties:
         id:
           type: string
+          title: Id
+          description: The deleted item identifier
         object:
           type: string
+          title: Object
+          description: Object type
           default: conversation.item.deleted
         deleted:
           type: boolean
+          title: Deleted
+          description: Whether the object was deleted
           default: true
-      additionalProperties: false
+      type: object
       required:
-        - id
-        - object
-        - deleted
+      - id
       title: ConversationItemDeletedResource
       description: Response for deleted conversation item.
     OpenAIEmbeddingsRequestWithExtraBody:
-      type: object
       properties:
         model:
           type: string
-          description: >-
-            The identifier of the model to use. The model must be an embedding model
-            registered with Llama Stack and available via the /models endpoint.
+          title: Model
         input:
-          oneOf:
-            - type: string
-            - type: array
-              items:
-                type: string
-          description: >-
-            Input text to embed, encoded as a string or array of strings. To embed
-            multiple inputs in a single request, pass an array of strings.
+          anyOf:
+          - type: string
+          - items:
+              type: string
+            type: array
+            title: list[string]
+          title: string | list[string]
         encoding_format:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
           default: float
-          description: >-
-            (Optional) The format to return the embeddings in. Can be either "float"
-            or "base64". Defaults to "float".
         dimensions:
-          type: integer
-          description: >-
-            (Optional) The number of dimensions the resulting output embeddings should
-            have. Only supported in text-embedding-3 and later models.
+          anyOf:
+          - type: integer
+          - type: 'null'
         user:
-          type: string
-          description: >-
-            (Optional) A unique identifier representing your end-user, which can help
-            OpenAI to monitor and detect abuse.
-      additionalProperties: false
-      required:
-        - model
-        - input
-      title: OpenAIEmbeddingsRequestWithExtraBody
-      description: >-
-        Request parameters for OpenAI-compatible embeddings endpoint.
-    OpenAIEmbeddingData:
+          anyOf:
+          - type: string
+          - type: 'null'
+      additionalProperties: true
       type: object
+      required:
+      - model
+      - input
+      title: OpenAIEmbeddingsRequestWithExtraBody
+      description: Request parameters for OpenAI-compatible embeddings endpoint.
+    OpenAIEmbeddingData:
       properties:
         object:
           type: string
           const: embedding
+          title: Object
           default: embedding
-          description: >-
-            The object type, which will be "embedding"
         embedding:
-          oneOf:
-            - type: array
-              items:
-                type: number
-            - type: string
-          description: >-
-            The embedding vector as a list of floats (when encoding_format="float")
-            or as a base64-encoded string (when encoding_format="base64")
+          anyOf:
+          - items:
+              type: number
+            type: array
+            title: list[number]
+          - type: string
+          title: list[number] | string
         index:
           type: integer
-          description: >-
-            The index of the embedding in the input list
-      additionalProperties: false
-      required:
-        - object
-        - embedding
-        - index
-      title: OpenAIEmbeddingData
-      description: >-
-        A single embedding data object from an OpenAI-compatible embeddings response.
-    OpenAIEmbeddingUsage:
+          title: Index
       type: object
+      required:
+      - embedding
+      - index
+      title: OpenAIEmbeddingData
+      description: A single embedding data object from an OpenAI-compatible embeddings response.
+    OpenAIEmbeddingUsage:
       properties:
         prompt_tokens:
           type: integer
-          description: The number of tokens in the input
+          title: Prompt Tokens
         total_tokens:
           type: integer
-          description: The total number of tokens used
-      additionalProperties: false
-      required:
-        - prompt_tokens
-        - total_tokens
-      title: OpenAIEmbeddingUsage
-      description: >-
-        Usage information for an OpenAI-compatible embeddings response.
-    OpenAIEmbeddingsResponse:
+          title: Total Tokens
       type: object
+      required:
+      - prompt_tokens
+      - total_tokens
+      title: OpenAIEmbeddingUsage
+      description: Usage information for an OpenAI-compatible embeddings response.
+    OpenAIEmbeddingsResponse:
       properties:
         object:
           type: string
           const: list
+          title: Object
           default: list
-          description: The object type, which will be "list"
         data:
-          type: array
           items:
             $ref: '#/components/schemas/OpenAIEmbeddingData'
-          description: List of embedding data objects
+          type: array
+          title: Data
         model:
           type: string
-          description: >-
-            The model that was used to generate the embeddings
+          title: Model
         usage:
           $ref: '#/components/schemas/OpenAIEmbeddingUsage'
-          description: Usage information
-      additionalProperties: false
+      type: object
       required:
-        - object
-        - data
-        - model
-        - usage
+      - data
+      - model
+      - usage
       title: OpenAIEmbeddingsResponse
-      description: >-
-        Response from an OpenAI-compatible embeddings request.
+      description: Response from an OpenAI-compatible embeddings request.
     OpenAIFilePurpose:
       type: string
       enum:
-        - assistants
-        - batch
+      - assistants
+      - batch
       title: OpenAIFilePurpose
-      description: >-
-        Valid purpose values for OpenAI Files API.
+      description: Valid purpose values for OpenAI Files API.
     ListOpenAIFileResponse:
-      type: object
       properties:
         data:
-          type: array
           items:
             $ref: '#/components/schemas/OpenAIFileObject'
-          description: List of file objects
+          type: array
+          title: Data
         has_more:
           type: boolean
-          description: >-
-            Whether there are more files available beyond this page
+          title: Has More
         first_id:
           type: string
-          description: >-
-            ID of the first file in the list for pagination
+          title: First Id
         last_id:
           type: string
-          description: >-
-            ID of the last file in the list for pagination
+          title: Last Id
         object:
           type: string
           const: list
+          title: Object
           default: list
-          description: The object type, which is always "list"
-      additionalProperties: false
-      required:
-        - data
-        - has_more
-        - first_id
-        - last_id
-        - object
-      title: ListOpenAIFileResponse
-      description: >-
-        Response for listing files in OpenAI Files API.
-    OpenAIFileObject:
       type: object
+      required:
+      - data
+      - has_more
+      - first_id
+      - last_id
+      title: ListOpenAIFileResponse
+      description: Response for listing files in OpenAI Files API.
+    OpenAIFileObject:
       properties:
         object:
           type: string
           const: file
+          title: Object
           default: file
-          description: The object type, which is always "file"
         id:
           type: string
-          description: >-
-            The file identifier, which can be referenced in the API endpoints
+          title: Id
         bytes:
           type: integer
-          description: The size of the file, in bytes
+          title: Bytes
         created_at:
           type: integer
-          description: >-
-            The Unix timestamp (in seconds) for when the file was created
+          title: Created At
         expires_at:
           type: integer
-          description: >-
-            The Unix timestamp (in seconds) for when the file expires
+          title: Expires At
         filename:
           type: string
-          description: The name of the file
+          title: Filename
         purpose:
-          type: string
-          enum:
-            - assistants
-            - batch
-          description: The intended purpose of the file
-      additionalProperties: false
-      required:
-        - object
-        - id
-        - bytes
-        - created_at
-        - expires_at
-        - filename
-        - purpose
-      title: OpenAIFileObject
-      description: >-
-        OpenAI File object as defined in the OpenAI Files API.
-    ExpiresAfter:
+          $ref: '#/components/schemas/OpenAIFilePurpose'
       type: object
+      required:
+      - id
+      - bytes
+      - created_at
+      - expires_at
+      - filename
+      - purpose
+      title: OpenAIFileObject
+      description: OpenAI File object as defined in the OpenAI Files API.
+    ExpiresAfter:
       properties:
         anchor:
           type: string
           const: created_at
+          title: Anchor
         seconds:
           type: integer
-      additionalProperties: false
+          maximum: 2592000.0
+          minimum: 3600.0
+          title: Seconds
+      type: object
       required:
-        - anchor
-        - seconds
+      - anchor
+      - seconds
       title: ExpiresAfter
-      description: >-
+      description: |-
         Control expiration of uploaded files.
 
         Params:
          - anchor, must be "created_at"
          - seconds, must be int between 3600 and 2592000 (1 hour to 30 days)
     OpenAIFileDeleteResponse:
-      type: object
       properties:
         id:
           type: string
-          description: The file identifier that was deleted
+          title: Id
         object:
           type: string
           const: file
+          title: Object
           default: file
-          description: The object type, which is always "file"
         deleted:
           type: boolean
-          description: >-
-            Whether the file was successfully deleted
-      additionalProperties: false
+          title: Deleted
+      type: object
       required:
-        - id
-        - object
-        - deleted
+      - id
+      - deleted
       title: OpenAIFileDeleteResponse
-      description: >-
-        Response for deleting a file in OpenAI Files API.
+      description: Response for deleting a file in OpenAI Files API.
     Response:
-      type: object
       title: Response
-    HealthInfo:
       type: object
+    HealthInfo:
       properties:
         status:
-          type: string
-          enum:
-            - OK
-            - Error
-            - Not Implemented
-          description: Current health status of the service
-      additionalProperties: false
-      required:
-        - status
-      title: HealthInfo
-      description: >-
-        Health status information for the service.
-    RouteInfo:
+          $ref: '#/components/schemas/HealthStatus'
       type: object
+      required:
+      - status
+      title: HealthInfo
+      description: Health status information for the service.
+    RouteInfo:
       properties:
         route:
           type: string
-          description: The API endpoint path
+          title: Route
         method:
           type: string
-          description: HTTP method for the route
+          title: Method
         provider_types:
-          type: array
           items:
             type: string
-          description: >-
-            List of provider types that implement this route
-      additionalProperties: false
-      required:
-        - route
-        - method
-        - provider_types
-      title: RouteInfo
-      description: >-
-        Information about an API route including its path, method, and implementing
-        providers.
-    ListRoutesResponse:
+          type: array
+          title: Provider Types
       type: object
+      required:
+      - route
+      - method
+      - provider_types
+      title: RouteInfo
+      description: Information about an API route including its path, method, and implementing providers.
+    ListRoutesResponse:
       properties:
         data:
-          type: array
           items:
             $ref: '#/components/schemas/RouteInfo'
-          description: >-
-            List of available route information objects
-      additionalProperties: false
-      required:
-        - data
-      title: ListRoutesResponse
-      description: >-
-        Response containing a list of all available API routes.
-    OpenAIModel:
+          type: array
+          title: Data
       type: object
+      required:
+      - data
+      title: ListRoutesResponse
+      description: Response containing a list of all available API routes.
+    OpenAIModel:
       properties:
         id:
           type: string
+          title: Id
         object:
           type: string
           const: model
+          title: Object
           default: model
         created:
           type: integer
+          title: Created
         owned_by:
           type: string
+          title: Owned By
         custom_metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-      additionalProperties: false
-      required:
-        - id
-        - object
-        - created
-        - owned_by
-      title: OpenAIModel
-      description: A model from OpenAI.
-    OpenAIListModelsResponse:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
       type: object
+      required:
+      - id
+      - created
+      - owned_by
+      title: OpenAIModel
+      description: |-
+        A model from OpenAI.
+
+        :id: The ID of the model
+        :object: The object type, which will be "model"
+        :created: The Unix timestamp in seconds when the model was created
+        :owned_by: The owner of the model
+        :custom_metadata: Llama Stack-specific metadata including model_type, provider info, and additional metadata
+    OpenAIListModelsResponse:
       properties:
         data:
-          type: array
           items:
             $ref: '#/components/schemas/OpenAIModel'
-      additionalProperties: false
+          type: array
+          title: Data
+      type: object
       required:
-        - data
+      - data
       title: OpenAIListModelsResponse
     Model:
-      type: object
       properties:
         identifier:
           type: string
-          description: >-
-            Unique identifier for this resource in llama stack
+          title: Identifier
+          description: Unique identifier for this resource in llama stack
         provider_resource_id:
-          type: string
-          description: >-
-            Unique identifier for this resource in the provider
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: Unique identifier for this resource in the provider
         provider_id:
           type: string
-          description: >-
-            ID of the provider that owns this resource
+          title: Provider Id
+          description: ID of the provider that owns this resource
         type:
           type: string
-          enum:
-            - model
-            - shield
-            - vector_store
-            - dataset
-            - scoring_function
-            - benchmark
-            - tool
-            - tool_group
-            - prompt
           const: model
+          title: Type
           default: model
-          description: >-
-            The resource type, always 'model' for model resources
         metadata:
+          additionalProperties: true
           type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
+          title: Metadata
           description: Any additional metadata for this model
         model_type:
           $ref: '#/components/schemas/ModelType'
           default: llm
-          description: >-
-            The type of model (LLM or embedding model)
-      additionalProperties: false
+      type: object
       required:
-        - identifier
-        - provider_id
-        - type
-        - metadata
-        - model_type
+      - identifier
+      - provider_id
       title: Model
-      description: >-
-        A model resource representing an AI model registered in Llama Stack.
+      description: A model resource representing an AI model registered in Llama Stack.
     ModelType:
       type: string
       enum:
-        - llm
-        - embedding
-        - rerank
+      - llm
+      - embedding
+      - rerank
       title: ModelType
-      description: >-
-        Enumeration of supported model types in Llama Stack.
+      description: Enumeration of supported model types in Llama Stack.
     RunModerationRequest:
-      type: object
       properties:
         input:
-          oneOf:
-            - type: string
-            - type: array
-              items:
-                type: string
-          description: >-
-            Input (or inputs) to classify. Can be a single string, an array of strings,
-            or an array of multi-modal input objects similar to other models.
+          anyOf:
+          - type: string
+          - items:
+              type: string
+            type: array
+            title: list[string]
+          title: string | list[string]
         model:
-          type: string
-          description: >-
-            (Optional) The content moderation model you would like to use.
-      additionalProperties: false
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
       required:
-        - input
+      - input
       title: RunModerationRequest
     ModerationObject:
-      type: object
       properties:
         id:
           type: string
-          description: >-
-            The unique identifier for the moderation request.
+          title: Id
         model:
           type: string
-          description: >-
-            The model used to generate the moderation results.
+          title: Model
         results:
-          type: array
           items:
             $ref: '#/components/schemas/ModerationObjectResults'
-          description: A list of moderation objects
-      additionalProperties: false
+          type: array
+          title: Results
+      type: object
       required:
-        - id
-        - model
-        - results
+      - id
+      - model
+      - results
       title: ModerationObject
       description: A moderation object.
     ModerationObjectResults:
-      type: object
       properties:
         flagged:
           type: boolean
-          description: >-
-            Whether any of the below categories are flagged.
+          title: Flagged
         categories:
-          type: object
-          additionalProperties:
-            type: boolean
-          description: >-
-            A list of the categories, and whether they are flagged or not.
+          anyOf:
+          - additionalProperties:
+              type: boolean
+            type: object
+          - type: 'null'
         category_applied_input_types:
-          type: object
-          additionalProperties:
-            type: array
-            items:
-              type: string
-          description: >-
-            A list of the categories along with the input type(s) that the score applies
-            to.
+          anyOf:
+          - additionalProperties:
+              items:
+                type: string
+              type: array
+            type: object
+          - type: 'null'
         category_scores:
-          type: object
-          additionalProperties:
-            type: number
-          description: >-
-            A list of the categories along with their scores as predicted by model.
+          anyOf:
+          - additionalProperties:
+              type: number
+            type: object
+          - type: 'null'
         user_message:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
         metadata:
+          additionalProperties: true
           type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-      additionalProperties: false
+          title: Metadata
+      type: object
       required:
-        - flagged
-        - metadata
+      - flagged
       title: ModerationObjectResults
       description: A moderation object.
     Prompt:
-      type: object
       properties:
         prompt:
-          type: string
-          description: >-
-            The system prompt text with variable placeholders. Variables are only
-            supported when using the Responses API.
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: The system prompt with variable placeholders
         version:
           type: integer
-          description: >-
-            Version (integer starting at 1, incremented on save)
+          minimum: 1.0
+          title: Version
+          description: Version (integer starting at 1, incremented on save)
         prompt_id:
           type: string
-          description: >-
-            Unique identifier formatted as 'pmpt_<48-digit-hash>'
+          title: Prompt Id
+          description: Unique identifier in format 'pmpt_<48-digit-hash>'
         variables:
-          type: array
           items:
             type: string
-          description: >-
-            List of prompt variable names that can be used in the prompt template
+          type: array
+          title: Variables
+          description: List of variable names that can be used in the prompt template
         is_default:
           type: boolean
+          title: Is Default
+          description: Boolean indicating whether this version is the default version
           default: false
-          description: >-
-            Boolean indicating whether this version is the default version for this
-            prompt
-      additionalProperties: false
-      required:
-        - version
-        - prompt_id
-        - variables
-        - is_default
-      title: Prompt
-      description: >-
-        A prompt resource representing a stored OpenAI Compatible prompt template
-        in Llama Stack.
-    ListPromptsResponse:
       type: object
+      required:
+      - version
+      - prompt_id
+      title: Prompt
+      description: A prompt resource representing a stored OpenAI Compatible prompt template in Llama Stack.
+    ListPromptsResponse:
       properties:
         data:
-          type: array
           items:
             $ref: '#/components/schemas/Prompt'
-      additionalProperties: false
+          type: array
+          title: Data
+      type: object
       required:
-        - data
+      - data
       title: ListPromptsResponse
       description: Response model to list prompts.
     CreatePromptRequest:
-      type: object
       properties:
         prompt:
           type: string
-          description: >-
-            The prompt text content with variable placeholders.
+          title: Prompt
         variables:
-          type: array
-          items:
-            type: string
-          description: >-
-            List of variable names that can be used in the prompt template.
-      additionalProperties: false
+          anyOf:
+          - items:
+              type: string
+            type: array
+          - type: 'null'
+      type: object
       required:
-        - prompt
+      - prompt
       title: CreatePromptRequest
     UpdatePromptRequest:
-      type: object
       properties:
         prompt:
           type: string
-          description: The updated prompt text content.
+          title: Prompt
         version:
           type: integer
-          description: >-
-            The current version of the prompt being updated.
+          title: Version
         variables:
-          type: array
-          items:
-            type: string
-          description: >-
-            Updated list of variable names that can be used in the prompt template.
+          anyOf:
+          - items:
+              type: string
+            type: array
+          - type: 'null'
         set_as_default:
           type: boolean
-          description: >-
-            Set the new version as the default (default=True).
-      additionalProperties: false
+          title: Set As Default
+          default: true
+      type: object
       required:
-        - prompt
-        - version
-        - set_as_default
+      - prompt
+      - version
       title: UpdatePromptRequest
     SetDefaultVersionRequest:
-      type: object
       properties:
         version:
           type: integer
-          description: The version to set as default.
-      additionalProperties: false
+          title: Version
+      type: object
       required:
-        - version
+      - version
       title: SetDefaultVersionRequest
     ProviderInfo:
-      type: object
       properties:
         api:
           type: string
-          description: The API name this provider implements
+          title: Api
         provider_id:
           type: string
-          description: Unique identifier for the provider
+          title: Provider Id
         provider_type:
           type: string
-          description: The type of provider implementation
+          title: Provider Type
         config:
+          additionalProperties: true
           type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            Configuration parameters for the provider
+          title: Config
         health:
+          additionalProperties: true
           type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: Current health status of the provider
-      additionalProperties: false
-      required:
-        - api
-        - provider_id
-        - provider_type
-        - config
-        - health
-      title: ProviderInfo
-      description: >-
-        Information about a registered provider including its configuration and health
-        status.
-    ListProvidersResponse:
+          title: Health
       type: object
+      required:
+      - api
+      - provider_id
+      - provider_type
+      - config
+      - health
+      title: ProviderInfo
+      description: Information about a registered provider including its configuration and health status.
+    ListProvidersResponse:
       properties:
         data:
-          type: array
           items:
             $ref: '#/components/schemas/ProviderInfo'
-          description: List of provider information objects
-      additionalProperties: false
-      required:
-        - data
-      title: ListProvidersResponse
-      description: >-
-        Response containing a list of all available providers.
-    ListOpenAIResponseObject:
+          type: array
+          title: Data
       type: object
+      required:
+      - data
+      title: ListProvidersResponse
+      description: Response containing a list of all available providers.
+    ListOpenAIResponseObject:
       properties:
         data:
-          type: array
           items:
             $ref: '#/components/schemas/OpenAIResponseObjectWithInput'
-          description: >-
-            List of response objects with their input context
+          type: array
+          title: Data
         has_more:
           type: boolean
-          description: >-
-            Whether there are more results available beyond this page
+          title: Has More
         first_id:
           type: string
-          description: >-
-            Identifier of the first item in this page
+          title: First Id
         last_id:
           type: string
-          description: Identifier of the last item in this page
+          title: Last Id
         object:
           type: string
           const: list
+          title: Object
           default: list
-          description: Object type identifier, always "list"
-      additionalProperties: false
-      required:
-        - data
-        - has_more
-        - first_id
-        - last_id
-        - object
-      title: ListOpenAIResponseObject
-      description: >-
-        Paginated list of OpenAI response objects with navigation metadata.
-    OpenAIResponseError:
       type: object
+      required:
+      - data
+      - has_more
+      - first_id
+      - last_id
+      title: ListOpenAIResponseObject
+      description: Paginated list of OpenAI response objects with navigation metadata.
+    OpenAIResponseError:
       properties:
         code:
           type: string
-          description: >-
-            Error code identifying the type of failure
+          title: Code
         message:
           type: string
-          description: >-
-            Human-readable error message describing the failure
-      additionalProperties: false
-      required:
-        - code
-        - message
-      title: OpenAIResponseError
-      description: >-
-        Error details for failed OpenAI response requests.
-    OpenAIResponseInput:
-      oneOf:
-        - $ref: '#/components/schemas/OpenAIResponseOutput'
-        - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
-        - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
-        - $ref: '#/components/schemas/OpenAIResponseMessage'
-    OpenAIResponseInputToolFileSearch:
+          title: Message
       type: object
+      required:
+      - code
+      - message
+      title: OpenAIResponseError
+      description: Error details for failed OpenAI response requests.
+    OpenAIResponseInput:
+      anyOf:
+      - discriminator:
+          mapping:
+            file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+            function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+            mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+            mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+            mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+            message: '#/components/schemas/OpenAIResponseMessage'
+            web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+          propertyName: type
+        oneOf:
+        - $ref: '#/components/schemas/OpenAIResponseMessage'
+          title: OpenAIResponseMessage
+        - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+          title: OpenAIResponseOutputMessageWebSearchToolCall
+        - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+          title: OpenAIResponseOutputMessageFileSearchToolCall
+        - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+          title: OpenAIResponseOutputMessageFunctionToolCall
+        - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+          title: OpenAIResponseOutputMessageMCPCall
+        - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+          title: OpenAIResponseOutputMessageMCPListTools
+        - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+          title: OpenAIResponseMCPApprovalRequest
+        title: OpenAIResponseMessage | ... (7 variants)
+      - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+        title: OpenAIResponseInputFunctionToolCallOutput
+      - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+        title: OpenAIResponseMCPApprovalResponse
+      - $ref: '#/components/schemas/OpenAIResponseMessage'
+        title: OpenAIResponseMessage
+      title: OpenAIResponseInputFunctionToolCallOutput | OpenAIResponseMCPApprovalResponse | OpenAIResponseMessage
+    OpenAIResponseInputToolFileSearch:
       properties:
         type:
           type: string
           const: file_search
+          title: Type
           default: file_search
-          description: >-
-            Tool type identifier, always "file_search"
         vector_store_ids:
-          type: array
           items:
             type: string
-          description: >-
-            List of vector store identifiers to search within
+          type: array
+          title: Vector Store Ids
         filters:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) Additional filters to apply to the search
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
         max_num_results:
-          type: integer
+          anyOf:
+          - type: integer
+            maximum: 50.0
+            minimum: 1.0
+          - type: 'null'
           default: 10
-          description: >-
-            (Optional) Maximum number of search results to return (1-50)
         ranking_options:
-          type: object
-          properties:
-            ranker:
-              type: string
-              description: >-
-                (Optional) Name of the ranking algorithm to use
-            score_threshold:
-              type: number
-              default: 0.0
-              description: >-
-                (Optional) Minimum relevance score threshold for results
-          additionalProperties: false
-          description: >-
-            (Optional) Options for ranking and scoring search results
-      additionalProperties: false
-      required:
-        - type
-        - vector_store_ids
-      title: OpenAIResponseInputToolFileSearch
-      description: >-
-        File search tool configuration for OpenAI response inputs.
-    OpenAIResponseInputToolFunction:
+          anyOf:
+          - $ref: '#/components/schemas/SearchRankingOptions'
+            title: SearchRankingOptions
+          - type: 'null'
+          title: SearchRankingOptions
       type: object
+      required:
+      - vector_store_ids
+      title: OpenAIResponseInputToolFileSearch
+      description: File search tool configuration for OpenAI response inputs.
+    OpenAIResponseInputToolFunction:
       properties:
         type:
           type: string
           const: function
+          title: Type
           default: function
-          description: Tool type identifier, always "function"
         name:
           type: string
-          description: Name of the function that can be called
+          title: Name
         description:
-          type: string
-          description: >-
-            (Optional) Description of what the function does
+          anyOf:
+          - type: string
+          - type: 'null'
         parameters:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) JSON schema defining the function's parameters
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
         strict:
-          type: boolean
-          description: >-
-            (Optional) Whether to enforce strict parameter validation
-      additionalProperties: false
-      required:
-        - type
-        - name
-      title: OpenAIResponseInputToolFunction
-      description: >-
-        Function tool configuration for OpenAI response inputs.
-    OpenAIResponseInputToolWebSearch:
+          anyOf:
+          - type: boolean
+          - type: 'null'
       type: object
+      required:
+      - name
+      - parameters
+      title: OpenAIResponseInputToolFunction
+      description: Function tool configuration for OpenAI response inputs.
+    OpenAIResponseInputToolWebSearch:
       properties:
         type:
-          oneOf:
-            - type: string
-              const: web_search
-            - type: string
-              const: web_search_preview
-            - type: string
-              const: web_search_preview_2025_03_11
-            - type: string
-              const: web_search_2025_08_26
+          title: Type
           default: web_search
-          description: Web search tool type variant to use
-        search_context_size:
           type: string
+          enum:
+          - web_search
+          - web_search_preview
+          - web_search_preview_2025_03_11
+          - web_search_2025_08_26
+        search_context_size:
+          anyOf:
+          - type: string
+            pattern: ^low|medium|high$
+          - type: 'null'
           default: medium
-          description: >-
-            (Optional) Size of search context, must be "low", "medium", or "high"
-      additionalProperties: false
-      required:
-        - type
-      title: OpenAIResponseInputToolWebSearch
-      description: >-
-        Web search tool configuration for OpenAI response inputs.
-    OpenAIResponseObjectWithInput:
       type: object
+      title: OpenAIResponseInputToolWebSearch
+      description: Web search tool configuration for OpenAI response inputs.
+    OpenAIResponseObjectWithInput:
       properties:
         created_at:
           type: integer
-          description: >-
-            Unix timestamp when the response was created
+          title: Created At
         error:
-          $ref: '#/components/schemas/OpenAIResponseError'
-          description: >-
-            (Optional) Error details if the response generation failed
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIResponseError'
+            title: OpenAIResponseError
+          - type: 'null'
+          title: OpenAIResponseError
         id:
           type: string
-          description: Unique identifier for this response
+          title: Id
         model:
           type: string
-          description: Model identifier used for generation
+          title: Model
         object:
           type: string
           const: response
+          title: Object
           default: response
-          description: >-
-            Object type identifier, always "response"
         output:
-          type: array
           items:
-            $ref: '#/components/schemas/OpenAIResponseOutput'
-          description: >-
-            List of generated output items (messages, tool calls, etc.)
+            oneOf:
+            - $ref: '#/components/schemas/OpenAIResponseMessage-Output'
+              title: OpenAIResponseMessage-Output
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+              title: OpenAIResponseOutputMessageWebSearchToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+              title: OpenAIResponseOutputMessageFileSearchToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+              title: OpenAIResponseOutputMessageFunctionToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+              title: OpenAIResponseOutputMessageMCPCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+              title: OpenAIResponseOutputMessageMCPListTools
+            - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+              title: OpenAIResponseMCPApprovalRequest
+            discriminator:
+              propertyName: type
+              mapping:
+                file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                message: '#/components/schemas/OpenAIResponseMessage-Output'
+                web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+            title: OpenAIResponseMessage-Output | ... (7 variants)
+          type: array
+          title: Output
         parallel_tool_calls:
           type: boolean
+          title: Parallel Tool Calls
           default: false
-          description: >-
-            Whether tool calls can be executed in parallel
         previous_response_id:
-          type: string
-          description: >-
-            (Optional) ID of the previous response in a conversation
+          anyOf:
+          - type: string
+          - type: 'null'
         prompt:
-          $ref: '#/components/schemas/OpenAIResponsePrompt'
-          description: >-
-            (Optional) Reference to a prompt template and its variables.
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIResponsePrompt'
+            title: OpenAIResponsePrompt
+          - type: 'null'
+          title: OpenAIResponsePrompt
         status:
           type: string
-          description: >-
-            Current status of the response generation
+          title: Status
         temperature:
-          type: number
-          description: >-
-            (Optional) Sampling temperature used for generation
+          anyOf:
+          - type: number
+          - type: 'null'
         text:
           $ref: '#/components/schemas/OpenAIResponseText'
-          description: >-
-            Text formatting configuration for the response
+          default:
+            format:
+              type: text
         top_p:
-          type: number
-          description: >-
-            (Optional) Nucleus sampling parameter used for generation
+          anyOf:
+          - type: number
+          - type: 'null'
         tools:
-          type: array
-          items:
-            $ref: '#/components/schemas/OpenAIResponseTool'
-          description: >-
-            (Optional) An array of tools the model may call while generating a response.
+          anyOf:
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                title: OpenAIResponseInputToolWebSearch
+              - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch'
+                title: OpenAIResponseInputToolFileSearch
+              - $ref: '#/components/schemas/OpenAIResponseInputToolFunction'
+                title: OpenAIResponseInputToolFunction
+              - $ref: '#/components/schemas/OpenAIResponseToolMCP'
+                title: OpenAIResponseToolMCP
+              discriminator:
+                propertyName: type
+                mapping:
+                  file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch'
+                  function: '#/components/schemas/OpenAIResponseInputToolFunction'
+                  mcp: '#/components/schemas/OpenAIResponseToolMCP'
+                  web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                  web_search_2025_08_26: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                  web_search_preview: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                  web_search_preview_2025_03_11: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+              title: OpenAIResponseInputToolWebSearch | ... (4 variants)
+            type: array
+          - type: 'null'
         truncation:
-          type: string
-          description: >-
-            (Optional) Truncation strategy applied to the response
+          anyOf:
+          - type: string
+          - type: 'null'
         usage:
-          $ref: '#/components/schemas/OpenAIResponseUsage'
-          description: >-
-            (Optional) Token usage information for the response
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIResponseUsage'
+            title: OpenAIResponseUsage
+          - type: 'null'
+          title: OpenAIResponseUsage
         instructions:
-          type: string
-          description: >-
-            (Optional) System message inserted into the model's context
+          anyOf:
+          - type: string
+          - type: 'null'
         max_tool_calls:
-          type: integer
-          description: >-
-            (Optional) Max number of total calls to built-in tools that can be processed
-            in a response
+          anyOf:
+          - type: integer
+          - type: 'null'
         input:
-          type: array
           items:
-            $ref: '#/components/schemas/OpenAIResponseInput'
-          description: >-
-            List of input items that led to this response
-      additionalProperties: false
+            anyOf:
+            - oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseMessage-Output'
+                title: OpenAIResponseMessage-Output
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+                title: OpenAIResponseOutputMessageWebSearchToolCall
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                title: OpenAIResponseOutputMessageFileSearchToolCall
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                title: OpenAIResponseOutputMessageFunctionToolCall
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                title: OpenAIResponseOutputMessageMCPCall
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                title: OpenAIResponseOutputMessageMCPListTools
+              - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                title: OpenAIResponseMCPApprovalRequest
+              discriminator:
+                propertyName: type
+                mapping:
+                  file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                  function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                  mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                  mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                  mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                  message: '#/components/schemas/OpenAIResponseMessage-Output'
+                  web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+              title: OpenAIResponseMessage-Output | ... (7 variants)
+            - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+              title: OpenAIResponseInputFunctionToolCallOutput
+            - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+              title: OpenAIResponseMCPApprovalResponse
+            - $ref: '#/components/schemas/OpenAIResponseMessage-Output'
+              title: OpenAIResponseMessage-Output
+            title: OpenAIResponseInputFunctionToolCallOutput | OpenAIResponseMCPApprovalResponse | OpenAIResponseMessage-Output
+          type: array
+          title: Input
+      type: object
       required:
-        - created_at
-        - id
-        - model
-        - object
-        - output
-        - parallel_tool_calls
-        - status
-        - text
-        - input
+      - created_at
+      - id
+      - model
+      - output
+      - status
+      - input
       title: OpenAIResponseObjectWithInput
-      description: >-
-        OpenAI response object extended with input context information.
+      description: OpenAI response object extended with input context information.
     OpenAIResponseOutput:
-      oneOf:
-        - $ref: '#/components/schemas/OpenAIResponseMessage'
-        - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
-        - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
-        - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
-        - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
-        - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
-        - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
       discriminator:
-        propertyName: type
         mapping:
-          message: '#/components/schemas/OpenAIResponseMessage'
-          web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
           file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
           function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+          mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
           mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
           mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
-          mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+          message: '#/components/schemas/OpenAIResponseMessage'
+          web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIResponseMessage'
+        title: OpenAIResponseMessage
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+        title: OpenAIResponseOutputMessageWebSearchToolCall
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+        title: OpenAIResponseOutputMessageFileSearchToolCall
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+        title: OpenAIResponseOutputMessageFunctionToolCall
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+        title: OpenAIResponseOutputMessageMCPCall
+      - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+        title: OpenAIResponseOutputMessageMCPListTools
+      - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+        title: OpenAIResponseMCPApprovalRequest
+      title: OpenAIResponseMessage | ... (7 variants)
     OpenAIResponsePrompt:
-      type: object
       properties:
         id:
           type: string
-          description: Unique identifier of the prompt template
+          title: Id
         variables:
-          type: object
-          additionalProperties:
-            $ref: '#/components/schemas/OpenAIResponseInputMessageContent'
-          description: >-
-            Dictionary of variable names to OpenAIResponseInputMessageContent structure
-            for template substitution. The substitution values can either be strings,
-            or other Response input types like images or files.
+          anyOf:
+          - additionalProperties:
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
+                title: OpenAIResponseInputMessageContentText
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+                title: OpenAIResponseInputMessageContentImage
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile'
+                title: OpenAIResponseInputMessageContentFile
+              discriminator:
+                propertyName: type
+                mapping:
+                  input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile'
+                  input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+                  input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
+              title: OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile
+            type: object
+          - type: 'null'
         version:
-          type: string
-          description: >-
-            Version number of the prompt to use (defaults to latest if not specified)
-      additionalProperties: false
-      required:
-        - id
-      title: OpenAIResponsePrompt
-      description: >-
-        OpenAI compatible Prompt object that is used in OpenAI responses.
-    OpenAIResponseText:
+          anyOf:
+          - type: string
+          - type: 'null'
       type: object
+      required:
+      - id
+      title: OpenAIResponsePrompt
+      description: OpenAI compatible Prompt object that is used in OpenAI responses.
+    OpenAIResponseText:
       properties:
         format:
-          type: object
-          properties:
-            type:
-              oneOf:
-                - type: string
-                  const: text
-                - type: string
-                  const: json_schema
-                - type: string
-                  const: json_object
-              description: >-
-                Must be "text", "json_schema", or "json_object" to identify the format
-                type
-            name:
-              type: string
-              description: >-
-                The name of the response format. Only used for json_schema.
-            schema:
-              type: object
-              additionalProperties:
-                oneOf:
-                  - type: 'null'
-                  - type: boolean
-                  - type: number
-                  - type: string
-                  - type: array
-                  - type: object
-              description: >-
-                The JSON schema the response should conform to. In a Python SDK, this
-                is often a `pydantic` model. Only used for json_schema.
-            description:
-              type: string
-              description: >-
-                (Optional) A description of the response format. Only used for json_schema.
-            strict:
-              type: boolean
-              description: >-
-                (Optional) Whether to strictly enforce the JSON schema. If true, the
-                response must match the schema exactly. Only used for json_schema.
-          additionalProperties: false
-          required:
-            - type
-          description: >-
-            (Optional) Text format configuration specifying output format requirements
-      additionalProperties: false
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIResponseTextFormat'
+            title: OpenAIResponseTextFormat
+          - type: 'null'
+          title: OpenAIResponseTextFormat
+      type: object
       title: OpenAIResponseText
-      description: >-
-        Text response configuration for OpenAI responses.
+      description: Text response configuration for OpenAI responses.
     OpenAIResponseTool:
-      oneOf:
-        - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch'
-        - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch'
-        - $ref: '#/components/schemas/OpenAIResponseInputToolFunction'
-        - $ref: '#/components/schemas/OpenAIResponseToolMCP'
       discriminator:
-        propertyName: type
         mapping:
-          web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch'
           file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch'
           function: '#/components/schemas/OpenAIResponseInputToolFunction'
           mcp: '#/components/schemas/OpenAIResponseToolMCP'
+          web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+          web_search_2025_08_26: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+          web_search_preview: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+          web_search_preview_2025_03_11: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+        title: OpenAIResponseInputToolWebSearch
+      - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch'
+        title: OpenAIResponseInputToolFileSearch
+      - $ref: '#/components/schemas/OpenAIResponseInputToolFunction'
+        title: OpenAIResponseInputToolFunction
+      - $ref: '#/components/schemas/OpenAIResponseToolMCP'
+        title: OpenAIResponseToolMCP
+      title: OpenAIResponseInputToolWebSearch | ... (4 variants)
     OpenAIResponseToolMCP:
-      type: object
       properties:
         type:
           type: string
           const: mcp
+          title: Type
           default: mcp
-          description: Tool type identifier, always "mcp"
         server_label:
           type: string
-          description: Label to identify this MCP server
+          title: Server Label
         allowed_tools:
-          oneOf:
-            - type: array
-              items:
-                type: string
-            - type: object
-              properties:
-                tool_names:
-                  type: array
-                  items:
-                    type: string
-                  description: >-
-                    (Optional) List of specific tool names that are allowed
-              additionalProperties: false
-              title: AllowedToolsFilter
-              description: >-
-                Filter configuration for restricting which MCP tools can be used.
-          description: >-
-            (Optional) Restriction on which tools can be used from this server
-      additionalProperties: false
-      required:
-        - type
-        - server_label
-      title: OpenAIResponseToolMCP
-      description: >-
-        Model Context Protocol (MCP) tool configuration for OpenAI response object.
-    OpenAIResponseUsage:
+          anyOf:
+          - items:
+              type: string
+            type: array
+            title: list[string]
+          - $ref: '#/components/schemas/AllowedToolsFilter'
+            title: AllowedToolsFilter
+          - type: 'null'
+          title: list[string] | AllowedToolsFilter
       type: object
+      required:
+      - server_label
+      title: OpenAIResponseToolMCP
+      description: Model Context Protocol (MCP) tool configuration for OpenAI response object.
+    OpenAIResponseUsage:
       properties:
         input_tokens:
           type: integer
-          description: Number of tokens in the input
+          title: Input Tokens
         output_tokens:
           type: integer
-          description: Number of tokens in the output
+          title: Output Tokens
         total_tokens:
           type: integer
-          description: Total tokens used (input + output)
+          title: Total Tokens
         input_tokens_details:
-          type: object
-          properties:
-            cached_tokens:
-              type: integer
-              description: Number of tokens retrieved from cache
-          additionalProperties: false
-          description: Detailed breakdown of input token usage
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIResponseUsageInputTokensDetails'
+            title: OpenAIResponseUsageInputTokensDetails
+          - type: 'null'
+          title: OpenAIResponseUsageInputTokensDetails
         output_tokens_details:
-          type: object
-          properties:
-            reasoning_tokens:
-              type: integer
-              description: >-
-                Number of tokens used for reasoning (o1/o3 models)
-          additionalProperties: false
-          description: Detailed breakdown of output token usage
-      additionalProperties: false
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIResponseUsageOutputTokensDetails'
+            title: OpenAIResponseUsageOutputTokensDetails
+          - type: 'null'
+          title: OpenAIResponseUsageOutputTokensDetails
+      type: object
       required:
-        - input_tokens
-        - output_tokens
-        - total_tokens
+      - input_tokens
+      - output_tokens
+      - total_tokens
       title: OpenAIResponseUsage
       description: Usage information for OpenAI response.
     ResponseGuardrailSpec:
-      type: object
+      description: Specification for a guardrail to apply during response generation.
       properties:
         type:
+          title: Type
           type: string
-          description: The type/identifier of the guardrail.
-      additionalProperties: false
       required:
-        - type
+      - type
       title: ResponseGuardrailSpec
-      description: >-
-        Specification for a guardrail to apply during response generation.
+      type: object
     OpenAIResponseInputTool:
-      oneOf:
-        - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch'
-        - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch'
-        - $ref: '#/components/schemas/OpenAIResponseInputToolFunction'
-        - $ref: '#/components/schemas/OpenAIResponseInputToolMCP'
       discriminator:
-        propertyName: type
         mapping:
-          web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch'
           file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch'
           function: '#/components/schemas/OpenAIResponseInputToolFunction'
           mcp: '#/components/schemas/OpenAIResponseInputToolMCP'
+          web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+          web_search_2025_08_26: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+          web_search_preview: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+          web_search_preview_2025_03_11: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+        title: OpenAIResponseInputToolWebSearch
+      - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch'
+        title: OpenAIResponseInputToolFileSearch
+      - $ref: '#/components/schemas/OpenAIResponseInputToolFunction'
+        title: OpenAIResponseInputToolFunction
+      - $ref: '#/components/schemas/OpenAIResponseInputToolMCP'
+        title: OpenAIResponseInputToolMCP
+      title: OpenAIResponseInputToolWebSearch | ... (4 variants)
     OpenAIResponseInputToolMCP:
-      type: object
       properties:
         type:
           type: string
           const: mcp
+          title: Type
           default: mcp
-          description: Tool type identifier, always "mcp"
         server_label:
           type: string
-          description: Label to identify this MCP server
+          title: Server Label
         server_url:
           type: string
-          description: URL endpoint of the MCP server
+          title: Server Url
         headers:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) HTTP headers to include when connecting to the server
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
         authorization:
-          type: string
-          description: >-
-            (Optional) OAuth access token for authenticating with the MCP server
+          anyOf:
+          - type: string
+          - type: 'null'
         require_approval:
-          oneOf:
-            - type: string
-              const: always
-            - type: string
-              const: never
-            - type: object
-              properties:
-                always:
-                  type: array
-                  items:
-                    type: string
-                  description: >-
-                    (Optional) List of tool names that always require approval
-                never:
-                  type: array
-                  items:
-                    type: string
-                  description: >-
-                    (Optional) List of tool names that never require approval
-              additionalProperties: false
-              title: ApprovalFilter
-              description: >-
-                Filter configuration for MCP tool approval requirements.
+          anyOf:
+          - type: string
+            const: always
+          - type: string
+            const: never
+          - $ref: '#/components/schemas/ApprovalFilter'
+            title: ApprovalFilter
+          title: string | ApprovalFilter
           default: never
-          description: >-
-            Approval requirement for tool calls ("always", "never", or filter)
         allowed_tools:
-          oneOf:
-            - type: array
-              items:
-                type: string
-            - type: object
-              properties:
-                tool_names:
-                  type: array
-                  items:
-                    type: string
-                  description: >-
-                    (Optional) List of specific tool names that are allowed
-              additionalProperties: false
-              title: AllowedToolsFilter
-              description: >-
-                Filter configuration for restricting which MCP tools can be used.
-          description: >-
-            (Optional) Restriction on which tools can be used from this server
-      additionalProperties: false
-      required:
-        - type
-        - server_label
-        - server_url
-        - require_approval
-      title: OpenAIResponseInputToolMCP
-      description: >-
-        Model Context Protocol (MCP) tool configuration for OpenAI response inputs.
-    CreateOpenaiResponseRequest:
+          anyOf:
+          - items:
+              type: string
+            type: array
+            title: list[string]
+          - $ref: '#/components/schemas/AllowedToolsFilter'
+            title: AllowedToolsFilter
+          - type: 'null'
+          title: list[string] | AllowedToolsFilter
       type: object
+      required:
+      - server_label
+      - server_url
+      title: OpenAIResponseInputToolMCP
+      description: Model Context Protocol (MCP) tool configuration for OpenAI response inputs.
+    CreateOpenaiResponseRequest:
       properties:
         input:
-          oneOf:
-            - type: string
-            - type: array
-              items:
-                $ref: '#/components/schemas/OpenAIResponseInput'
-          description: Input message(s) to create the response.
+          anyOf:
+          - type: string
+          - items:
+              anyOf:
+              - oneOf:
+                - $ref: '#/components/schemas/OpenAIResponseMessage-Input'
+                  title: OpenAIResponseMessage-Input
+                - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+                  title: OpenAIResponseOutputMessageWebSearchToolCall
+                - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                  title: OpenAIResponseOutputMessageFileSearchToolCall
+                - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                  title: OpenAIResponseOutputMessageFunctionToolCall
+                - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                  title: OpenAIResponseOutputMessageMCPCall
+                - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                  title: OpenAIResponseOutputMessageMCPListTools
+                - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                  title: OpenAIResponseMCPApprovalRequest
+                discriminator:
+                  propertyName: type
+                  mapping:
+                    file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                    function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                    mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                    mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                    mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                    message: '#/components/schemas/OpenAIResponseMessage-Input'
+                    web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+                title: OpenAIResponseMessage-Input | ... (7 variants)
+              - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+                title: OpenAIResponseInputFunctionToolCallOutput
+              - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+                title: OpenAIResponseMCPApprovalResponse
+              - $ref: '#/components/schemas/OpenAIResponseMessage-Input'
+                title: OpenAIResponseMessage-Input
+              title: OpenAIResponseInputFunctionToolCallOutput | OpenAIResponseMCPApprovalResponse | OpenAIResponseMessage-Input
+            type: array
+            title: list[OpenAIResponseMessageUnion | OpenAIResponseInputFunctionToolCallOutput | ...]
+          title: string | list[OpenAIResponseMessageUnion | OpenAIResponseInputFunctionToolCallOutput | ...]
         model:
           type: string
-          description: The underlying LLM used for completions.
+          title: Model
         prompt:
-          $ref: '#/components/schemas/OpenAIResponsePrompt'
-          description: >-
-            (Optional) Prompt object with ID, version, and variables.
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIResponsePrompt'
+            title: OpenAIResponsePrompt
+          - type: 'null'
+          title: OpenAIResponsePrompt
         instructions:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
         previous_response_id:
-          type: string
-          description: >-
-            (Optional) if specified, the new response will be a continuation of the
-            previous response. This can be used to easily fork-off new responses from
-            existing responses.
+          anyOf:
+          - type: string
+          - type: 'null'
         conversation:
-          type: string
-          description: >-
-            (Optional) The ID of a conversation to add the response to. Must begin
-            with 'conv_'. Input and output messages will be automatically added to
-            the conversation.
+          anyOf:
+          - type: string
+          - type: 'null'
         store:
-          type: boolean
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          default: true
         stream:
-          type: boolean
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          default: false
         temperature:
-          type: number
+          anyOf:
+          - type: number
+          - type: 'null'
         text:
-          $ref: '#/components/schemas/OpenAIResponseText'
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIResponseText'
+            title: OpenAIResponseText
+          - type: 'null'
+          title: OpenAIResponseText
         tools:
-          type: array
-          items:
-            $ref: '#/components/schemas/OpenAIResponseInputTool'
+          anyOf:
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                title: OpenAIResponseInputToolWebSearch
+              - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch'
+                title: OpenAIResponseInputToolFileSearch
+              - $ref: '#/components/schemas/OpenAIResponseInputToolFunction'
+                title: OpenAIResponseInputToolFunction
+              - $ref: '#/components/schemas/OpenAIResponseInputToolMCP'
+                title: OpenAIResponseInputToolMCP
+              discriminator:
+                propertyName: type
+                mapping:
+                  file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch'
+                  function: '#/components/schemas/OpenAIResponseInputToolFunction'
+                  mcp: '#/components/schemas/OpenAIResponseInputToolMCP'
+                  web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                  web_search_2025_08_26: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                  web_search_preview: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                  web_search_preview_2025_03_11: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+              title: OpenAIResponseInputToolWebSearch | ... (4 variants)
+            type: array
+          - type: 'null'
         include:
-          type: array
-          items:
-            type: string
-          description: >-
-            (Optional) Additional fields to include in the response.
+          anyOf:
+          - items:
+              type: string
+            type: array
+          - type: 'null'
         max_infer_iters:
-          type: integer
+          anyOf:
+          - type: integer
+          - type: 'null'
+          default: 10
         max_tool_calls:
-          type: integer
-          description: >-
-            (Optional) Max number of total calls to built-in tools that can be processed
-            in a response.
-      additionalProperties: false
+          anyOf:
+          - type: integer
+          - type: 'null'
+      type: object
       required:
-        - input
-        - model
+      - input
+      - model
       title: CreateOpenaiResponseRequest
     OpenAIResponseObject:
-      type: object
       properties:
         created_at:
           type: integer
-          description: >-
-            Unix timestamp when the response was created
+          title: Created At
         error:
-          $ref: '#/components/schemas/OpenAIResponseError'
-          description: >-
-            (Optional) Error details if the response generation failed
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIResponseError'
+            title: OpenAIResponseError
+          - type: 'null'
+          title: OpenAIResponseError
         id:
           type: string
-          description: Unique identifier for this response
+          title: Id
         model:
           type: string
-          description: Model identifier used for generation
+          title: Model
         object:
           type: string
           const: response
+          title: Object
           default: response
-          description: >-
-            Object type identifier, always "response"
         output:
-          type: array
           items:
-            $ref: '#/components/schemas/OpenAIResponseOutput'
-          description: >-
-            List of generated output items (messages, tool calls, etc.)
+            oneOf:
+            - $ref: '#/components/schemas/OpenAIResponseMessage-Output'
+              title: OpenAIResponseMessage-Output
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+              title: OpenAIResponseOutputMessageWebSearchToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+              title: OpenAIResponseOutputMessageFileSearchToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+              title: OpenAIResponseOutputMessageFunctionToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+              title: OpenAIResponseOutputMessageMCPCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+              title: OpenAIResponseOutputMessageMCPListTools
+            - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+              title: OpenAIResponseMCPApprovalRequest
+            discriminator:
+              propertyName: type
+              mapping:
+                file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                message: '#/components/schemas/OpenAIResponseMessage-Output'
+                web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+            title: OpenAIResponseMessage-Output | ... (7 variants)
+          type: array
+          title: Output
         parallel_tool_calls:
           type: boolean
+          title: Parallel Tool Calls
           default: false
-          description: >-
-            Whether tool calls can be executed in parallel
         previous_response_id:
-          type: string
-          description: >-
-            (Optional) ID of the previous response in a conversation
+          anyOf:
+          - type: string
+          - type: 'null'
         prompt:
-          $ref: '#/components/schemas/OpenAIResponsePrompt'
-          description: >-
-            (Optional) Reference to a prompt template and its variables.
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIResponsePrompt'
+            title: OpenAIResponsePrompt
+          - type: 'null'
+          title: OpenAIResponsePrompt
         status:
           type: string
-          description: >-
-            Current status of the response generation
+          title: Status
         temperature:
-          type: number
-          description: >-
-            (Optional) Sampling temperature used for generation
+          anyOf:
+          - type: number
+          - type: 'null'
         text:
           $ref: '#/components/schemas/OpenAIResponseText'
-          description: >-
-            Text formatting configuration for the response
+          default:
+            format:
+              type: text
         top_p:
-          type: number
-          description: >-
-            (Optional) Nucleus sampling parameter used for generation
+          anyOf:
+          - type: number
+          - type: 'null'
         tools:
-          type: array
-          items:
-            $ref: '#/components/schemas/OpenAIResponseTool'
-          description: >-
-            (Optional) An array of tools the model may call while generating a response.
+          anyOf:
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                title: OpenAIResponseInputToolWebSearch
+              - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch'
+                title: OpenAIResponseInputToolFileSearch
+              - $ref: '#/components/schemas/OpenAIResponseInputToolFunction'
+                title: OpenAIResponseInputToolFunction
+              - $ref: '#/components/schemas/OpenAIResponseToolMCP'
+                title: OpenAIResponseToolMCP
+              discriminator:
+                propertyName: type
+                mapping:
+                  file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch'
+                  function: '#/components/schemas/OpenAIResponseInputToolFunction'
+                  mcp: '#/components/schemas/OpenAIResponseToolMCP'
+                  web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                  web_search_2025_08_26: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                  web_search_preview: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+                  web_search_preview_2025_03_11: '#/components/schemas/OpenAIResponseInputToolWebSearch'
+              title: OpenAIResponseInputToolWebSearch | ... (4 variants)
+            type: array
+          - type: 'null'
         truncation:
-          type: string
-          description: >-
-            (Optional) Truncation strategy applied to the response
+          anyOf:
+          - type: string
+          - type: 'null'
         usage:
-          $ref: '#/components/schemas/OpenAIResponseUsage'
-          description: >-
-            (Optional) Token usage information for the response
+          anyOf:
+          - $ref: '#/components/schemas/OpenAIResponseUsage'
+            title: OpenAIResponseUsage
+          - type: 'null'
+          title: OpenAIResponseUsage
         instructions:
-          type: string
-          description: >-
-            (Optional) System message inserted into the model's context
+          anyOf:
+          - type: string
+          - type: 'null'
         max_tool_calls:
-          type: integer
-          description: >-
-            (Optional) Max number of total calls to built-in tools that can be processed
-            in a response
-      additionalProperties: false
-      required:
-        - created_at
-        - id
-        - model
-        - object
-        - output
-        - parallel_tool_calls
-        - status
-        - text
-      title: OpenAIResponseObject
-      description: >-
-        Complete OpenAI response object containing generation results and metadata.
-    OpenAIResponseContentPartOutputText:
+          anyOf:
+          - type: integer
+          - type: 'null'
       type: object
+      required:
+      - created_at
+      - id
+      - model
+      - output
+      - status
+      title: OpenAIResponseObject
+      description: Complete OpenAI response object containing generation results and metadata.
+    OpenAIResponseContentPartOutputText:
+      description: Text content within a streamed response part.
       properties:
         type:
-          type: string
           const: output_text
           default: output_text
-          description: >-
-            Content part type identifier, always "output_text"
-        text:
+          title: Type
+          type: string
+        text:
+          title: Text
           type: string
-          description: Text emitted for this content part
         annotations:
-          type: array
           items:
-            $ref: '#/components/schemas/OpenAIResponseAnnotations'
-          description: >-
-            Structured annotations associated with the text
+            discriminator:
+              mapping:
+                container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
+                file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
+                file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath'
+                url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation'
+              propertyName: type
+            oneOf:
+            - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
+              title: OpenAIResponseAnnotationFileCitation
+            - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation'
+              title: OpenAIResponseAnnotationCitation
+            - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
+              title: OpenAIResponseAnnotationContainerFileCitation
+            - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath'
+              title: OpenAIResponseAnnotationFilePath
+            title: OpenAIResponseAnnotationFileCitation | ... (4 variants)
+          title: Annotations
+          type: array
         logprobs:
-          type: array
-          items:
-            type: object
-            additionalProperties:
-              oneOf:
-                - type: 'null'
-                - type: boolean
-                - type: number
-                - type: string
-                - type: array
-                - type: object
-          description: (Optional) Token log probability details
-      additionalProperties: false
+          anyOf:
+          - items:
+              additionalProperties: true
+              type: object
+            type: array
+          - type: 'null'
+          nullable: true
       required:
-        - type
-        - text
-        - annotations
+      - text
       title: OpenAIResponseContentPartOutputText
-      description: >-
-        Text content within a streamed response part.
-    "OpenAIResponseContentPartReasoningSummary":
       type: object
+    OpenAIResponseContentPartReasoningSummary:
+      description: Reasoning summary part in a streamed response.
       properties:
         type:
-          type: string
           const: summary_text
           default: summary_text
-          description: >-
-            Content part type identifier, always "summary_text"
-        text:
+          title: Type
+          type: string
+        text:
+          title: Text
           type: string
-          description: Summary text
-      additionalProperties: false
       required:
-        - type
-        - text
-      title: >-
-        OpenAIResponseContentPartReasoningSummary
-      description: >-
-        Reasoning summary part in a streamed response.
-    OpenAIResponseContentPartReasoningText:
+      - text
+      title: OpenAIResponseContentPartReasoningSummary
       type: object
+    OpenAIResponseContentPartReasoningText:
+      description: Reasoning text emitted as part of a streamed response.
       properties:
         type:
-          type: string
           const: reasoning_text
           default: reasoning_text
-          description: >-
-            Content part type identifier, always "reasoning_text"
-        text:
+          title: Type
+          type: string
+        text:
+          title: Text
           type: string
-          description: Reasoning text supplied by the model
-      additionalProperties: false
       required:
-        - type
-        - text
+      - text
       title: OpenAIResponseContentPartReasoningText
-      description: >-
-        Reasoning text emitted as part of a streamed response.
+      type: object
     OpenAIResponseObjectStream:
-      oneOf:
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseInProgress'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDone'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallInProgress'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallSearching'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallCompleted'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsInProgress'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsFailed'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsCompleted'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDone'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallInProgress'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallFailed'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartAdded'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartDone'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDelta'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDone'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartDone'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDone'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDelta'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDone'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallInProgress'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallSearching'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallCompleted'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseIncomplete'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFailed'
-        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
       discriminator:
-        propertyName: type
         mapping:
-          response.created: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated'
-          response.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseInProgress'
-          response.output_item.added: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded'
-          response.output_item.done: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone'
-          response.output_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta'
-          response.output_text.done: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDone'
-          response.function_call_arguments.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta'
-          response.function_call_arguments.done: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone'
-          response.web_search_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallInProgress'
-          response.web_search_call.searching: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallSearching'
-          response.web_search_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallCompleted'
-          response.mcp_list_tools.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsInProgress'
-          response.mcp_list_tools.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsFailed'
-          response.mcp_list_tools.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsCompleted'
-          response.mcp_call.arguments.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta'
-          response.mcp_call.arguments.done: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDone'
-          response.mcp_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallInProgress'
-          response.mcp_call.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallFailed'
-          response.mcp_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted'
+          response.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
           response.content_part.added: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartAdded'
           response.content_part.done: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartDone'
-          response.reasoning_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDelta'
-          response.reasoning_text.done: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDone'
+          response.created: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated'
+          response.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseFailed'
+          response.file_search_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallCompleted'
+          response.file_search_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallInProgress'
+          response.file_search_call.searching: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallSearching'
+          response.function_call_arguments.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta'
+          response.function_call_arguments.done: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone'
+          response.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseInProgress'
+          response.incomplete: '#/components/schemas/OpenAIResponseObjectStreamResponseIncomplete'
+          response.mcp_call.arguments.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta'
+          response.mcp_call.arguments.done: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDone'
+          response.mcp_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted'
+          response.mcp_call.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallFailed'
+          response.mcp_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallInProgress'
+          response.mcp_list_tools.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsCompleted'
+          response.mcp_list_tools.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsFailed'
+          response.mcp_list_tools.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsInProgress'
+          response.output_item.added: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded'
+          response.output_item.done: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone'
+          response.output_text.annotation.added: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded'
+          response.output_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta'
+          response.output_text.done: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDone'
           response.reasoning_summary_part.added: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded'
           response.reasoning_summary_part.done: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartDone'
           response.reasoning_summary_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta'
           response.reasoning_summary_text.done: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDone'
+          response.reasoning_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDelta'
+          response.reasoning_text.done: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDone'
           response.refusal.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDelta'
           response.refusal.done: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDone'
-          response.output_text.annotation.added: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded'
-          response.file_search_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallInProgress'
-          response.file_search_call.searching: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallSearching'
-          response.file_search_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallCompleted'
-          response.incomplete: '#/components/schemas/OpenAIResponseObjectStreamResponseIncomplete'
-          response.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseFailed'
-          response.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
-    "OpenAIResponseObjectStreamResponseCompleted":
-      type: object
+          response.web_search_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallCompleted'
+          response.web_search_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallInProgress'
+          response.web_search_call.searching: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallSearching'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated'
+        title: OpenAIResponseObjectStreamResponseCreated
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseInProgress'
+        title: OpenAIResponseObjectStreamResponseInProgress
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded'
+        title: OpenAIResponseObjectStreamResponseOutputItemAdded
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone'
+        title: OpenAIResponseObjectStreamResponseOutputItemDone
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta'
+        title: OpenAIResponseObjectStreamResponseOutputTextDelta
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDone'
+        title: OpenAIResponseObjectStreamResponseOutputTextDone
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta'
+        title: OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone'
+        title: OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallInProgress'
+        title: OpenAIResponseObjectStreamResponseWebSearchCallInProgress
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallSearching'
+        title: OpenAIResponseObjectStreamResponseWebSearchCallSearching
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallCompleted'
+        title: OpenAIResponseObjectStreamResponseWebSearchCallCompleted
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsInProgress'
+        title: OpenAIResponseObjectStreamResponseMcpListToolsInProgress
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsFailed'
+        title: OpenAIResponseObjectStreamResponseMcpListToolsFailed
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsCompleted'
+        title: OpenAIResponseObjectStreamResponseMcpListToolsCompleted
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta'
+        title: OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDone'
+        title: OpenAIResponseObjectStreamResponseMcpCallArgumentsDone
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallInProgress'
+        title: OpenAIResponseObjectStreamResponseMcpCallInProgress
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallFailed'
+        title: OpenAIResponseObjectStreamResponseMcpCallFailed
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted'
+        title: OpenAIResponseObjectStreamResponseMcpCallCompleted
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartAdded'
+        title: OpenAIResponseObjectStreamResponseContentPartAdded
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartDone'
+        title: OpenAIResponseObjectStreamResponseContentPartDone
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDelta'
+        title: OpenAIResponseObjectStreamResponseReasoningTextDelta
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDone'
+        title: OpenAIResponseObjectStreamResponseReasoningTextDone
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded'
+        title: OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartDone'
+        title: OpenAIResponseObjectStreamResponseReasoningSummaryPartDone
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta'
+        title: OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDone'
+        title: OpenAIResponseObjectStreamResponseReasoningSummaryTextDone
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDelta'
+        title: OpenAIResponseObjectStreamResponseRefusalDelta
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDone'
+        title: OpenAIResponseObjectStreamResponseRefusalDone
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded'
+        title: OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallInProgress'
+        title: OpenAIResponseObjectStreamResponseFileSearchCallInProgress
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallSearching'
+        title: OpenAIResponseObjectStreamResponseFileSearchCallSearching
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallCompleted'
+        title: OpenAIResponseObjectStreamResponseFileSearchCallCompleted
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseIncomplete'
+        title: OpenAIResponseObjectStreamResponseIncomplete
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFailed'
+        title: OpenAIResponseObjectStreamResponseFailed
+      - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
+        title: OpenAIResponseObjectStreamResponseCompleted
+      title: OpenAIResponseObjectStreamResponseCreated | ... (36 variants)
+    OpenAIResponseObjectStreamResponseCompleted:
+      description: Streaming event indicating a response has been completed.
       properties:
         response:
           $ref: '#/components/schemas/OpenAIResponseObject'
-          description: Completed response object
         type:
-          type: string
           const: response.completed
           default: response.completed
-          description: >-
-            Event type identifier, always "response.completed"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - response
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseCompleted
-      description: >-
-        Streaming event indicating a response has been completed.
-    "OpenAIResponseObjectStreamResponseContentPartAdded":
+      - response
+      title: OpenAIResponseObjectStreamResponseCompleted
       type: object
+    OpenAIResponseObjectStreamResponseContentPartAdded:
+      description: Streaming event for when a new content part is added to a response item.
       properties:
         content_index:
+          title: Content Index
           type: integer
-          description: >-
-            Index position of the part within the content array
         response_id:
+          title: Response Id
           type: string
-          description: >-
-            Unique identifier of the response containing this content
         item_id:
+          title: Item Id
           type: string
-          description: >-
-            Unique identifier of the output item containing this content part
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of the output item in the response
         part:
-          oneOf:
-            - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText'
-            - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
-            - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
           discriminator:
-            propertyName: type
             mapping:
               output_text: '#/components/schemas/OpenAIResponseContentPartOutputText'
-              refusal: '#/components/schemas/OpenAIResponseContentPartRefusal'
               reasoning_text: '#/components/schemas/OpenAIResponseContentPartReasoningText'
-          description: The content part that was added
+              refusal: '#/components/schemas/OpenAIResponseContentPartRefusal'
+            propertyName: type
+          oneOf:
+          - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText'
+            title: OpenAIResponseContentPartOutputText
+          - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
+            title: OpenAIResponseContentPartRefusal
+          - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
+            title: OpenAIResponseContentPartReasoningText
+          title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.content_part.added
           default: response.content_part.added
-          description: >-
-            Event type identifier, always "response.content_part.added"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - content_index
-        - response_id
-        - item_id
-        - output_index
-        - part
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseContentPartAdded
-      description: >-
-        Streaming event for when a new content part is added to a response item.
-    "OpenAIResponseObjectStreamResponseContentPartDone":
+      - content_index
+      - response_id
+      - item_id
+      - output_index
+      - part
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseContentPartAdded
       type: object
+    OpenAIResponseObjectStreamResponseContentPartDone:
+      description: Streaming event for when a content part is completed.
       properties:
         content_index:
+          title: Content Index
           type: integer
-          description: >-
-            Index position of the part within the content array
         response_id:
+          title: Response Id
           type: string
-          description: >-
-            Unique identifier of the response containing this content
         item_id:
+          title: Item Id
           type: string
-          description: >-
-            Unique identifier of the output item containing this content part
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of the output item in the response
         part:
-          oneOf:
-            - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText'
-            - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
-            - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
           discriminator:
-            propertyName: type
             mapping:
               output_text: '#/components/schemas/OpenAIResponseContentPartOutputText'
-              refusal: '#/components/schemas/OpenAIResponseContentPartRefusal'
               reasoning_text: '#/components/schemas/OpenAIResponseContentPartReasoningText'
-          description: The completed content part
+              refusal: '#/components/schemas/OpenAIResponseContentPartRefusal'
+            propertyName: type
+          oneOf:
+          - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText'
+            title: OpenAIResponseContentPartOutputText
+          - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
+            title: OpenAIResponseContentPartRefusal
+          - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
+            title: OpenAIResponseContentPartReasoningText
+          title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.content_part.done
           default: response.content_part.done
-          description: >-
-            Event type identifier, always "response.content_part.done"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - content_index
-        - response_id
-        - item_id
-        - output_index
-        - part
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseContentPartDone
-      description: >-
-        Streaming event for when a content part is completed.
-    "OpenAIResponseObjectStreamResponseCreated":
+      - content_index
+      - response_id
+      - item_id
+      - output_index
+      - part
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseContentPartDone
       type: object
+    OpenAIResponseObjectStreamResponseCreated:
+      description: Streaming event indicating a new response has been created.
       properties:
         response:
           $ref: '#/components/schemas/OpenAIResponseObject'
-          description: The response object that was created
         type:
-          type: string
           const: response.created
           default: response.created
-          description: >-
-            Event type identifier, always "response.created"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - response
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseCreated
-      description: >-
-        Streaming event indicating a new response has been created.
-    OpenAIResponseObjectStreamResponseFailed:
+      - response
+      title: OpenAIResponseObjectStreamResponseCreated
       type: object
+    OpenAIResponseObjectStreamResponseFailed:
+      description: Streaming event emitted when a response fails.
       properties:
         response:
           $ref: '#/components/schemas/OpenAIResponseObject'
-          description: Response object describing the failure
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.failed
           default: response.failed
-          description: >-
-            Event type identifier, always "response.failed"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - response
-        - sequence_number
-        - type
+      - response
+      - sequence_number
       title: OpenAIResponseObjectStreamResponseFailed
-      description: >-
-        Streaming event emitted when a response fails.
-    "OpenAIResponseObjectStreamResponseFileSearchCallCompleted":
       type: object
+    OpenAIResponseObjectStreamResponseFileSearchCallCompleted:
+      description: Streaming event for completed file search calls.
       properties:
         item_id:
+          title: Item Id
           type: string
-          description: >-
-            Unique identifier of the completed file search call
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of the item in the output list
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.file_search_call.completed
           default: response.file_search_call.completed
-          description: >-
-            Event type identifier, always "response.file_search_call.completed"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - item_id
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseFileSearchCallCompleted
-      description: >-
-        Streaming event for completed file search calls.
-    "OpenAIResponseObjectStreamResponseFileSearchCallInProgress":
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseFileSearchCallCompleted
       type: object
+    OpenAIResponseObjectStreamResponseFileSearchCallInProgress:
+      description: Streaming event for file search calls in progress.
       properties:
         item_id:
+          title: Item Id
           type: string
-          description: >-
-            Unique identifier of the file search call
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of the item in the output list
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.file_search_call.in_progress
           default: response.file_search_call.in_progress
-          description: >-
-            Event type identifier, always "response.file_search_call.in_progress"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - item_id
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseFileSearchCallInProgress
-      description: >-
-        Streaming event for file search calls in progress.
-    "OpenAIResponseObjectStreamResponseFileSearchCallSearching":
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseFileSearchCallInProgress
       type: object
+    OpenAIResponseObjectStreamResponseFileSearchCallSearching:
+      description: Streaming event for file search currently searching.
       properties:
         item_id:
+          title: Item Id
           type: string
-          description: >-
-            Unique identifier of the file search call
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of the item in the output list
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.file_search_call.searching
           default: response.file_search_call.searching
-          description: >-
-            Event type identifier, always "response.file_search_call.searching"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - item_id
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseFileSearchCallSearching
-      description: >-
-        Streaming event for file search currently searching.
-    "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta":
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseFileSearchCallSearching
       type: object
+    OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta:
+      description: Streaming event for incremental function call argument updates.
       properties:
         delta:
+          title: Delta
           type: string
-          description: >-
-            Incremental function call arguments being added
         item_id:
+          title: Item Id
           type: string
-          description: >-
-            Unique identifier of the function call being updated
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of the item in the output list
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.function_call_arguments.delta
           default: response.function_call_arguments.delta
-          description: >-
-            Event type identifier, always "response.function_call_arguments.delta"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - delta
-        - item_id
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta
-      description: >-
-        Streaming event for incremental function call argument updates.
-    "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone":
+      - delta
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta
       type: object
+    OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone:
+      description: Streaming event for when function call arguments are completed.
       properties:
         arguments:
+          title: Arguments
           type: string
-          description: >-
-            Final complete arguments JSON string for the function call
         item_id:
+          title: Item Id
           type: string
-          description: >-
-            Unique identifier of the completed function call
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of the item in the output list
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.function_call_arguments.done
           default: response.function_call_arguments.done
-          description: >-
-            Event type identifier, always "response.function_call_arguments.done"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - arguments
-        - item_id
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone
-      description: >-
-        Streaming event for when function call arguments are completed.
-    "OpenAIResponseObjectStreamResponseInProgress":
+      - arguments
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone
       type: object
+    OpenAIResponseObjectStreamResponseInProgress:
+      description: Streaming event indicating the response remains in progress.
       properties:
         response:
           $ref: '#/components/schemas/OpenAIResponseObject'
-          description: Current response state while in progress
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.in_progress
           default: response.in_progress
-          description: >-
-            Event type identifier, always "response.in_progress"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - response
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseInProgress
-      description: >-
-        Streaming event indicating the response remains in progress.
-    "OpenAIResponseObjectStreamResponseIncomplete":
+      - response
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseInProgress
       type: object
+    OpenAIResponseObjectStreamResponseIncomplete:
+      description: Streaming event emitted when a response ends in an incomplete state.
       properties:
         response:
           $ref: '#/components/schemas/OpenAIResponseObject'
-          description: >-
-            Response object describing the incomplete state
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.incomplete
           default: response.incomplete
-          description: >-
-            Event type identifier, always "response.incomplete"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - response
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseIncomplete
-      description: >-
-        Streaming event emitted when a response ends in an incomplete state.
-    "OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta":
+      - response
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseIncomplete
       type: object
+    OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta:
       properties:
         delta:
+          title: Delta
           type: string
         item_id:
+          title: Item Id
           type: string
         output_index:
+          title: Output Index
           type: integer
         sequence_number:
+          title: Sequence Number
           type: integer
         type:
-          type: string
           const: response.mcp_call.arguments.delta
           default: response.mcp_call.arguments.delta
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - delta
-        - item_id
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta
-    "OpenAIResponseObjectStreamResponseMcpCallArgumentsDone":
+      - delta
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta
       type: object
+    OpenAIResponseObjectStreamResponseMcpCallArgumentsDone:
       properties:
         arguments:
+          title: Arguments
           type: string
         item_id:
+          title: Item Id
           type: string
         output_index:
+          title: Output Index
           type: integer
         sequence_number:
+          title: Sequence Number
           type: integer
         type:
-          type: string
           const: response.mcp_call.arguments.done
           default: response.mcp_call.arguments.done
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - arguments
-        - item_id
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseMcpCallArgumentsDone
-    "OpenAIResponseObjectStreamResponseMcpCallCompleted":
+      - arguments
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseMcpCallArgumentsDone
       type: object
+    OpenAIResponseObjectStreamResponseMcpCallCompleted:
+      description: Streaming event for completed MCP calls.
       properties:
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.mcp_call.completed
           default: response.mcp_call.completed
-          description: >-
-            Event type identifier, always "response.mcp_call.completed"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseMcpCallCompleted
-      description: Streaming event for completed MCP calls.
-    "OpenAIResponseObjectStreamResponseMcpCallFailed":
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseMcpCallCompleted
       type: object
+    OpenAIResponseObjectStreamResponseMcpCallFailed:
+      description: Streaming event for failed MCP calls.
       properties:
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.mcp_call.failed
           default: response.mcp_call.failed
-          description: >-
-            Event type identifier, always "response.mcp_call.failed"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseMcpCallFailed
-      description: Streaming event for failed MCP calls.
-    "OpenAIResponseObjectStreamResponseMcpCallInProgress":
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseMcpCallFailed
       type: object
+    OpenAIResponseObjectStreamResponseMcpCallInProgress:
+      description: Streaming event for MCP calls in progress.
       properties:
         item_id:
+          title: Item Id
           type: string
-          description: Unique identifier of the MCP call
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of the item in the output list
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.mcp_call.in_progress
           default: response.mcp_call.in_progress
-          description: >-
-            Event type identifier, always "response.mcp_call.in_progress"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - item_id
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseMcpCallInProgress
-      description: >-
-        Streaming event for MCP calls in progress.
-    "OpenAIResponseObjectStreamResponseMcpListToolsCompleted":
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseMcpCallInProgress
       type: object
+    OpenAIResponseObjectStreamResponseMcpListToolsCompleted:
       properties:
         sequence_number:
+          title: Sequence Number
           type: integer
         type:
-          type: string
           const: response.mcp_list_tools.completed
           default: response.mcp_list_tools.completed
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseMcpListToolsCompleted
-    "OpenAIResponseObjectStreamResponseMcpListToolsFailed":
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseMcpListToolsCompleted
       type: object
+    OpenAIResponseObjectStreamResponseMcpListToolsFailed:
       properties:
         sequence_number:
+          title: Sequence Number
           type: integer
         type:
-          type: string
           const: response.mcp_list_tools.failed
           default: response.mcp_list_tools.failed
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseMcpListToolsFailed
-    "OpenAIResponseObjectStreamResponseMcpListToolsInProgress":
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseMcpListToolsFailed
       type: object
+    OpenAIResponseObjectStreamResponseMcpListToolsInProgress:
       properties:
         sequence_number:
+          title: Sequence Number
           type: integer
         type:
-          type: string
           const: response.mcp_list_tools.in_progress
           default: response.mcp_list_tools.in_progress
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseMcpListToolsInProgress
-    "OpenAIResponseObjectStreamResponseOutputItemAdded":
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseMcpListToolsInProgress
       type: object
+    OpenAIResponseObjectStreamResponseOutputItemAdded:
+      description: Streaming event for when a new output item is added to the response.
       properties:
         response_id:
+          title: Response Id
           type: string
-          description: >-
-            Unique identifier of the response containing this output
         item:
-          oneOf:
-            - $ref: '#/components/schemas/OpenAIResponseMessage'
-            - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
-            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
-            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
-            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
-            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
-            - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
           discriminator:
-            propertyName: type
             mapping:
-              message: '#/components/schemas/OpenAIResponseMessage'
-              web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
               file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
               function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+              mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
               mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
               mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
-              mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
-          description: >-
-            The output item that was added (message, tool call, etc.)
+              message: '#/components/schemas/OpenAIResponseMessage'
+              web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+            propertyName: type
+          oneOf:
+          - $ref: '#/components/schemas/OpenAIResponseMessage'
+            title: OpenAIResponseMessage
+          - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+            title: OpenAIResponseOutputMessageWebSearchToolCall
+          - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+            title: OpenAIResponseOutputMessageFileSearchToolCall
+          - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+            title: OpenAIResponseOutputMessageFunctionToolCall
+          - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+            title: OpenAIResponseOutputMessageMCPCall
+          - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+            title: OpenAIResponseOutputMessageMCPListTools
+          - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+            title: OpenAIResponseMCPApprovalRequest
+          title: OpenAIResponseMessage | ... (7 variants)
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of this item in the output list
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.output_item.added
           default: response.output_item.added
-          description: >-
-            Event type identifier, always "response.output_item.added"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - response_id
-        - item
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseOutputItemAdded
-      description: >-
-        Streaming event for when a new output item is added to the response.
-    "OpenAIResponseObjectStreamResponseOutputItemDone":
+      - response_id
+      - item
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseOutputItemAdded
       type: object
+    OpenAIResponseObjectStreamResponseOutputItemDone:
+      description: Streaming event for when an output item is completed.
       properties:
         response_id:
+          title: Response Id
           type: string
-          description: >-
-            Unique identifier of the response containing this output
         item:
-          oneOf:
-            - $ref: '#/components/schemas/OpenAIResponseMessage'
-            - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
-            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
-            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
-            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
-            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
-            - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
           discriminator:
-            propertyName: type
             mapping:
-              message: '#/components/schemas/OpenAIResponseMessage'
-              web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
               file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
               function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+              mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
               mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
               mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
-              mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
-          description: >-
-            The completed output item (message, tool call, etc.)
+              message: '#/components/schemas/OpenAIResponseMessage'
+              web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+            propertyName: type
+          oneOf:
+          - $ref: '#/components/schemas/OpenAIResponseMessage'
+            title: OpenAIResponseMessage
+          - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+            title: OpenAIResponseOutputMessageWebSearchToolCall
+          - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+            title: OpenAIResponseOutputMessageFileSearchToolCall
+          - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+            title: OpenAIResponseOutputMessageFunctionToolCall
+          - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+            title: OpenAIResponseOutputMessageMCPCall
+          - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+            title: OpenAIResponseOutputMessageMCPListTools
+          - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+            title: OpenAIResponseMCPApprovalRequest
+          title: OpenAIResponseMessage | ... (7 variants)
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of this item in the output list
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.output_item.done
           default: response.output_item.done
-          description: >-
-            Event type identifier, always "response.output_item.done"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - response_id
-        - item
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseOutputItemDone
-      description: >-
-        Streaming event for when an output item is completed.
-    "OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded":
+      - response_id
+      - item
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseOutputItemDone
       type: object
+    OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded:
+      description: Streaming event for when an annotation is added to output text.
       properties:
         item_id:
+          title: Item Id
           type: string
-          description: >-
-            Unique identifier of the item to which the annotation is being added
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of the output item in the response's output array
         content_index:
+          title: Content Index
           type: integer
-          description: >-
-            Index position of the content part within the output item
         annotation_index:
+          title: Annotation Index
           type: integer
-          description: >-
-            Index of the annotation within the content part
         annotation:
-          oneOf:
-            - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
-            - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation'
-            - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
-            - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath'
           discriminator:
-            propertyName: type
             mapping:
-              file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
-              url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation'
               container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
+              file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
               file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath'
-          description: The annotation object being added
+              url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation'
+            propertyName: type
+          oneOf:
+          - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
+            title: OpenAIResponseAnnotationFileCitation
+          - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation'
+            title: OpenAIResponseAnnotationCitation
+          - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
+            title: OpenAIResponseAnnotationContainerFileCitation
+          - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath'
+            title: OpenAIResponseAnnotationFilePath
+          title: OpenAIResponseAnnotationFileCitation | ... (4 variants)
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.output_text.annotation.added
           default: response.output_text.annotation.added
-          description: >-
-            Event type identifier, always "response.output_text.annotation.added"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - item_id
-        - output_index
-        - content_index
-        - annotation_index
-        - annotation
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded
-      description: >-
-        Streaming event for when an annotation is added to output text.
-    "OpenAIResponseObjectStreamResponseOutputTextDelta":
+      - item_id
+      - output_index
+      - content_index
+      - annotation_index
+      - annotation
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded
       type: object
+    OpenAIResponseObjectStreamResponseOutputTextDelta:
+      description: Streaming event for incremental text content updates.
       properties:
         content_index:
+          title: Content Index
           type: integer
-          description: Index position within the text content
         delta:
+          title: Delta
           type: string
-          description: Incremental text content being added
         item_id:
+          title: Item Id
           type: string
-          description: >-
-            Unique identifier of the output item being updated
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of the item in the output list
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.output_text.delta
           default: response.output_text.delta
-          description: >-
-            Event type identifier, always "response.output_text.delta"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - content_index
-        - delta
-        - item_id
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseOutputTextDelta
-      description: >-
-        Streaming event for incremental text content updates.
-    "OpenAIResponseObjectStreamResponseOutputTextDone":
+      - content_index
+      - delta
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseOutputTextDelta
       type: object
+    OpenAIResponseObjectStreamResponseOutputTextDone:
+      description: Streaming event for when text output is completed.
       properties:
         content_index:
+          title: Content Index
           type: integer
-          description: Index position within the text content
         text:
+          title: Text
           type: string
-          description: >-
-            Final complete text content of the output item
         item_id:
+          title: Item Id
           type: string
-          description: >-
-            Unique identifier of the completed output item
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of the item in the output list
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.output_text.done
           default: response.output_text.done
-          description: >-
-            Event type identifier, always "response.output_text.done"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - content_index
-        - text
-        - item_id
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseOutputTextDone
-      description: >-
-        Streaming event for when text output is completed.
-    "OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded":
+      - content_index
+      - text
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseOutputTextDone
       type: object
+    OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded:
+      description: Streaming event for when a new reasoning summary part is added.
       properties:
         item_id:
+          title: Item Id
           type: string
-          description: Unique identifier of the output item
         output_index:
+          title: Output Index
           type: integer
-          description: Index position of the output item
         part:
           $ref: '#/components/schemas/OpenAIResponseContentPartReasoningSummary'
-          description: The summary part that was added
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         summary_index:
+          title: Summary Index
           type: integer
-          description: >-
-            Index of the summary part within the reasoning summary
         type:
-          type: string
           const: response.reasoning_summary_part.added
           default: response.reasoning_summary_part.added
-          description: >-
-            Event type identifier, always "response.reasoning_summary_part.added"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - item_id
-        - output_index
-        - part
-        - sequence_number
-        - summary_index
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded
-      description: >-
-        Streaming event for when a new reasoning summary part is added.
-    "OpenAIResponseObjectStreamResponseReasoningSummaryPartDone":
+      - item_id
+      - output_index
+      - part
+      - sequence_number
+      - summary_index
+      title: OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded
       type: object
+    OpenAIResponseObjectStreamResponseReasoningSummaryPartDone:
+      description: Streaming event for when a reasoning summary part is completed.
       properties:
         item_id:
+          title: Item Id
           type: string
-          description: Unique identifier of the output item
         output_index:
+          title: Output Index
           type: integer
-          description: Index position of the output item
         part:
           $ref: '#/components/schemas/OpenAIResponseContentPartReasoningSummary'
-          description: The completed summary part
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         summary_index:
+          title: Summary Index
           type: integer
-          description: >-
-            Index of the summary part within the reasoning summary
         type:
-          type: string
           const: response.reasoning_summary_part.done
           default: response.reasoning_summary_part.done
-          description: >-
-            Event type identifier, always "response.reasoning_summary_part.done"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - item_id
-        - output_index
-        - part
-        - sequence_number
-        - summary_index
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseReasoningSummaryPartDone
-      description: >-
-        Streaming event for when a reasoning summary part is completed.
-    "OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta":
+      - item_id
+      - output_index
+      - part
+      - sequence_number
+      - summary_index
+      title: OpenAIResponseObjectStreamResponseReasoningSummaryPartDone
       type: object
+    OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta:
+      description: Streaming event for incremental reasoning summary text updates.
       properties:
         delta:
+          title: Delta
           type: string
-          description: Incremental summary text being added
         item_id:
+          title: Item Id
           type: string
-          description: Unique identifier of the output item
         output_index:
+          title: Output Index
           type: integer
-          description: Index position of the output item
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         summary_index:
+          title: Summary Index
           type: integer
-          description: >-
-            Index of the summary part within the reasoning summary
         type:
-          type: string
           const: response.reasoning_summary_text.delta
           default: response.reasoning_summary_text.delta
-          description: >-
-            Event type identifier, always "response.reasoning_summary_text.delta"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - delta
-        - item_id
-        - output_index
-        - sequence_number
-        - summary_index
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta
-      description: >-
-        Streaming event for incremental reasoning summary text updates.
-    "OpenAIResponseObjectStreamResponseReasoningSummaryTextDone":
+      - delta
+      - item_id
+      - output_index
+      - sequence_number
+      - summary_index
+      title: OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta
       type: object
+    OpenAIResponseObjectStreamResponseReasoningSummaryTextDone:
+      description: Streaming event for when reasoning summary text is completed.
       properties:
         text:
+          title: Text
           type: string
-          description: Final complete summary text
         item_id:
+          title: Item Id
           type: string
-          description: Unique identifier of the output item
         output_index:
+          title: Output Index
           type: integer
-          description: Index position of the output item
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         summary_index:
+          title: Summary Index
           type: integer
-          description: >-
-            Index of the summary part within the reasoning summary
         type:
-          type: string
           const: response.reasoning_summary_text.done
           default: response.reasoning_summary_text.done
-          description: >-
-            Event type identifier, always "response.reasoning_summary_text.done"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - text
-        - item_id
-        - output_index
-        - sequence_number
-        - summary_index
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseReasoningSummaryTextDone
-      description: >-
-        Streaming event for when reasoning summary text is completed.
-    "OpenAIResponseObjectStreamResponseReasoningTextDelta":
+      - text
+      - item_id
+      - output_index
+      - sequence_number
+      - summary_index
+      title: OpenAIResponseObjectStreamResponseReasoningSummaryTextDone
       type: object
+    OpenAIResponseObjectStreamResponseReasoningTextDelta:
+      description: Streaming event for incremental reasoning text updates.
       properties:
         content_index:
+          title: Content Index
           type: integer
-          description: >-
-            Index position of the reasoning content part
         delta:
+          title: Delta
           type: string
-          description: Incremental reasoning text being added
         item_id:
+          title: Item Id
           type: string
-          description: >-
-            Unique identifier of the output item being updated
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of the item in the output list
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.reasoning_text.delta
           default: response.reasoning_text.delta
-          description: >-
-            Event type identifier, always "response.reasoning_text.delta"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - content_index
-        - delta
-        - item_id
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseReasoningTextDelta
-      description: >-
-        Streaming event for incremental reasoning text updates.
-    "OpenAIResponseObjectStreamResponseReasoningTextDone":
+      - content_index
+      - delta
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseReasoningTextDelta
       type: object
+    OpenAIResponseObjectStreamResponseReasoningTextDone:
+      description: Streaming event for when reasoning text is completed.
       properties:
         content_index:
+          title: Content Index
           type: integer
-          description: >-
-            Index position of the reasoning content part
         text:
+          title: Text
           type: string
-          description: Final complete reasoning text
         item_id:
+          title: Item Id
           type: string
-          description: >-
-            Unique identifier of the completed output item
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of the item in the output list
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.reasoning_text.done
           default: response.reasoning_text.done
-          description: >-
-            Event type identifier, always "response.reasoning_text.done"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - content_index
-        - text
-        - item_id
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseReasoningTextDone
-      description: >-
-        Streaming event for when reasoning text is completed.
-    "OpenAIResponseObjectStreamResponseRefusalDelta":
+      - content_index
+      - text
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseReasoningTextDone
       type: object
+    OpenAIResponseObjectStreamResponseRefusalDelta:
+      description: Streaming event for incremental refusal text updates.
       properties:
         content_index:
+          title: Content Index
           type: integer
-          description: Index position of the content part
         delta:
+          title: Delta
           type: string
-          description: Incremental refusal text being added
         item_id:
+          title: Item Id
           type: string
-          description: Unique identifier of the output item
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of the item in the output list
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.refusal.delta
           default: response.refusal.delta
-          description: >-
-            Event type identifier, always "response.refusal.delta"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - content_index
-        - delta
-        - item_id
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseRefusalDelta
-      description: >-
-        Streaming event for incremental refusal text updates.
-    "OpenAIResponseObjectStreamResponseRefusalDone":
+      - content_index
+      - delta
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseRefusalDelta
       type: object
+    OpenAIResponseObjectStreamResponseRefusalDone:
+      description: Streaming event for when refusal text is completed.
       properties:
         content_index:
+          title: Content Index
           type: integer
-          description: Index position of the content part
         refusal:
+          title: Refusal
           type: string
-          description: Final complete refusal text
         item_id:
+          title: Item Id
           type: string
-          description: Unique identifier of the output item
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of the item in the output list
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.refusal.done
           default: response.refusal.done
-          description: >-
-            Event type identifier, always "response.refusal.done"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - content_index
-        - refusal
-        - item_id
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseRefusalDone
-      description: >-
-        Streaming event for when refusal text is completed.
-    "OpenAIResponseObjectStreamResponseWebSearchCallCompleted":
+      - content_index
+      - refusal
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseRefusalDone
       type: object
+    OpenAIResponseObjectStreamResponseWebSearchCallCompleted:
+      description: Streaming event for completed web search calls.
       properties:
         item_id:
+          title: Item Id
           type: string
-          description: >-
-            Unique identifier of the completed web search call
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of the item in the output list
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.web_search_call.completed
           default: response.web_search_call.completed
-          description: >-
-            Event type identifier, always "response.web_search_call.completed"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - item_id
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseWebSearchCallCompleted
-      description: >-
-        Streaming event for completed web search calls.
-    "OpenAIResponseObjectStreamResponseWebSearchCallInProgress":
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseWebSearchCallCompleted
       type: object
+    OpenAIResponseObjectStreamResponseWebSearchCallInProgress:
+      description: Streaming event for web search calls in progress.
       properties:
         item_id:
+          title: Item Id
           type: string
-          description: Unique identifier of the web search call
         output_index:
+          title: Output Index
           type: integer
-          description: >-
-            Index position of the item in the output list
         sequence_number:
+          title: Sequence Number
           type: integer
-          description: >-
-            Sequential number for ordering streaming events
         type:
-          type: string
           const: response.web_search_call.in_progress
           default: response.web_search_call.in_progress
-          description: >-
-            Event type identifier, always "response.web_search_call.in_progress"
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - item_id
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseWebSearchCallInProgress
-      description: >-
-        Streaming event for web search calls in progress.
-    "OpenAIResponseObjectStreamResponseWebSearchCallSearching":
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseWebSearchCallInProgress
       type: object
+    OpenAIResponseObjectStreamResponseWebSearchCallSearching:
       properties:
         item_id:
+          title: Item Id
           type: string
         output_index:
+          title: Output Index
           type: integer
         sequence_number:
+          title: Sequence Number
           type: integer
         type:
-          type: string
           const: response.web_search_call.searching
           default: response.web_search_call.searching
-      additionalProperties: false
+          title: Type
+          type: string
       required:
-        - item_id
-        - output_index
-        - sequence_number
-        - type
-      title: >-
-        OpenAIResponseObjectStreamResponseWebSearchCallSearching
-    OpenAIDeleteResponseObject:
+      - item_id
+      - output_index
+      - sequence_number
+      title: OpenAIResponseObjectStreamResponseWebSearchCallSearching
       type: object
+    OpenAIDeleteResponseObject:
       properties:
         id:
           type: string
-          description: >-
-            Unique identifier of the deleted response
+          title: Id
         object:
           type: string
           const: response
+          title: Object
           default: response
-          description: >-
-            Object type identifier, always "response"
         deleted:
           type: boolean
+          title: Deleted
           default: true
-          description: Deletion confirmation flag, always True
-      additionalProperties: false
-      required:
-        - id
-        - object
-        - deleted
-      title: OpenAIDeleteResponseObject
-      description: >-
-        Response object confirming deletion of an OpenAI response.
-    ListOpenAIResponseInputItem:
       type: object
+      required:
+      - id
+      title: OpenAIDeleteResponseObject
+      description: Response object confirming deletion of an OpenAI response.
+    ListOpenAIResponseInputItem:
       properties:
         data:
-          type: array
           items:
-            $ref: '#/components/schemas/OpenAIResponseInput'
-          description: List of input items
+            anyOf:
+            - oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseMessage-Output'
+                title: OpenAIResponseMessage-Output
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+                title: OpenAIResponseOutputMessageWebSearchToolCall
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                title: OpenAIResponseOutputMessageFileSearchToolCall
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                title: OpenAIResponseOutputMessageFunctionToolCall
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                title: OpenAIResponseOutputMessageMCPCall
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                title: OpenAIResponseOutputMessageMCPListTools
+              - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                title: OpenAIResponseMCPApprovalRequest
+              discriminator:
+                propertyName: type
+                mapping:
+                  file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                  function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                  mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                  mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                  mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                  message: '#/components/schemas/OpenAIResponseMessage-Output'
+                  web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+              title: OpenAIResponseMessage-Output | ... (7 variants)
+            - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+              title: OpenAIResponseInputFunctionToolCallOutput
+            - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+              title: OpenAIResponseMCPApprovalResponse
+            - $ref: '#/components/schemas/OpenAIResponseMessage-Output'
+              title: OpenAIResponseMessage-Output
+            title: OpenAIResponseInputFunctionToolCallOutput | OpenAIResponseMCPApprovalResponse | OpenAIResponseMessage-Output
+          type: array
+          title: Data
         object:
           type: string
           const: list
+          title: Object
           default: list
-          description: Object type identifier, always "list"
-      additionalProperties: false
-      required:
-        - data
-        - object
-      title: ListOpenAIResponseInputItem
-      description: >-
-        List container for OpenAI response input items.
-    RunShieldRequest:
       type: object
+      required:
+      - data
+      title: ListOpenAIResponseInputItem
+      description: List container for OpenAI response input items.
+    RunShieldRequest:
       properties:
         shield_id:
           type: string
-          description: The identifier of the shield to run.
+          title: Shield Id
         messages:
-          type: array
           items:
-            $ref: '#/components/schemas/OpenAIMessageParam'
-          description: The messages to run the shield on.
-        params:
-          type: object
-          additionalProperties:
             oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: The parameters of the shield.
-      additionalProperties: false
+            - $ref: '#/components/schemas/OpenAIUserMessageParam-Input'
+              title: OpenAIUserMessageParam-Input
+            - $ref: '#/components/schemas/OpenAISystemMessageParam'
+              title: OpenAISystemMessageParam
+            - $ref: '#/components/schemas/OpenAIAssistantMessageParam-Input'
+              title: OpenAIAssistantMessageParam-Input
+            - $ref: '#/components/schemas/OpenAIToolMessageParam'
+              title: OpenAIToolMessageParam
+            - $ref: '#/components/schemas/OpenAIDeveloperMessageParam'
+              title: OpenAIDeveloperMessageParam
+            discriminator:
+              propertyName: role
+              mapping:
+                assistant: '#/components/schemas/OpenAIAssistantMessageParam-Input'
+                developer: '#/components/schemas/OpenAIDeveloperMessageParam'
+                system: '#/components/schemas/OpenAISystemMessageParam'
+                tool: '#/components/schemas/OpenAIToolMessageParam'
+                user: '#/components/schemas/OpenAIUserMessageParam-Input'
+            title: OpenAIUserMessageParam-Input | ... (5 variants)
+          type: array
+          title: Messages
+        params:
+          additionalProperties: true
+          type: object
+          title: Params
+      type: object
       required:
-        - shield_id
-        - messages
-        - params
+      - shield_id
+      - messages
+      - params
       title: RunShieldRequest
     RunShieldResponse:
-      type: object
       properties:
         violation:
-          $ref: '#/components/schemas/SafetyViolation'
-          description: >-
-            (Optional) Safety violation detected by the shield, if any
-      additionalProperties: false
+          anyOf:
+          - $ref: '#/components/schemas/SafetyViolation'
+            title: SafetyViolation
+          - type: 'null'
+          title: SafetyViolation
+      type: object
       title: RunShieldResponse
       description: Response from running a safety shield.
     SafetyViolation:
-      type: object
       properties:
         violation_level:
           $ref: '#/components/schemas/ViolationLevel'
-          description: Severity level of the violation
         user_message:
-          type: string
-          description: >-
-            (Optional) Message to convey to the user about the violation
+          anyOf:
+          - type: string
+          - type: 'null'
         metadata:
+          additionalProperties: true
           type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            Additional metadata including specific violation codes for debugging and
-            telemetry
-      additionalProperties: false
+          title: Metadata
+      type: object
       required:
-        - violation_level
-        - metadata
+      - violation_level
       title: SafetyViolation
-      description: >-
-        Details of a safety violation detected by content moderation.
+      description: Details of a safety violation detected by content moderation.
     ViolationLevel:
       type: string
       enum:
-        - info
-        - warn
-        - error
+      - info
+      - warn
+      - error
       title: ViolationLevel
       description: Severity level of a safety violation.
     AggregationFunctionType:
       type: string
       enum:
-        - average
-        - weighted_average
-        - median
-        - categorical_count
-        - accuracy
+      - average
+      - weighted_average
+      - median
+      - categorical_count
+      - accuracy
       title: AggregationFunctionType
-      description: >-
-        Types of aggregation functions for scoring results.
+      description: Types of aggregation functions for scoring results.
     ArrayType:
-      type: object
       properties:
         type:
           type: string
           const: array
+          title: Type
           default: array
-          description: Discriminator type. Always "array"
-      additionalProperties: false
-      required:
-        - type
+      type: object
       title: ArrayType
       description: Parameter type for array values.
     BasicScoringFnParams:
-      type: object
       properties:
         type:
-          $ref: '#/components/schemas/ScoringFnParamsType'
+          type: string
           const: basic
+          title: Type
           default: basic
-          description: >-
-            The type of scoring function parameters, always basic
         aggregation_functions:
-          type: array
           items:
             $ref: '#/components/schemas/AggregationFunctionType'
-          description: >-
-            Aggregation functions to apply to the scores of each row
-      additionalProperties: false
-      required:
-        - type
-        - aggregation_functions
-      title: BasicScoringFnParams
-      description: >-
-        Parameters for basic scoring function configuration.
-    BooleanType:
+          type: array
+          title: Aggregation Functions
+          description: Aggregation functions to apply to the scores of each row
       type: object
+      title: BasicScoringFnParams
+      description: Parameters for basic scoring function configuration.
+    BooleanType:
       properties:
         type:
           type: string
           const: boolean
+          title: Type
           default: boolean
-          description: Discriminator type. Always "boolean"
-      additionalProperties: false
-      required:
-        - type
+      type: object
       title: BooleanType
       description: Parameter type for boolean values.
     ChatCompletionInputType:
-      type: object
       properties:
         type:
           type: string
           const: chat_completion_input
+          title: Type
           default: chat_completion_input
-          description: >-
-            Discriminator type. Always "chat_completion_input"
-      additionalProperties: false
-      required:
-        - type
-      title: ChatCompletionInputType
-      description: >-
-        Parameter type for chat completion input.
-    CompletionInputType:
       type: object
+      title: ChatCompletionInputType
+      description: Parameter type for chat completion input.
+    CompletionInputType:
       properties:
         type:
           type: string
           const: completion_input
+          title: Type
           default: completion_input
-          description: >-
-            Discriminator type. Always "completion_input"
-      additionalProperties: false
-      required:
-        - type
+      type: object
       title: CompletionInputType
       description: Parameter type for completion input.
     JsonType:
-      type: object
       properties:
         type:
           type: string
           const: json
+          title: Type
           default: json
-          description: Discriminator type. Always "json"
-      additionalProperties: false
-      required:
-        - type
+      type: object
       title: JsonType
       description: Parameter type for JSON values.
     LLMAsJudgeScoringFnParams:
-      type: object
       properties:
         type:
-          $ref: '#/components/schemas/ScoringFnParamsType'
+          type: string
           const: llm_as_judge
+          title: Type
           default: llm_as_judge
-          description: >-
-            The type of scoring function parameters, always llm_as_judge
         judge_model:
           type: string
-          description: >-
-            Identifier of the LLM model to use as a judge for scoring
+          title: Judge Model
         prompt_template:
-          type: string
-          description: >-
-            (Optional) Custom prompt template for the judge model
+          anyOf:
+          - type: string
+          - type: 'null'
         judge_score_regexes:
-          type: array
           items:
             type: string
-          description: >-
-            Regexes to extract the answer from generated response
-        aggregation_functions:
           type: array
+          title: Judge Score Regexes
+          description: Regexes to extract the answer from generated response
+        aggregation_functions:
           items:
             $ref: '#/components/schemas/AggregationFunctionType'
-          description: >-
-            Aggregation functions to apply to the scores of each row
-      additionalProperties: false
-      required:
-        - type
-        - judge_model
-        - judge_score_regexes
-        - aggregation_functions
-      title: LLMAsJudgeScoringFnParams
-      description: >-
-        Parameters for LLM-as-judge scoring function configuration.
-    NumberType:
+          type: array
+          title: Aggregation Functions
+          description: Aggregation functions to apply to the scores of each row
       type: object
+      required:
+      - judge_model
+      title: LLMAsJudgeScoringFnParams
+      description: Parameters for LLM-as-judge scoring function configuration.
+    NumberType:
       properties:
         type:
           type: string
           const: number
+          title: Type
           default: number
-          description: Discriminator type. Always "number"
-      additionalProperties: false
-      required:
-        - type
+      type: object
       title: NumberType
       description: Parameter type for numeric values.
     ObjectType:
-      type: object
       properties:
         type:
           type: string
           const: object
+          title: Type
           default: object
-          description: Discriminator type. Always "object"
-      additionalProperties: false
-      required:
-        - type
+      type: object
       title: ObjectType
       description: Parameter type for object values.
     RegexParserScoringFnParams:
-      type: object
       properties:
         type:
-          $ref: '#/components/schemas/ScoringFnParamsType'
+          type: string
           const: regex_parser
+          title: Type
           default: regex_parser
-          description: >-
-            The type of scoring function parameters, always regex_parser
         parsing_regexes:
-          type: array
           items:
             type: string
-          description: >-
-            Regex to extract the answer from generated response
-        aggregation_functions:
           type: array
+          title: Parsing Regexes
+          description: Regex to extract the answer from generated response
+        aggregation_functions:
           items:
             $ref: '#/components/schemas/AggregationFunctionType'
-          description: >-
-            Aggregation functions to apply to the scores of each row
-      additionalProperties: false
-      required:
-        - type
-        - parsing_regexes
-        - aggregation_functions
-      title: RegexParserScoringFnParams
-      description: >-
-        Parameters for regex parser scoring function configuration.
-    ScoringFn:
+          type: array
+          title: Aggregation Functions
+          description: Aggregation functions to apply to the scores of each row
       type: object
+      title: RegexParserScoringFnParams
+      description: Parameters for regex parser scoring function configuration.
+    ScoringFn:
       properties:
         identifier:
           type: string
+          title: Identifier
+          description: Unique identifier for this resource in llama stack
         provider_resource_id:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: Unique identifier for this resource in the provider
         provider_id:
           type: string
+          title: Provider Id
+          description: ID of the provider that owns this resource
         type:
           type: string
-          enum:
-            - model
-            - shield
-            - vector_store
-            - dataset
-            - scoring_function
-            - benchmark
-            - tool
-            - tool_group
-            - prompt
           const: scoring_function
+          title: Type
           default: scoring_function
-          description: >-
-            The resource type, always scoring_function
         description:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
         metadata:
+          additionalProperties: true
           type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
+          title: Metadata
+          description: Any additional metadata for this definition
         return_type:
           oneOf:
-            - $ref: '#/components/schemas/StringType'
-            - $ref: '#/components/schemas/NumberType'
-            - $ref: '#/components/schemas/BooleanType'
-            - $ref: '#/components/schemas/ArrayType'
-            - $ref: '#/components/schemas/ObjectType'
-            - $ref: '#/components/schemas/JsonType'
-            - $ref: '#/components/schemas/UnionType'
-            - $ref: '#/components/schemas/ChatCompletionInputType'
-            - $ref: '#/components/schemas/CompletionInputType'
+          - $ref: '#/components/schemas/StringType'
+            title: StringType
+          - $ref: '#/components/schemas/NumberType'
+            title: NumberType
+          - $ref: '#/components/schemas/BooleanType'
+            title: BooleanType
+          - $ref: '#/components/schemas/ArrayType'
+            title: ArrayType
+          - $ref: '#/components/schemas/ObjectType'
+            title: ObjectType
+          - $ref: '#/components/schemas/JsonType'
+            title: JsonType
+          - $ref: '#/components/schemas/UnionType'
+            title: UnionType
+          - $ref: '#/components/schemas/ChatCompletionInputType'
+            title: ChatCompletionInputType
+          - $ref: '#/components/schemas/CompletionInputType'
+            title: CompletionInputType
+          title: StringType | ... (9 variants)
+          description: The return type of the deterministic function
           discriminator:
             propertyName: type
             mapping:
-              string: '#/components/schemas/StringType'
-              number: '#/components/schemas/NumberType'
-              boolean: '#/components/schemas/BooleanType'
               array: '#/components/schemas/ArrayType'
-              object: '#/components/schemas/ObjectType'
-              json: '#/components/schemas/JsonType'
-              union: '#/components/schemas/UnionType'
+              boolean: '#/components/schemas/BooleanType'
               chat_completion_input: '#/components/schemas/ChatCompletionInputType'
               completion_input: '#/components/schemas/CompletionInputType'
+              json: '#/components/schemas/JsonType'
+              number: '#/components/schemas/NumberType'
+              object: '#/components/schemas/ObjectType'
+              string: '#/components/schemas/StringType'
+              union: '#/components/schemas/UnionType'
         params:
-          $ref: '#/components/schemas/ScoringFnParams'
-      additionalProperties: false
+          anyOf:
+          - oneOf:
+            - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
+              title: LLMAsJudgeScoringFnParams
+            - $ref: '#/components/schemas/RegexParserScoringFnParams'
+              title: RegexParserScoringFnParams
+            - $ref: '#/components/schemas/BasicScoringFnParams'
+              title: BasicScoringFnParams
+            discriminator:
+              propertyName: type
+              mapping:
+                basic: '#/components/schemas/BasicScoringFnParams'
+                llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams'
+                regex_parser: '#/components/schemas/RegexParserScoringFnParams'
+            title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams
+          - type: 'null'
+          title: Params
+          description: The parameters for the scoring function for benchmark eval, these can be overridden for app eval
+      type: object
       required:
-        - identifier
-        - provider_id
-        - type
-        - metadata
-        - return_type
+      - identifier
+      - provider_id
+      - return_type
       title: ScoringFn
-      description: >-
-        A scoring function resource for evaluating model outputs.
+      description: A scoring function resource for evaluating model outputs.
     ScoringFnParams:
-      oneOf:
-        - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
-        - $ref: '#/components/schemas/RegexParserScoringFnParams'
-        - $ref: '#/components/schemas/BasicScoringFnParams'
       discriminator:
-        propertyName: type
         mapping:
+          basic: '#/components/schemas/BasicScoringFnParams'
           llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams'
           regex_parser: '#/components/schemas/RegexParserScoringFnParams'
-          basic: '#/components/schemas/BasicScoringFnParams'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
+        title: LLMAsJudgeScoringFnParams
+      - $ref: '#/components/schemas/RegexParserScoringFnParams'
+        title: RegexParserScoringFnParams
+      - $ref: '#/components/schemas/BasicScoringFnParams'
+        title: BasicScoringFnParams
+      title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams
     ScoringFnParamsType:
-      type: string
+      description: Types of scoring function parameter configurations.
       enum:
-        - llm_as_judge
-        - regex_parser
-        - basic
+      - llm_as_judge
+      - regex_parser
+      - basic
       title: ScoringFnParamsType
-      description: >-
-        Types of scoring function parameter configurations.
+      type: string
     StringType:
-      type: object
       properties:
         type:
           type: string
           const: string
+          title: Type
           default: string
-          description: Discriminator type. Always "string"
-      additionalProperties: false
-      required:
-        - type
+      type: object
       title: StringType
       description: Parameter type for string values.
     UnionType:
-      type: object
       properties:
         type:
           type: string
           const: union
+          title: Type
           default: union
-          description: Discriminator type. Always "union"
-      additionalProperties: false
-      required:
-        - type
+      type: object
       title: UnionType
       description: Parameter type for union values.
     ListScoringFunctionsResponse:
-      type: object
       properties:
         data:
-          type: array
           items:
             $ref: '#/components/schemas/ScoringFn'
-      additionalProperties: false
+          type: array
+          title: Data
+      type: object
       required:
-        - data
+      - data
       title: ListScoringFunctionsResponse
     ScoreRequest:
-      type: object
       properties:
         input_rows:
-          type: array
           items:
+            additionalProperties: true
             type: object
-            additionalProperties:
-              oneOf:
-                - type: 'null'
-                - type: boolean
-                - type: number
-                - type: string
-                - type: array
-                - type: object
-          description: The rows to score.
+          type: array
+          title: Input Rows
         scoring_functions:
-          type: object
           additionalProperties:
-            oneOf:
-              - $ref: '#/components/schemas/ScoringFnParams'
-              - type: 'null'
-          description: >-
-            The scoring functions to use for the scoring.
-      additionalProperties: false
+            anyOf:
+            - oneOf:
+              - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
+                title: LLMAsJudgeScoringFnParams
+              - $ref: '#/components/schemas/RegexParserScoringFnParams'
+                title: RegexParserScoringFnParams
+              - $ref: '#/components/schemas/BasicScoringFnParams'
+                title: BasicScoringFnParams
+              discriminator:
+                propertyName: type
+                mapping:
+                  basic: '#/components/schemas/BasicScoringFnParams'
+                  llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams'
+                  regex_parser: '#/components/schemas/RegexParserScoringFnParams'
+              title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams
+            - type: 'null'
+            title: AdditionalpropertiesUnion
+          type: object
+          title: Scoring Functions
+      type: object
       required:
-        - input_rows
-        - scoring_functions
+      - input_rows
+      - scoring_functions
       title: ScoreRequest
     ScoreResponse:
-      type: object
       properties:
         results:
-          type: object
           additionalProperties:
             $ref: '#/components/schemas/ScoringResult'
-          description: >-
-            A map of scoring function name to ScoringResult.
-      additionalProperties: false
+          type: object
+          title: Results
+      type: object
       required:
-        - results
+      - results
       title: ScoreResponse
       description: The response from scoring.
     ScoringResult:
-      type: object
       properties:
         score_rows:
-          type: array
           items:
+            additionalProperties: true
             type: object
-            additionalProperties:
-              oneOf:
-                - type: 'null'
-                - type: boolean
-                - type: number
-                - type: string
-                - type: array
-                - type: object
-          description: >-
-            The scoring result for each row. Each row is a map of column name to value.
+          type: array
+          title: Score Rows
         aggregated_results:
+          additionalProperties: true
           type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: Map of metric name to aggregated value
-      additionalProperties: false
+          title: Aggregated Results
+      type: object
       required:
-        - score_rows
-        - aggregated_results
+      - score_rows
+      - aggregated_results
       title: ScoringResult
       description: A scoring result for a single row.
     ScoreBatchRequest:
-      type: object
       properties:
         dataset_id:
           type: string
-          description: The ID of the dataset to score.
+          title: Dataset Id
         scoring_functions:
-          type: object
           additionalProperties:
-            oneOf:
-              - $ref: '#/components/schemas/ScoringFnParams'
-              - type: 'null'
-          description: >-
-            The scoring functions to use for the scoring.
+            anyOf:
+            - oneOf:
+              - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
+                title: LLMAsJudgeScoringFnParams
+              - $ref: '#/components/schemas/RegexParserScoringFnParams'
+                title: RegexParserScoringFnParams
+              - $ref: '#/components/schemas/BasicScoringFnParams'
+                title: BasicScoringFnParams
+              discriminator:
+                propertyName: type
+                mapping:
+                  basic: '#/components/schemas/BasicScoringFnParams'
+                  llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams'
+                  regex_parser: '#/components/schemas/RegexParserScoringFnParams'
+              title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams
+            - type: 'null'
+            title: AdditionalpropertiesUnion
+          type: object
+          title: Scoring Functions
         save_results_dataset:
           type: boolean
-          description: >-
-            Whether to save the results to a dataset.
-      additionalProperties: false
+          title: Save Results Dataset
+          default: false
+      type: object
       required:
-        - dataset_id
-        - scoring_functions
-        - save_results_dataset
+      - dataset_id
+      - scoring_functions
       title: ScoreBatchRequest
     ScoreBatchResponse:
-      type: object
       properties:
         dataset_id:
-          type: string
-          description: >-
-            (Optional) The identifier of the dataset that was scored
+          anyOf:
+          - type: string
+          - type: 'null'
         results:
-          type: object
           additionalProperties:
             $ref: '#/components/schemas/ScoringResult'
-          description: >-
-            A map of scoring function name to ScoringResult
-      additionalProperties: false
-      required:
-        - results
-      title: ScoreBatchResponse
-      description: >-
-        Response from batch scoring operations on datasets.
-    Shield:
+          type: object
+          title: Results
       type: object
+      required:
+      - results
+      title: ScoreBatchResponse
+      description: Response from batch scoring operations on datasets.
+    Shield:
       properties:
         identifier:
           type: string
+          title: Identifier
+          description: Unique identifier for this resource in llama stack
         provider_resource_id:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: Unique identifier for this resource in the provider
         provider_id:
           type: string
+          title: Provider Id
+          description: ID of the provider that owns this resource
         type:
           type: string
-          enum:
-            - model
-            - shield
-            - vector_store
-            - dataset
-            - scoring_function
-            - benchmark
-            - tool
-            - tool_group
-            - prompt
           const: shield
+          title: Type
           default: shield
-          description: The resource type, always shield
         params:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) Configuration parameters for the shield
-      additionalProperties: false
-      required:
-        - identifier
-        - provider_id
-        - type
-      title: Shield
-      description: >-
-        A safety shield resource that can be used to check content.
-    ListShieldsResponse:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
       type: object
+      required:
+      - identifier
+      - provider_id
+      title: Shield
+      description: A safety shield resource that can be used to check content.
+    ListShieldsResponse:
       properties:
         data:
-          type: array
           items:
             $ref: '#/components/schemas/Shield'
-      additionalProperties: false
+          type: array
+          title: Data
+      type: object
       required:
-        - data
+      - data
       title: ListShieldsResponse
     InvokeToolRequest:
-      type: object
       properties:
         tool_name:
           type: string
-          description: The name of the tool to invoke.
+          title: Tool Name
         kwargs:
+          additionalProperties: true
           type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            A dictionary of arguments to pass to the tool.
+          title: Kwargs
         authorization:
-          type: string
-          description: >-
-            (Optional) OAuth access token for authenticating with the MCP server.
-      additionalProperties: false
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
       required:
-        - tool_name
-        - kwargs
+      - tool_name
+      - kwargs
       title: InvokeToolRequest
     ImageContentItem:
-      type: object
+      description: A image content item
       properties:
         type:
-          type: string
           const: image
           default: image
-          description: >-
-            Discriminator type of the content item. Always "image"
+          title: Type
+          type: string
         image:
-          type: object
-          properties:
-            url:
-              $ref: '#/components/schemas/URL'
-              description: >-
-                A URL of the image or data URL in the format of data:image/{type};base64,{data}.
-                Note that URL could have length limits.
-            data:
-              type: string
-              contentEncoding: base64
-              description: base64 encoded image data as string
-          additionalProperties: false
-          description: >-
-            Image as a base64 encoded string or an URL
-      additionalProperties: false
+          $ref: '#/components/schemas/_URLOrData'
       required:
-        - type
-        - image
+      - image
       title: ImageContentItem
-      description: A image content item
+      type: object
     InterleavedContent:
-      oneOf:
-        - type: string
-        - $ref: '#/components/schemas/InterleavedContentItem'
-        - type: array
-          items:
-            $ref: '#/components/schemas/InterleavedContentItem'
-    InterleavedContentItem:
-      oneOf:
+      anyOf:
+      - type: string
+      - discriminator:
+          mapping:
+            image: '#/components/schemas/ImageContentItem'
+            text: '#/components/schemas/TextContentItem'
+          propertyName: type
+        oneOf:
         - $ref: '#/components/schemas/ImageContentItem'
+          title: ImageContentItem
         - $ref: '#/components/schemas/TextContentItem'
+          title: TextContentItem
+        title: ImageContentItem | TextContentItem
+      - items:
+          discriminator:
+            mapping:
+              image: '#/components/schemas/ImageContentItem'
+              text: '#/components/schemas/TextContentItem'
+            propertyName: type
+          oneOf:
+          - $ref: '#/components/schemas/ImageContentItem'
+            title: ImageContentItem
+          - $ref: '#/components/schemas/TextContentItem'
+            title: TextContentItem
+          title: ImageContentItem | TextContentItem
+        type: array
+        title: list[ImageContentItem | TextContentItem]
+      title: string | list[ImageContentItem | TextContentItem]
+    InterleavedContentItem:
       discriminator:
-        propertyName: type
         mapping:
           image: '#/components/schemas/ImageContentItem'
           text: '#/components/schemas/TextContentItem'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/ImageContentItem'
+        title: ImageContentItem
+      - $ref: '#/components/schemas/TextContentItem'
+        title: TextContentItem
+      title: ImageContentItem | TextContentItem
     TextContentItem:
-      type: object
       properties:
         type:
           type: string
           const: text
+          title: Type
           default: text
-          description: >-
-            Discriminator type of the content item. Always "text"
         text:
           type: string
-          description: Text content
-      additionalProperties: false
+          title: Text
+      type: object
       required:
-        - type
-        - text
+      - text
       title: TextContentItem
       description: A text content item
     ToolInvocationResult:
-      type: object
       properties:
         content:
-          $ref: '#/components/schemas/InterleavedContent'
-          description: >-
-            (Optional) The output content from the tool execution
+          anyOf:
+          - type: string
+          - oneOf:
+            - $ref: '#/components/schemas/ImageContentItem-Output'
+              title: ImageContentItem-Output
+            - $ref: '#/components/schemas/TextContentItem'
+              title: TextContentItem
+            discriminator:
+              propertyName: type
+              mapping:
+                image: '#/components/schemas/ImageContentItem-Output'
+                text: '#/components/schemas/TextContentItem'
+            title: ImageContentItem-Output | TextContentItem
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/ImageContentItem-Output'
+                title: ImageContentItem-Output
+              - $ref: '#/components/schemas/TextContentItem'
+                title: TextContentItem
+              discriminator:
+                propertyName: type
+                mapping:
+                  image: '#/components/schemas/ImageContentItem-Output'
+                  text: '#/components/schemas/TextContentItem'
+              title: ImageContentItem-Output | TextContentItem
+            type: array
+            title: list[ImageContentItem-Output | TextContentItem]
+          - type: 'null'
+          title: string | list[ImageContentItem-Output | TextContentItem]
         error_message:
-          type: string
-          description: >-
-            (Optional) Error message if the tool execution failed
+          anyOf:
+          - type: string
+          - type: 'null'
         error_code:
-          type: integer
-          description: >-
-            (Optional) Numeric error code if the tool execution failed
+          anyOf:
+          - type: integer
+          - type: 'null'
         metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) Additional metadata about the tool execution
-      additionalProperties: false
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+      type: object
       title: ToolInvocationResult
       description: Result of a tool invocation.
     URL:
-      type: object
       properties:
         uri:
           type: string
-          description: The URL string pointing to the resource
-      additionalProperties: false
+          title: Uri
+      type: object
       required:
-        - uri
+      - uri
       title: URL
       description: A URL reference to external content.
     ToolDef:
-      type: object
       properties:
         toolgroup_id:
-          type: string
-          description: >-
-            (Optional) ID of the tool group this tool belongs to
+          anyOf:
+          - type: string
+          - type: 'null'
         name:
           type: string
-          description: Name of the tool
+          title: Name
         description:
-          type: string
-          description: >-
-            (Optional) Human-readable description of what the tool does
+          anyOf:
+          - type: string
+          - type: 'null'
         input_schema:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) JSON Schema for tool inputs (MCP inputSchema)
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
         output_schema:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) JSON Schema for tool outputs (MCP outputSchema)
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
         metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) Additional metadata about the tool
-      additionalProperties: false
-      required:
-        - name
-      title: ToolDef
-      description: >-
-        Tool definition used in runtime contexts.
-    ListToolDefsResponse:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
       type: object
+      required:
+      - name
+      title: ToolDef
+      description: Tool definition used in runtime contexts.
+    ListToolDefsResponse:
       properties:
         data:
-          type: array
           items:
             $ref: '#/components/schemas/ToolDef'
-          description: List of tool definitions
-      additionalProperties: false
-      required:
-        - data
-      title: ListToolDefsResponse
-      description: >-
-        Response containing a list of tool definitions.
-    ToolGroup:
+          type: array
+          title: Data
       type: object
+      required:
+      - data
+      title: ListToolDefsResponse
+      description: Response containing a list of tool definitions.
+    ToolGroup:
       properties:
         identifier:
           type: string
+          title: Identifier
+          description: Unique identifier for this resource in llama stack
         provider_resource_id:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: Unique identifier for this resource in the provider
         provider_id:
           type: string
+          title: Provider Id
+          description: ID of the provider that owns this resource
         type:
           type: string
-          enum:
-            - model
-            - shield
-            - vector_store
-            - dataset
-            - scoring_function
-            - benchmark
-            - tool
-            - tool_group
-            - prompt
           const: tool_group
+          title: Type
           default: tool_group
-          description: Type of resource, always 'tool_group'
         mcp_endpoint:
-          $ref: '#/components/schemas/URL'
-          description: >-
-            (Optional) Model Context Protocol endpoint for remote tools
+          anyOf:
+          - $ref: '#/components/schemas/URL'
+            title: URL
+          - type: 'null'
+          title: URL
         args:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) Additional arguments for the tool group
-      additionalProperties: false
-      required:
-        - identifier
-        - provider_id
-        - type
-      title: ToolGroup
-      description: >-
-        A group of related tools managed together.
-    ListToolGroupsResponse:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
       type: object
+      required:
+      - identifier
+      - provider_id
+      title: ToolGroup
+      description: A group of related tools managed together.
+    ListToolGroupsResponse:
       properties:
         data:
-          type: array
           items:
             $ref: '#/components/schemas/ToolGroup'
-          description: List of tool groups
-      additionalProperties: false
-      required:
-        - data
-      title: ListToolGroupsResponse
-      description: >-
-        Response containing a list of tool groups.
-    Chunk:
+          type: array
+          title: Data
       type: object
+      required:
+      - data
+      title: ListToolGroupsResponse
+      description: Response containing a list of tool groups.
+    Chunk:
+      description: A chunk of content that can be inserted into a vector database.
       properties:
         content:
-          $ref: '#/components/schemas/InterleavedContent'
-          description: >-
-            The content of the chunk, which can be interleaved text, images, or other
-            types.
-        chunk_id:
-          type: string
-          description: >-
-            Unique identifier for the chunk. Must be provided explicitly.
-        metadata:
-          type: object
-          additionalProperties:
+          anyOf:
+          - type: string
+          - discriminator:
+              mapping:
+                image: '#/components/schemas/ImageContentItem'
+                text: '#/components/schemas/TextContentItem'
+              propertyName: type
             oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            Metadata associated with the chunk that will be used in the model context
-            during inference.
+            - $ref: '#/components/schemas/ImageContentItem'
+              title: ImageContentItem
+            - $ref: '#/components/schemas/TextContentItem'
+              title: TextContentItem
+            title: ImageContentItem | TextContentItem
+          - items:
+              discriminator:
+                mapping:
+                  image: '#/components/schemas/ImageContentItem'
+                  text: '#/components/schemas/TextContentItem'
+                propertyName: type
+              oneOf:
+              - $ref: '#/components/schemas/ImageContentItem'
+                title: ImageContentItem
+              - $ref: '#/components/schemas/TextContentItem'
+                title: TextContentItem
+              title: ImageContentItem | TextContentItem
+            type: array
+            title: list[ImageContentItem | TextContentItem]
+          title: string | list[ImageContentItem | TextContentItem]
+        chunk_id:
+          title: Chunk Id
+          type: string
+        metadata:
+          additionalProperties: true
+          title: Metadata
+          type: object
         embedding:
-          type: array
-          items:
-            type: number
-          description: >-
-            Optional embedding for the chunk. If not provided, it will be computed
-            later.
+          anyOf:
+          - items:
+              type: number
+            type: array
+          - type: 'null'
+          nullable: true
         chunk_metadata:
-          $ref: '#/components/schemas/ChunkMetadata'
-          description: >-
-            Metadata for the chunk that will NOT be used in the context during inference.
-            The `chunk_metadata` is required backend functionality.
-      additionalProperties: false
+          anyOf:
+          - $ref: '#/components/schemas/ChunkMetadata'
+            title: ChunkMetadata
+          - type: 'null'
+          nullable: true
+          title: ChunkMetadata
       required:
-        - content
-        - chunk_id
-        - metadata
+      - content
+      - chunk_id
       title: Chunk
-      description: >-
-        A chunk of content that can be inserted into a vector database.
-    ChunkMetadata:
       type: object
+    ChunkMetadata:
       properties:
         chunk_id:
-          type: string
-          description: >-
-            The ID of the chunk. If not set, it will be generated based on the document
-            ID and content.
+          anyOf:
+          - type: string
+          - type: 'null'
         document_id:
-          type: string
-          description: >-
-            The ID of the document this chunk belongs to.
+          anyOf:
+          - type: string
+          - type: 'null'
         source:
-          type: string
-          description: >-
-            The source of the content, such as a URL, file path, or other identifier.
+          anyOf:
+          - type: string
+          - type: 'null'
         created_timestamp:
-          type: integer
-          description: >-
-            An optional timestamp indicating when the chunk was created.
+          anyOf:
+          - type: integer
+          - type: 'null'
         updated_timestamp:
-          type: integer
-          description: >-
-            An optional timestamp indicating when the chunk was last updated.
+          anyOf:
+          - type: integer
+          - type: 'null'
         chunk_window:
-          type: string
-          description: >-
-            The window of the chunk, which can be used to group related chunks together.
+          anyOf:
+          - type: string
+          - type: 'null'
         chunk_tokenizer:
-          type: string
-          description: >-
-            The tokenizer used to create the chunk. Default is Tiktoken.
+          anyOf:
+          - type: string
+          - type: 'null'
         chunk_embedding_model:
-          type: string
-          description: >-
-            The embedding model used to create the chunk's embedding.
+          anyOf:
+          - type: string
+          - type: 'null'
         chunk_embedding_dimension:
-          type: integer
-          description: >-
-            The dimension of the embedding vector for the chunk.
+          anyOf:
+          - type: integer
+          - type: 'null'
         content_token_count:
-          type: integer
-          description: >-
-            The number of tokens in the content of the chunk.
+          anyOf:
+          - type: integer
+          - type: 'null'
         metadata_token_count:
-          type: integer
-          description: >-
-            The number of tokens in the metadata of the chunk.
-      additionalProperties: false
-      title: ChunkMetadata
-      description: >-
-        `ChunkMetadata` is backend metadata for a `Chunk` that is used to store additional
-        information about the chunk that     will not be used in the context during
-        inference, but is required for backend functionality. The `ChunkMetadata`     is
-        set during chunk creation in `MemoryToolRuntimeImpl().insert()`and is not
-        expected to change after.     Use `Chunk.metadata` for metadata that will
-        be used in the context during inference.
-    InsertChunksRequest:
+          anyOf:
+          - type: integer
+          - type: 'null'
       type: object
+      title: ChunkMetadata
+      description: |-
+        `ChunkMetadata` is backend metadata for a `Chunk` that is used to store additional information about the chunk that
+            will not be used in the context during inference, but is required for backend functionality. The `ChunkMetadata`
+            is set during chunk creation in `MemoryToolRuntimeImpl().insert()`and is not expected to change after.
+            Use `Chunk.metadata` for metadata that will be used in the context during inference.
+    InsertChunksRequest:
       properties:
         vector_store_id:
           type: string
-          description: >-
-            The identifier of the vector database to insert the chunks into.
+          title: Vector Store Id
         chunks:
-          type: array
           items:
-            $ref: '#/components/schemas/Chunk'
-          description: >-
-            The chunks to insert. Each `Chunk` should contain content which can be
-            interleaved text, images, or other types. `metadata`: `dict[str, Any]`
-            and `embedding`: `List[float]` are optional. If `metadata` is provided,
-            you configure how Llama Stack formats the chunk during generation. If
-            `embedding` is not provided, it will be computed later.
+            $ref: '#/components/schemas/Chunk-Input'
+          type: array
+          title: Chunks
         ttl_seconds:
-          type: integer
-          description: The time to live of the chunks.
-      additionalProperties: false
+          anyOf:
+          - type: integer
+          - type: 'null'
+      type: object
       required:
-        - vector_store_id
-        - chunks
+      - vector_store_id
+      - chunks
       title: InsertChunksRequest
     QueryChunksRequest:
-      type: object
       properties:
         vector_store_id:
           type: string
-          description: >-
-            The identifier of the vector database to query.
+          title: Vector Store Id
         query:
-          $ref: '#/components/schemas/InterleavedContent'
-          description: The query to search for.
+          anyOf:
+          - type: string
+          - oneOf:
+            - $ref: '#/components/schemas/ImageContentItem-Input'
+              title: ImageContentItem-Input
+            - $ref: '#/components/schemas/TextContentItem'
+              title: TextContentItem
+            discriminator:
+              propertyName: type
+              mapping:
+                image: '#/components/schemas/ImageContentItem-Input'
+                text: '#/components/schemas/TextContentItem'
+            title: ImageContentItem-Input | TextContentItem
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/ImageContentItem-Input'
+                title: ImageContentItem-Input
+              - $ref: '#/components/schemas/TextContentItem'
+                title: TextContentItem
+              discriminator:
+                propertyName: type
+                mapping:
+                  image: '#/components/schemas/ImageContentItem-Input'
+                  text: '#/components/schemas/TextContentItem'
+              title: ImageContentItem-Input | TextContentItem
+            type: array
+            title: list[ImageContentItem-Input | TextContentItem]
+          title: string | list[ImageContentItem-Input | TextContentItem]
         params:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: The parameters of the query.
-      additionalProperties: false
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+      type: object
       required:
-        - vector_store_id
-        - query
+      - vector_store_id
+      - query
       title: QueryChunksRequest
     QueryChunksResponse:
-      type: object
       properties:
         chunks:
-          type: array
           items:
-            $ref: '#/components/schemas/Chunk'
-          description: >-
-            List of content chunks returned from the query
-        scores:
+            $ref: '#/components/schemas/Chunk-Output'
           type: array
+          title: Chunks
+        scores:
           items:
             type: number
-          description: >-
-            Relevance scores corresponding to each returned chunk
-      additionalProperties: false
-      required:
-        - chunks
-        - scores
-      title: QueryChunksResponse
-      description: >-
-        Response from querying chunks in a vector database.
-    VectorStoreFileCounts:
+          type: array
+          title: Scores
       type: object
+      required:
+      - chunks
+      - scores
+      title: QueryChunksResponse
+      description: Response from querying chunks in a vector database.
+    VectorStoreFileCounts:
       properties:
         completed:
           type: integer
-          description: >-
-            Number of files that have been successfully processed
+          title: Completed
         cancelled:
           type: integer
-          description: >-
-            Number of files that had their processing cancelled
+          title: Cancelled
         failed:
           type: integer
-          description: Number of files that failed to process
+          title: Failed
         in_progress:
           type: integer
-          description: >-
-            Number of files currently being processed
+          title: In Progress
         total:
           type: integer
-          description: >-
-            Total number of files in the vector store
-      additionalProperties: false
-      required:
-        - completed
-        - cancelled
-        - failed
-        - in_progress
-        - total
-      title: VectorStoreFileCounts
-      description: >-
-        File processing status counts for a vector store.
-    VectorStoreListResponse:
+          title: Total
       type: object
+      required:
+      - completed
+      - cancelled
+      - failed
+      - in_progress
+      - total
+      title: VectorStoreFileCounts
+      description: File processing status counts for a vector store.
+    VectorStoreListResponse:
       properties:
         object:
           type: string
+          title: Object
           default: list
-          description: Object type identifier, always "list"
         data:
-          type: array
           items:
             $ref: '#/components/schemas/VectorStoreObject'
-          description: List of vector store objects
+          type: array
+          title: Data
         first_id:
-          type: string
-          description: >-
-            (Optional) ID of the first vector store in the list for pagination
+          anyOf:
+          - type: string
+          - type: 'null'
         last_id:
-          type: string
-          description: >-
-            (Optional) ID of the last vector store in the list for pagination
+          anyOf:
+          - type: string
+          - type: 'null'
         has_more:
           type: boolean
+          title: Has More
           default: false
-          description: >-
-            Whether there are more vector stores available beyond this page
-      additionalProperties: false
+      type: object
       required:
-        - object
-        - data
-        - has_more
+      - data
       title: VectorStoreListResponse
       description: Response from listing vector stores.
     VectorStoreObject:
-      type: object
       properties:
         id:
           type: string
-          description: Unique identifier for the vector store
+          title: Id
         object:
           type: string
+          title: Object
           default: vector_store
-          description: >-
-            Object type identifier, always "vector_store"
         created_at:
           type: integer
-          description: >-
-            Timestamp when the vector store was created
+          title: Created At
         name:
-          type: string
-          description: (Optional) Name of the vector store
+          anyOf:
+          - type: string
+          - type: 'null'
         usage_bytes:
           type: integer
+          title: Usage Bytes
           default: 0
-          description: >-
-            Storage space used by the vector store in bytes
         file_counts:
           $ref: '#/components/schemas/VectorStoreFileCounts'
-          description: >-
-            File processing status counts for the vector store
         status:
           type: string
+          title: Status
           default: completed
-          description: Current status of the vector store
         expires_after:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) Expiration policy for the vector store
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
         expires_at:
-          type: integer
-          description: >-
-            (Optional) Timestamp when the vector store will expire
+          anyOf:
+          - type: integer
+          - type: 'null'
         last_active_at:
-          type: integer
-          description: >-
-            (Optional) Timestamp of last activity on the vector store
+          anyOf:
+          - type: integer
+          - type: 'null'
         metadata:
+          additionalProperties: true
           type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            Set of key-value pairs that can be attached to the vector store
-      additionalProperties: false
+          title: Metadata
+      type: object
       required:
-        - id
-        - object
-        - created_at
-        - usage_bytes
-        - file_counts
-        - status
-        - metadata
+      - id
+      - created_at
+      - file_counts
       title: VectorStoreObject
       description: OpenAI Vector Store object.
     VectorStoreChunkingStrategy:
-      oneOf:
-        - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
-        - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
       discriminator:
-        propertyName: type
         mapping:
           auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
           static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+        title: VectorStoreChunkingStrategyAuto
+      - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+        title: VectorStoreChunkingStrategyStatic
+      title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic
     VectorStoreChunkingStrategyAuto:
-      type: object
       properties:
         type:
           type: string
           const: auto
+          title: Type
           default: auto
-          description: >-
-            Strategy type, always "auto" for automatic chunking
-      additionalProperties: false
-      required:
-        - type
-      title: VectorStoreChunkingStrategyAuto
-      description: >-
-        Automatic chunking strategy for vector store files.
-    VectorStoreChunkingStrategyStatic:
       type: object
+      title: VectorStoreChunkingStrategyAuto
+      description: Automatic chunking strategy for vector store files.
+    VectorStoreChunkingStrategyStatic:
       properties:
         type:
           type: string
           const: static
+          title: Type
           default: static
-          description: >-
-            Strategy type, always "static" for static chunking
         static:
           $ref: '#/components/schemas/VectorStoreChunkingStrategyStaticConfig'
-          description: >-
-            Configuration parameters for the static chunking strategy
-      additionalProperties: false
-      required:
-        - type
-        - static
-      title: VectorStoreChunkingStrategyStatic
-      description: >-
-        Static chunking strategy with configurable parameters.
-    VectorStoreChunkingStrategyStaticConfig:
       type: object
+      required:
+      - static
+      title: VectorStoreChunkingStrategyStatic
+      description: Static chunking strategy with configurable parameters.
+    VectorStoreChunkingStrategyStaticConfig:
       properties:
         chunk_overlap_tokens:
           type: integer
+          title: Chunk Overlap Tokens
           default: 400
-          description: >-
-            Number of tokens to overlap between adjacent chunks
         max_chunk_size_tokens:
           type: integer
+          maximum: 4096.0
+          minimum: 100.0
+          title: Max Chunk Size Tokens
           default: 800
-          description: >-
-            Maximum number of tokens per chunk, must be between 100 and 4096
-      additionalProperties: false
-      required:
-        - chunk_overlap_tokens
-        - max_chunk_size_tokens
+      type: object
       title: VectorStoreChunkingStrategyStaticConfig
-      description: >-
-        Configuration for static chunking strategy.
-    "OpenAICreateVectorStoreRequestWithExtraBody":
-      type: object
+      description: Configuration for static chunking strategy.
+    OpenAICreateVectorStoreRequestWithExtraBody:
       properties:
         name:
-          type: string
-          description: (Optional) A name for the vector store
+          anyOf:
+          - type: string
+          - type: 'null'
         file_ids:
-          type: array
-          items:
-            type: string
-          description: >-
-            List of file IDs to include in the vector store
+          anyOf:
+          - items:
+              type: string
+            type: array
+          - type: 'null'
         expires_after:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) Expiration policy for the vector store
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
         chunking_strategy:
-          $ref: '#/components/schemas/VectorStoreChunkingStrategy'
-          description: >-
-            (Optional) Strategy for splitting files into chunks
+          anyOf:
+          - oneOf:
+            - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+              title: VectorStoreChunkingStrategyAuto
+            - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+              title: VectorStoreChunkingStrategyStatic
+            discriminator:
+              propertyName: type
+              mapping:
+                auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+                static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+            title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic
+          - type: 'null'
+          title: Chunking Strategy
         metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            Set of key-value pairs that can be attached to the vector store
-      additionalProperties: false
-      title: >-
-        OpenAICreateVectorStoreRequestWithExtraBody
-      description: >-
-        Request to create a vector store with extra_body support.
-    OpenaiUpdateVectorStoreRequest:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+      additionalProperties: true
       type: object
+      title: OpenAICreateVectorStoreRequestWithExtraBody
+      description: Request to create a vector store with extra_body support.
+    OpenaiUpdateVectorStoreRequest:
       properties:
         name:
-          type: string
-          description: The name of the vector store.
+          anyOf:
+          - type: string
+          - type: 'null'
         expires_after:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            The expiration policy for a vector store.
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
         metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            Set of 16 key-value pairs that can be attached to an object.
-      additionalProperties: false
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+      type: object
       title: OpenaiUpdateVectorStoreRequest
     VectorStoreDeleteResponse:
-      type: object
       properties:
         id:
           type: string
-          description: >-
-            Unique identifier of the deleted vector store
+          title: Id
         object:
           type: string
+          title: Object
           default: vector_store.deleted
-          description: >-
-            Object type identifier for the deletion response
         deleted:
           type: boolean
+          title: Deleted
           default: true
-          description: >-
-            Whether the deletion operation was successful
-      additionalProperties: false
+      type: object
       required:
-        - id
-        - object
-        - deleted
+      - id
       title: VectorStoreDeleteResponse
       description: Response from deleting a vector store.
-    "OpenAICreateVectorStoreFileBatchRequestWithExtraBody":
-      type: object
+    OpenAICreateVectorStoreFileBatchRequestWithExtraBody:
       properties:
         file_ids:
-          type: array
           items:
             type: string
-          description: >-
-            A list of File IDs that the vector store should use
+          type: array
+          title: File Ids
         attributes:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) Key-value attributes to store with the files
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
         chunking_strategy:
-          $ref: '#/components/schemas/VectorStoreChunkingStrategy'
-          description: >-
-            (Optional) The chunking strategy used to chunk the file(s). Defaults to
-            auto
-      additionalProperties: false
-      required:
-        - file_ids
-      title: >-
-        OpenAICreateVectorStoreFileBatchRequestWithExtraBody
-      description: >-
-        Request to create a vector store file batch with extra_body support.
-    VectorStoreFileBatchObject:
+          anyOf:
+          - oneOf:
+            - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+              title: VectorStoreChunkingStrategyAuto
+            - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+              title: VectorStoreChunkingStrategyStatic
+            discriminator:
+              propertyName: type
+              mapping:
+                auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+                static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+            title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic
+          - type: 'null'
+          title: Chunking Strategy
+      additionalProperties: true
       type: object
+      required:
+      - file_ids
+      title: OpenAICreateVectorStoreFileBatchRequestWithExtraBody
+      description: Request to create a vector store file batch with extra_body support.
+    VectorStoreFileBatchObject:
       properties:
         id:
           type: string
-          description: Unique identifier for the file batch
+          title: Id
         object:
           type: string
+          title: Object
           default: vector_store.file_batch
-          description: >-
-            Object type identifier, always "vector_store.file_batch"
         created_at:
           type: integer
-          description: >-
-            Timestamp when the file batch was created
+          title: Created At
         vector_store_id:
           type: string
-          description: >-
-            ID of the vector store containing the file batch
+          title: Vector Store Id
         status:
-          $ref: '#/components/schemas/VectorStoreFileStatus'
-          description: >-
-            Current processing status of the file batch
+          title: Status
+          type: string
+          enum:
+          - completed
+          - in_progress
+          - cancelled
+          - failed
+          default: completed
         file_counts:
           $ref: '#/components/schemas/VectorStoreFileCounts'
-          description: >-
-            File processing status counts for the batch
-      additionalProperties: false
+      type: object
       required:
-        - id
-        - object
-        - created_at
-        - vector_store_id
-        - status
-        - file_counts
+      - id
+      - created_at
+      - vector_store_id
+      - status
+      - file_counts
       title: VectorStoreFileBatchObject
       description: OpenAI Vector Store File Batch object.
     VectorStoreFileStatus:
-      oneOf:
-        - type: string
-          const: completed
-        - type: string
-          const: in_progress
-        - type: string
-          const: cancelled
-        - type: string
-          const: failed
+      type: string
+      enum:
+      - completed
+      - in_progress
+      - cancelled
+      - failed
+      default: completed
     VectorStoreFileLastError:
-      type: object
       properties:
         code:
-          oneOf:
-            - type: string
-              const: server_error
-            - type: string
-              const: rate_limit_exceeded
-          description: >-
-            Error code indicating the type of failure
+          title: Code
+          type: string
+          enum:
+          - server_error
+          - rate_limit_exceeded
+          default: server_error
         message:
           type: string
-          description: >-
-            Human-readable error message describing the failure
-      additionalProperties: false
-      required:
-        - code
-        - message
-      title: VectorStoreFileLastError
-      description: >-
-        Error information for failed vector store file processing.
-    VectorStoreFileObject:
+          title: Message
       type: object
+      required:
+      - code
+      - message
+      title: VectorStoreFileLastError
+      description: Error information for failed vector store file processing.
+    VectorStoreFileObject:
       properties:
         id:
           type: string
-          description: Unique identifier for the file
+          title: Id
         object:
           type: string
+          title: Object
           default: vector_store.file
-          description: >-
-            Object type identifier, always "vector_store.file"
         attributes:
+          additionalProperties: true
           type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            Key-value attributes associated with the file
+          title: Attributes
         chunking_strategy:
           oneOf:
-            - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
-            - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+          - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+            title: VectorStoreChunkingStrategyAuto
+          - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+            title: VectorStoreChunkingStrategyStatic
+          title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic
           discriminator:
             propertyName: type
             mapping:
               auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
               static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
-          description: >-
-            Strategy used for splitting the file into chunks
         created_at:
           type: integer
-          description: >-
-            Timestamp when the file was added to the vector store
+          title: Created At
         last_error:
-          $ref: '#/components/schemas/VectorStoreFileLastError'
-          description: >-
-            (Optional) Error information if file processing failed
+          anyOf:
+          - $ref: '#/components/schemas/VectorStoreFileLastError'
+            title: VectorStoreFileLastError
+          - type: 'null'
+          title: VectorStoreFileLastError
         status:
-          $ref: '#/components/schemas/VectorStoreFileStatus'
-          description: Current processing status of the file
+          title: Status
+          type: string
+          enum:
+          - completed
+          - in_progress
+          - cancelled
+          - failed
+          default: completed
         usage_bytes:
           type: integer
+          title: Usage Bytes
           default: 0
-          description: Storage space used by this file in bytes
         vector_store_id:
           type: string
-          description: >-
-            ID of the vector store containing this file
-      additionalProperties: false
+          title: Vector Store Id
+      type: object
       required:
-        - id
-        - object
-        - attributes
-        - chunking_strategy
-        - created_at
-        - status
-        - usage_bytes
-        - vector_store_id
+      - id
+      - chunking_strategy
+      - created_at
+      - status
+      - vector_store_id
       title: VectorStoreFileObject
       description: OpenAI Vector Store File object.
     VectorStoreFilesListInBatchResponse:
-      type: object
       properties:
         object:
           type: string
+          title: Object
           default: list
-          description: Object type identifier, always "list"
         data:
-          type: array
           items:
             $ref: '#/components/schemas/VectorStoreFileObject'
-          description: >-
-            List of vector store file objects in the batch
+          type: array
+          title: Data
         first_id:
-          type: string
-          description: >-
-            (Optional) ID of the first file in the list for pagination
+          anyOf:
+          - type: string
+          - type: 'null'
         last_id:
-          type: string
-          description: >-
-            (Optional) ID of the last file in the list for pagination
+          anyOf:
+          - type: string
+          - type: 'null'
         has_more:
           type: boolean
+          title: Has More
           default: false
-          description: >-
-            Whether there are more files available beyond this page
-      additionalProperties: false
+      type: object
       required:
-        - object
-        - data
-        - has_more
+      - data
       title: VectorStoreFilesListInBatchResponse
-      description: >-
-        Response from listing files in a vector store file batch.
+      description: Response from listing files in a vector store file batch.
     VectorStoreListFilesResponse:
-      type: object
       properties:
         object:
           type: string
+          title: Object
           default: list
-          description: Object type identifier, always "list"
         data:
-          type: array
           items:
             $ref: '#/components/schemas/VectorStoreFileObject'
-          description: List of vector store file objects
+          type: array
+          title: Data
         first_id:
-          type: string
-          description: >-
-            (Optional) ID of the first file in the list for pagination
+          anyOf:
+          - type: string
+          - type: 'null'
         last_id:
-          type: string
-          description: >-
-            (Optional) ID of the last file in the list for pagination
+          anyOf:
+          - type: string
+          - type: 'null'
         has_more:
           type: boolean
+          title: Has More
           default: false
-          description: >-
-            Whether there are more files available beyond this page
-      additionalProperties: false
-      required:
-        - object
-        - data
-        - has_more
-      title: VectorStoreListFilesResponse
-      description: >-
-        Response from listing files in a vector store.
-    OpenaiAttachFileToVectorStoreRequest:
       type: object
+      required:
+      - data
+      title: VectorStoreListFilesResponse
+      description: Response from listing files in a vector store.
+    OpenaiAttachFileToVectorStoreRequest:
       properties:
         file_id:
           type: string
-          description: >-
-            The ID of the file to attach to the vector store.
+          title: File Id
         attributes:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            The key-value attributes stored with the file, which can be used for filtering.
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
         chunking_strategy:
-          $ref: '#/components/schemas/VectorStoreChunkingStrategy'
-          description: >-
-            The chunking strategy to use for the file.
-      additionalProperties: false
+          anyOf:
+          - oneOf:
+            - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+              title: VectorStoreChunkingStrategyAuto
+            - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+              title: VectorStoreChunkingStrategyStatic
+            discriminator:
+              propertyName: type
+              mapping:
+                auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+                static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+            title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic
+          - type: 'null'
+          title: Chunking Strategy
+      type: object
       required:
-        - file_id
+      - file_id
       title: OpenaiAttachFileToVectorStoreRequest
     OpenaiUpdateVectorStoreFileRequest:
-      type: object
       properties:
         attributes:
+          additionalProperties: true
           type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            The updated key-value attributes to store with the file.
-      additionalProperties: false
+          title: Attributes
+      type: object
       required:
-        - attributes
+      - attributes
       title: OpenaiUpdateVectorStoreFileRequest
     VectorStoreFileDeleteResponse:
-      type: object
       properties:
         id:
           type: string
-          description: Unique identifier of the deleted file
+          title: Id
         object:
           type: string
+          title: Object
           default: vector_store.file.deleted
-          description: >-
-            Object type identifier for the deletion response
         deleted:
           type: boolean
+          title: Deleted
           default: true
-          description: >-
-            Whether the deletion operation was successful
-      additionalProperties: false
-      required:
-        - id
-        - object
-        - deleted
-      title: VectorStoreFileDeleteResponse
-      description: >-
-        Response from deleting a vector store file.
-    bool:
-      type: boolean
-    VectorStoreContent:
       type: object
+      required:
+      - id
+      title: VectorStoreFileDeleteResponse
+      description: Response from deleting a vector store file.
+    VectorStoreContent:
       properties:
         type:
           type: string
           const: text
-          description: >-
-            Content type, currently only "text" is supported
+          title: Type
         text:
           type: string
-          description: The actual text content
+          title: Text
         embedding:
-          type: array
-          items:
-            type: number
-          description: >-
-            Optional embedding vector for this content chunk
+          anyOf:
+          - items:
+              type: number
+            type: array
+          - type: 'null'
         chunk_metadata:
-          $ref: '#/components/schemas/ChunkMetadata'
-          description: Optional chunk metadata
+          anyOf:
+          - $ref: '#/components/schemas/ChunkMetadata'
+            title: ChunkMetadata
+          - type: 'null'
+          title: ChunkMetadata
         metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: Optional user-defined metadata
-      additionalProperties: false
-      required:
-        - type
-        - text
-      title: VectorStoreContent
-      description: >-
-        Content item from a vector store file or search result.
-    VectorStoreFileContentResponse:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
       type: object
+      required:
+      - type
+      - text
+      title: VectorStoreContent
+      description: Content item from a vector store file or search result.
+    VectorStoreFileContentResponse:
       properties:
         object:
           type: string
           const: vector_store.file_content.page
+          title: Object
           default: vector_store.file_content.page
-          description: >-
-            The object type, which is always `vector_store.file_content.page`
         data:
-          type: array
           items:
             $ref: '#/components/schemas/VectorStoreContent'
-          description: Parsed content of the file
+          type: array
+          title: Data
         has_more:
           type: boolean
+          title: Has More
           default: false
-          description: >-
-            Indicates if there are more content pages to fetch
         next_page:
-          type: string
-          description: The token for the next page, if any
-      additionalProperties: false
-      required:
-        - object
-        - data
-        - has_more
-      title: VectorStoreFileContentResponse
-      description: >-
-        Represents the parsed content of a vector store file.
-    OpenaiSearchVectorStoreRequest:
+          anyOf:
+          - type: string
+          - type: 'null'
       type: object
+      required:
+      - data
+      title: VectorStoreFileContentResponse
+      description: Represents the parsed content of a vector store file.
+    OpenaiSearchVectorStoreRequest:
       properties:
         query:
-          oneOf:
-            - type: string
-            - type: array
-              items:
-                type: string
-          description: >-
-            The query string or array for performing the search.
-        filters:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            Filters based on file attributes to narrow the search results.
-        max_num_results:
-          type: integer
-          description: >-
-            Maximum number of results to return (1 to 50 inclusive, default 10).
-        ranking_options:
-          type: object
-          properties:
-            ranker:
+          anyOf:
+          - type: string
+          - items:
               type: string
-              description: >-
-                (Optional) Name of the ranking algorithm to use
-            score_threshold:
-              type: number
-              default: 0.0
-              description: >-
-                (Optional) Minimum relevance score threshold for results
-          additionalProperties: false
-          description: >-
-            Ranking options for fine-tuning the search results.
+            type: array
+            title: list[string]
+          title: string | list[string]
+        filters:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+        max_num_results:
+          anyOf:
+          - type: integer
+          - type: 'null'
+          default: 10
+        ranking_options:
+          anyOf:
+          - $ref: '#/components/schemas/SearchRankingOptions'
+            title: SearchRankingOptions
+          - type: 'null'
+          title: SearchRankingOptions
         rewrite_query:
-          type: boolean
-          description: >-
-            Whether to rewrite the natural language query for vector search (default
-            false)
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          default: false
         search_mode:
-          type: string
-          description: >-
-            The search mode to use - "keyword", "vector", or "hybrid" (default "vector")
-      additionalProperties: false
+          anyOf:
+          - type: string
+          - type: 'null'
+          default: vector
+      type: object
       required:
-        - query
+      - query
       title: OpenaiSearchVectorStoreRequest
     VectorStoreSearchResponse:
-      type: object
       properties:
         file_id:
           type: string
-          description: >-
-            Unique identifier of the file containing the result
+          title: File Id
         filename:
           type: string
-          description: Name of the file containing the result
+          title: Filename
         score:
           type: number
-          description: Relevance score for this search result
+          title: Score
         attributes:
-          type: object
-          additionalProperties:
-            oneOf:
+          anyOf:
+          - additionalProperties:
+              anyOf:
               - type: string
               - type: number
               - type: boolean
-          description: >-
-            (Optional) Key-value attributes associated with the file
+              title: string | number | boolean
+            type: object
+          - type: 'null'
         content:
-          type: array
           items:
             $ref: '#/components/schemas/VectorStoreContent'
-          description: >-
-            List of content items matching the search query
-      additionalProperties: false
+          type: array
+          title: Content
+      type: object
       required:
-        - file_id
-        - filename
-        - score
-        - content
+      - file_id
+      - filename
+      - score
+      - content
       title: VectorStoreSearchResponse
       description: Response from searching a vector store.
     VectorStoreSearchResponsePage:
-      type: object
       properties:
         object:
           type: string
+          title: Object
           default: vector_store.search_results.page
-          description: >-
-            Object type identifier for the search results page
         search_query:
-          type: array
           items:
             type: string
-          description: >-
-            The original search query that was executed
-        data:
           type: array
+          title: Search Query
+        data:
           items:
             $ref: '#/components/schemas/VectorStoreSearchResponse'
-          description: List of search result objects
+          type: array
+          title: Data
         has_more:
           type: boolean
+          title: Has More
           default: false
-          description: >-
-            Whether there are more results available beyond this page
         next_page:
-          type: string
-          description: >-
-            (Optional) Token for retrieving the next page of results
-      additionalProperties: false
-      required:
-        - object
-        - search_query
-        - data
-        - has_more
-      title: VectorStoreSearchResponsePage
-      description: >-
-        Paginated response from searching a vector store.
-    VersionInfo:
+          anyOf:
+          - type: string
+          - type: 'null'
       type: object
+      required:
+      - search_query
+      - data
+      title: VectorStoreSearchResponsePage
+      description: Paginated response from searching a vector store.
+    VersionInfo:
       properties:
         version:
           type: string
-          description: Version number of the service
-      additionalProperties: false
+          title: Version
+      type: object
       required:
-        - version
+      - version
       title: VersionInfo
       description: Version information for the service.
     AppendRowsRequest:
-      type: object
       properties:
         rows:
-          type: array
           items:
+            additionalProperties: true
             type: object
-            additionalProperties:
-              oneOf:
-                - type: 'null'
-                - type: boolean
-                - type: number
-                - type: string
-                - type: array
-                - type: object
-          description: The rows to append to the dataset.
-      additionalProperties: false
+          type: array
+          title: Rows
+      type: object
       required:
-        - rows
+      - rows
       title: AppendRowsRequest
     PaginatedResponse:
-      type: object
       properties:
         data:
-          type: array
           items:
+            additionalProperties: true
             type: object
-            additionalProperties:
-              oneOf:
-                - type: 'null'
-                - type: boolean
-                - type: number
-                - type: string
-                - type: array
-                - type: object
-          description: The list of items for the current page
+          type: array
+          title: Data
         has_more:
           type: boolean
-          description: >-
-            Whether there are more items available after this set
+          title: Has More
         url:
-          type: string
-          description: The URL for accessing this list
-      additionalProperties: false
-      required:
-        - data
-        - has_more
-      title: PaginatedResponse
-      description: >-
-        A generic paginated response that follows a simple format.
-    Dataset:
+          anyOf:
+          - type: string
+          - type: 'null'
       type: object
+      required:
+      - data
+      - has_more
+      title: PaginatedResponse
+      description: A generic paginated response that follows a simple format.
+    Dataset:
       properties:
         identifier:
           type: string
+          title: Identifier
+          description: Unique identifier for this resource in llama stack
         provider_resource_id:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: Unique identifier for this resource in the provider
         provider_id:
           type: string
+          title: Provider Id
+          description: ID of the provider that owns this resource
         type:
           type: string
-          enum:
-            - model
-            - shield
-            - vector_store
-            - dataset
-            - scoring_function
-            - benchmark
-            - tool
-            - tool_group
-            - prompt
           const: dataset
+          title: Type
           default: dataset
-          description: >-
-            Type of resource, always 'dataset' for datasets
         purpose:
-          type: string
-          enum:
-            - post-training/messages
-            - eval/question-answer
-            - eval/messages-answer
-          description: >-
-            Purpose of the dataset indicating its intended use
+          $ref: '#/components/schemas/DatasetPurpose'
         source:
           oneOf:
-            - $ref: '#/components/schemas/URIDataSource'
-            - $ref: '#/components/schemas/RowsDataSource'
+          - $ref: '#/components/schemas/URIDataSource'
+            title: URIDataSource
+          - $ref: '#/components/schemas/RowsDataSource'
+            title: RowsDataSource
+          title: URIDataSource | RowsDataSource
           discriminator:
             propertyName: type
             mapping:
-              uri: '#/components/schemas/URIDataSource'
               rows: '#/components/schemas/RowsDataSource'
-          description: >-
-            Data source configuration for the dataset
+              uri: '#/components/schemas/URIDataSource'
         metadata:
+          additionalProperties: true
           type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: Additional metadata for the dataset
-      additionalProperties: false
-      required:
-        - identifier
-        - provider_id
-        - type
-        - purpose
-        - source
-        - metadata
-      title: Dataset
-      description: >-
-        Dataset resource for storing and accessing training or evaluation data.
-    RowsDataSource:
+          title: Metadata
+          description: Any additional metadata for this dataset
       type: object
+      required:
+      - identifier
+      - provider_id
+      - purpose
+      - source
+      title: Dataset
+      description: Dataset resource for storing and accessing training or evaluation data.
+    RowsDataSource:
       properties:
         type:
           type: string
           const: rows
+          title: Type
           default: rows
         rows:
-          type: array
           items:
+            additionalProperties: true
             type: object
-            additionalProperties:
-              oneOf:
-                - type: 'null'
-                - type: boolean
-                - type: number
-                - type: string
-                - type: array
-                - type: object
-          description: >-
-            The dataset is stored in rows. E.g. - [ {"messages": [{"role": "user",
-            "content": "Hello, world!"}, {"role": "assistant", "content": "Hello,
-            world!"}]} ]
-      additionalProperties: false
+          type: array
+          title: Rows
+      type: object
       required:
-        - type
-        - rows
+      - rows
       title: RowsDataSource
       description: A dataset stored in rows.
     URIDataSource:
-      type: object
       properties:
         type:
           type: string
           const: uri
+          title: Type
           default: uri
         uri:
           type: string
-          description: >-
-            The dataset can be obtained from a URI. E.g. - "https://mywebsite.com/mydata.jsonl"
-            - "lsfs://mydata.jsonl" - "data:csv;base64,{base64_content}"
-      additionalProperties: false
-      required:
-        - type
-        - uri
-      title: URIDataSource
-      description: >-
-        A dataset that can be obtained from a URI.
-    ListDatasetsResponse:
+          title: Uri
       type: object
+      required:
+      - uri
+      title: URIDataSource
+      description: A dataset that can be obtained from a URI.
+    ListDatasetsResponse:
       properties:
         data:
-          type: array
           items:
             $ref: '#/components/schemas/Dataset'
-          description: List of datasets
-      additionalProperties: false
+          type: array
+          title: Data
+      type: object
       required:
-        - data
+      - data
       title: ListDatasetsResponse
       description: Response from listing datasets.
     Benchmark:
-      type: object
       properties:
         identifier:
           type: string
+          title: Identifier
+          description: Unique identifier for this resource in llama stack
         provider_resource_id:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: Unique identifier for this resource in the provider
         provider_id:
           type: string
+          title: Provider Id
+          description: ID of the provider that owns this resource
         type:
           type: string
-          enum:
-            - model
-            - shield
-            - vector_store
-            - dataset
-            - scoring_function
-            - benchmark
-            - tool
-            - tool_group
-            - prompt
           const: benchmark
+          title: Type
           default: benchmark
-          description: The resource type, always benchmark
         dataset_id:
           type: string
-          description: >-
-            Identifier of the dataset to use for the benchmark evaluation
+          title: Dataset Id
         scoring_functions:
-          type: array
           items:
             type: string
-          description: >-
-            List of scoring function identifiers to apply during evaluation
+          type: array
+          title: Scoring Functions
         metadata:
+          additionalProperties: true
           type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
+          title: Metadata
           description: Metadata for this evaluation task
-      additionalProperties: false
-      required:
-        - identifier
-        - provider_id
-        - type
-        - dataset_id
-        - scoring_functions
-        - metadata
-      title: Benchmark
-      description: >-
-        A benchmark resource for evaluating model performance.
-    ListBenchmarksResponse:
       type: object
+      required:
+      - identifier
+      - provider_id
+      - dataset_id
+      - scoring_functions
+      title: Benchmark
+      description: A benchmark resource for evaluating model performance.
+    ListBenchmarksResponse:
       properties:
         data:
-          type: array
           items:
             $ref: '#/components/schemas/Benchmark'
-      additionalProperties: false
+          type: array
+          title: Data
+      type: object
       required:
-        - data
+      - data
       title: ListBenchmarksResponse
     BenchmarkConfig:
-      type: object
       properties:
         eval_candidate:
           $ref: '#/components/schemas/ModelCandidate'
-          description: The candidate to evaluate.
         scoring_params:
-          type: object
           additionalProperties:
-            $ref: '#/components/schemas/ScoringFnParams'
-          description: >-
-            Map between scoring function id and parameters for each scoring function
-            you want to run
+            oneOf:
+            - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
+              title: LLMAsJudgeScoringFnParams
+            - $ref: '#/components/schemas/RegexParserScoringFnParams'
+              title: RegexParserScoringFnParams
+            - $ref: '#/components/schemas/BasicScoringFnParams'
+              title: BasicScoringFnParams
+            discriminator:
+              propertyName: type
+              mapping:
+                basic: '#/components/schemas/BasicScoringFnParams'
+                llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams'
+                regex_parser: '#/components/schemas/RegexParserScoringFnParams'
+            title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams
+          type: object
+          title: Scoring Params
+          description: Map between scoring function id and parameters for each scoring function you want to run
         num_examples:
-          type: integer
-          description: >-
-            (Optional) The number of examples to evaluate. If not provided, all examples
-            in the dataset will be evaluated
-      additionalProperties: false
-      required:
-        - eval_candidate
-        - scoring_params
-      title: BenchmarkConfig
-      description: >-
-        A benchmark configuration for evaluation.
-    GreedySamplingStrategy:
+          anyOf:
+          - type: integer
+          - type: 'null'
+          description: Number of examples to evaluate (useful for testing), if not provided, all examples in the dataset will be evaluated
       type: object
+      required:
+      - eval_candidate
+      title: BenchmarkConfig
+      description: A benchmark configuration for evaluation.
+    GreedySamplingStrategy:
       properties:
         type:
           type: string
           const: greedy
+          title: Type
           default: greedy
-          description: >-
-            Must be "greedy" to identify this sampling strategy
-      additionalProperties: false
-      required:
-        - type
-      title: GreedySamplingStrategy
-      description: >-
-        Greedy sampling strategy that selects the highest probability token at each
-        step.
-    ModelCandidate:
       type: object
+      title: GreedySamplingStrategy
+      description: Greedy sampling strategy that selects the highest probability token at each step.
+    ModelCandidate:
       properties:
         type:
           type: string
           const: model
+          title: Type
           default: model
         model:
           type: string
-          description: The model ID to evaluate.
+          title: Model
         sampling_params:
           $ref: '#/components/schemas/SamplingParams'
-          description: The sampling parameters for the model.
         system_message:
-          $ref: '#/components/schemas/SystemMessage'
-          description: >-
-            (Optional) The system message providing instructions or context to the
-            model.
-      additionalProperties: false
+          anyOf:
+          - $ref: '#/components/schemas/SystemMessage'
+            title: SystemMessage
+          - type: 'null'
+          title: SystemMessage
+      type: object
       required:
-        - type
-        - model
-        - sampling_params
+      - model
+      - sampling_params
       title: ModelCandidate
       description: A model candidate for evaluation.
     SamplingParams:
-      type: object
       properties:
         strategy:
           oneOf:
-            - $ref: '#/components/schemas/GreedySamplingStrategy'
-            - $ref: '#/components/schemas/TopPSamplingStrategy'
-            - $ref: '#/components/schemas/TopKSamplingStrategy'
+          - $ref: '#/components/schemas/GreedySamplingStrategy'
+            title: GreedySamplingStrategy
+          - $ref: '#/components/schemas/TopPSamplingStrategy'
+            title: TopPSamplingStrategy
+          - $ref: '#/components/schemas/TopKSamplingStrategy'
+            title: TopKSamplingStrategy
+          title: GreedySamplingStrategy | TopPSamplingStrategy | TopKSamplingStrategy
           discriminator:
             propertyName: type
             mapping:
               greedy: '#/components/schemas/GreedySamplingStrategy'
-              top_p: '#/components/schemas/TopPSamplingStrategy'
               top_k: '#/components/schemas/TopKSamplingStrategy'
-          description: The sampling strategy.
+              top_p: '#/components/schemas/TopPSamplingStrategy'
         max_tokens:
-          type: integer
-          description: >-
-            The maximum number of tokens that can be generated in the completion.
-            The token count of your prompt plus max_tokens cannot exceed the model's
-            context length.
+          anyOf:
+          - type: integer
+          - type: 'null'
         repetition_penalty:
-          type: number
+          anyOf:
+          - type: number
+          - type: 'null'
           default: 1.0
-          description: >-
-            Number between -2.0 and 2.0. Positive values penalize new tokens based
-            on whether they appear in the text so far, increasing the model's likelihood
-            to talk about new topics.
         stop:
-          type: array
-          items:
-            type: string
-          description: >-
-            Up to 4 sequences where the API will stop generating further tokens. The
-            returned text will not contain the stop sequence.
-      additionalProperties: false
-      required:
-        - strategy
+          anyOf:
+          - items:
+              type: string
+            type: array
+          - type: 'null'
+      type: object
       title: SamplingParams
       description: Sampling parameters.
     SystemMessage:
-      type: object
       properties:
         role:
           type: string
           const: system
+          title: Role
           default: system
-          description: >-
-            Must be "system" to identify this as a system message
         content:
-          $ref: '#/components/schemas/InterleavedContent'
-          description: >-
-            The content of the "system prompt". If multiple system messages are provided,
-            they are concatenated. The underlying Llama Stack code may also add other
-            system messages (for example, for formatting tool definitions).
-      additionalProperties: false
-      required:
-        - role
-        - content
-      title: SystemMessage
-      description: >-
-        A system message providing instructions or context to the model.
-    TopKSamplingStrategy:
+          anyOf:
+          - type: string
+          - oneOf:
+            - $ref: '#/components/schemas/ImageContentItem-Input'
+              title: ImageContentItem-Input
+            - $ref: '#/components/schemas/TextContentItem'
+              title: TextContentItem
+            discriminator:
+              propertyName: type
+              mapping:
+                image: '#/components/schemas/ImageContentItem-Input'
+                text: '#/components/schemas/TextContentItem'
+            title: ImageContentItem-Input | TextContentItem
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/ImageContentItem-Input'
+                title: ImageContentItem-Input
+              - $ref: '#/components/schemas/TextContentItem'
+                title: TextContentItem
+              discriminator:
+                propertyName: type
+                mapping:
+                  image: '#/components/schemas/ImageContentItem-Input'
+                  text: '#/components/schemas/TextContentItem'
+              title: ImageContentItem-Input | TextContentItem
+            type: array
+            title: list[ImageContentItem-Input | TextContentItem]
+          title: string | list[ImageContentItem-Input | TextContentItem]
       type: object
+      required:
+      - content
+      title: SystemMessage
+      description: A system message providing instructions or context to the model.
+    TopKSamplingStrategy:
       properties:
         type:
           type: string
           const: top_k
+          title: Type
           default: top_k
-          description: >-
-            Must be "top_k" to identify this sampling strategy
         top_k:
           type: integer
-          description: >-
-            Number of top tokens to consider for sampling. Must be at least 1
-      additionalProperties: false
-      required:
-        - type
-        - top_k
-      title: TopKSamplingStrategy
-      description: >-
-        Top-k sampling strategy that restricts sampling to the k most likely tokens.
-    TopPSamplingStrategy:
+          minimum: 1.0
+          title: Top K
       type: object
+      required:
+      - top_k
+      title: TopKSamplingStrategy
+      description: Top-k sampling strategy that restricts sampling to the k most likely tokens.
+    TopPSamplingStrategy:
       properties:
         type:
           type: string
           const: top_p
+          title: Type
           default: top_p
-          description: >-
-            Must be "top_p" to identify this sampling strategy
         temperature:
-          type: number
-          description: >-
-            Controls randomness in sampling. Higher values increase randomness
+          anyOf:
+          - type: number
+            minimum: 0.0
+          - type: 'null'
         top_p:
-          type: number
+          anyOf:
+          - type: number
+          - type: 'null'
           default: 0.95
-          description: >-
-            Cumulative probability threshold for nucleus sampling. Defaults to 0.95
-      additionalProperties: false
-      required:
-        - type
-      title: TopPSamplingStrategy
-      description: >-
-        Top-p (nucleus) sampling strategy that samples from the smallest set of tokens
-        with cumulative probability >= p.
-    EvaluateRowsRequest:
       type: object
+      required:
+      - temperature
+      title: TopPSamplingStrategy
+      description: Top-p (nucleus) sampling strategy that samples from the smallest set of tokens with cumulative probability >= p.
+    EvaluateRowsRequest:
       properties:
         input_rows:
-          type: array
           items:
+            additionalProperties: true
             type: object
-            additionalProperties:
-              oneOf:
-                - type: 'null'
-                - type: boolean
-                - type: number
-                - type: string
-                - type: array
-                - type: object
-          description: The rows to evaluate.
-        scoring_functions:
           type: array
+          title: Input Rows
+        scoring_functions:
           items:
             type: string
-          description: >-
-            The scoring functions to use for the evaluation.
+          type: array
+          title: Scoring Functions
         benchmark_config:
           $ref: '#/components/schemas/BenchmarkConfig'
-          description: The configuration for the benchmark.
-      additionalProperties: false
+      type: object
       required:
-        - input_rows
-        - scoring_functions
-        - benchmark_config
+      - input_rows
+      - scoring_functions
+      - benchmark_config
       title: EvaluateRowsRequest
     EvaluateResponse:
-      type: object
       properties:
         generations:
-          type: array
           items:
+            additionalProperties: true
             type: object
-            additionalProperties:
-              oneOf:
-                - type: 'null'
-                - type: boolean
-                - type: number
-                - type: string
-                - type: array
-                - type: object
-          description: The generations from the evaluation.
+          type: array
+          title: Generations
         scores:
-          type: object
           additionalProperties:
             $ref: '#/components/schemas/ScoringResult'
-          description: The scores from the evaluation.
-      additionalProperties: false
+          type: object
+          title: Scores
+      type: object
       required:
-        - generations
-        - scores
+      - generations
+      - scores
       title: EvaluateResponse
       description: The response from an evaluation.
-    RunEvalRequest:
-      type: object
-      properties:
-        benchmark_config:
-          $ref: '#/components/schemas/BenchmarkConfig'
-          description: The configuration for the benchmark.
-      additionalProperties: false
-      required:
-        - benchmark_config
-      title: RunEvalRequest
     Job:
-      type: object
       properties:
         job_id:
           type: string
-          description: Unique identifier for the job
+          title: Job Id
         status:
-          type: string
-          enum:
-            - completed
-            - in_progress
-            - failed
-            - scheduled
-            - cancelled
-          description: Current execution status of the job
-      additionalProperties: false
-      required:
-        - job_id
-        - status
-      title: Job
-      description: >-
-        A job execution instance with status tracking.
-    RerankRequest:
+          $ref: '#/components/schemas/JobStatus'
       type: object
+      required:
+      - job_id
+      - status
+      title: Job
+      description: A job execution instance with status tracking.
+    RerankRequest:
       properties:
         model:
           type: string
-          description: >-
-            The identifier of the reranking model to use.
+          title: Model
         query:
-          oneOf:
+          anyOf:
+          - type: string
+          - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+            title: OpenAIChatCompletionContentPartTextParam
+          - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+            title: OpenAIChatCompletionContentPartImageParam
+          title: string | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam
+        items:
+          items:
+            anyOf:
             - type: string
             - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+              title: OpenAIChatCompletionContentPartTextParam
             - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
-          description: >-
-            The search query to rank items against. Can be a string, text content
-            part, or image content part. The input must not exceed the model's max
-            input token length.
-        items:
+              title: OpenAIChatCompletionContentPartImageParam
+            title: string | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam
           type: array
-          items:
-            oneOf:
-              - type: string
-              - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
-              - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
-          description: >-
-            List of items to rerank. Each item can be a string, text content part,
-            or image content part. Each input must not exceed the model's max input
-            token length.
+          title: Items
         max_num_results:
-          type: integer
-          description: >-
-            (Optional) Maximum number of results to return. Default: returns all.
-      additionalProperties: false
+          anyOf:
+          - type: integer
+          - type: 'null'
+      type: object
       required:
-        - model
-        - query
-        - items
+      - model
+      - query
+      - items
       title: RerankRequest
     RerankData:
-      type: object
       properties:
         index:
           type: integer
-          description: >-
-            The original index of the document in the input list
+          title: Index
         relevance_score:
           type: number
-          description: >-
-            The relevance score from the model output. Values are inverted when applicable
-            so that higher scores indicate greater relevance.
-      additionalProperties: false
-      required:
-        - index
-        - relevance_score
-      title: RerankData
-      description: >-
-        A single rerank result from a reranking response.
-    RerankResponse:
+          title: Relevance Score
       type: object
+      required:
+      - index
+      - relevance_score
+      title: RerankData
+      description: A single rerank result from a reranking response.
+    RerankResponse:
       properties:
         data:
-          type: array
           items:
             $ref: '#/components/schemas/RerankData'
-          description: >-
-            List of rerank result objects, sorted by relevance score (descending)
-      additionalProperties: false
+          type: array
+          title: Data
+      type: object
       required:
-        - data
+      - data
       title: RerankResponse
       description: Response from a reranking request.
     Checkpoint:
-      type: object
       properties:
         identifier:
           type: string
-          description: Unique identifier for the checkpoint
+          title: Identifier
         created_at:
           type: string
           format: date-time
-          description: >-
-            Timestamp when the checkpoint was created
+          title: Created At
         epoch:
           type: integer
-          description: >-
-            Training epoch when the checkpoint was saved
+          title: Epoch
         post_training_job_id:
           type: string
-          description: >-
-            Identifier of the training job that created this checkpoint
+          title: Post Training Job Id
         path:
           type: string
-          description: >-
-            File system path where the checkpoint is stored
+          title: Path
         training_metrics:
-          $ref: '#/components/schemas/PostTrainingMetric'
-          description: >-
-            (Optional) Training metrics associated with this checkpoint
-      additionalProperties: false
+          anyOf:
+          - $ref: '#/components/schemas/PostTrainingMetric'
+            title: PostTrainingMetric
+          - type: 'null'
+          title: PostTrainingMetric
+      type: object
       required:
-        - identifier
-        - created_at
-        - epoch
-        - post_training_job_id
-        - path
+      - identifier
+      - created_at
+      - epoch
+      - post_training_job_id
+      - path
       title: Checkpoint
       description: Checkpoint created during training runs.
     PostTrainingJobArtifactsResponse:
-      type: object
       properties:
         job_uuid:
           type: string
-          description: Unique identifier for the training job
+          title: Job Uuid
         checkpoints:
-          type: array
           items:
             $ref: '#/components/schemas/Checkpoint'
-          description: >-
-            List of model checkpoints created during training
-      additionalProperties: false
+          type: array
+          title: Checkpoints
+      type: object
       required:
-        - job_uuid
-        - checkpoints
+      - job_uuid
       title: PostTrainingJobArtifactsResponse
       description: Artifacts of a finetuning job.
     PostTrainingMetric:
-      type: object
       properties:
         epoch:
           type: integer
-          description: Training epoch number
+          title: Epoch
         train_loss:
           type: number
-          description: Loss value on the training dataset
+          title: Train Loss
         validation_loss:
           type: number
-          description: Loss value on the validation dataset
+          title: Validation Loss
         perplexity:
           type: number
-          description: >-
-            Perplexity metric indicating model confidence
-      additionalProperties: false
-      required:
-        - epoch
-        - train_loss
-        - validation_loss
-        - perplexity
-      title: PostTrainingMetric
-      description: >-
-        Training metrics captured during post-training jobs.
-    CancelTrainingJobRequest:
+          title: Perplexity
       type: object
+      required:
+      - epoch
+      - train_loss
+      - validation_loss
+      - perplexity
+      title: PostTrainingMetric
+      description: Training metrics captured during post-training jobs.
+    CancelTrainingJobRequest:
       properties:
         job_uuid:
           type: string
-          description: The UUID of the job to cancel.
-      additionalProperties: false
+          title: Job Uuid
+      type: object
       required:
-        - job_uuid
+      - job_uuid
       title: CancelTrainingJobRequest
     PostTrainingJobStatusResponse:
-      type: object
       properties:
         job_uuid:
           type: string
-          description: Unique identifier for the training job
+          title: Job Uuid
         status:
-          type: string
-          enum:
-            - completed
-            - in_progress
-            - failed
-            - scheduled
-            - cancelled
-          description: Current status of the training job
+          $ref: '#/components/schemas/JobStatus'
         scheduled_at:
-          type: string
-          format: date-time
-          description: >-
-            (Optional) Timestamp when the job was scheduled
+          anyOf:
+          - type: string
+            format: date-time
+          - type: 'null'
         started_at:
-          type: string
-          format: date-time
-          description: >-
-            (Optional) Timestamp when the job execution began
+          anyOf:
+          - type: string
+            format: date-time
+          - type: 'null'
         completed_at:
-          type: string
-          format: date-time
-          description: >-
-            (Optional) Timestamp when the job finished, if completed
+          anyOf:
+          - type: string
+            format: date-time
+          - type: 'null'
         resources_allocated:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) Information about computational resources allocated to the
-            job
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
         checkpoints:
-          type: array
           items:
             $ref: '#/components/schemas/Checkpoint'
-          description: >-
-            List of model checkpoints created during training
-      additionalProperties: false
+          type: array
+          title: Checkpoints
+      type: object
       required:
-        - job_uuid
-        - status
-        - checkpoints
+      - job_uuid
+      - status
       title: PostTrainingJobStatusResponse
       description: Status of a finetuning job.
     ListPostTrainingJobsResponse:
-      type: object
       properties:
         data:
-          type: array
           items:
-            type: object
-            properties:
-              job_uuid:
-                type: string
-            additionalProperties: false
-            required:
-              - job_uuid
-            title: PostTrainingJob
-      additionalProperties: false
+            $ref: '#/components/schemas/PostTrainingJob'
+          type: array
+          title: Data
+      type: object
       required:
-        - data
+      - data
       title: ListPostTrainingJobsResponse
     DPOAlignmentConfig:
-      type: object
       properties:
         beta:
           type: number
-          description: Temperature parameter for the DPO loss
+          title: Beta
         loss_type:
           $ref: '#/components/schemas/DPOLossType'
           default: sigmoid
-          description: The type of loss function to use for DPO
-      additionalProperties: false
+      type: object
       required:
-        - beta
-        - loss_type
+      - beta
       title: DPOAlignmentConfig
-      description: >-
-        Configuration for Direct Preference Optimization (DPO) alignment.
+      description: Configuration for Direct Preference Optimization (DPO) alignment.
     DPOLossType:
       type: string
       enum:
-        - sigmoid
-        - hinge
-        - ipo
-        - kto_pair
+      - sigmoid
+      - hinge
+      - ipo
+      - kto_pair
       title: DPOLossType
     DataConfig:
-      type: object
       properties:
         dataset_id:
           type: string
-          description: >-
-            Unique identifier for the training dataset
+          title: Dataset Id
         batch_size:
           type: integer
-          description: Number of samples per training batch
+          title: Batch Size
         shuffle:
           type: boolean
-          description: >-
-            Whether to shuffle the dataset during training
+          title: Shuffle
         data_format:
           $ref: '#/components/schemas/DatasetFormat'
-          description: >-
-            Format of the dataset (instruct or dialog)
         validation_dataset_id:
-          type: string
-          description: >-
-            (Optional) Unique identifier for the validation dataset
+          anyOf:
+          - type: string
+          - type: 'null'
         packed:
-          type: boolean
+          anyOf:
+          - type: boolean
+          - type: 'null'
           default: false
-          description: >-
-            (Optional) Whether to pack multiple samples into a single sequence for
-            efficiency
         train_on_input:
-          type: boolean
+          anyOf:
+          - type: boolean
+          - type: 'null'
           default: false
-          description: >-
-            (Optional) Whether to compute loss on input tokens as well as output tokens
-      additionalProperties: false
+      type: object
       required:
-        - dataset_id
-        - batch_size
-        - shuffle
-        - data_format
+      - dataset_id
+      - batch_size
+      - shuffle
+      - data_format
       title: DataConfig
-      description: >-
-        Configuration for training data and data loading.
+      description: Configuration for training data and data loading.
     DatasetFormat:
       type: string
       enum:
-        - instruct
-        - dialog
+      - instruct
+      - dialog
       title: DatasetFormat
       description: Format of the training dataset.
     EfficiencyConfig:
-      type: object
       properties:
         enable_activation_checkpointing:
-          type: boolean
+          anyOf:
+          - type: boolean
+          - type: 'null'
           default: false
-          description: >-
-            (Optional) Whether to use activation checkpointing to reduce memory usage
         enable_activation_offloading:
-          type: boolean
+          anyOf:
+          - type: boolean
+          - type: 'null'
           default: false
-          description: >-
-            (Optional) Whether to offload activations to CPU to save GPU memory
         memory_efficient_fsdp_wrap:
-          type: boolean
+          anyOf:
+          - type: boolean
+          - type: 'null'
           default: false
-          description: >-
-            (Optional) Whether to use memory-efficient FSDP wrapping
         fsdp_cpu_offload:
-          type: boolean
+          anyOf:
+          - type: boolean
+          - type: 'null'
           default: false
-          description: >-
-            (Optional) Whether to offload FSDP parameters to CPU
-      additionalProperties: false
-      title: EfficiencyConfig
-      description: >-
-        Configuration for memory and compute efficiency optimizations.
-    OptimizerConfig:
       type: object
+      title: EfficiencyConfig
+      description: Configuration for memory and compute efficiency optimizations.
+    OptimizerConfig:
       properties:
         optimizer_type:
           $ref: '#/components/schemas/OptimizerType'
-          description: >-
-            Type of optimizer to use (adam, adamw, or sgd)
         lr:
           type: number
-          description: Learning rate for the optimizer
+          title: Lr
         weight_decay:
           type: number
-          description: >-
-            Weight decay coefficient for regularization
+          title: Weight Decay
         num_warmup_steps:
           type: integer
-          description: Number of steps for learning rate warmup
-      additionalProperties: false
+          title: Num Warmup Steps
+      type: object
       required:
-        - optimizer_type
-        - lr
-        - weight_decay
-        - num_warmup_steps
+      - optimizer_type
+      - lr
+      - weight_decay
+      - num_warmup_steps
       title: OptimizerConfig
-      description: >-
-        Configuration parameters for the optimization algorithm.
+      description: Configuration parameters for the optimization algorithm.
     OptimizerType:
       type: string
       enum:
-        - adam
-        - adamw
-        - sgd
+      - adam
+      - adamw
+      - sgd
       title: OptimizerType
-      description: >-
-        Available optimizer algorithms for training.
+      description: Available optimizer algorithms for training.
     TrainingConfig:
-      type: object
       properties:
         n_epochs:
           type: integer
-          description: Number of training epochs to run
+          title: N Epochs
         max_steps_per_epoch:
           type: integer
+          title: Max Steps Per Epoch
           default: 1
-          description: Maximum number of steps to run per epoch
         gradient_accumulation_steps:
           type: integer
+          title: Gradient Accumulation Steps
           default: 1
-          description: >-
-            Number of steps to accumulate gradients before updating
         max_validation_steps:
-          type: integer
+          anyOf:
+          - type: integer
+          - type: 'null'
           default: 1
-          description: >-
-            (Optional) Maximum number of validation steps per epoch
         data_config:
-          $ref: '#/components/schemas/DataConfig'
-          description: >-
-            (Optional) Configuration for data loading and formatting
+          anyOf:
+          - $ref: '#/components/schemas/DataConfig'
+            title: DataConfig
+          - type: 'null'
+          title: DataConfig
         optimizer_config:
-          $ref: '#/components/schemas/OptimizerConfig'
-          description: >-
-            (Optional) Configuration for the optimization algorithm
+          anyOf:
+          - $ref: '#/components/schemas/OptimizerConfig'
+            title: OptimizerConfig
+          - type: 'null'
+          title: OptimizerConfig
         efficiency_config:
-          $ref: '#/components/schemas/EfficiencyConfig'
-          description: >-
-            (Optional) Configuration for memory and compute optimizations
+          anyOf:
+          - $ref: '#/components/schemas/EfficiencyConfig'
+            title: EfficiencyConfig
+          - type: 'null'
+          title: EfficiencyConfig
         dtype:
-          type: string
+          anyOf:
+          - type: string
+          - type: 'null'
           default: bf16
-          description: >-
-            (Optional) Data type for model parameters (bf16, fp16, fp32)
-      additionalProperties: false
-      required:
-        - n_epochs
-        - max_steps_per_epoch
-        - gradient_accumulation_steps
-      title: TrainingConfig
-      description: >-
-        Comprehensive configuration for the training process.
-    PreferenceOptimizeRequest:
       type: object
+      required:
+      - n_epochs
+      title: TrainingConfig
+      description: Comprehensive configuration for the training process.
+    PreferenceOptimizeRequest:
       properties:
         job_uuid:
           type: string
-          description: The UUID of the job to create.
+          title: Job Uuid
         finetuned_model:
           type: string
-          description: The model to fine-tune.
+          title: Finetuned Model
         algorithm_config:
           $ref: '#/components/schemas/DPOAlignmentConfig'
-          description: The algorithm configuration.
         training_config:
           $ref: '#/components/schemas/TrainingConfig'
-          description: The training configuration.
         hyperparam_search_config:
+          additionalProperties: true
           type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: The hyperparam search configuration.
+          title: Hyperparam Search Config
         logger_config:
+          additionalProperties: true
           type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: The logger configuration.
-      additionalProperties: false
+          title: Logger Config
+      type: object
       required:
-        - job_uuid
-        - finetuned_model
-        - algorithm_config
-        - training_config
-        - hyperparam_search_config
-        - logger_config
+      - job_uuid
+      - finetuned_model
+      - algorithm_config
+      - training_config
+      - hyperparam_search_config
+      - logger_config
       title: PreferenceOptimizeRequest
     PostTrainingJob:
-      type: object
       properties:
         job_uuid:
           type: string
-      additionalProperties: false
+          title: Job Uuid
+      type: object
       required:
-        - job_uuid
+      - job_uuid
       title: PostTrainingJob
     AlgorithmConfig:
-      oneOf:
-        - $ref: '#/components/schemas/LoraFinetuningConfig'
-        - $ref: '#/components/schemas/QATFinetuningConfig'
       discriminator:
-        propertyName: type
         mapping:
           LoRA: '#/components/schemas/LoraFinetuningConfig'
           QAT: '#/components/schemas/QATFinetuningConfig'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/LoraFinetuningConfig'
+        title: LoraFinetuningConfig
+      - $ref: '#/components/schemas/QATFinetuningConfig'
+        title: QATFinetuningConfig
+      title: LoraFinetuningConfig | QATFinetuningConfig
     LoraFinetuningConfig:
-      type: object
       properties:
         type:
           type: string
           const: LoRA
+          title: Type
           default: LoRA
-          description: Algorithm type identifier, always "LoRA"
         lora_attn_modules:
-          type: array
           items:
             type: string
-          description: >-
-            List of attention module names to apply LoRA to
+          type: array
+          title: Lora Attn Modules
         apply_lora_to_mlp:
           type: boolean
-          description: Whether to apply LoRA to MLP layers
+          title: Apply Lora To Mlp
         apply_lora_to_output:
           type: boolean
-          description: >-
-            Whether to apply LoRA to output projection layers
+          title: Apply Lora To Output
         rank:
           type: integer
-          description: >-
-            Rank of the LoRA adaptation (lower rank = fewer parameters)
+          title: Rank
         alpha:
           type: integer
-          description: >-
-            LoRA scaling parameter that controls adaptation strength
+          title: Alpha
         use_dora:
-          type: boolean
+          anyOf:
+          - type: boolean
+          - type: 'null'
           default: false
-          description: >-
-            (Optional) Whether to use DoRA (Weight-Decomposed Low-Rank Adaptation)
         quantize_base:
-          type: boolean
+          anyOf:
+          - type: boolean
+          - type: 'null'
           default: false
-          description: >-
-            (Optional) Whether to quantize the base model weights
-      additionalProperties: false
-      required:
-        - type
-        - lora_attn_modules
-        - apply_lora_to_mlp
-        - apply_lora_to_output
-        - rank
-        - alpha
-      title: LoraFinetuningConfig
-      description: >-
-        Configuration for Low-Rank Adaptation (LoRA) fine-tuning.
-    QATFinetuningConfig:
       type: object
+      required:
+      - lora_attn_modules
+      - apply_lora_to_mlp
+      - apply_lora_to_output
+      - rank
+      - alpha
+      title: LoraFinetuningConfig
+      description: Configuration for Low-Rank Adaptation (LoRA) fine-tuning.
+    QATFinetuningConfig:
       properties:
         type:
           type: string
           const: QAT
+          title: Type
           default: QAT
-          description: Algorithm type identifier, always "QAT"
         quantizer_name:
           type: string
-          description: >-
-            Name of the quantization algorithm to use
+          title: Quantizer Name
         group_size:
           type: integer
-          description: Size of groups for grouped quantization
-      additionalProperties: false
-      required:
-        - type
-        - quantizer_name
-        - group_size
-      title: QATFinetuningConfig
-      description: >-
-        Configuration for Quantization-Aware Training (QAT) fine-tuning.
-    SupervisedFineTuneRequest:
+          title: Group Size
       type: object
+      required:
+      - quantizer_name
+      - group_size
+      title: QATFinetuningConfig
+      description: Configuration for Quantization-Aware Training (QAT) fine-tuning.
+    SupervisedFineTuneRequest:
       properties:
         job_uuid:
           type: string
-          description: The UUID of the job to create.
+          title: Job Uuid
         training_config:
           $ref: '#/components/schemas/TrainingConfig'
-          description: The training configuration.
         hyperparam_search_config:
+          additionalProperties: true
           type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: The hyperparam search configuration.
+          title: Hyperparam Search Config
         logger_config:
+          additionalProperties: true
           type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: The logger configuration.
+          title: Logger Config
         model:
-          type: string
-          description: The model to fine-tune.
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: Model descriptor for training if not in provider config`
         checkpoint_dir:
-          type: string
-          description: The directory to save checkpoint(s) to.
+          anyOf:
+          - type: string
+          - type: 'null'
         algorithm_config:
-          $ref: '#/components/schemas/AlgorithmConfig'
-          description: The algorithm configuration.
-      additionalProperties: false
+          anyOf:
+          - oneOf:
+            - $ref: '#/components/schemas/LoraFinetuningConfig'
+              title: LoraFinetuningConfig
+            - $ref: '#/components/schemas/QATFinetuningConfig'
+              title: QATFinetuningConfig
+            discriminator:
+              propertyName: type
+              mapping:
+                LoRA: '#/components/schemas/LoraFinetuningConfig'
+                QAT: '#/components/schemas/QATFinetuningConfig'
+            title: LoraFinetuningConfig | QATFinetuningConfig
+          - type: 'null'
+          title: Algorithm Config
+      type: object
       required:
-        - job_uuid
-        - training_config
-        - hyperparam_search_config
-        - logger_config
+      - job_uuid
+      - training_config
+      - hyperparam_search_config
+      - logger_config
       title: SupervisedFineTuneRequest
     RegisterModelRequest:
-      type: object
       properties:
         model_id:
           type: string
-          description: The identifier of the model to register.
+          title: Model Id
         provider_model_id:
-          type: string
-          description: >-
-            The identifier of the model in the provider.
+          anyOf:
+          - type: string
+          - type: 'null'
         provider_id:
-          type: string
-          description: The identifier of the provider.
+          anyOf:
+          - type: string
+          - type: 'null'
         metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: Any additional metadata for this model.
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
         model_type:
-          $ref: '#/components/schemas/ModelType'
-          description: The type of model to register.
-      additionalProperties: false
+          anyOf:
+          - $ref: '#/components/schemas/ModelType'
+            title: ModelType
+          - type: 'null'
+          title: ModelType
+      type: object
       required:
-        - model_id
+      - model_id
       title: RegisterModelRequest
     ParamType:
-      oneOf:
-        - $ref: '#/components/schemas/StringType'
-        - $ref: '#/components/schemas/NumberType'
-        - $ref: '#/components/schemas/BooleanType'
-        - $ref: '#/components/schemas/ArrayType'
-        - $ref: '#/components/schemas/ObjectType'
-        - $ref: '#/components/schemas/JsonType'
-        - $ref: '#/components/schemas/UnionType'
-        - $ref: '#/components/schemas/ChatCompletionInputType'
-        - $ref: '#/components/schemas/CompletionInputType'
       discriminator:
-        propertyName: type
         mapping:
-          string: '#/components/schemas/StringType'
-          number: '#/components/schemas/NumberType'
-          boolean: '#/components/schemas/BooleanType'
           array: '#/components/schemas/ArrayType'
-          object: '#/components/schemas/ObjectType'
-          json: '#/components/schemas/JsonType'
-          union: '#/components/schemas/UnionType'
+          boolean: '#/components/schemas/BooleanType'
           chat_completion_input: '#/components/schemas/ChatCompletionInputType'
           completion_input: '#/components/schemas/CompletionInputType'
-    RegisterScoringFunctionRequest:
-      type: object
-      properties:
-        scoring_fn_id:
-          type: string
-          description: >-
-            The ID of the scoring function to register.
-        description:
-          type: string
-          description: The description of the scoring function.
-        return_type:
-          $ref: '#/components/schemas/ParamType'
-          description: The return type of the scoring function.
-        provider_scoring_fn_id:
-          type: string
-          description: >-
-            The ID of the provider scoring function to use for the scoring function.
-        provider_id:
-          type: string
-          description: >-
-            The ID of the provider to use for the scoring function.
-        params:
-          $ref: '#/components/schemas/ScoringFnParams'
-          description: >-
-            The parameters for the scoring function for benchmark eval, these can
-            be overridden for app eval.
-      additionalProperties: false
-      required:
-        - scoring_fn_id
-        - description
-        - return_type
-      title: RegisterScoringFunctionRequest
+          json: '#/components/schemas/JsonType'
+          number: '#/components/schemas/NumberType'
+          object: '#/components/schemas/ObjectType'
+          string: '#/components/schemas/StringType'
+          union: '#/components/schemas/UnionType'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/StringType'
+        title: StringType
+      - $ref: '#/components/schemas/NumberType'
+        title: NumberType
+      - $ref: '#/components/schemas/BooleanType'
+        title: BooleanType
+      - $ref: '#/components/schemas/ArrayType'
+        title: ArrayType
+      - $ref: '#/components/schemas/ObjectType'
+        title: ObjectType
+      - $ref: '#/components/schemas/JsonType'
+        title: JsonType
+      - $ref: '#/components/schemas/UnionType'
+        title: UnionType
+      - $ref: '#/components/schemas/ChatCompletionInputType'
+        title: ChatCompletionInputType
+      - $ref: '#/components/schemas/CompletionInputType'
+        title: CompletionInputType
+      title: StringType | ... (9 variants)
     RegisterShieldRequest:
-      type: object
       properties:
         shield_id:
           type: string
-          description: >-
-            The identifier of the shield to register.
+          title: Shield Id
         provider_shield_id:
-          type: string
-          description: >-
-            The identifier of the shield in the provider.
+          anyOf:
+          - type: string
+          - type: 'null'
         provider_id:
-          type: string
-          description: The identifier of the provider.
+          anyOf:
+          - type: string
+          - type: 'null'
         params:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: The parameters of the shield.
-      additionalProperties: false
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+      type: object
       required:
-        - shield_id
+      - shield_id
       title: RegisterShieldRequest
     RegisterToolGroupRequest:
-      type: object
       properties:
         toolgroup_id:
           type: string
-          description: The ID of the tool group to register.
+          title: Toolgroup Id
         provider_id:
           type: string
-          description: >-
-            The ID of the provider to use for the tool group.
+          title: Provider Id
         mcp_endpoint:
-          $ref: '#/components/schemas/URL'
-          description: >-
-            The MCP endpoint to use for the tool group.
+          anyOf:
+          - $ref: '#/components/schemas/URL'
+            title: URL
+          - type: 'null'
+          title: URL
         args:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            A dictionary of arguments to pass to the tool group.
-      additionalProperties: false
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+      type: object
       required:
-        - toolgroup_id
-        - provider_id
+      - toolgroup_id
+      - provider_id
       title: RegisterToolGroupRequest
     DataSource:
-      oneOf:
-        - $ref: '#/components/schemas/URIDataSource'
-        - $ref: '#/components/schemas/RowsDataSource'
       discriminator:
-        propertyName: type
         mapping:
-          uri: '#/components/schemas/URIDataSource'
           rows: '#/components/schemas/RowsDataSource'
-    RegisterDatasetRequest:
-      type: object
-      properties:
-        purpose:
-          type: string
-          enum:
-            - post-training/messages
-            - eval/question-answer
-            - eval/messages-answer
-          description: >-
-            The purpose of the dataset. One of: - "post-training/messages": The dataset
-            contains a messages column with list of messages for post-training. {
-            "messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant",
-            "content": "Hello, world!"}, ] } - "eval/question-answer": The dataset
-            contains a question column and an answer column for evaluation. { "question":
-            "What is the capital of France?", "answer": "Paris" } - "eval/messages-answer":
-            The dataset contains a messages column with list of messages and an answer
-            column for evaluation. { "messages": [ {"role": "user", "content": "Hello,
-            my name is John Doe."}, {"role": "assistant", "content": "Hello, John
-            Doe. How can I help you today?"}, {"role": "user", "content": "What's
-            my name?"}, ], "answer": "John Doe" }
-        source:
-          $ref: '#/components/schemas/DataSource'
-          description: >-
-            The data source of the dataset. Ensure that the data source schema is
-            compatible with the purpose of the dataset. Examples: - { "type": "uri",
-            "uri": "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri":
-            "lsfs://mydata.jsonl" } - { "type": "uri", "uri": "data:csv;base64,{base64_content}"
-            } - { "type": "uri", "uri": "huggingface://llamastack/simpleqa?split=train"
-            } - { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content":
-            "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ]
-            } ] }
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            The metadata for the dataset. - E.g. {"description": "My dataset"}.
-        dataset_id:
-          type: string
-          description: >-
-            The ID of the dataset. If not provided, an ID will be generated.
-      additionalProperties: false
-      required:
-        - purpose
-        - source
-      title: RegisterDatasetRequest
+          uri: '#/components/schemas/URIDataSource'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/URIDataSource'
+        title: URIDataSource
+      - $ref: '#/components/schemas/RowsDataSource'
+        title: RowsDataSource
+      title: URIDataSource | RowsDataSource
     RegisterBenchmarkRequest:
-      type: object
       properties:
         benchmark_id:
           type: string
-          description: The ID of the benchmark to register.
+          title: Benchmark Id
         dataset_id:
           type: string
-          description: >-
-            The ID of the dataset to use for the benchmark.
+          title: Dataset Id
         scoring_functions:
-          type: array
           items:
             type: string
-          description: >-
-            The scoring functions to use for the benchmark.
+          type: array
+          title: Scoring Functions
         provider_benchmark_id:
-          type: string
-          description: >-
-            The ID of the provider benchmark to use for the benchmark.
+          anyOf:
+          - type: string
+          - type: 'null'
         provider_id:
-          type: string
-          description: >-
-            The ID of the provider to use for the benchmark.
+          anyOf:
+          - type: string
+          - type: 'null'
         metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: The metadata to use for the benchmark.
-      additionalProperties: false
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+      type: object
       required:
-        - benchmark_id
-        - dataset_id
-        - scoring_functions
+      - benchmark_id
+      - dataset_id
+      - scoring_functions
       title: RegisterBenchmarkRequest
+    AllowedToolsFilter:
+      properties:
+        tool_names:
+          anyOf:
+          - items:
+              type: string
+            type: array
+          - type: 'null'
+      type: object
+      title: AllowedToolsFilter
+      description: Filter configuration for restricting which MCP tools can be used.
+    ApprovalFilter:
+      properties:
+        always:
+          anyOf:
+          - items:
+              type: string
+            type: array
+          - type: 'null'
+        never:
+          anyOf:
+          - items:
+              type: string
+            type: array
+          - type: 'null'
+      type: object
+      title: ApprovalFilter
+      description: Filter configuration for MCP tool approval requirements.
+    BatchError:
+      properties:
+        code:
+          anyOf:
+          - type: string
+          - type: 'null'
+        line:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        message:
+          anyOf:
+          - type: string
+          - type: 'null'
+        param:
+          anyOf:
+          - type: string
+          - type: 'null'
+      additionalProperties: true
+      type: object
+      title: BatchError
+    BatchRequestCounts:
+      properties:
+        completed:
+          type: integer
+          title: Completed
+        failed:
+          type: integer
+          title: Failed
+        total:
+          type: integer
+          title: Total
+      additionalProperties: true
+      type: object
+      required:
+      - completed
+      - failed
+      - total
+      title: BatchRequestCounts
+    BatchUsage:
+      properties:
+        input_tokens:
+          type: integer
+          title: Input Tokens
+        input_tokens_details:
+          $ref: '#/components/schemas/InputTokensDetails'
+        output_tokens:
+          type: integer
+          title: Output Tokens
+        output_tokens_details:
+          $ref: '#/components/schemas/OutputTokensDetails'
+        total_tokens:
+          type: integer
+          title: Total Tokens
+      additionalProperties: true
+      type: object
+      required:
+      - input_tokens
+      - input_tokens_details
+      - output_tokens
+      - output_tokens_details
+      - total_tokens
+      title: BatchUsage
+    Body_openai_upload_file_v1_files_post:
+      properties:
+        file:
+          type: string
+          format: binary
+          title: File
+        purpose:
+          $ref: '#/components/schemas/OpenAIFilePurpose'
+        expires_after:
+          anyOf:
+          - $ref: '#/components/schemas/ExpiresAfter'
+            title: ExpiresAfter
+          - type: 'null'
+          title: ExpiresAfter
+      type: object
+      required:
+      - file
+      - purpose
+      title: Body_openai_upload_file_v1_files_post
+    Chunk-Input:
+      properties:
+        content:
+          anyOf:
+          - type: string
+          - oneOf:
+            - $ref: '#/components/schemas/ImageContentItem-Input'
+              title: ImageContentItem-Input
+            - $ref: '#/components/schemas/TextContentItem'
+              title: TextContentItem
+            discriminator:
+              propertyName: type
+              mapping:
+                image: '#/components/schemas/ImageContentItem-Input'
+                text: '#/components/schemas/TextContentItem'
+            title: ImageContentItem-Input | TextContentItem
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/ImageContentItem-Input'
+                title: ImageContentItem-Input
+              - $ref: '#/components/schemas/TextContentItem'
+                title: TextContentItem
+              discriminator:
+                propertyName: type
+                mapping:
+                  image: '#/components/schemas/ImageContentItem-Input'
+                  text: '#/components/schemas/TextContentItem'
+              title: ImageContentItem-Input | TextContentItem
+            type: array
+            title: list[ImageContentItem-Input | TextContentItem]
+          title: string | list[ImageContentItem-Input | TextContentItem]
+        chunk_id:
+          type: string
+          title: Chunk Id
+        metadata:
+          additionalProperties: true
+          type: object
+          title: Metadata
+        embedding:
+          anyOf:
+          - items:
+              type: number
+            type: array
+          - type: 'null'
+        chunk_metadata:
+          anyOf:
+          - $ref: '#/components/schemas/ChunkMetadata'
+            title: ChunkMetadata
+          - type: 'null'
+          title: ChunkMetadata
+      type: object
+      required:
+      - content
+      - chunk_id
+      title: Chunk
+      description: A chunk of content that can be inserted into a vector database.
+    Chunk-Output:
+      properties:
+        content:
+          anyOf:
+          - type: string
+          - oneOf:
+            - $ref: '#/components/schemas/ImageContentItem-Output'
+              title: ImageContentItem-Output
+            - $ref: '#/components/schemas/TextContentItem'
+              title: TextContentItem
+            discriminator:
+              propertyName: type
+              mapping:
+                image: '#/components/schemas/ImageContentItem-Output'
+                text: '#/components/schemas/TextContentItem'
+            title: ImageContentItem-Output | TextContentItem
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/ImageContentItem-Output'
+                title: ImageContentItem-Output
+              - $ref: '#/components/schemas/TextContentItem'
+                title: TextContentItem
+              discriminator:
+                propertyName: type
+                mapping:
+                  image: '#/components/schemas/ImageContentItem-Output'
+                  text: '#/components/schemas/TextContentItem'
+              title: ImageContentItem-Output | TextContentItem
+            type: array
+            title: list[ImageContentItem-Output | TextContentItem]
+          title: string | list[ImageContentItem-Output | TextContentItem]
+        chunk_id:
+          type: string
+          title: Chunk Id
+        metadata:
+          additionalProperties: true
+          type: object
+          title: Metadata
+        embedding:
+          anyOf:
+          - items:
+              type: number
+            type: array
+          - type: 'null'
+        chunk_metadata:
+          anyOf:
+          - $ref: '#/components/schemas/ChunkMetadata'
+            title: ChunkMetadata
+          - type: 'null'
+          title: ChunkMetadata
+      type: object
+      required:
+      - content
+      - chunk_id
+      title: Chunk
+      description: A chunk of content that can be inserted into a vector database.
+    ConversationItemInclude:
+      type: string
+      enum:
+      - web_search_call.action.sources
+      - code_interpreter_call.outputs
+      - computer_call_output.output.image_url
+      - file_search_call.results
+      - message.input_image.image_url
+      - message.output_text.logprobs
+      - reasoning.encrypted_content
+      title: ConversationItemInclude
+      description: Specify additional output data to include in the model response.
+    DatasetPurpose:
+      type: string
+      enum:
+      - post-training/messages
+      - eval/question-answer
+      - eval/messages-answer
+      title: DatasetPurpose
+      description: Purpose of the dataset. Each purpose has a required input data schema.
+    Errors:
+      properties:
+        data:
+          anyOf:
+          - items:
+              $ref: '#/components/schemas/BatchError'
+            type: array
+          - type: 'null'
+        object:
+          anyOf:
+          - type: string
+          - type: 'null'
+      additionalProperties: true
+      type: object
+      title: Errors
+    HealthStatus:
+      type: string
+      enum:
+      - OK
+      - Error
+      - Not Implemented
+      title: HealthStatus
+    ImageContentItem-Input:
+      properties:
+        type:
+          type: string
+          const: image
+          title: Type
+          default: image
+        image:
+          $ref: '#/components/schemas/_URLOrData'
+      type: object
+      required:
+      - image
+      title: ImageContentItem
+      description: A image content item
+    ImageContentItem-Output:
+      properties:
+        type:
+          type: string
+          const: image
+          title: Type
+          default: image
+        image:
+          $ref: '#/components/schemas/_URLOrData'
+      type: object
+      required:
+      - image
+      title: ImageContentItem
+      description: A image content item
+    InputTokensDetails:
+      properties:
+        cached_tokens:
+          type: integer
+          title: Cached Tokens
+      additionalProperties: true
+      type: object
+      required:
+      - cached_tokens
+      title: InputTokensDetails
+    JobStatus:
+      type: string
+      enum:
+      - completed
+      - in_progress
+      - failed
+      - scheduled
+      - cancelled
+      title: JobStatus
+      description: Status of a job execution.
+    MCPListToolsTool:
+      properties:
+        input_schema:
+          additionalProperties: true
+          type: object
+          title: Input Schema
+        name:
+          type: string
+          title: Name
+        description:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      required:
+      - input_schema
+      - name
+      title: MCPListToolsTool
+      description: Tool definition returned by MCP list tools operation.
+    OpenAIAssistantMessageParam-Input:
+      properties:
+        role:
+          type: string
+          const: assistant
+          title: Role
+          default: assistant
+        content:
+          anyOf:
+          - type: string
+          - items:
+              $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+            type: array
+            title: list[OpenAIChatCompletionContentPartTextParam]
+          - type: 'null'
+          title: string | list[OpenAIChatCompletionContentPartTextParam]
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+        tool_calls:
+          anyOf:
+          - items:
+              $ref: '#/components/schemas/OpenAIChatCompletionToolCall'
+            type: array
+          - type: 'null'
+      type: object
+      title: OpenAIAssistantMessageParam
+      description: A message containing the model's (assistant) response in an OpenAI-compatible chat completion request.
+    OpenAIAssistantMessageParam-Output:
+      properties:
+        role:
+          type: string
+          const: assistant
+          title: Role
+          default: assistant
+        content:
+          anyOf:
+          - type: string
+          - items:
+              $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+            type: array
+            title: list[OpenAIChatCompletionContentPartTextParam]
+          - type: 'null'
+          title: string | list[OpenAIChatCompletionContentPartTextParam]
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+        tool_calls:
+          anyOf:
+          - items:
+              $ref: '#/components/schemas/OpenAIChatCompletionToolCall'
+            type: array
+          - type: 'null'
+      type: object
+      title: OpenAIAssistantMessageParam
+      description: A message containing the model's (assistant) response in an OpenAI-compatible chat completion request.
+    OpenAIChatCompletionUsageCompletionTokensDetails:
+      properties:
+        reasoning_tokens:
+          anyOf:
+          - type: integer
+          - type: 'null'
+      type: object
+      title: OpenAIChatCompletionUsageCompletionTokensDetails
+      description: Token details for output tokens in OpenAI chat completion usage.
+    OpenAIChatCompletionUsagePromptTokensDetails:
+      properties:
+        cached_tokens:
+          anyOf:
+          - type: integer
+          - type: 'null'
+      type: object
+      title: OpenAIChatCompletionUsagePromptTokensDetails
+      description: Token details for prompt tokens in OpenAI chat completion usage.
+    OpenAIResponseMessage-Input:
+      properties:
+        content:
+          anyOf:
+          - type: string
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
+                title: OpenAIResponseInputMessageContentText
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+                title: OpenAIResponseInputMessageContentImage
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile'
+                title: OpenAIResponseInputMessageContentFile
+              discriminator:
+                propertyName: type
+                mapping:
+                  input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile'
+                  input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+                  input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
+              title: OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile
+            type: array
+            title: list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile]
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText'
+                title: OpenAIResponseOutputMessageContentOutputText
+              - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
+                title: OpenAIResponseContentPartRefusal
+              discriminator:
+                propertyName: type
+                mapping:
+                  output_text: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText'
+                  refusal: '#/components/schemas/OpenAIResponseContentPartRefusal'
+              title: OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal
+            type: array
+            title: list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal]
+          title: string | list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile] | list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal]
+        role:
+          title: Role
+          type: string
+          enum:
+          - system
+          - developer
+          - user
+          - assistant
+          default: system
+        type:
+          type: string
+          const: message
+          title: Type
+          default: message
+        id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        status:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      required:
+      - content
+      - role
+      title: OpenAIResponseMessage
+      description: |-
+        Corresponds to the various Message types in the Responses API.
+        They are all under one type because the Responses API gives them all
+        the same "type" value, and there is no way to tell them apart in certain
+        scenarios.
+    OpenAIResponseMessage-Output:
+      properties:
+        content:
+          anyOf:
+          - type: string
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
+                title: OpenAIResponseInputMessageContentText
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+                title: OpenAIResponseInputMessageContentImage
+              - $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile'
+                title: OpenAIResponseInputMessageContentFile
+              discriminator:
+                propertyName: type
+                mapping:
+                  input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile'
+                  input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+                  input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
+              title: OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile
+            type: array
+            title: list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile]
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText'
+                title: OpenAIResponseOutputMessageContentOutputText
+              - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
+                title: OpenAIResponseContentPartRefusal
+              discriminator:
+                propertyName: type
+                mapping:
+                  output_text: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText'
+                  refusal: '#/components/schemas/OpenAIResponseContentPartRefusal'
+              title: OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal
+            type: array
+            title: list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal]
+          title: string | list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile] | list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal]
+        role:
+          title: Role
+          type: string
+          enum:
+          - system
+          - developer
+          - user
+          - assistant
+          default: system
+        type:
+          type: string
+          const: message
+          title: Type
+          default: message
+        id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        status:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      required:
+      - content
+      - role
+      title: OpenAIResponseMessage
+      description: |-
+        Corresponds to the various Message types in the Responses API.
+        They are all under one type because the Responses API gives them all
+        the same "type" value, and there is no way to tell them apart in certain
+        scenarios.
+    OpenAIResponseOutputMessageFileSearchToolCallResults:
+      properties:
+        attributes:
+          additionalProperties: true
+          type: object
+          title: Attributes
+        file_id:
+          type: string
+          title: File Id
+        filename:
+          type: string
+          title: Filename
+        score:
+          type: number
+          title: Score
+        text:
+          type: string
+          title: Text
+      type: object
+      required:
+      - attributes
+      - file_id
+      - filename
+      - score
+      - text
+      title: OpenAIResponseOutputMessageFileSearchToolCallResults
+      description: Search results returned by the file search operation.
+    OpenAIResponseTextFormat:
+      properties:
+        type:
+          title: Type
+          type: string
+          enum:
+          - text
+          - json_schema
+          - json_object
+          default: text
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+        schema:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+        description:
+          anyOf:
+          - type: string
+          - type: 'null'
+        strict:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+      type: object
+      title: OpenAIResponseTextFormat
+      description: Configuration for Responses API text format.
+    OpenAIResponseUsageInputTokensDetails:
+      properties:
+        cached_tokens:
+          anyOf:
+          - type: integer
+          - type: 'null'
+      type: object
+      title: OpenAIResponseUsageInputTokensDetails
+      description: Token details for input tokens in OpenAI response usage.
+    OpenAIResponseUsageOutputTokensDetails:
+      properties:
+        reasoning_tokens:
+          anyOf:
+          - type: integer
+          - type: 'null'
+      type: object
+      title: OpenAIResponseUsageOutputTokensDetails
+      description: Token details for output tokens in OpenAI response usage.
+    OpenAIUserMessageParam-Input:
+      properties:
+        role:
+          type: string
+          const: user
+          title: Role
+          default: user
+        content:
+          anyOf:
+          - type: string
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+                title: OpenAIChatCompletionContentPartTextParam
+              - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+                title: OpenAIChatCompletionContentPartImageParam
+              - $ref: '#/components/schemas/OpenAIFile'
+                title: OpenAIFile
+              discriminator:
+                propertyName: type
+                mapping:
+                  file: '#/components/schemas/OpenAIFile'
+                  image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+                  text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+              title: OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile
+            type: array
+            title: list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile]
+          title: string | list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile]
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      required:
+      - content
+      title: OpenAIUserMessageParam
+      description: A message from the user in an OpenAI-compatible chat completion request.
+    OpenAIUserMessageParam-Output:
+      properties:
+        role:
+          type: string
+          const: user
+          title: Role
+          default: user
+        content:
+          anyOf:
+          - type: string
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+                title: OpenAIChatCompletionContentPartTextParam
+              - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+                title: OpenAIChatCompletionContentPartImageParam
+              - $ref: '#/components/schemas/OpenAIFile'
+                title: OpenAIFile
+              discriminator:
+                propertyName: type
+                mapping:
+                  file: '#/components/schemas/OpenAIFile'
+                  image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+                  text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+              title: OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile
+            type: array
+            title: list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile]
+          title: string | list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile]
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      required:
+      - content
+      title: OpenAIUserMessageParam
+      description: A message from the user in an OpenAI-compatible chat completion request.
+    OutputTokensDetails:
+      properties:
+        reasoning_tokens:
+          type: integer
+          title: Reasoning Tokens
+      additionalProperties: true
+      type: object
+      required:
+      - reasoning_tokens
+      title: OutputTokensDetails
+    RegisterDatasetRequestLoose:
+      properties:
+        purpose:
+          title: Purpose
+        source:
+          title: Source
+        metadata:
+          title: Metadata
+        dataset_id:
+          title: Dataset Id
+      type: object
+      required:
+      - purpose
+      - source
+      title: RegisterDatasetRequestLoose
+    RegisterScoringFunctionRequestLoose:
+      properties:
+        scoring_fn_id:
+          title: Scoring Fn Id
+        description:
+          title: Description
+        return_type:
+          title: Return Type
+        provider_scoring_fn_id:
+          title: Provider Scoring Fn Id
+        provider_id:
+          title: Provider Id
+        params:
+          title: Params
+      type: object
+      required:
+      - scoring_fn_id
+      - description
+      - return_type
+      title: RegisterScoringFunctionRequestLoose
+    SearchRankingOptions:
+      properties:
+        ranker:
+          anyOf:
+          - type: string
+          - type: 'null'
+        score_threshold:
+          anyOf:
+          - type: number
+          - type: 'null'
+          default: 0.0
+      type: object
+      title: SearchRankingOptions
+      description: Options for ranking and filtering search results.
+    _URLOrData:
+      properties:
+        url:
+          anyOf:
+          - $ref: '#/components/schemas/URL'
+            title: URL
+          - type: 'null'
+          title: URL
+        data:
+          anyOf:
+          - type: string
+          - type: 'null'
+          contentEncoding: base64
+      type: object
+      title: _URLOrData
+      description: A URL or a base64 encoded string
+    SamplingStrategy:
+      discriminator:
+        mapping:
+          greedy: '#/components/schemas/GreedySamplingStrategy'
+          top_k: '#/components/schemas/TopKSamplingStrategy'
+          top_p: '#/components/schemas/TopPSamplingStrategy'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/GreedySamplingStrategy'
+        title: GreedySamplingStrategy
+      - $ref: '#/components/schemas/TopPSamplingStrategy'
+        title: TopPSamplingStrategy
+      - $ref: '#/components/schemas/TopKSamplingStrategy'
+        title: TopKSamplingStrategy
+      title: GreedySamplingStrategy | TopPSamplingStrategy | TopKSamplingStrategy
+    GrammarResponseFormat:
+      description: Configuration for grammar-guided response generation.
+      properties:
+        type:
+          const: grammar
+          default: grammar
+          title: Type
+          type: string
+        bnf:
+          additionalProperties: true
+          title: Bnf
+          type: object
+      required:
+      - bnf
+      title: GrammarResponseFormat
+      type: object
+    JsonSchemaResponseFormat:
+      description: Configuration for JSON schema-guided response generation.
+      properties:
+        type:
+          const: json_schema
+          default: json_schema
+          title: Type
+          type: string
+        json_schema:
+          additionalProperties: true
+          title: Json Schema
+          type: object
+      required:
+      - json_schema
+      title: JsonSchemaResponseFormat
+      type: object
+    ResponseFormat:
+      discriminator:
+        mapping:
+          grammar: '#/components/schemas/GrammarResponseFormat'
+          json_schema: '#/components/schemas/JsonSchemaResponseFormat'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/JsonSchemaResponseFormat'
+        title: JsonSchemaResponseFormat
+      - $ref: '#/components/schemas/GrammarResponseFormat'
+        title: GrammarResponseFormat
+      title: JsonSchemaResponseFormat | GrammarResponseFormat
+    OpenAIResponseContentPart:
+      discriminator:
+        mapping:
+          output_text: '#/components/schemas/OpenAIResponseContentPartOutputText'
+          reasoning_text: '#/components/schemas/OpenAIResponseContentPartReasoningText'
+          refusal: '#/components/schemas/OpenAIResponseContentPartRefusal'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText'
+        title: OpenAIResponseContentPartOutputText
+      - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
+        title: OpenAIResponseContentPartRefusal
+      - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
+        title: OpenAIResponseContentPartReasoningText
+      title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText
+    SpanEndPayload:
+      description: Payload for a span end event.
+      properties:
+        type:
+          const: span_end
+          default: span_end
+          title: Type
+          type: string
+        status:
+          $ref: '#/components/schemas/SpanStatus'
+      required:
+      - status
+      title: SpanEndPayload
+      type: object
+    SpanStartPayload:
+      description: Payload for a span start event.
+      properties:
+        type:
+          const: span_start
+          default: span_start
+          title: Type
+          type: string
+        name:
+          title: Name
+          type: string
+        parent_span_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+      required:
+      - name
+      title: SpanStartPayload
+      type: object
+    SpanStatus:
+      description: The status of a span indicating whether it completed successfully or with an error.
+      enum:
+      - ok
+      - error
+      title: SpanStatus
+      type: string
+    StructuredLogPayload:
+      discriminator:
+        mapping:
+          span_end: '#/components/schemas/SpanEndPayload'
+          span_start: '#/components/schemas/SpanStartPayload'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/SpanStartPayload'
+        title: SpanStartPayload
+      - $ref: '#/components/schemas/SpanEndPayload'
+        title: SpanEndPayload
+      title: SpanStartPayload | SpanEndPayload
+    LogSeverity:
+      description: The severity level of a log message.
+      enum:
+      - verbose
+      - debug
+      - info
+      - warn
+      - error
+      - critical
+      title: LogSeverity
+      type: string
+    MetricEvent:
+      description: A metric event containing a measured value.
+      properties:
+        trace_id:
+          title: Trace Id
+          type: string
+        span_id:
+          title: Span Id
+          type: string
+        timestamp:
+          format: date-time
+          title: Timestamp
+          type: string
+        attributes:
+          anyOf:
+          - additionalProperties:
+              anyOf:
+              - type: string
+              - type: integer
+              - type: number
+              - type: boolean
+              - type: 'null'
+              title: string | ... (4 variants)
+            type: object
+          - type: 'null'
+        type:
+          const: metric
+          default: metric
+          title: Type
+          type: string
+        metric:
+          title: Metric
+          type: string
+        value:
+          anyOf:
+          - type: integer
+          - type: number
+          title: integer | number
+        unit:
+          title: Unit
+          type: string
+      required:
+      - trace_id
+      - span_id
+      - timestamp
+      - metric
+      - value
+      - unit
+      title: MetricEvent
+      type: object
+    StructuredLogEvent:
+      description: A structured log event containing typed payload data.
+      properties:
+        trace_id:
+          title: Trace Id
+          type: string
+        span_id:
+          title: Span Id
+          type: string
+        timestamp:
+          format: date-time
+          title: Timestamp
+          type: string
+        attributes:
+          anyOf:
+          - additionalProperties:
+              anyOf:
+              - type: string
+              - type: integer
+              - type: number
+              - type: boolean
+              - type: 'null'
+              title: string | ... (4 variants)
+            type: object
+          - type: 'null'
+        type:
+          const: structured_log
+          default: structured_log
+          title: Type
+          type: string
+        payload:
+          discriminator:
+            mapping:
+              span_end: '#/components/schemas/SpanEndPayload'
+              span_start: '#/components/schemas/SpanStartPayload'
+            propertyName: type
+          oneOf:
+          - $ref: '#/components/schemas/SpanStartPayload'
+            title: SpanStartPayload
+          - $ref: '#/components/schemas/SpanEndPayload'
+            title: SpanEndPayload
+          title: SpanStartPayload | SpanEndPayload
+      required:
+      - trace_id
+      - span_id
+      - timestamp
+      - payload
+      title: StructuredLogEvent
+      type: object
+    UnstructuredLogEvent:
+      description: An unstructured log event containing a simple text message.
+      properties:
+        trace_id:
+          title: Trace Id
+          type: string
+        span_id:
+          title: Span Id
+          type: string
+        timestamp:
+          format: date-time
+          title: Timestamp
+          type: string
+        attributes:
+          anyOf:
+          - additionalProperties:
+              anyOf:
+              - type: string
+              - type: integer
+              - type: number
+              - type: boolean
+              - type: 'null'
+              title: string | ... (4 variants)
+            type: object
+          - type: 'null'
+        type:
+          const: unstructured_log
+          default: unstructured_log
+          title: Type
+          type: string
+        message:
+          title: Message
+          type: string
+        severity:
+          $ref: '#/components/schemas/LogSeverity'
+      required:
+      - trace_id
+      - span_id
+      - timestamp
+      - message
+      - severity
+      title: UnstructuredLogEvent
+      type: object
+    Event:
+      discriminator:
+        mapping:
+          metric: '#/components/schemas/MetricEvent'
+          structured_log: '#/components/schemas/StructuredLogEvent'
+          unstructured_log: '#/components/schemas/UnstructuredLogEvent'
+        propertyName: type
+      oneOf:
+      - $ref: '#/components/schemas/UnstructuredLogEvent'
+        title: UnstructuredLogEvent
+      - $ref: '#/components/schemas/MetricEvent'
+        title: MetricEvent
+      - $ref: '#/components/schemas/StructuredLogEvent'
+        title: StructuredLogEvent
+      title: UnstructuredLogEvent | MetricEvent | StructuredLogEvent
+    MetricInResponse:
+      description: A metric value included in API responses.
+      properties:
+        metric:
+          title: Metric
+          type: string
+        value:
+          anyOf:
+          - type: integer
+          - type: number
+          title: integer | number
+        unit:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+      required:
+      - metric
+      - value
+      title: MetricInResponse
+      type: object
+    TextDelta:
+      description: A text content delta for streaming responses.
+      properties:
+        type:
+          const: text
+          default: text
+          title: Type
+          type: string
+        text:
+          title: Text
+          type: string
+      required:
+      - text
+      title: TextDelta
+      type: object
+    ImageDelta:
+      description: An image content delta for streaming responses.
+      properties:
+        type:
+          const: image
+          default: image
+          title: Type
+          type: string
+        image:
+          format: binary
+          title: Image
+          type: string
+      required:
+      - image
+      title: ImageDelta
+      type: object
+    Fp8QuantizationConfig:
+      description: Configuration for 8-bit floating point quantization.
+      properties:
+        type:
+          const: fp8_mixed
+          default: fp8_mixed
+          title: Type
+          type: string
+      title: Fp8QuantizationConfig
+      type: object
+    Bf16QuantizationConfig:
+      description: Configuration for BFloat16 precision (typically no quantization).
+      properties:
+        type:
+          const: bf16
+          default: bf16
+          title: Type
+          type: string
+      title: Bf16QuantizationConfig
+      type: object
+    Int4QuantizationConfig:
+      description: Configuration for 4-bit integer quantization.
+      properties:
+        type:
+          const: int4_mixed
+          default: int4_mixed
+          title: Type
+          type: string
+        scheme:
+          anyOf:
+          - type: string
+          - type: 'null'
+          default: int4_weight_int8_dynamic_activation
+      title: Int4QuantizationConfig
+      type: object
+    UserMessage:
+      description: A message from the user in a chat conversation.
+      properties:
+        role:
+          const: user
+          default: user
+          title: Role
+          type: string
+        content:
+          anyOf:
+          - type: string
+          - discriminator:
+              mapping:
+                image: '#/components/schemas/ImageContentItem'
+                text: '#/components/schemas/TextContentItem'
+              propertyName: type
+            oneOf:
+            - $ref: '#/components/schemas/ImageContentItem'
+              title: ImageContentItem
+            - $ref: '#/components/schemas/TextContentItem'
+              title: TextContentItem
+            title: ImageContentItem | TextContentItem
+          - items:
+              discriminator:
+                mapping:
+                  image: '#/components/schemas/ImageContentItem'
+                  text: '#/components/schemas/TextContentItem'
+                propertyName: type
+              oneOf:
+              - $ref: '#/components/schemas/ImageContentItem'
+                title: ImageContentItem
+              - $ref: '#/components/schemas/TextContentItem'
+                title: TextContentItem
+              title: ImageContentItem | TextContentItem
+            type: array
+            title: list[ImageContentItem | TextContentItem]
+          title: string | list[ImageContentItem | TextContentItem]
+        context:
+          anyOf:
+          - type: string
+          - discriminator:
+              mapping:
+                image: '#/components/schemas/ImageContentItem'
+                text: '#/components/schemas/TextContentItem'
+              propertyName: type
+            oneOf:
+            - $ref: '#/components/schemas/ImageContentItem'
+              title: ImageContentItem
+            - $ref: '#/components/schemas/TextContentItem'
+              title: TextContentItem
+            title: ImageContentItem | TextContentItem
+          - items:
+              discriminator:
+                mapping:
+                  image: '#/components/schemas/ImageContentItem'
+                  text: '#/components/schemas/TextContentItem'
+                propertyName: type
+              oneOf:
+              - $ref: '#/components/schemas/ImageContentItem'
+                title: ImageContentItem
+              - $ref: '#/components/schemas/TextContentItem'
+                title: TextContentItem
+              title: ImageContentItem | TextContentItem
+            type: array
+            title: list[ImageContentItem | TextContentItem]
+          - type: 'null'
+          title: string | list[ImageContentItem | TextContentItem]
+          nullable: true
+      required:
+      - content
+      title: UserMessage
+      type: object
+    ToolResponseMessage:
+      description: A message representing the result of a tool invocation.
+      properties:
+        role:
+          const: tool
+          default: tool
+          title: Role
+          type: string
+        call_id:
+          title: Call Id
+          type: string
+        content:
+          anyOf:
+          - type: string
+          - discriminator:
+              mapping:
+                image: '#/components/schemas/ImageContentItem'
+                text: '#/components/schemas/TextContentItem'
+              propertyName: type
+            oneOf:
+            - $ref: '#/components/schemas/ImageContentItem'
+              title: ImageContentItem
+            - $ref: '#/components/schemas/TextContentItem'
+              title: TextContentItem
+            title: ImageContentItem | TextContentItem
+          - items:
+              discriminator:
+                mapping:
+                  image: '#/components/schemas/ImageContentItem'
+                  text: '#/components/schemas/TextContentItem'
+                propertyName: type
+              oneOf:
+              - $ref: '#/components/schemas/ImageContentItem'
+                title: ImageContentItem
+              - $ref: '#/components/schemas/TextContentItem'
+                title: TextContentItem
+              title: ImageContentItem | TextContentItem
+            type: array
+            title: list[ImageContentItem | TextContentItem]
+          title: string | list[ImageContentItem | TextContentItem]
+      required:
+      - call_id
+      - content
+      title: ToolResponseMessage
+      type: object
+    TokenLogProbs:
+      description: Log probabilities for generated tokens.
+      properties:
+        logprobs_by_token:
+          additionalProperties:
+            type: number
+          title: Logprobs By Token
+          type: object
+      required:
+      - logprobs_by_token
+      title: TokenLogProbs
+      type: object
+    EmbeddingsResponse:
+      description: Response containing generated embeddings.
+      properties:
+        embeddings:
+          items:
+            items:
+              type: number
+            type: array
+          title: Embeddings
+          type: array
+      required:
+      - embeddings
+      title: EmbeddingsResponse
+      type: object
+    OpenAICompletionLogprobs:
+      description: |-
+        The log probabilities for the tokens in the message from an OpenAI-compatible completion response.
+
+        :text_offset: (Optional) The offset of the token in the text
+        :token_logprobs: (Optional) The log probabilities for the tokens
+        :tokens: (Optional) The tokens
+        :top_logprobs: (Optional) The top log probabilities for the tokens
+      properties:
+        text_offset:
+          anyOf:
+          - items:
+              type: integer
+            type: array
+          - type: 'null'
+          nullable: true
+        token_logprobs:
+          anyOf:
+          - items:
+              type: number
+            type: array
+          - type: 'null'
+          nullable: true
+        tokens:
+          anyOf:
+          - items:
+              type: string
+            type: array
+          - type: 'null'
+          nullable: true
+        top_logprobs:
+          anyOf:
+          - items:
+              additionalProperties:
+                type: number
+              type: object
+            type: array
+          - type: 'null'
+          nullable: true
+      title: OpenAICompletionLogprobs
+      type: object
+    VectorStoreCreateRequest:
+      description: Request to create a vector store.
+      properties:
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+        file_ids:
+          items:
+            type: string
+          title: File Ids
+          type: array
+        expires_after:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+          nullable: true
+        chunking_strategy:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+          nullable: true
+        metadata:
+          additionalProperties: true
+          title: Metadata
+          type: object
+      title: VectorStoreCreateRequest
+      type: object
+    VectorStoreModifyRequest:
+      description: Request to modify a vector store.
+      properties:
+        name:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+        expires_after:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+          nullable: true
+        metadata:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+          nullable: true
+      title: VectorStoreModifyRequest
+      type: object
+    VectorStoreSearchRequest:
+      description: Request to search a vector store.
+      properties:
+        query:
+          anyOf:
+          - type: string
+          - items:
+              type: string
+            type: array
+            title: list[string]
+          title: string | list[string]
+        filters:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+          nullable: true
+        max_num_results:
+          default: 10
+          title: Max Num Results
+          type: integer
+        ranking_options:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+          nullable: true
+        rewrite_query:
+          default: false
+          title: Rewrite Query
+          type: boolean
+      required:
+      - query
+      title: VectorStoreSearchRequest
+      type: object
+    DialogType:
+      description: Parameter type for dialog data with semantic output labels.
+      properties:
+        type:
+          const: dialog
+          default: dialog
+          title: Type
+          type: string
+      title: DialogType
+      type: object
+    ConversationMessage:
+      description: OpenAI-compatible message item for conversations.
+      properties:
+        id:
+          description: unique identifier for this message
+          title: Id
+          type: string
+        content:
+          description: message content
+          items:
+            additionalProperties: true
+            type: object
+          title: Content
+          type: array
+        role:
+          description: message role
+          title: Role
+          type: string
+        status:
+          description: message status
+          title: Status
+          type: string
+        type:
+          const: message
+          default: message
+          title: Type
+          type: string
+        object:
+          const: message
+          default: message
+          title: Object
+          type: string
+      required:
+      - id
+      - content
+      - role
+      - status
+      title: ConversationMessage
+      type: object
+    ConversationItemCreateRequest:
+      description: Request body for creating conversation items.
+      properties:
+        items:
+          description: Items to include in the conversation context. You may add up to 20 items at a time.
+          items:
+            discriminator:
+              mapping:
+                file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+                function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+                function_call_output: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+                mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+                mcp_approval_response: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+                mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+                mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+                message: '#/components/schemas/OpenAIResponseMessage'
+                web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+              propertyName: type
+            oneOf:
+            - $ref: '#/components/schemas/OpenAIResponseMessage'
+              title: OpenAIResponseMessage
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+              title: OpenAIResponseOutputMessageWebSearchToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
+              title: OpenAIResponseOutputMessageFileSearchToolCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+              title: OpenAIResponseOutputMessageFunctionToolCall
+            - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+              title: OpenAIResponseInputFunctionToolCallOutput
+            - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+              title: OpenAIResponseMCPApprovalRequest
+            - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
+              title: OpenAIResponseMCPApprovalResponse
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+              title: OpenAIResponseOutputMessageMCPCall
+            - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+              title: OpenAIResponseOutputMessageMCPListTools
+            title: OpenAIResponseMessage | ... (9 variants)
+          maxItems: 20
+          title: Items
+          type: array
+      required:
+      - items
+      title: ConversationItemCreateRequest
+      type: object
+    ToolGroupInput:
+      description: Input data for registering a tool group.
+      properties:
+        toolgroup_id:
+          title: Toolgroup Id
+          type: string
+        provider_id:
+          title: Provider Id
+          type: string
+        args:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+          nullable: true
+        mcp_endpoint:
+          anyOf:
+          - $ref: '#/components/schemas/URL'
+            title: URL
+          - type: 'null'
+          nullable: true
+          title: URL
+      required:
+      - toolgroup_id
+      - provider_id
+      title: ToolGroupInput
+      type: object
+    Api:
+      description: Enumeration of all available APIs in the Llama Stack system.
+      enum:
+      - providers
+      - inference
+      - safety
+      - agents
+      - batches
+      - vector_io
+      - datasetio
+      - scoring
+      - eval
+      - post_training
+      - tool_runtime
+      - models
+      - shields
+      - vector_stores
+      - datasets
+      - scoring_functions
+      - benchmarks
+      - tool_groups
+      - files
+      - prompts
+      - conversations
+      - inspect
+      title: Api
+      type: string
+    ProviderSpec:
+      properties:
+        api:
+          $ref: '#/components/schemas/Api'
+        provider_type:
+          title: Provider Type
+          type: string
+        config_class:
+          description: Fully-qualified classname of the config for this provider
+          title: Config Class
+          type: string
+        api_dependencies:
+          description: Higher-level API surfaces may depend on other providers to provide their functionality
+          items:
+            $ref: '#/components/schemas/Api'
+          title: Api Dependencies
+          type: array
+        optional_api_dependencies:
+          items:
+            $ref: '#/components/schemas/Api'
+          title: Optional Api Dependencies
+          type: array
+        deprecation_warning:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: If this provider is deprecated, specify the warning message here
+          nullable: true
+        deprecation_error:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: If this provider is deprecated and does NOT work, specify the error message here
+          nullable: true
+        module:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: |2-
+
+             Fully-qualified name of the module to import. The module is expected to have:
+
+              - `get_adapter_impl(config, deps)`: returns the adapter implementation
+
+              Example: `module: ramalama_stack`
+
+          nullable: true
+        pip_packages:
+          description: The pip dependencies needed for this implementation
+          items:
+            type: string
+          title: Pip Packages
+          type: array
+        provider_data_validator:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+        is_external:
+          default: false
+          description: Notes whether this provider is an external provider.
+          title: Is External
+          type: boolean
+        deps__:
+          items:
+            type: string
+          title: Deps
+          type: array
+      required:
+      - api
+      - provider_type
+      - config_class
+      title: ProviderSpec
+      type: object
+    InlineProviderSpec:
+      properties:
+        api:
+          $ref: '#/components/schemas/Api'
+        provider_type:
+          title: Provider Type
+          type: string
+        config_class:
+          description: Fully-qualified classname of the config for this provider
+          title: Config Class
+          type: string
+        api_dependencies:
+          description: Higher-level API surfaces may depend on other providers to provide their functionality
+          items:
+            $ref: '#/components/schemas/Api'
+          title: Api Dependencies
+          type: array
+        optional_api_dependencies:
+          items:
+            $ref: '#/components/schemas/Api'
+          title: Optional Api Dependencies
+          type: array
+        deprecation_warning:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: If this provider is deprecated, specify the warning message here
+          nullable: true
+        deprecation_error:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: If this provider is deprecated and does NOT work, specify the error message here
+          nullable: true
+        module:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: |2-
+
+             Fully-qualified name of the module to import. The module is expected to have:
+
+              - `get_adapter_impl(config, deps)`: returns the adapter implementation
+
+              Example: `module: ramalama_stack`
+
+          nullable: true
+        pip_packages:
+          description: The pip dependencies needed for this implementation
+          items:
+            type: string
+          title: Pip Packages
+          type: array
+        provider_data_validator:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+        is_external:
+          default: false
+          description: Notes whether this provider is an external provider.
+          title: Is External
+          type: boolean
+        deps__:
+          items:
+            type: string
+          title: Deps
+          type: array
+        container_image:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: |2
+
+            The container image to use for this implementation. If one is provided, pip_packages will be ignored.
+            If a provider depends on other providers, the dependencies MUST NOT specify a container image.
+          nullable: true
+        description:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: |2
+
+            A description of the provider. This is used to display in the documentation.
+          nullable: true
+      required:
+      - api
+      - provider_type
+      - config_class
+      title: InlineProviderSpec
+      type: object
+    RemoteProviderSpec:
+      properties:
+        api:
+          $ref: '#/components/schemas/Api'
+        provider_type:
+          title: Provider Type
+          type: string
+        config_class:
+          description: Fully-qualified classname of the config for this provider
+          title: Config Class
+          type: string
+        api_dependencies:
+          description: Higher-level API surfaces may depend on other providers to provide their functionality
+          items:
+            $ref: '#/components/schemas/Api'
+          title: Api Dependencies
+          type: array
+        optional_api_dependencies:
+          items:
+            $ref: '#/components/schemas/Api'
+          title: Optional Api Dependencies
+          type: array
+        deprecation_warning:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: If this provider is deprecated, specify the warning message here
+          nullable: true
+        deprecation_error:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: If this provider is deprecated and does NOT work, specify the error message here
+          nullable: true
+        module:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: |2-
+
+             Fully-qualified name of the module to import. The module is expected to have:
+
+              - `get_adapter_impl(config, deps)`: returns the adapter implementation
+
+              Example: `module: ramalama_stack`
+
+          nullable: true
+        pip_packages:
+          description: The pip dependencies needed for this implementation
+          items:
+            type: string
+          title: Pip Packages
+          type: array
+        provider_data_validator:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+        is_external:
+          default: false
+          description: Notes whether this provider is an external provider.
+          title: Is External
+          type: boolean
+        deps__:
+          items:
+            type: string
+          title: Deps
+          type: array
+        adapter_type:
+          description: Unique identifier for this adapter
+          title: Adapter Type
+          type: string
+        description:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: |2
+
+            A description of the provider. This is used to display in the documentation.
+          nullable: true
+      required:
+      - api
+      - provider_type
+      - config_class
+      - adapter_type
+      title: RemoteProviderSpec
+      type: object
+    PostTrainingJobLogStream:
+      description: Stream of logs from a finetuning job.
+      properties:
+        job_uuid:
+          title: Job Uuid
+          type: string
+        log_lines:
+          items:
+            type: string
+          title: Log Lines
+          type: array
+      required:
+      - job_uuid
+      - log_lines
+      title: PostTrainingJobLogStream
+      type: object
+    RLHFAlgorithm:
+      description: Available reinforcement learning from human feedback algorithms.
+      enum:
+      - dpo
+      title: RLHFAlgorithm
+      type: string
+    PostTrainingRLHFRequest:
+      description: Request to finetune a model using reinforcement learning from human feedback.
+      properties:
+        job_uuid:
+          title: Job Uuid
+          type: string
+        finetuned_model:
+          $ref: '#/components/schemas/URL'
+        dataset_id:
+          title: Dataset Id
+          type: string
+        validation_dataset_id:
+          title: Validation Dataset Id
+          type: string
+        algorithm:
+          $ref: '#/components/schemas/RLHFAlgorithm'
+        algorithm_config:
+          $ref: '#/components/schemas/DPOAlignmentConfig'
+        optimizer_config:
+          $ref: '#/components/schemas/OptimizerConfig'
+        training_config:
+          $ref: '#/components/schemas/TrainingConfig'
+        hyperparam_search_config:
+          additionalProperties: true
+          title: Hyperparam Search Config
+          type: object
+        logger_config:
+          additionalProperties: true
+          title: Logger Config
+          type: object
+      required:
+      - job_uuid
+      - finetuned_model
+      - dataset_id
+      - validation_dataset_id
+      - algorithm
+      - algorithm_config
+      - optimizer_config
+      - training_config
+      - hyperparam_search_config
+      - logger_config
+      title: PostTrainingRLHFRequest
+      type: object
+    Span:
+      description: A span representing a single operation within a trace.
+      properties:
+        span_id:
+          title: Span Id
+          type: string
+        trace_id:
+          title: Trace Id
+          type: string
+        parent_span_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+        name:
+          title: Name
+          type: string
+        start_time:
+          format: date-time
+          title: Start Time
+          type: string
+        end_time:
+          anyOf:
+          - format: date-time
+            type: string
+          - type: 'null'
+          nullable: true
+        attributes:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+      required:
+      - span_id
+      - trace_id
+      - name
+      - start_time
+      title: Span
+      type: object
+    Trace:
+      description: A trace representing the complete execution path of a request across multiple operations.
+      properties:
+        trace_id:
+          title: Trace Id
+          type: string
+        root_span_id:
+          title: Root Span Id
+          type: string
+        start_time:
+          format: date-time
+          title: Start Time
+          type: string
+        end_time:
+          anyOf:
+          - format: date-time
+            type: string
+          - type: 'null'
+          nullable: true
+      required:
+      - trace_id
+      - root_span_id
+      - start_time
+      title: Trace
+      type: object
+    EventType:
+      description: The type of telemetry event being logged.
+      enum:
+      - unstructured_log
+      - structured_log
+      - metric
+      title: EventType
+      type: string
+    StructuredLogType:
+      description: The type of structured log event payload.
+      enum:
+      - span_start
+      - span_end
+      title: StructuredLogType
+      type: string
+    EvalTrace:
+      description: A trace record for evaluation purposes.
+      properties:
+        session_id:
+          title: Session Id
+          type: string
+        step:
+          title: Step
+          type: string
+        input:
+          title: Input
+          type: string
+        output:
+          title: Output
+          type: string
+        expected_output:
+          title: Expected Output
+          type: string
+      required:
+      - session_id
+      - step
+      - input
+      - output
+      - expected_output
+      title: EvalTrace
+      type: object
+    SpanWithStatus:
+      description: A span that includes status information.
+      properties:
+        span_id:
+          title: Span Id
+          type: string
+        trace_id:
+          title: Trace Id
+          type: string
+        parent_span_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+          nullable: true
+        name:
+          title: Name
+          type: string
+        start_time:
+          format: date-time
+          title: Start Time
+          type: string
+        end_time:
+          anyOf:
+          - format: date-time
+            type: string
+          - type: 'null'
+          nullable: true
+        attributes:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+        status:
+          anyOf:
+          - $ref: '#/components/schemas/SpanStatus'
+            title: SpanStatus
+          - type: 'null'
+          nullable: true
+          title: SpanStatus
+      required:
+      - span_id
+      - trace_id
+      - name
+      - start_time
+      title: SpanWithStatus
+      type: object
+    QueryConditionOp:
+      description: Comparison operators for query conditions.
+      enum:
+      - eq
+      - ne
+      - gt
+      - lt
+      title: QueryConditionOp
+      type: string
+    QueryCondition:
+      description: A condition for filtering query results.
+      properties:
+        key:
+          title: Key
+          type: string
+        op:
+          $ref: '#/components/schemas/QueryConditionOp'
+        value:
+          title: Value
+      required:
+      - key
+      - op
+      - value
+      title: QueryCondition
+      type: object
+    MetricLabel:
+      description: A label associated with a metric.
+      properties:
+        name:
+          title: Name
+          type: string
+        value:
+          title: Value
+          type: string
+      required:
+      - name
+      - value
+      title: MetricLabel
+      type: object
+    MetricDataPoint:
+      description: A single data point in a metric time series.
+      properties:
+        timestamp:
+          title: Timestamp
+          type: integer
+        value:
+          title: Value
+          type: number
+        unit:
+          title: Unit
+          type: string
+      required:
+      - timestamp
+      - value
+      - unit
+      title: MetricDataPoint
+      type: object
+    MetricSeries:
+      description: A time series of metric data points.
+      properties:
+        metric:
+          title: Metric
+          type: string
+        labels:
+          items:
+            $ref: '#/components/schemas/MetricLabel'
+          title: Labels
+          type: array
+        values:
+          items:
+            $ref: '#/components/schemas/MetricDataPoint'
+          title: Values
+          type: array
+      required:
+      - metric
+      - labels
+      - values
+      title: MetricSeries
+      type: object
   responses:
     BadRequest400:
       description: The request was invalid or malformed
@@ -11921,8 +13389,7 @@ components:
             title: Bad Request
             detail: The request was invalid or malformed
     TooManyRequests429:
-      description: >-
-        The client has sent too many requests in a given amount of time
+      description: The client has sent too many requests in a given amount of time
       content:
         application/json:
           schema:
@@ -11930,11 +13397,9 @@ components:
           example:
             status: 429
             title: Too Many Requests
-            detail: >-
-              You have exceeded the rate limit. Please try again later.
+            detail: You have exceeded the rate limit. Please try again later.
     InternalServerError500:
-      description: >-
-        The server encountered an unexpected error
+      description: The server encountered an unexpected error
       content:
         application/json:
           schema:
@@ -11942,127 +13407,101 @@ components:
           example:
             status: 500
             title: Internal Server Error
-            detail: >-
-              An unexpected error occurred. Our team has been notified.
+            detail: An unexpected error occurred
     DefaultError:
-      description: An unexpected error occurred
+      description: An error occurred
       content:
         application/json:
           schema:
             $ref: '#/components/schemas/Error'
-          example:
-            status: 0
-            title: Error
-            detail: An unexpected error occurred
-security:
-  - Default: []
 tags:
-  - name: Agents
-    description: >-
-      APIs for creating and interacting with agentic systems.
-    x-displayName: Agents
-  - name: Batches
-    description: >-
-      The API is designed to allow use of openai client libraries for seamless integration.
+- description: APIs for creating and interacting with agentic systems.
+  name: Agents
+  x-displayName: Agents
+- description: |-
+    The API is designed to allow use of openai client libraries for seamless integration.
 
+    This API provides the following extensions:
+     - idempotent batch creation
 
-      This API provides the following extensions:
-       - idempotent batch creation
+    Note: This API is currently under active development and may undergo changes.
+  name: Batches
+  x-displayName: The Batches API enables efficient processing of multiple requests in a single operation, particularly useful for processing large datasets, batch evaluation workflows, and cost-effective inference at scale.
+- description: ''
+  name: Benchmarks
+- description: Protocol for conversation management operations.
+  name: Conversations
+  x-displayName: Conversations
+- description: ''
+  name: DatasetIO
+- description: ''
+  name: Datasets
+- description: Llama Stack Evaluation API for running evaluations on model and agent candidates.
+  name: Eval
+  x-displayName: Evaluations
+- description: This API is used to upload documents that can be used with other Llama Stack APIs.
+  name: Files
+  x-displayName: Files
+- description: |-
+    Llama Stack Inference API for generating completions, chat completions, and embeddings.
 
-      Note: This API is currently under active development and may undergo changes.
-    x-displayName: >-
-      The Batches API enables efficient processing of multiple requests in a single
-      operation, particularly useful for processing large datasets, batch evaluation
-      workflows, and cost-effective inference at scale.
-  - name: Benchmarks
-    description: ''
-  - name: Conversations
-    description: >-
-      Protocol for conversation management operations.
-    x-displayName: Conversations
-  - name: DatasetIO
-    description: ''
-  - name: Datasets
-    description: ''
-  - name: Eval
-    description: >-
-      Llama Stack Evaluation API for running evaluations on model and agent candidates.
-    x-displayName: Evaluations
-  - name: Files
-    description: >-
-      This API is used to upload documents that can be used with other Llama Stack
-      APIs.
-    x-displayName: Files
-  - name: Inference
-    description: >-
-      Llama Stack Inference API for generating completions, chat completions, and
-      embeddings.
-
-
-      This API provides the raw interface to the underlying models. Three kinds of
-      models are supported:
-
-      - LLM models: these models generate "raw" and "chat" (conversational) completions.
-
-      - Embedding models: these models generate embeddings to be used for semantic
-      search.
-
-      - Rerank models: these models reorder the documents based on their relevance
-      to a query.
-    x-displayName: Inference
-  - name: Inspect
-    description: >-
-      APIs for inspecting the Llama Stack service, including health status, available
-      API routes with methods and implementing providers.
-    x-displayName: Inspect
-  - name: Models
-    description: ''
-  - name: PostTraining (Coming Soon)
-    description: ''
-  - name: Prompts
-    description: >-
-      Protocol for prompt management operations.
-    x-displayName: Prompts
-  - name: Providers
-    description: >-
-      Providers API for inspecting, listing, and modifying providers and their configurations.
-    x-displayName: Providers
-  - name: Safety
-    description: OpenAI-compatible Moderations API.
-    x-displayName: Safety
-  - name: Scoring
-    description: ''
-  - name: ScoringFunctions
-    description: ''
-  - name: Shields
-    description: ''
-  - name: ToolGroups
-    description: ''
-  - name: ToolRuntime
-    description: ''
-  - name: VectorIO
-    description: ''
+    This API provides the raw interface to the underlying models. Three kinds of models are supported:
+    - LLM models: these models generate "raw" and "chat" (conversational) completions.
+    - Embedding models: these models generate embeddings to be used for semantic search.
+    - Rerank models: these models reorder the documents based on their relevance to a query.
+  name: Inference
+  x-displayName: Inference
+- description: APIs for inspecting the Llama Stack service, including health status, available API routes with methods and implementing providers.
+  name: Inspect
+  x-displayName: Inspect
+- description: ''
+  name: Models
+- description: ''
+  name: PostTraining (Coming Soon)
+- description: Protocol for prompt management operations.
+  name: Prompts
+  x-displayName: Prompts
+- description: Providers API for inspecting, listing, and modifying providers and their configurations.
+  name: Providers
+  x-displayName: Providers
+- description: OpenAI-compatible Moderations API.
+  name: Safety
+  x-displayName: Safety
+- description: ''
+  name: Scoring
+- description: ''
+  name: ScoringFunctions
+- description: ''
+  name: Shields
+- description: ''
+  name: ToolGroups
+- description: ''
+  name: ToolRuntime
+- description: ''
+  name: VectorIO
 x-tagGroups:
-  - name: Operations
-    tags:
-      - Agents
-      - Batches
-      - Benchmarks
-      - Conversations
-      - DatasetIO
-      - Datasets
-      - Eval
-      - Files
-      - Inference
-      - Inspect
-      - Models
-      - PostTraining (Coming Soon)
-      - Prompts
-      - Providers
-      - Safety
-      - Scoring
-      - ScoringFunctions
-      - Shields
-      - ToolGroups
-      - ToolRuntime
-      - VectorIO
+- name: Operations
+  tags:
+  - Agents
+  - Batches
+  - Benchmarks
+  - Conversations
+  - DatasetIO
+  - Datasets
+  - Eval
+  - Files
+  - Inference
+  - Inspect
+  - Models
+  - PostTraining (Coming Soon)
+  - Prompts
+  - Providers
+  - Safety
+  - Scoring
+  - ScoringFunctions
+  - Shields
+  - ToolGroups
+  - ToolRuntime
+  - VectorIO
+security:
+- Default: []
diff --git a/pyproject.toml b/pyproject.toml
index f6d28fd03..bdf8309ad 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -31,7 +31,7 @@ dependencies = [
     "httpx",
     "jinja2>=3.1.6",
     "jsonschema",
-    "llama-stack-api",  # API and provider specifications (local dev via tool.uv.sources)
+    "llama-stack-api",                                # API and provider specifications (local dev via tool.uv.sources)
     "openai>=2.5.0",
     "prompt-toolkit",
     "python-dotenv",
@@ -50,12 +50,13 @@ dependencies = [
     "aiosqlite>=0.21.0",                              # server - for metadata store
     "asyncpg",                                        # for metadata store
     "sqlalchemy[asyncio]>=2.0.41",                    # server - for conversations
+    "pyyaml>=6.0.2",
     "starlette>=0.49.1",
 ]
 
 [project.optional-dependencies]
 client = [
-    "llama-stack-client>=0.3.0",  # Optional for library-only usage
+    "llama-stack-client>=0.3.0", # Optional for library-only usage
 ]
 
 [dependency-groups]
@@ -66,13 +67,14 @@ dev = [
     "pytest-cov",
     "pytest-html",
     "pytest-json-report",
-    "pytest-socket", # For blocking network access in unit tests
-    "nbval", # For notebook testing
+    "pytest-socket",                 # For blocking network access in unit tests
+    "nbval",                         # For notebook testing
     "black",
     "ruff",
     "mypy",
     "pre-commit>=4.4.0",
-    "ruamel.yaml", # needed for openapi generator
+    "ruamel.yaml",                   # needed for openapi generator
+    "openapi-spec-validator>=0.7.2",
 ]
 # Type checking dependencies - includes type stubs and optional runtime dependencies
 # needed for complete mypy coverage across all optional features
@@ -182,7 +184,12 @@ install-wheel-from-presigned = "llama_stack.cli.scripts.run:install_wheel_from_p
 
 [tool.setuptools.packages.find]
 where = ["src"]
-include = ["llama_stack", "llama_stack.*", "llama_stack_api", "llama_stack_api.*"]
+include = [
+    "llama_stack",
+    "llama_stack.*",
+    "llama_stack_api",
+    "llama_stack_api.*",
+]
 
 [[tool.uv.index]]
 name = "pytorch-cpu"
@@ -249,7 +256,9 @@ unfixable = [
 # Ignore the following errors for the following files
 [tool.ruff.lint.per-file-ignores]
 "tests/**/*.py" = ["DTZ"] # Ignore datetime rules for tests
-"src/llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py" = ["RUF001"]
+"src/llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py" = [
+    "RUF001",
+]
 "src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py" = [
     "RUF001",
     "PLE2515",
@@ -341,7 +350,6 @@ exclude = [
     "^src/llama_stack/providers/utils/telemetry/dataset_mixin\\.py$",
     "^src/llama_stack/providers/utils/telemetry/trace_protocol\\.py$",
     "^src/llama_stack/providers/utils/telemetry/tracing\\.py$",
-    "^src/llama_stack_api/strong_typing/auxiliary\\.py$",
     "^src/llama_stack/distributions/template\\.py$",
 ]
 
diff --git a/scripts/openapi_generator/__init__.py b/scripts/openapi_generator/__init__.py
new file mode 100644
index 000000000..7f6aaa1d1
--- /dev/null
+++ b/scripts/openapi_generator/__init__.py
@@ -0,0 +1,16 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+"""
+OpenAPI generator module for Llama Stack.
+
+This module provides functionality to generate OpenAPI specifications
+from FastAPI applications.
+"""
+
+from .main import generate_openapi_spec, main
+
+__all__ = ["generate_openapi_spec", "main"]
diff --git a/docs/openapi_generator/pyopenapi/__init__.py b/scripts/openapi_generator/__main__.py
similarity index 58%
rename from docs/openapi_generator/pyopenapi/__init__.py
rename to scripts/openapi_generator/__main__.py
index 756f351d8..d857e5e7e 100644
--- a/docs/openapi_generator/pyopenapi/__init__.py
+++ b/scripts/openapi_generator/__main__.py
@@ -3,3 +3,12 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
+
+"""
+Entry point for running the openapi_generator module as a package.
+"""
+
+from .main import main
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/openapi_generator/_legacy_order.py b/scripts/openapi_generator/_legacy_order.py
new file mode 100644
index 000000000..72863c8fc
--- /dev/null
+++ b/scripts/openapi_generator/_legacy_order.py
@@ -0,0 +1,502 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+"""
+Temporary ordering helpers extracted from origin/main client-sdks/stainless/openapi.yml.
+
+These lists help the new generator match the previous ordering so that diffs
+remain readable while we debug schema content regressions. Remove once stable.
+"""
+
+LEGACY_PATH_ORDER = [
+    "/v1/batches",
+    "/v1/batches/{batch_id}",
+    "/v1/batches/{batch_id}/cancel",
+    "/v1/chat/completions",
+    "/v1/chat/completions/{completion_id}",
+    "/v1/completions",
+    "/v1/conversations",
+    "/v1/conversations/{conversation_id}",
+    "/v1/conversations/{conversation_id}/items",
+    "/v1/conversations/{conversation_id}/items/{item_id}",
+    "/v1/embeddings",
+    "/v1/files",
+    "/v1/files/{file_id}",
+    "/v1/files/{file_id}/content",
+    "/v1/health",
+    "/v1/inspect/routes",
+    "/v1/models",
+    "/v1/models/{model_id}",
+    "/v1/moderations",
+    "/v1/prompts",
+    "/v1/prompts/{prompt_id}",
+    "/v1/prompts/{prompt_id}/set-default-version",
+    "/v1/prompts/{prompt_id}/versions",
+    "/v1/providers",
+    "/v1/providers/{provider_id}",
+    "/v1/responses",
+    "/v1/responses/{response_id}",
+    "/v1/responses/{response_id}/input_items",
+    "/v1/safety/run-shield",
+    "/v1/scoring-functions",
+    "/v1/scoring-functions/{scoring_fn_id}",
+    "/v1/scoring/score",
+    "/v1/scoring/score-batch",
+    "/v1/shields",
+    "/v1/shields/{identifier}",
+    "/v1/tool-runtime/invoke",
+    "/v1/tool-runtime/list-tools",
+    "/v1/toolgroups",
+    "/v1/toolgroups/{toolgroup_id}",
+    "/v1/tools",
+    "/v1/tools/{tool_name}",
+    "/v1/vector-io/insert",
+    "/v1/vector-io/query",
+    "/v1/vector_stores",
+    "/v1/vector_stores/{vector_store_id}",
+    "/v1/vector_stores/{vector_store_id}/file_batches",
+    "/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}",
+    "/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel",
+    "/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/files",
+    "/v1/vector_stores/{vector_store_id}/files",
+    "/v1/vector_stores/{vector_store_id}/files/{file_id}",
+    "/v1/vector_stores/{vector_store_id}/files/{file_id}/content",
+    "/v1/vector_stores/{vector_store_id}/search",
+    "/v1/version",
+    "/v1beta/datasetio/append-rows/{dataset_id}",
+    "/v1beta/datasetio/iterrows/{dataset_id}",
+    "/v1beta/datasets",
+    "/v1beta/datasets/{dataset_id}",
+    "/v1alpha/eval/benchmarks",
+    "/v1alpha/eval/benchmarks/{benchmark_id}",
+    "/v1alpha/eval/benchmarks/{benchmark_id}/evaluations",
+    "/v1alpha/eval/benchmarks/{benchmark_id}/jobs",
+    "/v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}",
+    "/v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result",
+    "/v1alpha/inference/rerank",
+    "/v1alpha/post-training/job/artifacts",
+    "/v1alpha/post-training/job/cancel",
+    "/v1alpha/post-training/job/status",
+    "/v1alpha/post-training/jobs",
+    "/v1alpha/post-training/preference-optimize",
+    "/v1alpha/post-training/supervised-fine-tune",
+]
+
+LEGACY_SCHEMA_ORDER = [
+    "Error",
+    "ListBatchesResponse",
+    "CreateBatchRequest",
+    "Batch",
+    "Order",
+    "ListOpenAIChatCompletionResponse",
+    "OpenAIAssistantMessageParam",
+    "OpenAIChatCompletionContentPartImageParam",
+    "OpenAIChatCompletionContentPartParam",
+    "OpenAIChatCompletionContentPartTextParam",
+    "OpenAIChatCompletionToolCall",
+    "OpenAIChatCompletionToolCallFunction",
+    "OpenAIChatCompletionUsage",
+    "OpenAIChoice",
+    "OpenAIChoiceLogprobs",
+    "OpenAIDeveloperMessageParam",
+    "OpenAIFile",
+    "OpenAIFileFile",
+    "OpenAIImageURL",
+    "OpenAIMessageParam",
+    "OpenAISystemMessageParam",
+    "OpenAITokenLogProb",
+    "OpenAIToolMessageParam",
+    "OpenAITopLogProb",
+    "OpenAIUserMessageParam",
+    "OpenAIJSONSchema",
+    "OpenAIResponseFormatJSONObject",
+    "OpenAIResponseFormatJSONSchema",
+    "OpenAIResponseFormatParam",
+    "OpenAIResponseFormatText",
+    "OpenAIChatCompletionRequestWithExtraBody",
+    "OpenAIChatCompletion",
+    "OpenAIChatCompletionChunk",
+    "OpenAIChoiceDelta",
+    "OpenAIChunkChoice",
+    "OpenAICompletionWithInputMessages",
+    "OpenAICompletionRequestWithExtraBody",
+    "OpenAICompletion",
+    "OpenAICompletionChoice",
+    "ConversationItem",
+    "OpenAIResponseAnnotationCitation",
+    "OpenAIResponseAnnotationContainerFileCitation",
+    "OpenAIResponseAnnotationFileCitation",
+    "OpenAIResponseAnnotationFilePath",
+    "OpenAIResponseAnnotations",
+    "OpenAIResponseContentPartRefusal",
+    "OpenAIResponseInputFunctionToolCallOutput",
+    "OpenAIResponseInputMessageContent",
+    "OpenAIResponseInputMessageContentFile",
+    "OpenAIResponseInputMessageContentImage",
+    "OpenAIResponseInputMessageContentText",
+    "OpenAIResponseMCPApprovalRequest",
+    "OpenAIResponseMCPApprovalResponse",
+    "OpenAIResponseMessage",
+    "OpenAIResponseOutputMessageContent",
+    "OpenAIResponseOutputMessageContentOutputText",
+    "OpenAIResponseOutputMessageFileSearchToolCall",
+    "OpenAIResponseOutputMessageFunctionToolCall",
+    "OpenAIResponseOutputMessageMCPCall",
+    "OpenAIResponseOutputMessageMCPListTools",
+    "OpenAIResponseOutputMessageWebSearchToolCall",
+    "CreateConversationRequest",
+    "Conversation",
+    "UpdateConversationRequest",
+    "ConversationDeletedResource",
+    "ConversationItemList",
+    "AddItemsRequest",
+    "ConversationItemDeletedResource",
+    "OpenAIEmbeddingsRequestWithExtraBody",
+    "OpenAIEmbeddingData",
+    "OpenAIEmbeddingUsage",
+    "OpenAIEmbeddingsResponse",
+    "OpenAIFilePurpose",
+    "ListOpenAIFileResponse",
+    "OpenAIFileObject",
+    "ExpiresAfter",
+    "OpenAIFileDeleteResponse",
+    "Response",
+    "HealthInfo",
+    "RouteInfo",
+    "ListRoutesResponse",
+    "OpenAIModel",
+    "OpenAIListModelsResponse",
+    "Model",
+    "ModelType",
+    "RunModerationRequest",
+    "ModerationObject",
+    "ModerationObjectResults",
+    "Prompt",
+    "ListPromptsResponse",
+    "CreatePromptRequest",
+    "UpdatePromptRequest",
+    "SetDefaultVersionRequest",
+    "ProviderInfo",
+    "ListProvidersResponse",
+    "ListOpenAIResponseObject",
+    "OpenAIResponseError",
+    "OpenAIResponseInput",
+    "OpenAIResponseInputToolFileSearch",
+    "OpenAIResponseInputToolFunction",
+    "OpenAIResponseInputToolWebSearch",
+    "OpenAIResponseObjectWithInput",
+    "OpenAIResponseOutput",
+    "OpenAIResponsePrompt",
+    "OpenAIResponseText",
+    "OpenAIResponseTool",
+    "OpenAIResponseToolMCP",
+    "OpenAIResponseUsage",
+    "ResponseGuardrailSpec",
+    "OpenAIResponseInputTool",
+    "OpenAIResponseInputToolMCP",
+    "CreateOpenaiResponseRequest",
+    "OpenAIResponseObject",
+    "OpenAIResponseContentPartOutputText",
+    "OpenAIResponseContentPartReasoningSummary",
+    "OpenAIResponseContentPartReasoningText",
+    "OpenAIResponseObjectStream",
+    "OpenAIResponseObjectStreamResponseCompleted",
+    "OpenAIResponseObjectStreamResponseContentPartAdded",
+    "OpenAIResponseObjectStreamResponseContentPartDone",
+    "OpenAIResponseObjectStreamResponseCreated",
+    "OpenAIResponseObjectStreamResponseFailed",
+    "OpenAIResponseObjectStreamResponseFileSearchCallCompleted",
+    "OpenAIResponseObjectStreamResponseFileSearchCallInProgress",
+    "OpenAIResponseObjectStreamResponseFileSearchCallSearching",
+    "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta",
+    "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone",
+    "OpenAIResponseObjectStreamResponseInProgress",
+    "OpenAIResponseObjectStreamResponseIncomplete",
+    "OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta",
+    "OpenAIResponseObjectStreamResponseMcpCallArgumentsDone",
+    "OpenAIResponseObjectStreamResponseMcpCallCompleted",
+    "OpenAIResponseObjectStreamResponseMcpCallFailed",
+    "OpenAIResponseObjectStreamResponseMcpCallInProgress",
+    "OpenAIResponseObjectStreamResponseMcpListToolsCompleted",
+    "OpenAIResponseObjectStreamResponseMcpListToolsFailed",
+    "OpenAIResponseObjectStreamResponseMcpListToolsInProgress",
+    "OpenAIResponseObjectStreamResponseOutputItemAdded",
+    "OpenAIResponseObjectStreamResponseOutputItemDone",
+    "OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded",
+    "OpenAIResponseObjectStreamResponseOutputTextDelta",
+    "OpenAIResponseObjectStreamResponseOutputTextDone",
+    "OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded",
+    "OpenAIResponseObjectStreamResponseReasoningSummaryPartDone",
+    "OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta",
+    "OpenAIResponseObjectStreamResponseReasoningSummaryTextDone",
+    "OpenAIResponseObjectStreamResponseReasoningTextDelta",
+    "OpenAIResponseObjectStreamResponseReasoningTextDone",
+    "OpenAIResponseObjectStreamResponseRefusalDelta",
+    "OpenAIResponseObjectStreamResponseRefusalDone",
+    "OpenAIResponseObjectStreamResponseWebSearchCallCompleted",
+    "OpenAIResponseObjectStreamResponseWebSearchCallInProgress",
+    "OpenAIResponseObjectStreamResponseWebSearchCallSearching",
+    "OpenAIDeleteResponseObject",
+    "ListOpenAIResponseInputItem",
+    "RunShieldRequest",
+    "RunShieldResponse",
+    "SafetyViolation",
+    "ViolationLevel",
+    "AggregationFunctionType",
+    "ArrayType",
+    "BasicScoringFnParams",
+    "BooleanType",
+    "ChatCompletionInputType",
+    "CompletionInputType",
+    "JsonType",
+    "LLMAsJudgeScoringFnParams",
+    "NumberType",
+    "ObjectType",
+    "RegexParserScoringFnParams",
+    "ScoringFn",
+    "ScoringFnParams",
+    "ScoringFnParamsType",
+    "StringType",
+    "UnionType",
+    "ListScoringFunctionsResponse",
+    "ScoreRequest",
+    "ScoreResponse",
+    "ScoringResult",
+    "ScoreBatchRequest",
+    "ScoreBatchResponse",
+    "Shield",
+    "ListShieldsResponse",
+    "InvokeToolRequest",
+    "ImageContentItem",
+    "InterleavedContent",
+    "InterleavedContentItem",
+    "TextContentItem",
+    "ToolInvocationResult",
+    "URL",
+    "ToolDef",
+    "ListToolDefsResponse",
+    "ToolGroup",
+    "ListToolGroupsResponse",
+    "Chunk",
+    "ChunkMetadata",
+    "InsertChunksRequest",
+    "QueryChunksRequest",
+    "QueryChunksResponse",
+    "VectorStoreFileCounts",
+    "VectorStoreListResponse",
+    "VectorStoreObject",
+    "VectorStoreChunkingStrategy",
+    "VectorStoreChunkingStrategyAuto",
+    "VectorStoreChunkingStrategyStatic",
+    "VectorStoreChunkingStrategyStaticConfig",
+    "OpenAICreateVectorStoreRequestWithExtraBody",
+    "OpenaiUpdateVectorStoreRequest",
+    "VectorStoreDeleteResponse",
+    "OpenAICreateVectorStoreFileBatchRequestWithExtraBody",
+    "VectorStoreFileBatchObject",
+    "VectorStoreFileStatus",
+    "VectorStoreFileLastError",
+    "VectorStoreFileObject",
+    "VectorStoreFilesListInBatchResponse",
+    "VectorStoreListFilesResponse",
+    "OpenaiAttachFileToVectorStoreRequest",
+    "OpenaiUpdateVectorStoreFileRequest",
+    "VectorStoreFileDeleteResponse",
+    "bool",
+    "VectorStoreContent",
+    "VectorStoreFileContentResponse",
+    "OpenaiSearchVectorStoreRequest",
+    "VectorStoreSearchResponse",
+    "VectorStoreSearchResponsePage",
+    "VersionInfo",
+    "AppendRowsRequest",
+    "PaginatedResponse",
+    "Dataset",
+    "RowsDataSource",
+    "URIDataSource",
+    "ListDatasetsResponse",
+    "Benchmark",
+    "ListBenchmarksResponse",
+    "BenchmarkConfig",
+    "GreedySamplingStrategy",
+    "ModelCandidate",
+    "SamplingParams",
+    "SystemMessage",
+    "TopKSamplingStrategy",
+    "TopPSamplingStrategy",
+    "EvaluateRowsRequest",
+    "EvaluateResponse",
+    "RunEvalRequest",
+    "Job",
+    "RerankRequest",
+    "RerankData",
+    "RerankResponse",
+    "Checkpoint",
+    "PostTrainingJobArtifactsResponse",
+    "PostTrainingMetric",
+    "CancelTrainingJobRequest",
+    "PostTrainingJobStatusResponse",
+    "ListPostTrainingJobsResponse",
+    "DPOAlignmentConfig",
+    "DPOLossType",
+    "DataConfig",
+    "DatasetFormat",
+    "EfficiencyConfig",
+    "OptimizerConfig",
+    "OptimizerType",
+    "TrainingConfig",
+    "PreferenceOptimizeRequest",
+    "PostTrainingJob",
+    "AlgorithmConfig",
+    "LoraFinetuningConfig",
+    "QATFinetuningConfig",
+    "SupervisedFineTuneRequest",
+    "RegisterModelRequest",
+    "ParamType",
+    "RegisterScoringFunctionRequest",
+    "RegisterShieldRequest",
+    "RegisterToolGroupRequest",
+    "DataSource",
+    "RegisterDatasetRequest",
+    "RegisterBenchmarkRequest",
+]
+
+LEGACY_RESPONSE_ORDER = ["BadRequest400", "TooManyRequests429", "InternalServerError500", "DefaultError"]
+
+LEGACY_TAGS = [
+    {
+        "description": "APIs for creating and interacting with agentic systems.",
+        "name": "Agents",
+        "x-displayName": "Agents",
+    },
+    {
+        "description": "The API is designed to allow use of openai client libraries for seamless integration.\n"
+        "\n"
+        "This API provides the following extensions:\n"
+        " - idempotent batch creation\n"
+        "\n"
+        "Note: This API is currently under active development and may undergo changes.",
+        "name": "Batches",
+        "x-displayName": "The Batches API enables efficient processing of multiple requests in a single operation, "
+        "particularly useful for processing large datasets, batch evaluation workflows, and cost-effective "
+        "inference at scale.",
+    },
+    {"description": "", "name": "Benchmarks"},
+    {
+        "description": "Protocol for conversation management operations.",
+        "name": "Conversations",
+        "x-displayName": "Conversations",
+    },
+    {"description": "", "name": "DatasetIO"},
+    {"description": "", "name": "Datasets"},
+    {
+        "description": "Llama Stack Evaluation API for running evaluations on model and agent candidates.",
+        "name": "Eval",
+        "x-displayName": "Evaluations",
+    },
+    {
+        "description": "This API is used to upload documents that can be used with other Llama Stack APIs.",
+        "name": "Files",
+        "x-displayName": "Files",
+    },
+    {
+        "description": "Llama Stack Inference API for generating completions, chat completions, and embeddings.\n"
+        "\n"
+        "This API provides the raw interface to the underlying models. Three kinds of models are supported:\n"
+        '- LLM models: these models generate "raw" and "chat" (conversational) completions.\n'
+        "- Embedding models: these models generate embeddings to be used for semantic search.\n"
+        "- Rerank models: these models reorder the documents based on their relevance to a query.",
+        "name": "Inference",
+        "x-displayName": "Inference",
+    },
+    {
+        "description": "APIs for inspecting the Llama Stack service, including health status, available API routes with "
+        "methods and implementing providers.",
+        "name": "Inspect",
+        "x-displayName": "Inspect",
+    },
+    {"description": "", "name": "Models"},
+    {"description": "", "name": "PostTraining (Coming Soon)"},
+    {"description": "Protocol for prompt management operations.", "name": "Prompts", "x-displayName": "Prompts"},
+    {
+        "description": "Providers API for inspecting, listing, and modifying providers and their configurations.",
+        "name": "Providers",
+        "x-displayName": "Providers",
+    },
+    {"description": "OpenAI-compatible Moderations API.", "name": "Safety", "x-displayName": "Safety"},
+    {"description": "", "name": "Scoring"},
+    {"description": "", "name": "ScoringFunctions"},
+    {"description": "", "name": "Shields"},
+    {"description": "", "name": "ToolGroups"},
+    {"description": "", "name": "ToolRuntime"},
+    {"description": "", "name": "VectorIO"},
+]
+
+LEGACY_TAG_ORDER = [
+    "Agents",
+    "Batches",
+    "Benchmarks",
+    "Conversations",
+    "DatasetIO",
+    "Datasets",
+    "Eval",
+    "Files",
+    "Inference",
+    "Inspect",
+    "Models",
+    "PostTraining (Coming Soon)",
+    "Prompts",
+    "Providers",
+    "Safety",
+    "Scoring",
+    "ScoringFunctions",
+    "Shields",
+    "ToolGroups",
+    "ToolRuntime",
+    "VectorIO",
+]
+
+LEGACY_TAG_GROUPS = [
+    {
+        "name": "Operations",
+        "tags": [
+            "Agents",
+            "Batches",
+            "Benchmarks",
+            "Conversations",
+            "DatasetIO",
+            "Datasets",
+            "Eval",
+            "Files",
+            "Inference",
+            "Inspect",
+            "Models",
+            "PostTraining (Coming Soon)",
+            "Prompts",
+            "Providers",
+            "Safety",
+            "Scoring",
+            "ScoringFunctions",
+            "Shields",
+            "ToolGroups",
+            "ToolRuntime",
+            "VectorIO",
+        ],
+    }
+]
+
+LEGACY_SECURITY = [{"Default": []}]
+
+LEGACY_OPERATION_KEYS = [
+    "responses",
+    "tags",
+    "summary",
+    "description",
+    "operationId",
+    "parameters",
+    "requestBody",
+    "deprecated",
+]
diff --git a/scripts/openapi_generator/app.py b/scripts/openapi_generator/app.py
new file mode 100644
index 000000000..d972889cd
--- /dev/null
+++ b/scripts/openapi_generator/app.py
@@ -0,0 +1,91 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+"""
+FastAPI app creation for OpenAPI generation.
+"""
+
+import inspect
+from typing import Any
+
+from fastapi import FastAPI
+
+from llama_stack.core.resolver import api_protocol_map
+from llama_stack_api import Api
+
+from .state import _protocol_methods_cache
+
+
+def _get_protocol_method(api: Api, method_name: str) -> Any | None:
+    """
+    Get a protocol method function by API and method name.
+    Uses caching to avoid repeated lookups.
+
+    Args:
+        api: The API enum
+        method_name: The method name (function name)
+
+    Returns:
+        The function object, or None if not found
+    """
+    global _protocol_methods_cache
+
+    if _protocol_methods_cache is None:
+        _protocol_methods_cache = {}
+        protocols = api_protocol_map()
+        from llama_stack_api.tools import SpecialToolGroup, ToolRuntime
+
+        toolgroup_protocols = {
+            SpecialToolGroup.rag_tool: ToolRuntime,
+        }
+
+        for api_key, protocol in protocols.items():
+            method_map: dict[str, Any] = {}
+            protocol_methods = inspect.getmembers(protocol, predicate=inspect.isfunction)
+            for name, method in protocol_methods:
+                method_map[name] = method
+
+            # Handle tool_runtime special case
+            if api_key == Api.tool_runtime:
+                for tool_group, sub_protocol in toolgroup_protocols.items():
+                    sub_protocol_methods = inspect.getmembers(sub_protocol, predicate=inspect.isfunction)
+                    for name, method in sub_protocol_methods:
+                        if hasattr(method, "__webmethod__"):
+                            method_map[f"{tool_group.value}.{name}"] = method
+
+            _protocol_methods_cache[api_key] = method_map
+
+    return _protocol_methods_cache.get(api, {}).get(method_name)
+
+
+def create_llama_stack_app() -> FastAPI:
+    """
+    Create a FastAPI app that represents the Llama Stack API.
+    This uses the existing route discovery system to automatically find all routes.
+    """
+    app = FastAPI(
+        title="Llama Stack API",
+        description="A comprehensive API for building and deploying AI applications",
+        version="1.0.0",
+        servers=[
+            {"url": "http://any-hosted-llama-stack.com"},
+        ],
+    )
+
+    # Get all API routes
+    from llama_stack.core.server.routes import get_all_api_routes
+
+    api_routes = get_all_api_routes()
+
+    # Create FastAPI routes from the discovered routes
+    from . import endpoints
+
+    for api, routes in api_routes.items():
+        for route, webmethod in routes:
+            # Convert the route to a FastAPI endpoint
+            endpoints._create_fastapi_endpoint(app, route, webmethod, api)
+
+    return app
diff --git a/scripts/openapi_generator/endpoints.py b/scripts/openapi_generator/endpoints.py
new file mode 100644
index 000000000..39086f47f
--- /dev/null
+++ b/scripts/openapi_generator/endpoints.py
@@ -0,0 +1,657 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+"""
+Endpoint generation logic for FastAPI OpenAPI generation.
+"""
+
+import inspect
+import re
+import types
+import typing
+from typing import Annotated, Any, get_args, get_origin
+
+from fastapi import FastAPI
+from pydantic import Field, create_model
+
+from llama_stack.log import get_logger
+from llama_stack_api import Api
+from llama_stack_api.schema_utils import get_registered_schema_info
+
+from . import app as app_module
+from .state import _extra_body_fields, register_dynamic_model
+
+logger = get_logger(name=__name__, category="core")
+
+
+def _to_pascal_case(segment: str) -> str:
+    tokens = re.findall(r"[A-Za-z]+|\d+", segment)
+    return "".join(token.capitalize() for token in tokens if token)
+
+
+def _compose_request_model_name(api: Api, method_name: str, variant: str | None = None) -> str:
+    """Generate a deterministic model name from the protocol method."""
+
+    def _to_pascal_from_snake(value: str) -> str:
+        return "".join(segment.capitalize() for segment in value.split("_") if segment)
+
+    base_name = _to_pascal_from_snake(method_name)
+    if not base_name:
+        base_name = _to_pascal_case(api.value)
+    base_name = f"{base_name}Request"
+    if variant:
+        base_name = f"{base_name}{variant}"
+    return base_name
+
+
+def _extract_path_parameters(path: str) -> list[dict[str, Any]]:
+    """Extract path parameters from a URL path and return them as OpenAPI parameter definitions."""
+    matches = re.findall(r"\{([^}:]+)(?::[^}]+)?\}", path)
+    return [
+        {
+            "name": param_name,
+            "in": "path",
+            "required": True,
+            "schema": {"type": "string"},
+            "description": f"Path parameter: {param_name}",
+        }
+        for param_name in matches
+    ]
+
+
+def _create_endpoint_with_request_model(
+    request_model: type, response_model: type | None, operation_description: str | None
+):
+    """Create an endpoint function with a request body model."""
+
+    async def endpoint(request: request_model) -> response_model:
+        return response_model() if response_model else {}
+
+    if operation_description:
+        endpoint.__doc__ = operation_description
+    return endpoint
+
+
+def _build_field_definitions(query_parameters: list[tuple[str, type, Any]], use_any: bool = False) -> dict[str, tuple]:
+    """Build field definitions for a Pydantic model from query parameters."""
+    from typing import Any
+
+    field_definitions = {}
+    for param_name, param_type, default_value in query_parameters:
+        if use_any:
+            field_definitions[param_name] = (Any, ... if default_value is inspect.Parameter.empty else default_value)
+            continue
+
+        base_type = param_type
+        extracted_field = None
+        if get_origin(param_type) is Annotated:
+            args = get_args(param_type)
+            if args:
+                base_type = args[0]
+                for arg in args[1:]:
+                    if isinstance(arg, Field):
+                        extracted_field = arg
+                        break
+
+        try:
+            if extracted_field:
+                field_definitions[param_name] = (base_type, extracted_field)
+            else:
+                field_definitions[param_name] = (
+                    base_type,
+                    ... if default_value is inspect.Parameter.empty else default_value,
+                )
+        except (TypeError, ValueError):
+            field_definitions[param_name] = (Any, ... if default_value is inspect.Parameter.empty else default_value)
+
+    # Ensure all parameters are included
+    expected_params = {name for name, _, _ in query_parameters}
+    missing = expected_params - set(field_definitions.keys())
+    if missing:
+        for param_name, _, default_value in query_parameters:
+            if param_name in missing:
+                field_definitions[param_name] = (
+                    Any,
+                    ... if default_value is inspect.Parameter.empty else default_value,
+                )
+
+    return field_definitions
+
+
+def _create_dynamic_request_model(
+    api: Api,
+    webmethod,
+    method_name: str,
+    http_method: str,
+    query_parameters: list[tuple[str, type, Any]],
+    use_any: bool = False,
+    variant_suffix: str | None = None,
+) -> type | None:
+    """Create a dynamic Pydantic model for request body."""
+    try:
+        field_definitions = _build_field_definitions(query_parameters, use_any)
+        if not field_definitions:
+            return None
+        model_name = _compose_request_model_name(api, method_name, variant_suffix or None)
+        request_model = create_model(model_name, **field_definitions)
+        return register_dynamic_model(model_name, request_model)
+    except Exception:
+        return None
+
+
+def _build_signature_params(
+    query_parameters: list[tuple[str, type, Any]],
+) -> tuple[list[inspect.Parameter], dict[str, type]]:
+    """Build signature parameters and annotations from query parameters."""
+    signature_params = []
+    param_annotations = {}
+    for param_name, param_type, default_value in query_parameters:
+        param_annotations[param_name] = param_type
+        signature_params.append(
+            inspect.Parameter(
+                param_name,
+                inspect.Parameter.POSITIONAL_OR_KEYWORD,
+                default=default_value if default_value is not inspect.Parameter.empty else inspect.Parameter.empty,
+                annotation=param_type,
+            )
+        )
+    return signature_params, param_annotations
+
+
+def _extract_operation_description_from_docstring(api: Api, method_name: str) -> str | None:
+    """Extract operation description from the actual function docstring."""
+    func = app_module._get_protocol_method(api, method_name)
+    if not func or not func.__doc__:
+        return None
+
+    doc_lines = func.__doc__.split("\n")
+    description_lines = []
+    metadata_markers = (":param", ":type", ":return", ":returns", ":raises", ":exception", ":yield", ":yields", ":cvar")
+
+    for line in doc_lines:
+        if line.strip().startswith(metadata_markers):
+            break
+        description_lines.append(line)
+
+    description = "\n".join(description_lines).strip()
+    return description if description else None
+
+
+def _extract_response_description_from_docstring(webmethod, response_model, api: Api, method_name: str) -> str:
+    """Extract response description from the actual function docstring."""
+    func = app_module._get_protocol_method(api, method_name)
+    if not func or not func.__doc__:
+        return "Successful Response"
+    for line in func.__doc__.split("\n"):
+        if line.strip().startswith(":returns:"):
+            if desc := line.strip()[9:].strip():
+                return desc
+    return "Successful Response"
+
+
+def _get_tag_from_api(api: Api) -> str:
+    """Extract a tag name from the API enum for API grouping."""
+    return api.value.replace("_", " ").title()
+
+
+def _is_file_or_form_param(param_type: Any) -> bool:
+    """Check if a parameter type is annotated with File() or Form()."""
+    if get_origin(param_type) is Annotated:
+        args = get_args(param_type)
+        if len(args) > 1:
+            # Check metadata for File or Form
+            for metadata in args[1:]:
+                # Check if it's a File or Form instance
+                if hasattr(metadata, "__class__"):
+                    class_name = metadata.__class__.__name__
+                    if class_name in ("File", "Form"):
+                        return True
+    return False
+
+
+def _is_extra_body_field(metadata_item: Any) -> bool:
+    """Check if a metadata item is an ExtraBodyField instance."""
+    from llama_stack_api.schema_utils import ExtraBodyField
+
+    return isinstance(metadata_item, ExtraBodyField)
+
+
+def _is_async_iterator_type(type_obj: Any) -> bool:
+    """Check if a type is AsyncIterator or AsyncIterable."""
+    from collections.abc import AsyncIterable, AsyncIterator
+
+    origin = get_origin(type_obj)
+    if origin is None:
+        # Check if it's the class itself
+        return type_obj in (AsyncIterator, AsyncIterable) or (
+            hasattr(type_obj, "__origin__") and type_obj.__origin__ in (AsyncIterator, AsyncIterable)
+        )
+    return origin in (AsyncIterator, AsyncIterable)
+
+
+def _extract_response_models_from_union(union_type: Any) -> tuple[type | None, type | None]:
+    """
+    Extract non-streaming and streaming response models from a union type.
+
+    Returns:
+        tuple: (non_streaming_model, streaming_model)
+    """
+    non_streaming_model = None
+    streaming_model = None
+
+    args = get_args(union_type)
+    for arg in args:
+        # Check if it's an AsyncIterator
+        if _is_async_iterator_type(arg):
+            # Extract the type argument from AsyncIterator[T]
+            iterator_args = get_args(arg)
+            if iterator_args:
+                inner_type = iterator_args[0]
+                # Check if the inner type is a registered schema (union type)
+                # or a Pydantic model
+                if hasattr(inner_type, "model_json_schema"):
+                    streaming_model = inner_type
+                else:
+                    # Might be a registered schema - check if it's registered
+                    if get_registered_schema_info(inner_type):
+                        # We'll need to look this up later, but for now store the type
+                        streaming_model = inner_type
+        elif hasattr(arg, "model_json_schema"):
+            # Non-streaming Pydantic model
+            if non_streaming_model is None:
+                non_streaming_model = arg
+
+    return non_streaming_model, streaming_model
+
+
+def _find_models_for_endpoint(
+    webmethod, api: Api, method_name: str, is_post_put: bool = False
+) -> tuple[type | None, type | None, list[tuple[str, type, Any]], list[inspect.Parameter], type | None, str | None]:
+    """
+    Find appropriate request and response models for an endpoint by analyzing the actual function signature.
+    This uses the protocol function to determine the correct models dynamically.
+
+    Args:
+        webmethod: The webmethod metadata
+        api: The API enum for looking up the function
+        method_name: The method name (function name)
+        is_post_put: Whether this is a POST, PUT, or PATCH request (GET requests should never have request bodies)
+
+    Returns:
+        tuple: (request_model, response_model, query_parameters, file_form_params, streaming_response_model, response_schema_name)
+        where query_parameters is a list of (name, type, default_value) tuples
+        and file_form_params is a list of inspect.Parameter objects for File()/Form() params
+        and streaming_response_model is the model for streaming responses (AsyncIterator content)
+    """
+    route_descriptor = f"{webmethod.method or 'UNKNOWN'} {webmethod.route}"
+    try:
+        # Get the function from the protocol
+        func = app_module._get_protocol_method(api, method_name)
+        if not func:
+            logger.warning("No protocol method for %s.%s (%s)", api, method_name, route_descriptor)
+            return None, None, [], [], None, None
+
+        # Analyze the function signature
+        sig = inspect.signature(func)
+
+        # Find request model and collect all body parameters
+        request_model = None
+        query_parameters = []
+        file_form_params = []
+        path_params = set()
+        extra_body_params = []
+        response_schema_name = None
+
+        # Extract path parameters from the route
+        if webmethod and hasattr(webmethod, "route"):
+            path_matches = re.findall(r"\{([^}:]+)(?::[^}]+)?\}", webmethod.route)
+            path_params = set(path_matches)
+
+        for param_name, param in sig.parameters.items():
+            if param_name == "self":
+                continue
+
+            # Skip *args and **kwargs parameters - these are not real API parameters
+            if param.kind in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD):
+                continue
+
+            # Check if this is a path parameter
+            if param_name in path_params:
+                # Path parameters are handled separately, skip them
+                continue
+
+            # Check if it's a File() or Form() parameter - these need special handling
+            param_type = param.annotation
+            if _is_file_or_form_param(param_type):
+                # File() and Form() parameters must be in the function signature directly
+                # They cannot be part of a Pydantic model
+                file_form_params.append(param)
+                continue
+
+            # Check for ExtraBodyField in Annotated types
+            is_extra_body = False
+            extra_body_description = None
+            if get_origin(param_type) is Annotated:
+                args = get_args(param_type)
+                base_type = args[0] if args else param_type
+                metadata = args[1:] if len(args) > 1 else []
+
+                # Check if any metadata item is an ExtraBodyField
+                for metadata_item in metadata:
+                    if _is_extra_body_field(metadata_item):
+                        is_extra_body = True
+                        extra_body_description = metadata_item.description
+                        break
+
+                if is_extra_body:
+                    # Store as extra body parameter - exclude from request model
+                    extra_body_params.append((param_name, base_type, extra_body_description))
+                    continue
+
+            # Check if it's a Pydantic model (for POST/PUT requests)
+            if hasattr(param_type, "model_json_schema"):
+                # Collect all body parameters including Pydantic models
+                # We'll decide later whether to use a single model or create a combined one
+                query_parameters.append((param_name, param_type, param.default))
+            elif get_origin(param_type) is Annotated:
+                # Handle Annotated types - get the base type
+                args = get_args(param_type)
+                if args and hasattr(args[0], "model_json_schema"):
+                    # Collect Pydantic models from Annotated types
+                    query_parameters.append((param_name, args[0], param.default))
+                else:
+                    # Regular annotated parameter (but not File/Form, already handled above)
+                    query_parameters.append((param_name, param_type, param.default))
+            else:
+                # This is likely a body parameter for POST/PUT or query parameter for GET
+                # Store the parameter info for later use
+                # Preserve inspect.Parameter.empty to distinguish "no default" from "default=None"
+                default_value = param.default
+
+                # Extract the base type from union types (e.g., str | None -> str)
+                # Also make it safe for FastAPI to avoid forward reference issues
+                query_parameters.append((param_name, param_type, default_value))
+
+        # Store extra body fields for later use in post-processing
+        # We'll store them when the endpoint is created, as we need the full path
+        # For now, attach to the function for later retrieval
+        if extra_body_params:
+            func._extra_body_params = extra_body_params  # type: ignore
+
+        # If there's exactly one body parameter and it's a Pydantic model, use it directly
+        # Otherwise, we'll create a combined request model from all parameters
+        # BUT: For GET requests, never create a request body - all parameters should be query parameters
+        if is_post_put and len(query_parameters) == 1:
+            param_name, param_type, default_value = query_parameters[0]
+            if hasattr(param_type, "model_json_schema"):
+                request_model = param_type
+                query_parameters = []  # Clear query_parameters so we use the single model
+
+        # Find response model from return annotation
+        # Also detect streaming response models (AsyncIterator)
+        response_model = None
+        streaming_response_model = None
+        return_annotation = sig.return_annotation
+        if return_annotation != inspect.Signature.empty:
+            origin = get_origin(return_annotation)
+            if hasattr(return_annotation, "model_json_schema"):
+                response_model = return_annotation
+            elif origin is Annotated:
+                # Handle Annotated return types
+                args = get_args(return_annotation)
+                if args:
+                    # Check if the first argument is a Pydantic model
+                    if hasattr(args[0], "model_json_schema"):
+                        response_model = args[0]
+                    else:
+                        # Check if the first argument is a union type
+                        inner_origin = get_origin(args[0])
+                        if inner_origin is not None and (
+                            inner_origin is types.UnionType or inner_origin is typing.Union
+                        ):
+                            response_model, streaming_response_model = _extract_response_models_from_union(args[0])
+            elif origin is not None and (origin is types.UnionType or origin is typing.Union):
+                # Handle union types - extract both non-streaming and streaming models
+                response_model, streaming_response_model = _extract_response_models_from_union(return_annotation)
+            else:
+                try:
+                    from fastapi import Response as FastAPIResponse
+                except ImportError:
+                    fastapi_response_cls = None
+                else:
+                    fastapi_response_cls = FastAPIResponse
+                try:
+                    from starlette.responses import Response as StarletteResponse
+                except ImportError:
+                    starlette_response_cls = None
+                else:
+                    starlette_response_cls = StarletteResponse
+
+                response_types = tuple(t for t in (fastapi_response_cls, starlette_response_cls) if t is not None)
+                if response_types and any(return_annotation is t for t in response_types):
+                    response_schema_name = "Response"
+
+        return (
+            request_model,
+            response_model,
+            query_parameters,
+            file_form_params,
+            streaming_response_model,
+            response_schema_name,
+        )
+
+    except Exception as exc:
+        logger.warning(
+            "Failed to analyze endpoint %s.%s (%s): %s", api, method_name, route_descriptor, exc, exc_info=True
+        )
+        return None, None, [], [], None, None
+
+
+def _create_fastapi_endpoint(app: FastAPI, route, webmethod, api: Api):
+    """Create a FastAPI endpoint from a discovered route and webmethod."""
+    path = route.path
+    raw_methods = route.methods or set()
+    method_list = sorted({method.upper() for method in raw_methods if method and method.upper() != "HEAD"})
+    if not method_list:
+        method_list = ["GET"]
+    primary_method = method_list[0]
+    name = route.name
+    fastapi_path = path.replace("{", "{").replace("}", "}")
+    is_post_put = any(method in ["POST", "PUT", "PATCH"] for method in method_list)
+
+    (
+        request_model,
+        response_model,
+        query_parameters,
+        file_form_params,
+        streaming_response_model,
+        response_schema_name,
+    ) = _find_models_for_endpoint(webmethod, api, name, is_post_put)
+    operation_description = _extract_operation_description_from_docstring(api, name)
+    response_description = _extract_response_description_from_docstring(webmethod, response_model, api, name)
+
+    # Retrieve and store extra body fields for this endpoint
+    func = app_module._get_protocol_method(api, name)
+    extra_body_params = getattr(func, "_extra_body_params", []) if func else []
+    if extra_body_params:
+        for method in method_list:
+            key = (fastapi_path, method.upper())
+            _extra_body_fields[key] = extra_body_params
+
+    if is_post_put and not request_model and not file_form_params and query_parameters:
+        request_model = _create_dynamic_request_model(
+            api, webmethod, name, primary_method, query_parameters, use_any=False
+        )
+        if not request_model:
+            request_model = _create_dynamic_request_model(
+                api, webmethod, name, primary_method, query_parameters, use_any=True, variant_suffix="Loose"
+            )
+        if request_model:
+            query_parameters = []
+
+    if file_form_params and is_post_put:
+        signature_params = list(file_form_params)
+        param_annotations = {param.name: param.annotation for param in file_form_params}
+        for param_name, param_type, default_value in query_parameters:
+            signature_params.append(
+                inspect.Parameter(
+                    param_name,
+                    inspect.Parameter.POSITIONAL_OR_KEYWORD,
+                    default=default_value if default_value is not inspect.Parameter.empty else inspect.Parameter.empty,
+                    annotation=param_type,
+                )
+            )
+            param_annotations[param_name] = param_type
+
+        async def file_form_endpoint():
+            return response_model() if response_model else {}
+
+        if operation_description:
+            file_form_endpoint.__doc__ = operation_description
+        file_form_endpoint.__signature__ = inspect.Signature(signature_params)
+        file_form_endpoint.__annotations__ = param_annotations
+        endpoint_func = file_form_endpoint
+    elif request_model and response_model:
+        endpoint_func = _create_endpoint_with_request_model(request_model, response_model, operation_description)
+    elif request_model:
+        endpoint_func = _create_endpoint_with_request_model(request_model, None, operation_description)
+    elif response_model and query_parameters:
+        if is_post_put:
+            request_model = _create_dynamic_request_model(
+                api, webmethod, name, primary_method, query_parameters, use_any=False
+            )
+            if not request_model:
+                request_model = _create_dynamic_request_model(
+                    api, webmethod, name, primary_method, query_parameters, use_any=True, variant_suffix="Loose"
+                )
+
+            if request_model:
+                endpoint_func = _create_endpoint_with_request_model(
+                    request_model, response_model, operation_description
+                )
+            else:
+
+                async def empty_endpoint() -> response_model:
+                    return response_model() if response_model else {}
+
+                if operation_description:
+                    empty_endpoint.__doc__ = operation_description
+                endpoint_func = empty_endpoint
+        else:
+            sorted_params = sorted(query_parameters, key=lambda x: (x[2] is not inspect.Parameter.empty, x[0]))
+            signature_params, param_annotations = _build_signature_params(sorted_params)
+
+            async def query_endpoint():
+                return response_model()
+
+            if operation_description:
+                query_endpoint.__doc__ = operation_description
+            query_endpoint.__signature__ = inspect.Signature(signature_params)
+            query_endpoint.__annotations__ = param_annotations
+            endpoint_func = query_endpoint
+    elif response_model:
+
+        async def response_only_endpoint() -> response_model:
+            return response_model()
+
+        if operation_description:
+            response_only_endpoint.__doc__ = operation_description
+        endpoint_func = response_only_endpoint
+    elif query_parameters:
+        signature_params, param_annotations = _build_signature_params(query_parameters)
+
+        async def params_only_endpoint():
+            return {}
+
+        if operation_description:
+            params_only_endpoint.__doc__ = operation_description
+        params_only_endpoint.__signature__ = inspect.Signature(signature_params)
+        params_only_endpoint.__annotations__ = param_annotations
+        endpoint_func = params_only_endpoint
+    else:
+        # Endpoint with no parameters and no response model
+        # If we have a response_model from the function signature, use it even if _find_models_for_endpoint didn't find it
+        # This can happen if there was an exception during model finding
+        if response_model is None:
+            # Try to get response model directly from the function signature as a fallback
+            func = app_module._get_protocol_method(api, name)
+            if func:
+                try:
+                    sig = inspect.signature(func)
+                    return_annotation = sig.return_annotation
+                    if return_annotation != inspect.Signature.empty:
+                        if hasattr(return_annotation, "model_json_schema"):
+                            response_model = return_annotation
+                        elif get_origin(return_annotation) is Annotated:
+                            args = get_args(return_annotation)
+                            if args and hasattr(args[0], "model_json_schema"):
+                                response_model = args[0]
+                except Exception:
+                    pass
+
+        if response_model:
+
+            async def no_params_endpoint() -> response_model:
+                return response_model() if response_model else {}
+        else:
+
+            async def no_params_endpoint():
+                return {}
+
+        if operation_description:
+            no_params_endpoint.__doc__ = operation_description
+        endpoint_func = no_params_endpoint
+
+    # Build response content with both application/json and text/event-stream if streaming
+    response_content: dict[str, Any] = {}
+    if response_model:
+        response_content["application/json"] = {"schema": {"$ref": f"#/components/schemas/{response_model.__name__}"}}
+    elif response_schema_name:
+        response_content["application/json"] = {"schema": {"$ref": f"#/components/schemas/{response_schema_name}"}}
+    if streaming_response_model:
+        # Get the schema name for the streaming model
+        # It might be a registered schema or a Pydantic model
+        streaming_schema_name = None
+        # Check if it's a registered schema first (before checking __name__)
+        # because registered schemas might be Annotated types
+        if schema_info := get_registered_schema_info(streaming_response_model):
+            streaming_schema_name = schema_info.name
+        elif hasattr(streaming_response_model, "__name__"):
+            streaming_schema_name = streaming_response_model.__name__
+
+        if streaming_schema_name:
+            response_content["text/event-stream"] = {
+                "schema": {"$ref": f"#/components/schemas/{streaming_schema_name}"}
+            }
+
+    # If no content types, use empty schema
+    # Add the endpoint to the FastAPI app
+    is_deprecated = webmethod.deprecated or False
+    route_kwargs = {
+        "name": name,
+        "tags": [_get_tag_from_api(api)],
+        "deprecated": is_deprecated,
+        "responses": {
+            400: {"$ref": "#/components/responses/BadRequest400"},
+            429: {"$ref": "#/components/responses/TooManyRequests429"},
+            500: {"$ref": "#/components/responses/InternalServerError500"},
+            "default": {"$ref": "#/components/responses/DefaultError"},
+        },
+    }
+    success_response: dict[str, Any] = {"description": response_description}
+    if response_content:
+        success_response["content"] = response_content
+    route_kwargs["responses"][200] = success_response
+
+    # FastAPI needs response_model parameter to properly generate OpenAPI spec
+    # Use the non-streaming response model if available
+    if response_model:
+        route_kwargs["response_model"] = response_model
+
+    method_map = {"GET": app.get, "POST": app.post, "PUT": app.put, "DELETE": app.delete, "PATCH": app.patch}
+    for method in method_list:
+        if handler := method_map.get(method):
+            handler(fastapi_path, **route_kwargs)(endpoint_func)
diff --git a/scripts/openapi_generator/main.py b/scripts/openapi_generator/main.py
new file mode 100755
index 000000000..e881ff726
--- /dev/null
+++ b/scripts/openapi_generator/main.py
@@ -0,0 +1,241 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+"""
+Main entry point for the FastAPI OpenAPI generator.
+"""
+
+import copy
+from pathlib import Path
+from typing import Any
+
+import yaml
+from fastapi.openapi.utils import get_openapi
+
+from . import app, schema_collection, schema_filtering, schema_transforms, state
+
+
+def generate_openapi_spec(output_dir: str) -> dict[str, Any]:
+    """
+    Generate OpenAPI specification using FastAPI's built-in method.
+
+    Args:
+        output_dir: Directory to save the generated files
+
+    Returns:
+        The generated OpenAPI specification as a dictionary
+    """
+    state.reset_generator_state()
+    # Create the FastAPI app
+    fastapi_app = app.create_llama_stack_app()
+
+    # Generate the OpenAPI schema
+    openapi_schema = get_openapi(
+        title=fastapi_app.title,
+        version=fastapi_app.version,
+        description=fastapi_app.description,
+        routes=fastapi_app.routes,
+        servers=fastapi_app.servers,
+    )
+
+    # Set OpenAPI version to 3.1.0
+    openapi_schema["openapi"] = "3.1.0"
+
+    # Add standard error responses
+    openapi_schema = schema_transforms._add_error_responses(openapi_schema)
+
+    # Ensure all @json_schema_type decorated models are included
+    openapi_schema = schema_collection._ensure_json_schema_types_included(openapi_schema)
+
+    # Fix $ref references to point to components/schemas instead of $defs
+    openapi_schema = schema_transforms._fix_ref_references(openapi_schema)
+
+    # Fix path parameter resolution issues
+    openapi_schema = schema_transforms._fix_path_parameters(openapi_schema)
+
+    # Eliminate $defs section entirely for oasdiff compatibility
+    openapi_schema = schema_transforms._eliminate_defs_section(openapi_schema)
+
+    # Clean descriptions in schema definitions by removing docstring metadata
+    openapi_schema = schema_transforms._clean_schema_descriptions(openapi_schema)
+    openapi_schema = schema_transforms._normalize_empty_responses(openapi_schema)
+
+    # Remove query parameters from POST/PUT/PATCH endpoints that have a request body
+    # FastAPI sometimes infers parameters as query params even when they should be in the request body
+    openapi_schema = schema_transforms._remove_query_params_from_body_endpoints(openapi_schema)
+
+    # Add x-llama-stack-extra-body-params extension for ExtraBodyField parameters
+    openapi_schema = schema_transforms._add_extra_body_params_extension(openapi_schema)
+
+    # Remove request bodies from GET endpoints (GET requests should never have request bodies)
+    # This must run AFTER _add_extra_body_params_extension to ensure any request bodies
+    # that FastAPI incorrectly added to GET endpoints are removed
+    openapi_schema = schema_transforms._remove_request_bodies_from_get_endpoints(openapi_schema)
+
+    # Extract duplicate union types to shared schema references
+    openapi_schema = schema_transforms._extract_duplicate_union_types(openapi_schema)
+
+    # Split into stable (v1 only), experimental (v1alpha + v1beta), deprecated, and combined (stainless) specs
+    # Each spec needs its own deep copy of the full schema to avoid cross-contamination
+    stable_schema = schema_filtering._filter_schema_by_version(
+        copy.deepcopy(openapi_schema), stable_only=True, exclude_deprecated=True
+    )
+    experimental_schema = schema_filtering._filter_schema_by_version(
+        copy.deepcopy(openapi_schema), stable_only=False, exclude_deprecated=True
+    )
+    deprecated_schema = schema_filtering._filter_deprecated_schema(copy.deepcopy(openapi_schema))
+    combined_schema = schema_filtering._filter_combined_schema(copy.deepcopy(openapi_schema))
+
+    # Apply duplicate union extraction to combined schema (used by Stainless)
+    combined_schema = schema_transforms._extract_duplicate_union_types(combined_schema)
+
+    base_description = (
+        "This is the specification of the Llama Stack that provides\n"
+        "                    a set of endpoints and their corresponding interfaces that are\n"
+        "    tailored to\n"
+        "                    best leverage Llama Models."
+    )
+
+    schema_configs = [
+        (
+            stable_schema,
+            "Llama Stack Specification",
+            "**✅ STABLE**: Production-ready APIs with backward compatibility guarantees.",
+        ),
+        (
+            experimental_schema,
+            "Llama Stack Specification - Experimental APIs",
+            "**🧪 EXPERIMENTAL**: Pre-release APIs (v1alpha, v1beta) that may change before\n    becoming stable.",
+        ),
+        (
+            deprecated_schema,
+            "Llama Stack Specification - Deprecated APIs",
+            "**⚠️ DEPRECATED**: Legacy APIs that may be removed in future versions. Use for\n    migration reference only.",
+        ),
+        (
+            combined_schema,
+            "Llama Stack Specification - Stable & Experimental APIs",
+            "**🔗 COMBINED**: This specification includes both stable production-ready APIs\n    and experimental pre-release APIs. Use stable APIs for production deployments\n    and experimental APIs for testing new features.",
+        ),
+    ]
+
+    for schema, title, description_suffix in schema_configs:
+        if "info" not in schema:
+            schema["info"] = {}
+        schema["info"].update(
+            {
+                "title": title,
+                "version": "v1",
+                "description": f"{base_description}\n\n    {description_suffix}",
+            }
+        )
+
+    schemas_to_validate = [
+        (stable_schema, "Stable schema"),
+        (experimental_schema, "Experimental schema"),
+        (deprecated_schema, "Deprecated schema"),
+        (combined_schema, "Combined (stainless) schema"),
+    ]
+
+    for schema, _ in schemas_to_validate:
+        schema_transforms._fix_schema_issues(schema)
+        schema_transforms._apply_legacy_sorting(schema)
+
+    print("\nValidating generated schemas...")
+    failed_schemas = [
+        name for schema, name in schemas_to_validate if not schema_transforms.validate_openapi_schema(schema, name)
+    ]
+    if failed_schemas:
+        raise ValueError(f"Invalid schemas: {', '.join(failed_schemas)}")
+
+    # Ensure output directory exists
+    output_path = Path(output_dir)
+    output_path.mkdir(parents=True, exist_ok=True)
+
+    # Save the stable specification
+    yaml_path = output_path / "llama-stack-spec.yaml"
+    schema_transforms._write_yaml_file(yaml_path, stable_schema)
+    # Post-process the YAML file to remove $defs section and fix references
+    with open(yaml_path) as f:
+        yaml_content = f.read()
+
+    if "  $defs:" in yaml_content or "#/$defs/" in yaml_content:
+        # Use string replacement to fix references directly
+        if "#/$defs/" in yaml_content:
+            yaml_content = yaml_content.replace("#/$defs/", "#/components/schemas/")
+
+        # Parse the YAML content
+        yaml_data = yaml.safe_load(yaml_content)
+
+        # Move $defs to components/schemas if it exists
+        if "$defs" in yaml_data:
+            if "components" not in yaml_data:
+                yaml_data["components"] = {}
+            if "schemas" not in yaml_data["components"]:
+                yaml_data["components"]["schemas"] = {}
+
+            # Move all $defs to components/schemas
+            for def_name, def_schema in yaml_data["$defs"].items():
+                yaml_data["components"]["schemas"][def_name] = def_schema
+
+            # Remove the $defs section
+            del yaml_data["$defs"]
+
+        # Write the modified YAML back
+        schema_transforms._write_yaml_file(yaml_path, yaml_data)
+
+    print(f"Generated YAML (stable): {yaml_path}")
+
+    experimental_yaml_path = output_path / "experimental-llama-stack-spec.yaml"
+    schema_transforms._write_yaml_file(experimental_yaml_path, experimental_schema)
+    print(f"Generated YAML (experimental): {experimental_yaml_path}")
+
+    deprecated_yaml_path = output_path / "deprecated-llama-stack-spec.yaml"
+    schema_transforms._write_yaml_file(deprecated_yaml_path, deprecated_schema)
+    print(f"Generated YAML (deprecated): {deprecated_yaml_path}")
+
+    # Generate combined (stainless) spec
+    stainless_yaml_path = output_path / "stainless-llama-stack-spec.yaml"
+    schema_transforms._write_yaml_file(stainless_yaml_path, combined_schema)
+    print(f"Generated YAML (stainless/combined): {stainless_yaml_path}")
+
+    return stable_schema
+
+
+def main():
+    """Main entry point for the FastAPI OpenAPI generator."""
+    import argparse
+
+    parser = argparse.ArgumentParser(description="Generate OpenAPI specification using FastAPI")
+    parser.add_argument("output_dir", help="Output directory for generated files")
+
+    args = parser.parse_args()
+
+    print("Generating OpenAPI specification using FastAPI...")
+    print(f"Output directory: {args.output_dir}")
+
+    try:
+        openapi_schema = generate_openapi_spec(output_dir=args.output_dir)
+
+        print("\nOpenAPI specification generated successfully!")
+        print(f"Schemas: {len(openapi_schema.get('components', {}).get('schemas', {}))}")
+        print(f"Paths: {len(openapi_schema.get('paths', {}))}")
+        operation_count = sum(
+            1
+            for path_info in openapi_schema.get("paths", {}).values()
+            for method in ["get", "post", "put", "delete", "patch"]
+            if method in path_info
+        )
+        print(f"Operations: {operation_count}")
+
+    except Exception as e:
+        print(f"Error generating OpenAPI specification: {e}")
+        raise
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/openapi_generator/schema_collection.py b/scripts/openapi_generator/schema_collection.py
new file mode 100644
index 000000000..51a70c62a
--- /dev/null
+++ b/scripts/openapi_generator/schema_collection.py
@@ -0,0 +1,131 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+"""
+Schema discovery and collection for OpenAPI generation.
+"""
+
+import importlib
+from typing import Any
+
+
+def _ensure_components_schemas(openapi_schema: dict[str, Any]) -> None:
+    """Ensure components.schemas exists in the schema."""
+    if "components" not in openapi_schema:
+        openapi_schema["components"] = {}
+    if "schemas" not in openapi_schema["components"]:
+        openapi_schema["components"]["schemas"] = {}
+
+
+def _load_extra_schema_modules() -> None:
+    """
+    Import modules outside llama_stack_api that use schema_utils to register schemas.
+
+    The API package already imports its submodules via __init__, but server-side modules
+    like telemetry need to be imported explicitly so their decorator side effects run.
+    """
+    extra_modules = [
+        "llama_stack.core.telemetry.telemetry",
+    ]
+    for module_name in extra_modules:
+        try:
+            importlib.import_module(module_name)
+        except ImportError:
+            continue
+
+
+def _extract_and_fix_defs(schema: dict[str, Any], openapi_schema: dict[str, Any]) -> None:
+    """
+    Extract $defs from a schema, move them to components/schemas, and fix references.
+    This handles both TypeAdapter-generated schemas and model_json_schema() schemas.
+    """
+    if "$defs" in schema:
+        defs = schema.pop("$defs")
+        for def_name, def_schema in defs.items():
+            if def_name not in openapi_schema["components"]["schemas"]:
+                openapi_schema["components"]["schemas"][def_name] = def_schema
+                # Recursively handle $defs in nested schemas
+                _extract_and_fix_defs(def_schema, openapi_schema)
+
+        # Fix any references in the main schema that point to $defs
+        def fix_refs_in_schema(obj: Any) -> None:
+            if isinstance(obj, dict):
+                if "$ref" in obj and obj["$ref"].startswith("#/$defs/"):
+                    obj["$ref"] = obj["$ref"].replace("#/$defs/", "#/components/schemas/")
+                for value in obj.values():
+                    fix_refs_in_schema(value)
+            elif isinstance(obj, list):
+                for item in obj:
+                    fix_refs_in_schema(item)
+
+        fix_refs_in_schema(schema)
+
+
+def _ensure_json_schema_types_included(openapi_schema: dict[str, Any]) -> dict[str, Any]:
+    """
+    Ensure all registered schemas (decorated, explicit, and dynamic) are included in the OpenAPI schema.
+    Relies on llama_stack_api's registry instead of recursively importing every module.
+    """
+    _ensure_components_schemas(openapi_schema)
+
+    from pydantic import TypeAdapter
+
+    from llama_stack_api.schema_utils import (
+        iter_dynamic_schema_types,
+        iter_json_schema_types,
+        iter_registered_schema_types,
+    )
+
+    # Import extra modules (e.g., telemetry) whose schema registrations live outside llama_stack_api
+    _load_extra_schema_modules()
+
+    # Handle explicitly registered schemas first (union types, Annotated structs, etc.)
+    for registration_info in iter_registered_schema_types():
+        schema_type = registration_info.type
+        schema_name = registration_info.name
+        if schema_name not in openapi_schema["components"]["schemas"]:
+            try:
+                adapter = TypeAdapter(schema_type)
+                schema = adapter.json_schema(ref_template="#/components/schemas/{model}")
+                _extract_and_fix_defs(schema, openapi_schema)
+                openapi_schema["components"]["schemas"][schema_name] = schema
+            except Exception as e:
+                print(f"Warning: Failed to generate schema for registered type {schema_name}: {e}")
+                import traceback
+
+                traceback.print_exc()
+                continue
+
+    # Add @json_schema_type decorated models
+    for model in iter_json_schema_types():
+        schema_name = getattr(model, "_llama_stack_schema_name", None) or getattr(model, "__name__", None)
+        if not schema_name:
+            continue
+        if schema_name not in openapi_schema["components"]["schemas"]:
+            try:
+                if hasattr(model, "model_json_schema"):
+                    schema = model.model_json_schema(ref_template="#/components/schemas/{model}")
+                else:
+                    adapter = TypeAdapter(model)
+                    schema = adapter.json_schema(ref_template="#/components/schemas/{model}")
+                _extract_and_fix_defs(schema, openapi_schema)
+                openapi_schema["components"]["schemas"][schema_name] = schema
+            except Exception as e:
+                print(f"Warning: Failed to generate schema for {schema_name}: {e}")
+                continue
+
+    # Include any dynamic models generated while building endpoints
+    for model in iter_dynamic_schema_types():
+        try:
+            schema_name = model.__name__
+            if schema_name not in openapi_schema["components"]["schemas"]:
+                schema = model.model_json_schema(ref_template="#/components/schemas/{model}")
+                _extract_and_fix_defs(schema, openapi_schema)
+                openapi_schema["components"]["schemas"][schema_name] = schema
+        except Exception:
+            continue
+
+    return openapi_schema
diff --git a/scripts/openapi_generator/schema_filtering.py b/scripts/openapi_generator/schema_filtering.py
new file mode 100644
index 000000000..4667d27a5
--- /dev/null
+++ b/scripts/openapi_generator/schema_filtering.py
@@ -0,0 +1,297 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+"""
+Schema filtering and version filtering for OpenAPI generation.
+"""
+
+from typing import Any
+
+from llama_stack_api.schema_utils import iter_json_schema_types, iter_registered_schema_types
+from llama_stack_api.version import (
+    LLAMA_STACK_API_V1,
+    LLAMA_STACK_API_V1ALPHA,
+    LLAMA_STACK_API_V1BETA,
+)
+
+
+def _get_all_json_schema_type_names() -> set[str]:
+    """Collect schema names from @json_schema_type-decorated models."""
+    schema_names = set()
+    for model in iter_json_schema_types():
+        schema_name = getattr(model, "_llama_stack_schema_name", None) or getattr(model, "__name__", None)
+        if schema_name:
+            schema_names.add(schema_name)
+    return schema_names
+
+
+def _get_explicit_schema_names(openapi_schema: dict[str, Any]) -> set[str]:
+    """Schema names to keep even if not referenced by a path."""
+    registered_schema_names = {info.name for info in iter_registered_schema_types()}
+    json_schema_type_names = _get_all_json_schema_type_names()
+    return registered_schema_names | json_schema_type_names
+
+
+def _find_schema_refs_in_object(obj: Any) -> set[str]:
+    """
+    Recursively find all schema references ($ref) in an object.
+    """
+    refs = set()
+
+    if isinstance(obj, dict):
+        for key, value in obj.items():
+            if key == "$ref" and isinstance(value, str) and value.startswith("#/components/schemas/"):
+                schema_name = value.split("/")[-1]
+                refs.add(schema_name)
+            else:
+                refs.update(_find_schema_refs_in_object(value))
+    elif isinstance(obj, list):
+        for item in obj:
+            refs.update(_find_schema_refs_in_object(item))
+
+    return refs
+
+
+def _add_transitive_references(
+    referenced_schemas: set[str], all_schemas: dict[str, Any], initial_schemas: set[str] | None = None
+) -> set[str]:
+    """Add transitive references for given schemas."""
+    if initial_schemas:
+        referenced_schemas.update(initial_schemas)
+        additional_schemas = set()
+        for schema_name in initial_schemas:
+            if schema_name in all_schemas:
+                additional_schemas.update(_find_schema_refs_in_object(all_schemas[schema_name]))
+    else:
+        additional_schemas = set()
+        for schema_name in referenced_schemas:
+            if schema_name in all_schemas:
+                additional_schemas.update(_find_schema_refs_in_object(all_schemas[schema_name]))
+
+    while additional_schemas:
+        new_schemas = additional_schemas - referenced_schemas
+        if not new_schemas:
+            break
+        referenced_schemas.update(new_schemas)
+        additional_schemas = set()
+        for schema_name in new_schemas:
+            if schema_name in all_schemas:
+                additional_schemas.update(_find_schema_refs_in_object(all_schemas[schema_name]))
+
+    return referenced_schemas
+
+
+def _find_schemas_referenced_by_paths(filtered_paths: dict[str, Any], openapi_schema: dict[str, Any]) -> set[str]:
+    """
+    Find all schemas that are referenced by the filtered paths.
+    This recursively traverses the path definitions to find all $ref references.
+    """
+    referenced_schemas = set()
+
+    # Traverse all filtered paths
+    for _, path_item in filtered_paths.items():
+        if not isinstance(path_item, dict):
+            continue
+
+        # Check each HTTP method in the path
+        for method in ["get", "post", "put", "delete", "patch", "head", "options"]:
+            if method in path_item:
+                operation = path_item[method]
+                if isinstance(operation, dict):
+                    # Find all schema references in this operation
+                    referenced_schemas.update(_find_schema_refs_in_object(operation))
+
+    # Also check the responses section for schema references
+    if "components" in openapi_schema and "responses" in openapi_schema["components"]:
+        referenced_schemas.update(_find_schema_refs_in_object(openapi_schema["components"]["responses"]))
+
+    # Also include schemas that are referenced by other schemas (transitive references)
+    # This ensures we include all dependencies
+    all_schemas = openapi_schema.get("components", {}).get("schemas", {})
+    additional_schemas = set()
+
+    for schema_name in referenced_schemas:
+        if schema_name in all_schemas:
+            additional_schemas.update(_find_schema_refs_in_object(all_schemas[schema_name]))
+
+    # Keep adding transitive references until no new ones are found
+    while additional_schemas:
+        new_schemas = additional_schemas - referenced_schemas
+        if not new_schemas:
+            break
+        referenced_schemas.update(new_schemas)
+        additional_schemas = set()
+        for schema_name in new_schemas:
+            if schema_name in all_schemas:
+                additional_schemas.update(_find_schema_refs_in_object(all_schemas[schema_name]))
+
+    return referenced_schemas
+
+
+def _filter_schemas_by_references(
+    filtered_schema: dict[str, Any], filtered_paths: dict[str, Any], openapi_schema: dict[str, Any]
+) -> dict[str, Any]:
+    """Filter schemas to only include ones referenced by filtered paths and explicit schemas."""
+    if "components" not in filtered_schema or "schemas" not in filtered_schema["components"]:
+        return filtered_schema
+
+    referenced_schemas = _find_schemas_referenced_by_paths(filtered_paths, openapi_schema)
+    all_schemas = openapi_schema.get("components", {}).get("schemas", {})
+    explicit_names = _get_explicit_schema_names(openapi_schema)
+    referenced_schemas = _add_transitive_references(referenced_schemas, all_schemas, explicit_names)
+
+    filtered_schemas = {
+        name: schema for name, schema in filtered_schema["components"]["schemas"].items() if name in referenced_schemas
+    }
+    filtered_schema["components"]["schemas"] = filtered_schemas
+
+    if "components" in openapi_schema and "$defs" in openapi_schema["components"]:
+        if "components" not in filtered_schema:
+            filtered_schema["components"] = {}
+        filtered_schema["components"]["$defs"] = openapi_schema["components"]["$defs"]
+
+    return filtered_schema
+
+
+def _path_starts_with_version(path: str, version: str) -> bool:
+    """Check if a path starts with a specific API version prefix."""
+    return path.startswith(f"/{version}/")
+
+
+def _is_stable_path(path: str) -> bool:
+    """Check if a path is a stable v1 path (not v1alpha or v1beta)."""
+    return (
+        _path_starts_with_version(path, LLAMA_STACK_API_V1)
+        and not _path_starts_with_version(path, LLAMA_STACK_API_V1ALPHA)
+        and not _path_starts_with_version(path, LLAMA_STACK_API_V1BETA)
+    )
+
+
+def _is_experimental_path(path: str) -> bool:
+    """Check if a path is an experimental path (v1alpha or v1beta)."""
+    return _path_starts_with_version(path, LLAMA_STACK_API_V1ALPHA) or _path_starts_with_version(
+        path, LLAMA_STACK_API_V1BETA
+    )
+
+
+def _is_path_deprecated(path_item: dict[str, Any]) -> bool:
+    """Check if a path item has any deprecated operations."""
+    if not isinstance(path_item, dict):
+        return False
+    for method in ["get", "post", "put", "delete", "patch", "head", "options"]:
+        if isinstance(path_item.get(method), dict) and path_item[method].get("deprecated", False):
+            return True
+    return False
+
+
+def _filter_schema_by_version(
+    openapi_schema: dict[str, Any], stable_only: bool = True, exclude_deprecated: bool = True
+) -> dict[str, Any]:
+    """
+    Filter OpenAPI schema by API version.
+
+    Args:
+        openapi_schema: The full OpenAPI schema
+        stable_only: If True, return only /v1/ paths (stable). If False, return only /v1alpha/ and /v1beta/ paths (experimental).
+        exclude_deprecated: If True, exclude deprecated endpoints from the result.
+
+    Returns:
+        Filtered OpenAPI schema
+    """
+    filtered_schema = openapi_schema.copy()
+
+    if "paths" not in filtered_schema:
+        return filtered_schema
+
+    filtered_paths = {}
+    for path, path_item in filtered_schema["paths"].items():
+        if not isinstance(path_item, dict):
+            continue
+
+        # Filter at operation level, not path level
+        # This allows paths with both deprecated and non-deprecated operations
+        filtered_path_item = {}
+        for method in ["get", "post", "put", "delete", "patch", "head", "options"]:
+            if method not in path_item:
+                continue
+            operation = path_item[method]
+            if not isinstance(operation, dict):
+                continue
+
+            # Skip deprecated operations if exclude_deprecated is True
+            if exclude_deprecated and operation.get("deprecated", False):
+                continue
+
+            filtered_path_item[method] = operation
+
+        # Only include path if it has at least one operation after filtering
+        if filtered_path_item:
+            # Check if path matches version filter
+            if (stable_only and _is_stable_path(path)) or (not stable_only and _is_experimental_path(path)):
+                filtered_paths[path] = filtered_path_item
+
+    filtered_schema["paths"] = filtered_paths
+    return _filter_schemas_by_references(filtered_schema, filtered_paths, openapi_schema)
+
+
+def _filter_deprecated_schema(openapi_schema: dict[str, Any]) -> dict[str, Any]:
+    """
+    Filter OpenAPI schema to include only deprecated endpoints.
+    Includes all deprecated endpoints regardless of version (v1, v1alpha, v1beta).
+    """
+    filtered_schema = openapi_schema.copy()
+
+    if "paths" not in filtered_schema:
+        return filtered_schema
+
+    # Filter paths to only include deprecated ones
+    filtered_paths = {}
+    for path, path_item in filtered_schema["paths"].items():
+        if _is_path_deprecated(path_item):
+            filtered_paths[path] = path_item
+
+    filtered_schema["paths"] = filtered_paths
+
+    return filtered_schema
+
+
+def _filter_combined_schema(openapi_schema: dict[str, Any]) -> dict[str, Any]:
+    """
+    Filter OpenAPI schema to include both stable (v1) and experimental (v1alpha, v1beta) APIs.
+    Includes deprecated endpoints. This is used for the combined "stainless" spec.
+    """
+    filtered_schema = openapi_schema.copy()
+
+    if "paths" not in filtered_schema:
+        return filtered_schema
+
+    # Filter paths to include stable (v1) and experimental (v1alpha, v1beta), excluding deprecated
+    filtered_paths = {}
+    for path, path_item in filtered_schema["paths"].items():
+        if not isinstance(path_item, dict):
+            continue
+
+        # Filter at operation level, not path level
+        # This allows paths with both deprecated and non-deprecated operations
+        filtered_path_item = {}
+        for method in ["get", "post", "put", "delete", "patch", "head", "options"]:
+            if method not in path_item:
+                continue
+            operation = path_item[method]
+            if not isinstance(operation, dict):
+                continue
+
+            filtered_path_item[method] = operation
+
+        # Only include path if it has at least one operation after filtering
+        if filtered_path_item:
+            # Check if path matches version filter (stable or experimental)
+            if _is_stable_path(path) or _is_experimental_path(path):
+                filtered_paths[path] = filtered_path_item
+
+    filtered_schema["paths"] = filtered_paths
+
+    return _filter_schemas_by_references(filtered_schema, filtered_paths, openapi_schema)
diff --git a/scripts/openapi_generator/schema_transforms.py b/scripts/openapi_generator/schema_transforms.py
new file mode 100644
index 000000000..5821c99d5
--- /dev/null
+++ b/scripts/openapi_generator/schema_transforms.py
@@ -0,0 +1,963 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+"""
+Schema transformations and fixes for OpenAPI generation.
+"""
+
+import copy
+from collections import OrderedDict
+from pathlib import Path
+from typing import Any
+
+import yaml
+from openapi_spec_validator import validate_spec
+from openapi_spec_validator.exceptions import OpenAPISpecValidatorError
+
+from . import endpoints, schema_collection
+from ._legacy_order import (
+    LEGACY_OPERATION_KEYS,
+    LEGACY_PATH_ORDER,
+    LEGACY_RESPONSE_ORDER,
+    LEGACY_SCHEMA_ORDER,
+    LEGACY_SECURITY,
+    LEGACY_TAG_GROUPS,
+    LEGACY_TAGS,
+)
+from .state import _extra_body_fields
+
+
+def _fix_ref_references(openapi_schema: dict[str, Any]) -> dict[str, Any]:
+    """
+    Fix $ref references to point to components/schemas instead of $defs.
+    This prevents the YAML dumper from creating a root-level $defs section.
+    """
+
+    def fix_refs(obj: Any) -> None:
+        if isinstance(obj, dict):
+            if "$ref" in obj and obj["$ref"].startswith("#/$defs/"):
+                # Replace #/$defs/ with #/components/schemas/
+                obj["$ref"] = obj["$ref"].replace("#/$defs/", "#/components/schemas/")
+            for value in obj.values():
+                fix_refs(value)
+        elif isinstance(obj, list):
+            for item in obj:
+                fix_refs(item)
+
+    fix_refs(openapi_schema)
+    return openapi_schema
+
+
+def _normalize_empty_responses(openapi_schema: dict[str, Any]) -> dict[str, Any]:
+    """Convert empty 200 responses into 204 No Content."""
+
+    for path_item in openapi_schema.get("paths", {}).values():
+        if not isinstance(path_item, dict):
+            continue
+        for method in list(path_item.keys()):
+            operation = path_item.get(method)
+            if not isinstance(operation, dict):
+                continue
+            responses = operation.get("responses")
+            if not isinstance(responses, dict):
+                continue
+            response_200 = responses.get("200") or responses.get(200)
+            if response_200 is None:
+                continue
+            content = response_200.get("content")
+            if content and any(
+                isinstance(media, dict) and media.get("schema") not in ({}, None) for media in content.values()
+            ):
+                continue
+            responses.pop("200", None)
+            responses.pop(200, None)
+            responses["204"] = {"description": response_200.get("description", "No Content")}
+    return openapi_schema
+
+
+def _eliminate_defs_section(openapi_schema: dict[str, Any]) -> dict[str, Any]:
+    """
+    Eliminate $defs section entirely by moving all definitions to components/schemas.
+    This matches the structure of the old pyopenapi generator for oasdiff compatibility.
+    """
+    schema_collection._ensure_components_schemas(openapi_schema)
+
+    # First pass: collect all $defs from anywhere in the schema
+    defs_to_move = {}
+
+    def collect_defs(obj: Any) -> None:
+        if isinstance(obj, dict):
+            if "$defs" in obj:
+                # Collect $defs for later processing
+                for def_name, def_schema in obj["$defs"].items():
+                    if def_name not in defs_to_move:
+                        defs_to_move[def_name] = def_schema
+
+            # Recursively process all values
+            for value in obj.values():
+                collect_defs(value)
+        elif isinstance(obj, list):
+            for item in obj:
+                collect_defs(item)
+
+    # Collect all $defs
+    collect_defs(openapi_schema)
+
+    # Move all $defs to components/schemas
+    for def_name, def_schema in defs_to_move.items():
+        if def_name not in openapi_schema["components"]["schemas"]:
+            openapi_schema["components"]["schemas"][def_name] = def_schema
+
+    # Also move any existing root-level $defs to components/schemas
+    if "$defs" in openapi_schema:
+        print(f"Found root-level $defs with {len(openapi_schema['$defs'])} items, moving to components/schemas")
+        for def_name, def_schema in openapi_schema["$defs"].items():
+            if def_name not in openapi_schema["components"]["schemas"]:
+                openapi_schema["components"]["schemas"][def_name] = def_schema
+        # Remove the root-level $defs
+        del openapi_schema["$defs"]
+
+    # Second pass: remove all $defs sections from anywhere in the schema
+    def remove_defs(obj: Any) -> None:
+        if isinstance(obj, dict):
+            if "$defs" in obj:
+                del obj["$defs"]
+
+            # Recursively process all values
+            for value in obj.values():
+                remove_defs(value)
+        elif isinstance(obj, list):
+            for item in obj:
+                remove_defs(item)
+
+    # Remove all $defs sections
+    remove_defs(openapi_schema)
+
+    return openapi_schema
+
+
+def _add_error_responses(openapi_schema: dict[str, Any]) -> dict[str, Any]:
+    """
+    Add standard error response definitions to the OpenAPI schema.
+    Uses the actual Error model from the codebase for consistency.
+    """
+    if "components" not in openapi_schema:
+        openapi_schema["components"] = {}
+    if "responses" not in openapi_schema["components"]:
+        openapi_schema["components"]["responses"] = {}
+
+    try:
+        from llama_stack_api.datatypes import Error
+
+        schema_collection._ensure_components_schemas(openapi_schema)
+        if "Error" not in openapi_schema["components"]["schemas"]:
+            openapi_schema["components"]["schemas"]["Error"] = Error.model_json_schema()
+    except ImportError:
+        pass
+
+    schema_collection._ensure_components_schemas(openapi_schema)
+    if "Response" not in openapi_schema["components"]["schemas"]:
+        openapi_schema["components"]["schemas"]["Response"] = {"title": "Response", "type": "object"}
+
+    # Define standard HTTP error responses
+    error_responses = {
+        400: {
+            "name": "BadRequest400",
+            "description": "The request was invalid or malformed",
+            "example": {"status": 400, "title": "Bad Request", "detail": "The request was invalid or malformed"},
+        },
+        429: {
+            "name": "TooManyRequests429",
+            "description": "The client has sent too many requests in a given amount of time",
+            "example": {
+                "status": 429,
+                "title": "Too Many Requests",
+                "detail": "You have exceeded the rate limit. Please try again later.",
+            },
+        },
+        500: {
+            "name": "InternalServerError500",
+            "description": "The server encountered an unexpected error",
+            "example": {"status": 500, "title": "Internal Server Error", "detail": "An unexpected error occurred"},
+        },
+    }
+
+    # Add each error response to the schema
+    for _, error_info in error_responses.items():
+        response_name = error_info["name"]
+        openapi_schema["components"]["responses"][response_name] = {
+            "description": error_info["description"],
+            "content": {
+                "application/json": {"schema": {"$ref": "#/components/schemas/Error"}, "example": error_info["example"]}
+            },
+        }
+
+    # Add a default error response
+    openapi_schema["components"]["responses"]["DefaultError"] = {
+        "description": "An error occurred",
+        "content": {"application/json": {"schema": {"$ref": "#/components/schemas/Error"}}},
+    }
+
+    return openapi_schema
+
+
+def _fix_path_parameters(openapi_schema: dict[str, Any]) -> dict[str, Any]:
+    """
+    Fix path parameter resolution issues by adding explicit parameter definitions.
+    """
+    if "paths" not in openapi_schema:
+        return openapi_schema
+
+    for path, path_item in openapi_schema["paths"].items():
+        # Extract path parameters from the URL
+        path_params = endpoints._extract_path_parameters(path)
+
+        if not path_params:
+            continue
+
+        # Add parameters to each operation in this path
+        for method in ["get", "post", "put", "delete", "patch", "head", "options"]:
+            if method in path_item and isinstance(path_item[method], dict):
+                operation = path_item[method]
+                if "parameters" not in operation:
+                    operation["parameters"] = []
+
+                # Add path parameters that aren't already defined
+                existing_param_names = {p.get("name") for p in operation["parameters"] if p.get("in") == "path"}
+                for param in path_params:
+                    if param["name"] not in existing_param_names:
+                        operation["parameters"].append(param)
+
+    return openapi_schema
+
+
+def _get_schema_title(item: dict[str, Any]) -> str | None:
+    """Extract a title for a schema item to use in union variant names."""
+    if "$ref" in item:
+        return item["$ref"].split("/")[-1]
+    elif "type" in item:
+        type_val = item["type"]
+        if type_val == "null":
+            return None
+        if type_val == "array" and "items" in item:
+            items = item["items"]
+            if isinstance(items, dict):
+                if "anyOf" in items or "oneOf" in items:
+                    nested_union = items.get("anyOf") or items.get("oneOf")
+                    if isinstance(nested_union, list) and len(nested_union) > 0:
+                        nested_types = []
+                        for nested_item in nested_union:
+                            if isinstance(nested_item, dict):
+                                if "$ref" in nested_item:
+                                    nested_types.append(nested_item["$ref"].split("/")[-1])
+                                elif "oneOf" in nested_item:
+                                    one_of_items = nested_item.get("oneOf", [])
+                                    if one_of_items and isinstance(one_of_items[0], dict) and "$ref" in one_of_items[0]:
+                                        base_name = one_of_items[0]["$ref"].split("/")[-1].split("-")[0]
+                                        nested_types.append(f"{base_name}Union")
+                                    else:
+                                        nested_types.append("Union")
+                                elif "type" in nested_item and nested_item["type"] != "null":
+                                    nested_types.append(nested_item["type"])
+                        if nested_types:
+                            unique_nested = list(dict.fromkeys(nested_types))
+                            # Use more descriptive names for better code generation
+                            if len(unique_nested) <= 3:
+                                return f"list[{' | '.join(unique_nested)}]"
+                            else:
+                                # Include first few types for better naming
+                                return f"list[{unique_nested[0]} | {unique_nested[1]} | ...]"
+                        return "list[Union]"
+                elif "$ref" in items:
+                    return f"list[{items['$ref'].split('/')[-1]}]"
+                elif "type" in items:
+                    return f"list[{items['type']}]"
+            return "array"
+        return type_val
+    elif "title" in item:
+        return item["title"]
+    return None
+
+
+def _add_titles_to_unions(obj: Any, parent_key: str | None = None) -> None:
+    """Recursively add titles to union schemas (anyOf/oneOf) to help code generators infer names."""
+    if isinstance(obj, dict):
+        # Check if this is a union schema (anyOf or oneOf)
+        if "anyOf" in obj or "oneOf" in obj:
+            union_type = "anyOf" if "anyOf" in obj else "oneOf"
+            union_items = obj[union_type]
+
+            if isinstance(union_items, list) and len(union_items) > 0:
+                # Skip simple nullable unions (type | null) - these don't need titles
+                is_simple_nullable = (
+                    len(union_items) == 2
+                    and any(isinstance(item, dict) and item.get("type") == "null" for item in union_items)
+                    and any(
+                        isinstance(item, dict) and "type" in item and item.get("type") != "null" for item in union_items
+                    )
+                    and not any(
+                        isinstance(item, dict) and ("$ref" in item or "anyOf" in item or "oneOf" in item)
+                        for item in union_items
+                    )
+                )
+
+                if is_simple_nullable:
+                    # Remove title from simple nullable unions if it exists
+                    if "title" in obj:
+                        del obj["title"]
+                else:
+                    # Add titles to individual union variants that need them
+                    for item in union_items:
+                        if isinstance(item, dict):
+                            # Skip null types
+                            if item.get("type") == "null":
+                                continue
+                            # Add title to complex variants (arrays with unions, nested unions, etc.)
+                            # Also add to simple types if they're part of a complex union
+                            needs_title = (
+                                "items" in item
+                                or "anyOf" in item
+                                or "oneOf" in item
+                                or ("$ref" in item and "title" not in item)
+                            )
+                            if needs_title and "title" not in item:
+                                variant_title = _get_schema_title(item)
+                                if variant_title:
+                                    item["title"] = variant_title
+
+                    # Try to infer a meaningful title from the union items for the parent
+                    titles = []
+                    for item in union_items:
+                        if isinstance(item, dict):
+                            title = _get_schema_title(item)
+                            if title:
+                                titles.append(title)
+
+                    if titles:
+                        # Create a title from the union items
+                        unique_titles = list(dict.fromkeys(titles))  # Preserve order, remove duplicates
+                        if len(unique_titles) <= 3:
+                            title = " | ".join(unique_titles)
+                        else:
+                            title = f"{unique_titles[0]} | ... ({len(unique_titles)} variants)"
+                        # Always set the title for unions to help code generators
+                        # This will replace generic property titles with union-specific ones
+                        obj["title"] = title
+                    elif "title" not in obj and parent_key:
+                        # Use parent key as fallback only if no title exists
+                        obj["title"] = f"{parent_key.title()}Union"
+
+        # Recursively process all values
+        for key, value in obj.items():
+            _add_titles_to_unions(value, key)
+    elif isinstance(obj, list):
+        for item in obj:
+            _add_titles_to_unions(item, parent_key)
+
+
+def _convert_anyof_const_to_enum(obj: Any) -> None:
+    """Convert anyOf with multiple const string values to a proper enum."""
+    if isinstance(obj, dict):
+        if "anyOf" in obj:
+            any_of = obj["anyOf"]
+            if isinstance(any_of, list):
+                # Check if all items are const string values
+                const_values = []
+                has_null = False
+                can_convert = True
+                for item in any_of:
+                    if isinstance(item, dict):
+                        if item.get("type") == "null":
+                            has_null = True
+                        elif item.get("type") == "string" and "const" in item:
+                            const_values.append(item["const"])
+                        else:
+                            # Not a simple const pattern, skip conversion for this anyOf
+                            can_convert = False
+                            break
+
+                # If we have const values and they're all strings, convert to enum
+                if can_convert and const_values and len(const_values) == len(any_of) - (1 if has_null else 0):
+                    # Convert to enum
+                    obj["type"] = "string"
+                    obj["enum"] = const_values
+                    # Preserve default if present, otherwise try to get from first const item
+                    if "default" not in obj:
+                        for item in any_of:
+                            if isinstance(item, dict) and "const" in item:
+                                obj["default"] = item["const"]
+                                break
+                    # Remove anyOf
+                    del obj["anyOf"]
+                    # Handle nullable
+                    if has_null:
+                        obj["nullable"] = True
+                    # Remove title if it's just "string"
+                    if obj.get("title") == "string":
+                        del obj["title"]
+
+        # Recursively process all values
+        for value in obj.values():
+            _convert_anyof_const_to_enum(value)
+    elif isinstance(obj, list):
+        for item in obj:
+            _convert_anyof_const_to_enum(item)
+
+
+def _fix_schema_recursive(obj: Any) -> None:
+    """Recursively fix schema issues: exclusiveMinimum and null defaults."""
+    if isinstance(obj, dict):
+        if "exclusiveMinimum" in obj and isinstance(obj["exclusiveMinimum"], int | float):
+            obj["minimum"] = obj.pop("exclusiveMinimum")
+        if "default" in obj and obj["default"] is None:
+            del obj["default"]
+            obj["nullable"] = True
+        for value in obj.values():
+            _fix_schema_recursive(value)
+    elif isinstance(obj, list):
+        for item in obj:
+            _fix_schema_recursive(item)
+
+
+def _clean_description(description: str) -> str:
+    """Remove :param, :type, :returns, and other docstring metadata from description."""
+    if not description:
+        return description
+
+    lines = description.split("\n")
+    cleaned_lines = []
+    skip_until_empty = False
+
+    for line in lines:
+        stripped = line.strip()
+        # Skip lines that start with docstring metadata markers
+        if stripped.startswith(
+            (":param", ":type", ":return", ":returns", ":raises", ":exception", ":yield", ":yields", ":cvar")
+        ):
+            skip_until_empty = True
+            continue
+        # If we're skipping and hit an empty line, resume normal processing
+        if skip_until_empty:
+            if not stripped:
+                skip_until_empty = False
+            continue
+        # Include the line if we're not skipping
+        cleaned_lines.append(line)
+
+    # Join and strip trailing whitespace
+    result = "\n".join(cleaned_lines).strip()
+    return result
+
+
+def _clean_schema_descriptions(openapi_schema: dict[str, Any]) -> dict[str, Any]:
+    """Clean descriptions in schema definitions by removing docstring metadata."""
+    if "components" not in openapi_schema or "schemas" not in openapi_schema["components"]:
+        return openapi_schema
+
+    schemas = openapi_schema["components"]["schemas"]
+    for schema_def in schemas.values():
+        if isinstance(schema_def, dict) and "description" in schema_def and isinstance(schema_def["description"], str):
+            schema_def["description"] = _clean_description(schema_def["description"])
+
+    return openapi_schema
+
+
+def _add_extra_body_params_extension(openapi_schema: dict[str, Any]) -> dict[str, Any]:
+    """
+    Add x-llama-stack-extra-body-params extension to requestBody for endpoints with ExtraBodyField parameters.
+    """
+    if "paths" not in openapi_schema:
+        return openapi_schema
+
+    from pydantic import TypeAdapter
+
+    for path, path_item in openapi_schema["paths"].items():
+        if not isinstance(path_item, dict):
+            continue
+
+        for method in ["get", "post", "put", "delete", "patch", "head", "options"]:
+            if method not in path_item:
+                continue
+
+            operation = path_item[method]
+            if not isinstance(operation, dict):
+                continue
+
+            # Check if we have extra body fields for this path/method
+            key = (path, method.upper())
+            if key not in _extra_body_fields:
+                continue
+
+            extra_body_params = _extra_body_fields[key]
+
+            # Ensure requestBody exists
+            if "requestBody" not in operation:
+                continue
+
+            request_body = operation["requestBody"]
+            if not isinstance(request_body, dict):
+                continue
+
+            # Get the schema from requestBody
+            content = request_body.get("content", {})
+            json_content = content.get("application/json", {})
+            schema_ref = json_content.get("schema", {})
+
+            # Remove extra body fields from the schema if they exist as properties
+            # Handle both $ref schemas and inline schemas
+            if isinstance(schema_ref, dict):
+                if "$ref" in schema_ref:
+                    # Schema is a reference - remove from the referenced schema
+                    ref_path = schema_ref["$ref"]
+                    if ref_path.startswith("#/components/schemas/"):
+                        schema_name = ref_path.split("/")[-1]
+                        if "components" in openapi_schema and "schemas" in openapi_schema["components"]:
+                            schema_def = openapi_schema["components"]["schemas"].get(schema_name)
+                            if isinstance(schema_def, dict) and "properties" in schema_def:
+                                for param_name, _, _ in extra_body_params:
+                                    if param_name in schema_def["properties"]:
+                                        del schema_def["properties"][param_name]
+                                        # Also remove from required if present
+                                        if "required" in schema_def and param_name in schema_def["required"]:
+                                            schema_def["required"].remove(param_name)
+                elif "properties" in schema_ref:
+                    # Schema is inline - remove directly from it
+                    for param_name, _, _ in extra_body_params:
+                        if param_name in schema_ref["properties"]:
+                            del schema_ref["properties"][param_name]
+                            # Also remove from required if present
+                            if "required" in schema_ref and param_name in schema_ref["required"]:
+                                schema_ref["required"].remove(param_name)
+
+            # Build the extra body params schema
+            extra_params_schema = {}
+            for param_name, param_type, description in extra_body_params:
+                try:
+                    # Generate JSON schema for the parameter type
+                    adapter = TypeAdapter(param_type)
+                    param_schema = adapter.json_schema(ref_template="#/components/schemas/{model}")
+
+                    # Add description if provided
+                    if description:
+                        param_schema["description"] = description
+
+                    extra_params_schema[param_name] = param_schema
+                except Exception:
+                    # If we can't generate schema, skip this parameter
+                    continue
+
+            if extra_params_schema:
+                # Add the extension to requestBody
+                if "x-llama-stack-extra-body-params" not in request_body:
+                    request_body["x-llama-stack-extra-body-params"] = extra_params_schema
+
+    return openapi_schema
+
+
+def _remove_query_params_from_body_endpoints(openapi_schema: dict[str, Any]) -> dict[str, Any]:
+    """
+    Remove query parameters from POST/PUT/PATCH endpoints that have a request body.
+    FastAPI sometimes infers parameters as query params even when they should be in the request body.
+    """
+    if "paths" not in openapi_schema:
+        return openapi_schema
+
+    body_methods = {"post", "put", "patch"}
+
+    for _path, path_item in openapi_schema["paths"].items():
+        if not isinstance(path_item, dict):
+            continue
+
+        for method in body_methods:
+            if method not in path_item:
+                continue
+
+            operation = path_item[method]
+            if not isinstance(operation, dict):
+                continue
+
+            # Check if this operation has a request body
+            has_request_body = "requestBody" in operation and operation["requestBody"]
+
+            if has_request_body:
+                # Remove all query parameters (parameters with "in": "query")
+                if "parameters" in operation:
+                    # Filter out query parameters, keep path and header parameters
+                    operation["parameters"] = [
+                        param
+                        for param in operation["parameters"]
+                        if isinstance(param, dict) and param.get("in") != "query"
+                    ]
+                    # Remove the parameters key if it's now empty
+                    if not operation["parameters"]:
+                        del operation["parameters"]
+
+    return openapi_schema
+
+
+def _remove_request_bodies_from_get_endpoints(openapi_schema: dict[str, Any]) -> dict[str, Any]:
+    """
+    Remove request bodies from GET endpoints and convert their parameters to query parameters.
+
+    GET requests should never have request bodies - all parameters should be query parameters.
+    This function removes any requestBody that FastAPI may have incorrectly added to GET endpoints
+    and converts any parameters in the requestBody to query parameters.
+    """
+    if "paths" not in openapi_schema:
+        return openapi_schema
+
+    for _path, path_item in openapi_schema["paths"].items():
+        if not isinstance(path_item, dict):
+            continue
+
+        # Check GET method specifically
+        if "get" in path_item:
+            operation = path_item["get"]
+            if not isinstance(operation, dict):
+                continue
+
+            if "requestBody" in operation:
+                request_body = operation["requestBody"]
+                # Extract parameters from requestBody and convert to query parameters
+                if isinstance(request_body, dict) and "content" in request_body:
+                    content = request_body.get("content", {})
+                    json_content = content.get("application/json", {})
+                    schema = json_content.get("schema", {})
+
+                    if "parameters" not in operation:
+                        operation["parameters"] = []
+                    elif not isinstance(operation["parameters"], list):
+                        operation["parameters"] = []
+
+                    # If the schema has properties, convert each to a query parameter
+                    if isinstance(schema, dict) and "properties" in schema:
+                        for param_name, param_schema in schema["properties"].items():
+                            # Check if this parameter is already in the parameters list
+                            existing_param = None
+                            for existing in operation["parameters"]:
+                                if isinstance(existing, dict) and existing.get("name") == param_name:
+                                    existing_param = existing
+                                    break
+
+                            if not existing_param:
+                                # Create a new query parameter from the requestBody property
+                                required = param_name in schema.get("required", [])
+                                query_param = {
+                                    "name": param_name,
+                                    "in": "query",
+                                    "required": required,
+                                    "schema": param_schema,
+                                }
+                                # Add description if present
+                                if "description" in param_schema:
+                                    query_param["description"] = param_schema["description"]
+                                operation["parameters"].append(query_param)
+                    elif isinstance(schema, dict):
+                        # Handle direct schema (not a model with properties)
+                        # Try to infer parameter name from schema title
+                        param_name = schema.get("title", "").lower().replace(" ", "_")
+                        if param_name:
+                            # Check if this parameter is already in the parameters list
+                            existing_param = None
+                            for existing in operation["parameters"]:
+                                if isinstance(existing, dict) and existing.get("name") == param_name:
+                                    existing_param = existing
+                                    break
+
+                            if not existing_param:
+                                # Create a new query parameter from the requestBody schema
+                                query_param = {
+                                    "name": param_name,
+                                    "in": "query",
+                                    "required": False,  # Default to optional for GET requests
+                                    "schema": schema,
+                                }
+                                # Add description if present
+                                if "description" in schema:
+                                    query_param["description"] = schema["description"]
+                                operation["parameters"].append(query_param)
+
+                # Remove request body from GET endpoint
+                del operation["requestBody"]
+
+    return openapi_schema
+
+
+def _extract_duplicate_union_types(openapi_schema: dict[str, Any]) -> dict[str, Any]:
+    """
+    Extract duplicate union types to shared schema references.
+
+    Stainless generates type names from union types based on their context, which can cause
+    duplicate names when the same union appears in different places. This function extracts
+    these duplicate unions to shared schema definitions and replaces inline definitions with
+    references to them.
+
+    According to Stainless docs, when duplicate types are detected, they should be extracted
+    to the same ref and declared as a model. This ensures Stainless generates consistent
+    type names regardless of where the union is referenced.
+
+    Fixes: https://www.stainless.com/docs/reference/diagnostics#Python/DuplicateDeclaration
+    """
+    if "components" not in openapi_schema or "schemas" not in openapi_schema["components"]:
+        return openapi_schema
+
+    schemas = openapi_schema["components"]["schemas"]
+
+    # Extract the Output union type (used in OpenAIResponseObjectWithInput-Output and ListOpenAIResponseInputItem)
+    output_union_schema_name = "OpenAIResponseMessageOutputUnion"
+    output_union_title = None
+
+    # Get the union type from OpenAIResponseObjectWithInput-Output.input.items.anyOf
+    if "OpenAIResponseObjectWithInput-Output" in schemas:
+        schema = schemas["OpenAIResponseObjectWithInput-Output"]
+        if isinstance(schema, dict) and "properties" in schema:
+            input_prop = schema["properties"].get("input")
+            if isinstance(input_prop, dict) and "items" in input_prop:
+                items = input_prop["items"]
+                if isinstance(items, dict) and "anyOf" in items:
+                    # Extract the union schema with deep copy
+                    output_union_schema = copy.deepcopy(items["anyOf"])
+                    output_union_title = items.get("title", "OpenAIResponseMessageOutputUnion")
+
+                    # Collect all refs from the oneOf to detect duplicates
+                    refs_in_oneof = set()
+                    for item in output_union_schema:
+                        if isinstance(item, dict) and "oneOf" in item:
+                            oneof = item["oneOf"]
+                            if isinstance(oneof, list):
+                                for variant in oneof:
+                                    if isinstance(variant, dict) and "$ref" in variant:
+                                        refs_in_oneof.add(variant["$ref"])
+                            item["x-stainless-naming"] = "OpenAIResponseMessageOutputOneOf"
+
+                    # Remove duplicate refs from anyOf that are already in oneOf
+                    deduplicated_schema = []
+                    for item in output_union_schema:
+                        if isinstance(item, dict) and "$ref" in item:
+                            if item["$ref"] not in refs_in_oneof:
+                                deduplicated_schema.append(item)
+                        else:
+                            deduplicated_schema.append(item)
+                    output_union_schema = deduplicated_schema
+
+                    # Create the shared schema with x-stainless-naming to ensure consistent naming
+                    if output_union_schema_name not in schemas:
+                        schemas[output_union_schema_name] = {
+                            "anyOf": output_union_schema,
+                            "title": output_union_title,
+                            "x-stainless-naming": output_union_schema_name,
+                        }
+                    # Replace with reference
+                    input_prop["items"] = {"$ref": f"#/components/schemas/{output_union_schema_name}"}
+
+    # Replace the same union in ListOpenAIResponseInputItem.data.items.anyOf
+    if "ListOpenAIResponseInputItem" in schemas and output_union_schema_name in schemas:
+        schema = schemas["ListOpenAIResponseInputItem"]
+        if isinstance(schema, dict) and "properties" in schema:
+            data_prop = schema["properties"].get("data")
+            if isinstance(data_prop, dict) and "items" in data_prop:
+                items = data_prop["items"]
+                if isinstance(items, dict) and "anyOf" in items:
+                    # Replace with reference
+                    data_prop["items"] = {"$ref": f"#/components/schemas/{output_union_schema_name}"}
+
+    # Extract the Input union type (used in _responses_Request.input.anyOf[1].items.anyOf)
+    input_union_schema_name = "OpenAIResponseMessageInputUnion"
+
+    if "_responses_Request" in schemas:
+        schema = schemas["_responses_Request"]
+        if isinstance(schema, dict) and "properties" in schema:
+            input_prop = schema["properties"].get("input")
+            if isinstance(input_prop, dict) and "anyOf" in input_prop:
+                any_of = input_prop["anyOf"]
+                if isinstance(any_of, list) and len(any_of) > 1:
+                    # Check the second item (index 1) which should be the array type
+                    second_item = any_of[1]
+                    if isinstance(second_item, dict) and "items" in second_item:
+                        items = second_item["items"]
+                        if isinstance(items, dict) and "anyOf" in items:
+                            # Extract the union schema with deep copy
+                            input_union_schema = copy.deepcopy(items["anyOf"])
+                            input_union_title = items.get("title", "OpenAIResponseMessageInputUnion")
+
+                            # Collect all refs from the oneOf to detect duplicates
+                            refs_in_oneof = set()
+                            for item in input_union_schema:
+                                if isinstance(item, dict) and "oneOf" in item:
+                                    oneof = item["oneOf"]
+                                    if isinstance(oneof, list):
+                                        for variant in oneof:
+                                            if isinstance(variant, dict) and "$ref" in variant:
+                                                refs_in_oneof.add(variant["$ref"])
+                                    item["x-stainless-naming"] = "OpenAIResponseMessageInputOneOf"
+
+                            # Remove duplicate refs from anyOf that are already in oneOf
+                            deduplicated_schema = []
+                            for item in input_union_schema:
+                                if isinstance(item, dict) and "$ref" in item:
+                                    if item["$ref"] not in refs_in_oneof:
+                                        deduplicated_schema.append(item)
+                                else:
+                                    deduplicated_schema.append(item)
+                            input_union_schema = deduplicated_schema
+
+                            # Create the shared schema with x-stainless-naming to ensure consistent naming
+                            if input_union_schema_name not in schemas:
+                                schemas[input_union_schema_name] = {
+                                    "anyOf": input_union_schema,
+                                    "title": input_union_title,
+                                    "x-stainless-naming": input_union_schema_name,
+                                }
+                            # Replace with reference
+                            second_item["items"] = {"$ref": f"#/components/schemas/{input_union_schema_name}"}
+
+    return openapi_schema
+
+
+def _convert_multiline_strings_to_literal(obj: Any) -> Any:
+    """Recursively convert multi-line strings to LiteralScalarString for YAML block scalar formatting."""
+    try:
+        from ruamel.yaml.scalarstring import LiteralScalarString
+
+        if isinstance(obj, str) and "\n" in obj:
+            return LiteralScalarString(obj)
+        elif isinstance(obj, dict):
+            return {key: _convert_multiline_strings_to_literal(value) for key, value in obj.items()}
+        elif isinstance(obj, list):
+            return [_convert_multiline_strings_to_literal(item) for item in obj]
+        else:
+            return obj
+    except ImportError:
+        return obj
+
+
+def _write_yaml_file(file_path: Path, schema: dict[str, Any]) -> None:
+    """Write schema to YAML file using ruamel.yaml if available, otherwise standard yaml."""
+    try:
+        from ruamel.yaml import YAML
+
+        yaml_writer = YAML()
+        yaml_writer.default_flow_style = False
+        yaml_writer.sort_keys = False
+        yaml_writer.width = 4096
+        yaml_writer.allow_unicode = True
+        schema = _convert_multiline_strings_to_literal(schema)
+        with open(file_path, "w") as f:
+            yaml_writer.dump(schema, f)
+    except ImportError:
+        with open(file_path, "w") as f:
+            yaml.dump(schema, f, default_flow_style=False, sort_keys=False)
+
+    # Post-process to remove trailing whitespace from all lines
+    with open(file_path) as f:
+        lines = f.readlines()
+
+    # Strip trailing whitespace from each line, preserving newlines
+    cleaned_lines = [line.rstrip() + "\n" if line.endswith("\n") else line.rstrip() for line in lines]
+
+    with open(file_path, "w") as f:
+        f.writelines(cleaned_lines)
+
+
+def _apply_legacy_sorting(openapi_schema: dict[str, Any]) -> dict[str, Any]:
+    """
+    Temporarily match the legacy ordering from origin/main so diffs are easier to read.
+    Remove this once the generator output stabilizes and we no longer need legacy diffs.
+    """
+
+    def order_mapping(data: dict[str, Any], priority: list[str]) -> OrderedDict[str, Any]:
+        ordered: OrderedDict[str, Any] = OrderedDict()
+        for key in priority:
+            if key in data:
+                ordered[key] = data[key]
+        for key, value in data.items():
+            if key not in ordered:
+                ordered[key] = value
+        return ordered
+
+    paths = openapi_schema.get("paths")
+    if isinstance(paths, dict):
+        openapi_schema["paths"] = order_mapping(paths, LEGACY_PATH_ORDER)
+        for path, path_item in openapi_schema["paths"].items():
+            if not isinstance(path_item, dict):
+                continue
+            ordered_path_item = OrderedDict()
+            for method in ["get", "post", "put", "delete", "patch", "head", "options"]:
+                if method in path_item:
+                    ordered_path_item[method] = order_mapping(path_item[method], LEGACY_OPERATION_KEYS)
+            for key, value in path_item.items():
+                if key not in ordered_path_item:
+                    if isinstance(value, dict) and key.lower() in {
+                        "get",
+                        "post",
+                        "put",
+                        "delete",
+                        "patch",
+                        "head",
+                        "options",
+                    }:
+                        ordered_path_item[key] = order_mapping(value, LEGACY_OPERATION_KEYS)
+                    else:
+                        ordered_path_item[key] = value
+            openapi_schema["paths"][path] = ordered_path_item
+
+    components = openapi_schema.setdefault("components", {})
+    schemas = components.get("schemas")
+    if isinstance(schemas, dict):
+        components["schemas"] = order_mapping(schemas, LEGACY_SCHEMA_ORDER)
+    responses = components.get("responses")
+    if isinstance(responses, dict):
+        components["responses"] = order_mapping(responses, LEGACY_RESPONSE_ORDER)
+
+    if LEGACY_TAGS:
+        openapi_schema["tags"] = LEGACY_TAGS
+
+    if LEGACY_TAG_GROUPS:
+        openapi_schema["x-tagGroups"] = LEGACY_TAG_GROUPS
+
+    if LEGACY_SECURITY:
+        openapi_schema["security"] = LEGACY_SECURITY
+
+    return openapi_schema
+
+
+def _fix_schema_issues(openapi_schema: dict[str, Any]) -> dict[str, Any]:
+    """Fix common schema issues: exclusiveMinimum, null defaults, and add titles to unions."""
+    # Convert anyOf with const values to enums across the entire schema
+    _convert_anyof_const_to_enum(openapi_schema)
+
+    # Fix other schema issues and add titles to unions
+    if "components" in openapi_schema and "schemas" in openapi_schema["components"]:
+        for schema_name, schema_def in openapi_schema["components"]["schemas"].items():
+            _fix_schema_recursive(schema_def)
+            _add_titles_to_unions(schema_def, schema_name)
+    return openapi_schema
+
+
+def validate_openapi_schema(schema: dict[str, Any], schema_name: str = "OpenAPI schema") -> bool:
+    """
+    Validate an OpenAPI schema using openapi-spec-validator.
+
+    Args:
+        schema: The OpenAPI schema dictionary to validate
+        schema_name: Name of the schema for error reporting
+
+    Returns:
+        True if valid, False otherwise
+
+    Raises:
+        OpenAPIValidationError: If validation fails
+    """
+    try:
+        validate_spec(schema)
+        print(f"{schema_name} is valid")
+        return True
+    except OpenAPISpecValidatorError as e:
+        print(f"{schema_name} validation failed: {e}")
+        return False
+    except Exception as e:
+        print(f"{schema_name} validation error: {e}")
+        return False
diff --git a/scripts/openapi_generator/state.py b/scripts/openapi_generator/state.py
new file mode 100644
index 000000000..babd1451a
--- /dev/null
+++ b/scripts/openapi_generator/state.py
@@ -0,0 +1,41 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+"""
+Shared state for the OpenAPI generator module.
+"""
+
+from typing import Any
+
+from llama_stack_api import Api
+from llama_stack_api.schema_utils import clear_dynamic_schema_types, register_dynamic_schema_type
+
+_dynamic_model_registry: dict[str, type] = {}
+
+# Cache for protocol methods to avoid repeated lookups
+_protocol_methods_cache: dict[Api, dict[str, Any]] | None = None
+
+# Global dict to store extra body field information by endpoint
+# Key: (path, method) tuple, Value: list of (param_name, param_type, description) tuples
+_extra_body_fields: dict[tuple[str, str], list[tuple[str, type, str | None]]] = {}
+
+
+def register_dynamic_model(name: str, model: type) -> type:
+    """Register and deduplicate dynamically generated request models."""
+    existing = _dynamic_model_registry.get(name)
+    if existing is not None:
+        register_dynamic_schema_type(existing)
+        return existing
+    _dynamic_model_registry[name] = model
+    register_dynamic_schema_type(model)
+    return model
+
+
+def reset_generator_state() -> None:
+    """Clear per-run caches so repeated generations stay deterministic."""
+    _dynamic_model_registry.clear()
+    _extra_body_fields.clear()
+    clear_dynamic_schema_types()
diff --git a/scripts/run_openapi_generator.sh b/scripts/run_openapi_generator.sh
new file mode 100755
index 000000000..946b2886f
--- /dev/null
+++ b/scripts/run_openapi_generator.sh
@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+PYTHONPATH=${PYTHONPATH:-}
+THIS_DIR="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" && pwd)"
+
+set -euo pipefail
+
+
+stack_dir=$(dirname "$THIS_DIR")
+PYTHONPATH=$PYTHONPATH:$stack_dir \
+  python3 -m scripts.openapi_generator "$stack_dir"/docs/static
+
+cp "$stack_dir"/docs/static/stainless-llama-stack-spec.yaml "$stack_dir"/client-sdks/stainless/openapi.yml
diff --git a/src/llama_stack/core/library_client.py b/src/llama_stack/core/library_client.py
index 2a224d915..d6be7aeca 100644
--- a/src/llama_stack/core/library_client.py
+++ b/src/llama_stack/core/library_client.py
@@ -19,7 +19,7 @@ import httpx
 import yaml
 from fastapi import Response as FastAPIResponse
 
-from llama_stack_api import is_unwrapped_body_param
+from llama_stack.core.utils.type_inspection import is_unwrapped_body_param
 
 try:
     from llama_stack_client import (
@@ -42,17 +42,10 @@ from termcolor import cprint
 from llama_stack.core.build import print_pip_install_help
 from llama_stack.core.configure import parse_and_maybe_upgrade_config
 from llama_stack.core.datatypes import BuildConfig, BuildProvider, DistributionSpec
-from llama_stack.core.request_headers import (
-    PROVIDER_DATA_VAR,
-    request_provider_data_context,
-)
+from llama_stack.core.request_headers import PROVIDER_DATA_VAR, request_provider_data_context
 from llama_stack.core.resolver import ProviderRegistry
 from llama_stack.core.server.routes import RouteImpls, find_matching_route, initialize_route_impls
-from llama_stack.core.stack import (
-    Stack,
-    get_stack_run_config_from_distro,
-    replace_env_vars,
-)
+from llama_stack.core.stack import Stack, get_stack_run_config_from_distro, replace_env_vars
 from llama_stack.core.telemetry import Telemetry
 from llama_stack.core.telemetry.tracing import CURRENT_TRACE_CONTEXT, end_trace, setup_logger, start_trace
 from llama_stack.core.utils.config import redact_sensitive_fields
diff --git a/src/llama_stack/core/utils/type_inspection.py b/src/llama_stack/core/utils/type_inspection.py
new file mode 100644
index 000000000..31e7f2328
--- /dev/null
+++ b/src/llama_stack/core/utils/type_inspection.py
@@ -0,0 +1,45 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+"""
+Utility functions for type inspection and parameter handling.
+"""
+
+import inspect
+import typing
+from typing import Any, get_args, get_origin
+
+from pydantic import BaseModel
+from pydantic.fields import FieldInfo
+
+
+def is_unwrapped_body_param(param_type: Any) -> bool:
+    """
+    Check if a parameter type represents an unwrapped body parameter.
+    An unwrapped body parameter is an Annotated type with Body(embed=False)
+
+    This is used to determine whether request parameters should be flattened
+    in OpenAPI specs and client libraries (matching FastAPI's embed=False behavior).
+
+    Args:
+        param_type: The parameter type annotation to check
+
+    Returns:
+        True if the parameter should be treated as an unwrapped body parameter
+    """
+    # Check if it's Annotated with Body(embed=False)
+    if get_origin(param_type) is typing.Annotated:
+        args = get_args(param_type)
+        base_type = args[0]
+        metadata = args[1:]
+
+        # Look for Body annotation with embed=False
+        # Body() returns a FieldInfo object, so we check for that type and the embed attribute
+        for item in metadata:
+            if isinstance(item, FieldInfo) and hasattr(item, "embed") and not item.embed:
+                return inspect.isclass(base_type) and issubclass(base_type, BaseModel)
+
+    return False
diff --git a/src/llama_stack_api/__init__.py b/src/llama_stack_api/__init__.py
index 19b29301b..b7efcc543 100644
--- a/src/llama_stack_api/__init__.py
+++ b/src/llama_stack_api/__init__.py
@@ -353,8 +353,15 @@ from .safety import (
 from .schema_utils import (
     CallableT,
     ExtraBodyField,
+    SchemaInfo,
     WebMethod,
+    clear_dynamic_schema_types,
+    get_registered_schema_info,
+    iter_dynamic_schema_types,
+    iter_json_schema_types,
+    iter_registered_schema_types,
     json_schema_type,
+    register_dynamic_schema_type,
     register_schema,
     webmethod,
 )
@@ -388,27 +395,6 @@ from .shields import (
 )
 
 # Import from strong_typing
-from .strong_typing.core import JsonType
-from .strong_typing.docstring import Docstring, parse_type
-from .strong_typing.inspection import (
-    get_signature,
-    is_generic_list,
-    is_type_optional,
-    is_type_union,
-    is_unwrapped_body_param,
-    unwrap_generic_list,
-    unwrap_optional_type,
-    unwrap_union_types,
-)
-from .strong_typing.name import python_type_to_name
-from .strong_typing.schema import (
-    JsonSchemaGenerator,
-    Schema,
-    SchemaOptions,
-    StrictJsonType,
-    get_schema_identifier,
-)
-from .strong_typing.serialization import json_dump_string, object_to_json
 from .tools import (
     ListToolDefsResponse,
     ListToolGroupsResponse,
@@ -537,6 +523,7 @@ __all__ = [
     "ExtraBodyField",
     "Files",
     "Fp8QuantizationConfig",
+    "clear_dynamic_schema_types",
     "get_schema_identifier",
     "get_signature",
     "GrammarResponseFormat",
@@ -557,6 +544,10 @@ __all__ = [
     "is_type_optional",
     "is_type_union",
     "is_unwrapped_body_param",
+    "iter_dynamic_schema_types",
+    "iter_json_schema_types",
+    "iter_registered_schema_types",
+    "get_registered_schema_info",
     "Job",
     "JobStatus",
     "json_dump_string",
@@ -759,6 +750,7 @@ __all__ = [
     "RAGQueryGeneratorConfig",
     "RAGQueryResult",
     "RAGSearchMode",
+    "register_dynamic_schema_type",
     "register_schema",
     "RLHFAlgorithm",
     "RRFRanker",
@@ -796,6 +788,7 @@ __all__ = [
     "ScoringResult",
     "ScoringResultRow",
     "Schema",
+    "SchemaInfo",
     "SchemaOptions",
     "SearchRankingOptions",
     "Shield",
diff --git a/src/llama_stack_api/benchmarks.py b/src/llama_stack_api/benchmarks.py
index e9ac3a8b8..fdb2ccad4 100644
--- a/src/llama_stack_api/benchmarks.py
+++ b/src/llama_stack_api/benchmarks.py
@@ -48,6 +48,7 @@ class BenchmarkInput(CommonBenchmarkFields, BaseModel):
     provider_benchmark_id: str | None = None
 
 
+@json_schema_type
 class ListBenchmarksResponse(BaseModel):
     data: list[Benchmark]
 
diff --git a/src/llama_stack_api/datasets.py b/src/llama_stack_api/datasets.py
index 76d787078..6d707aa8e 100644
--- a/src/llama_stack_api/datasets.py
+++ b/src/llama_stack_api/datasets.py
@@ -136,6 +136,7 @@ class DatasetInput(CommonDatasetFields, BaseModel):
     dataset_id: str
 
 
+@json_schema_type
 class ListDatasetsResponse(BaseModel):
     """Response from listing datasets.
 
diff --git a/src/llama_stack_api/inspect.py b/src/llama_stack_api/inspect.py
index 8326e9e6b..b9e5a6843 100644
--- a/src/llama_stack_api/inspect.py
+++ b/src/llama_stack_api/inspect.py
@@ -54,6 +54,7 @@ class VersionInfo(BaseModel):
     version: str
 
 
+@json_schema_type
 class ListRoutesResponse(BaseModel):
     """Response containing a list of all available API routes.
 
diff --git a/src/llama_stack_api/models.py b/src/llama_stack_api/models.py
index 833864ec2..98c16b6c2 100644
--- a/src/llama_stack_api/models.py
+++ b/src/llama_stack_api/models.py
@@ -100,6 +100,7 @@ class OpenAIModel(BaseModel):
     custom_metadata: dict[str, Any] | None = None
 
 
+@json_schema_type
 class OpenAIListModelsResponse(BaseModel):
     data: list[OpenAIModel]
 
diff --git a/src/llama_stack_api/openai_responses.py b/src/llama_stack_api/openai_responses.py
index 2dd73e90a..952418f1c 100644
--- a/src/llama_stack_api/openai_responses.py
+++ b/src/llama_stack_api/openai_responses.py
@@ -1316,6 +1316,7 @@ OpenAIResponseInput = Annotated[
 register_schema(OpenAIResponseInput, name="OpenAIResponseInput")
 
 
+@json_schema_type
 class ListOpenAIResponseInputItem(BaseModel):
     """List container for OpenAI response input items.
 
diff --git a/src/llama_stack_api/post_training.py b/src/llama_stack_api/post_training.py
index 0cc9277d9..505c8bfd7 100644
--- a/src/llama_stack_api/post_training.py
+++ b/src/llama_stack_api/post_training.py
@@ -236,6 +236,7 @@ class PostTrainingRLHFRequest(BaseModel):
     logger_config: dict[str, Any]
 
 
+@json_schema_type
 class PostTrainingJob(BaseModel):
     job_uuid: str
 
@@ -265,6 +266,7 @@ class PostTrainingJobStatusResponse(BaseModel):
     checkpoints: list[Checkpoint] = Field(default_factory=list)
 
 
+@json_schema_type
 class ListPostTrainingJobsResponse(BaseModel):
     data: list[PostTrainingJob]
 
diff --git a/src/llama_stack_api/prompts.py b/src/llama_stack_api/prompts.py
index 651d03e61..8562e4704 100644
--- a/src/llama_stack_api/prompts.py
+++ b/src/llama_stack_api/prompts.py
@@ -85,6 +85,7 @@ class Prompt(BaseModel):
         return f"pmpt_{hex_string}"
 
 
+@json_schema_type
 class ListPromptsResponse(BaseModel):
     """Response model to list prompts."""
 
diff --git a/src/llama_stack_api/providers.py b/src/llama_stack_api/providers.py
index 5b555b82f..88c66f261 100644
--- a/src/llama_stack_api/providers.py
+++ b/src/llama_stack_api/providers.py
@@ -31,6 +31,7 @@ class ProviderInfo(BaseModel):
     health: HealthResponse
 
 
+@json_schema_type
 class ListProvidersResponse(BaseModel):
     """Response containing a list of all available providers.
 
diff --git a/src/llama_stack_api/schema_utils.py b/src/llama_stack_api/schema_utils.py
index 8444d2a34..162ef63fb 100644
--- a/src/llama_stack_api/schema_utils.py
+++ b/src/llama_stack_api/schema_utils.py
@@ -4,11 +4,9 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from collections.abc import Callable
+from collections.abc import Callable, Iterable
 from dataclasses import dataclass
-from typing import Any, TypeVar
-
-from .strong_typing.schema import json_schema_type, register_schema  # noqa: F401
+from typing import Any, Literal, TypeVar
 
 
 class ExtraBodyField[T]:
@@ -48,6 +46,98 @@ class ExtraBodyField[T]:
         self.description = description
 
 
+SchemaSource = Literal["json_schema_type", "registered_schema", "dynamic_schema"]
+
+
+@dataclass(frozen=True)
+class SchemaInfo:
+    """Metadata describing a schema entry exposed to OpenAPI generation."""
+
+    name: str
+    type: Any
+    source: SchemaSource
+
+
+_json_schema_types: dict[type, SchemaInfo] = {}
+
+
+def json_schema_type(cls):
+    """
+    Decorator to mark a Pydantic model for top-level component registration.
+
+    Models marked with this decorator will be registered as top-level components
+    in the OpenAPI schema, while unmarked models will be inlined.
+
+    This provides control over schema registration to avoid unnecessary indirection
+    for simple one-off types while keeping complex reusable types as components.
+    """
+    cls._llama_stack_schema_type = True
+    schema_name = getattr(cls, "__name__", f"Anonymous_{id(cls)}")
+    cls._llama_stack_schema_name = schema_name
+    _json_schema_types.setdefault(cls, SchemaInfo(name=schema_name, type=cls, source="json_schema_type"))
+    return cls
+
+
+# Global registries for schemas discoverable by the generator
+_registered_schemas: dict[Any, SchemaInfo] = {}
+_dynamic_schema_types: dict[type, SchemaInfo] = {}
+
+
+def register_schema(schema_type, name: str | None = None):
+    """
+    Register a schema type for top-level component registration.
+
+    This replicates the behavior of strong_typing's register_schema function.
+    It's used for union types and other complex types that should appear as
+    top-level components in the OpenAPI schema.
+
+    Args:
+        schema_type: The type to register (e.g., union types, Annotated types)
+        name: Optional name for the schema in the OpenAPI spec. If not provided,
+              uses the type's __name__ or a generated name.
+    """
+    if name is None:
+        name = getattr(schema_type, "__name__", f"Anonymous_{id(schema_type)}")
+
+    # Store the registration information in a global registry
+    # since union types don't allow setting attributes
+    _registered_schemas[schema_type] = SchemaInfo(name=name, type=schema_type, source="registered_schema")
+
+    return schema_type
+
+
+def get_registered_schema_info(schema_type: Any) -> SchemaInfo | None:
+    """Return the registration metadata for a schema type if present."""
+    return _registered_schemas.get(schema_type)
+
+
+def iter_registered_schema_types() -> Iterable[SchemaInfo]:
+    """Iterate over all explicitly registered schema entries."""
+    return tuple(_registered_schemas.values())
+
+
+def iter_json_schema_types() -> Iterable[type]:
+    """Iterate over all Pydantic models decorated with @json_schema_type."""
+    return tuple(info.type for info in _json_schema_types.values())
+
+
+def iter_dynamic_schema_types() -> Iterable[type]:
+    """Iterate over dynamic models registered at generation time."""
+    return tuple(info.type for info in _dynamic_schema_types.values())
+
+
+def register_dynamic_schema_type(schema_type: type, name: str | None = None) -> type:
+    """Register a dynamic model generated at runtime for schema inclusion."""
+    schema_name = name if name is not None else getattr(schema_type, "__name__", f"Anonymous_{id(schema_type)}")
+    _dynamic_schema_types[schema_type] = SchemaInfo(name=schema_name, type=schema_type, source="dynamic_schema")
+    return schema_type
+
+
+def clear_dynamic_schema_types() -> None:
+    """Clear dynamic schema registrations."""
+    _dynamic_schema_types.clear()
+
+
 @dataclass
 class WebMethod:
     level: str | None = None
diff --git a/src/llama_stack_api/scoring_functions.py b/src/llama_stack_api/scoring_functions.py
index f75336e54..12051c20c 100644
--- a/src/llama_stack_api/scoring_functions.py
+++ b/src/llama_stack_api/scoring_functions.py
@@ -155,6 +155,7 @@ class ScoringFnInput(CommonScoringFnFields, BaseModel):
     provider_scoring_fn_id: str | None = None
 
 
+@json_schema_type
 class ListScoringFunctionsResponse(BaseModel):
     data: list[ScoringFn]
 
diff --git a/src/llama_stack_api/shields.py b/src/llama_stack_api/shields.py
index 2aeb83333..19e412a5a 100644
--- a/src/llama_stack_api/shields.py
+++ b/src/llama_stack_api/shields.py
@@ -43,6 +43,7 @@ class ShieldInput(CommonShieldFields):
     provider_shield_id: str | None = None
 
 
+@json_schema_type
 class ListShieldsResponse(BaseModel):
     data: list[Shield]
 
diff --git a/src/llama_stack_api/strong_typing/__init__.py b/src/llama_stack_api/strong_typing/__init__.py
deleted file mode 100644
index d832dcf6f..000000000
--- a/src/llama_stack_api/strong_typing/__init__.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-"""
-Type-safe data interchange for Python data classes.
-
-Provides auxiliary services for working with Python type annotations, converting typed data to and from JSON,
-and generating a JSON schema for a complex type.
-"""
-
-__version__ = "0.3.4"
-__author__ = "Levente Hunyadi"
-__copyright__ = "Copyright 2021-2024, Levente Hunyadi"
-__license__ = "MIT"
-__maintainer__ = "Levente Hunyadi"
-__status__ = "Production"
diff --git a/src/llama_stack_api/strong_typing/auxiliary.py b/src/llama_stack_api/strong_typing/auxiliary.py
deleted file mode 100644
index eb067b38b..000000000
--- a/src/llama_stack_api/strong_typing/auxiliary.py
+++ /dev/null
@@ -1,229 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-"""
-Type-safe data interchange for Python data classes.
-
-:see: https://github.com/hunyadi/strong_typing
-"""
-
-import dataclasses
-import sys
-from collections.abc import Callable
-from dataclasses import is_dataclass
-from typing import TypeVar, overload
-
-if sys.version_info >= (3, 9):
-    from typing import Annotated as Annotated
-else:
-    from typing import Annotated as Annotated
-
-if sys.version_info >= (3, 10):
-    from typing import TypeAlias as TypeAlias
-else:
-    from typing import TypeAlias as TypeAlias
-
-if sys.version_info >= (3, 11):
-    from typing import dataclass_transform as dataclass_transform
-else:
-    from typing import dataclass_transform as dataclass_transform
-
-T = TypeVar("T")
-
-
-def _compact_dataclass_repr(obj: object) -> str:
-    """
-    Compact data-class representation where positional arguments are used instead of keyword arguments.
-
-    :param obj: A data-class object.
-    :returns: A string that matches the pattern `Class(arg1, arg2, ...)`.
-    """
-
-    if is_dataclass(obj):
-        arglist = ", ".join(repr(getattr(obj, field.name)) for field in dataclasses.fields(obj))
-        return f"{obj.__class__.__name__}({arglist})"
-    else:
-        return obj.__class__.__name__
-
-
-class CompactDataClass:
-    "A data class whose repr() uses positional rather than keyword arguments."
-
-    def __repr__(self) -> str:
-        return _compact_dataclass_repr(self)
-
-
-@overload
-def typeannotation(cls: type[T], /) -> type[T]: ...
-
-
-@overload
-def typeannotation(cls: None, *, eq: bool = True, order: bool = False) -> Callable[[type[T]], type[T]]: ...
-
-
-@dataclass_transform(eq_default=True, order_default=False)
-def typeannotation(
-    cls: type[T] | None = None, *, eq: bool = True, order: bool = False
-) -> type[T] | Callable[[type[T]], type[T]]:
-    """
-    Returns the same class as was passed in, with dunder methods added based on the fields defined in the class.
-
-    :param cls: The data-class type to transform into a type annotation.
-    :param eq: Whether to generate functions to support equality comparison.
-    :param order: Whether to generate functions to support ordering.
-    :returns: A data-class type, or a wrapper for data-class types.
-    """
-
-    def wrap(cls: type[T]) -> type[T]:
-        # mypy fails to equate bound-y functions (first argument interpreted as
-        # the bound object) with class methods, hence the `ignore` directive.
-        cls.__repr__ = _compact_dataclass_repr  # type: ignore[method-assign]
-        if not dataclasses.is_dataclass(cls):
-            cls = dataclasses.dataclass(  # type: ignore[call-overload]
-                cls,
-                init=True,
-                repr=False,
-                eq=eq,
-                order=order,
-                unsafe_hash=False,
-                frozen=True,
-            )
-        return cls
-
-    # see if decorator is used as @typeannotation or @typeannotation()
-    if cls is None:
-        # called with parentheses
-        return wrap
-    else:
-        # called without parentheses
-        return wrap(cls)
-
-
-@typeannotation
-class Alias:
-    "Alternative name of a property, typically used in JSON serialization."
-
-    name: str
-
-
-@typeannotation
-class Signed:
-    "Signedness of an integer type."
-
-    is_signed: bool
-
-
-@typeannotation
-class Storage:
-    "Number of bytes the binary representation of an integer type takes, e.g. 4 bytes for an int32."
-
-    bytes: int
-
-
-@typeannotation
-class IntegerRange:
-    "Minimum and maximum value of an integer. The range is inclusive."
-
-    minimum: int
-    maximum: int
-
-
-@typeannotation
-class Precision:
-    "Precision of a floating-point value."
-
-    significant_digits: int
-    decimal_digits: int = 0
-
-    @property
-    def integer_digits(self) -> int:
-        return self.significant_digits - self.decimal_digits
-
-
-@typeannotation
-class TimePrecision:
-    """
-    Precision of a timestamp or time interval.
-
-    :param decimal_digits: Number of fractional digits retained in the sub-seconds field for a timestamp.
-    """
-
-    decimal_digits: int = 0
-
-
-@typeannotation
-class Length:
-    "Exact length of a string."
-
-    value: int
-
-
-@typeannotation
-class MinLength:
-    "Minimum length of a string."
-
-    value: int
-
-
-@typeannotation
-class MaxLength:
-    "Maximum length of a string."
-
-    value: int
-
-
-@typeannotation
-class SpecialConversion:
-    "Indicates that the annotated type is subject to custom conversion rules."
-
-
-int8: TypeAlias = Annotated[int, Signed(True), Storage(1), IntegerRange(-128, 127)]
-int16: TypeAlias = Annotated[int, Signed(True), Storage(2), IntegerRange(-32768, 32767)]
-int32: TypeAlias = Annotated[
-    int,
-    Signed(True),
-    Storage(4),
-    IntegerRange(-2147483648, 2147483647),
-]
-int64: TypeAlias = Annotated[
-    int,
-    Signed(True),
-    Storage(8),
-    IntegerRange(-9223372036854775808, 9223372036854775807),
-]
-
-uint8: TypeAlias = Annotated[int, Signed(False), Storage(1), IntegerRange(0, 255)]
-uint16: TypeAlias = Annotated[int, Signed(False), Storage(2), IntegerRange(0, 65535)]
-uint32: TypeAlias = Annotated[
-    int,
-    Signed(False),
-    Storage(4),
-    IntegerRange(0, 4294967295),
-]
-uint64: TypeAlias = Annotated[
-    int,
-    Signed(False),
-    Storage(8),
-    IntegerRange(0, 18446744073709551615),
-]
-
-float32: TypeAlias = Annotated[float, Storage(4)]
-float64: TypeAlias = Annotated[float, Storage(8)]
-
-# maps globals of type Annotated[T, ...] defined in this module to their string names
-_auxiliary_types: dict[object, str] = {}
-module = sys.modules[__name__]
-for var in dir(module):
-    typ = getattr(module, var)
-    if getattr(typ, "__metadata__", None) is not None:
-        # type is Annotated[T, ...]
-        _auxiliary_types[typ] = var
-
-
-def get_auxiliary_format(data_type: object) -> str | None:
-    "Returns the JSON format string corresponding to an auxiliary type."
-
-    return _auxiliary_types.get(data_type)
diff --git a/src/llama_stack_api/strong_typing/classdef.py b/src/llama_stack_api/strong_typing/classdef.py
deleted file mode 100644
index e54e3a9d6..000000000
--- a/src/llama_stack_api/strong_typing/classdef.py
+++ /dev/null
@@ -1,440 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import copy
-import dataclasses
-import datetime
-import decimal
-import enum
-import ipaddress
-import math
-import re
-import sys
-import types
-import typing
-import uuid
-from dataclasses import dataclass
-from typing import Any, Literal, TypeVar, Union
-
-from .auxiliary import (
-    Alias,
-    Annotated,
-    MaxLength,
-    Precision,
-    float32,
-    float64,
-    int16,
-    int32,
-    int64,
-)
-from .core import JsonType, Schema
-from .docstring import Docstring, DocstringParam
-from .inspection import TypeLike
-from .serialization import json_to_object, object_to_json
-
-T = TypeVar("T")
-
-
-@dataclass
-class JsonSchemaNode:
-    title: str | None
-    description: str | None
-
-
-@dataclass
-class JsonSchemaType(JsonSchemaNode):
-    type: str
-    format: str | None
-
-
-@dataclass
-class JsonSchemaBoolean(JsonSchemaType):
-    type: Literal["boolean"]
-    const: bool | None
-    default: bool | None
-    examples: list[bool] | None
-
-
-@dataclass
-class JsonSchemaInteger(JsonSchemaType):
-    type: Literal["integer"]
-    const: int | None
-    default: int | None
-    examples: list[int] | None
-    enum: list[int] | None
-    minimum: int | None
-    maximum: int | None
-
-
-@dataclass
-class JsonSchemaNumber(JsonSchemaType):
-    type: Literal["number"]
-    const: float | None
-    default: float | None
-    examples: list[float] | None
-    minimum: float | None
-    maximum: float | None
-    exclusiveMinimum: float | None
-    exclusiveMaximum: float | None
-    multipleOf: float | None
-
-
-@dataclass
-class JsonSchemaString(JsonSchemaType):
-    type: Literal["string"]
-    const: str | None
-    default: str | None
-    examples: list[str] | None
-    enum: list[str] | None
-    minLength: int | None
-    maxLength: int | None
-
-
-@dataclass
-class JsonSchemaArray(JsonSchemaType):
-    type: Literal["array"]
-    items: "JsonSchemaAny"
-
-
-@dataclass
-class JsonSchemaObject(JsonSchemaType):
-    type: Literal["object"]
-    properties: dict[str, "JsonSchemaAny"] | None
-    additionalProperties: bool | None
-    required: list[str] | None
-
-
-@dataclass
-class JsonSchemaRef(JsonSchemaNode):
-    ref: Annotated[str, Alias("$ref")]
-
-
-@dataclass
-class JsonSchemaAllOf(JsonSchemaNode):
-    allOf: list["JsonSchemaAny"]
-
-
-@dataclass
-class JsonSchemaAnyOf(JsonSchemaNode):
-    anyOf: list["JsonSchemaAny"]
-
-
-@dataclass
-class Discriminator:
-    propertyName: str
-    mapping: dict[str, str]
-
-
-@dataclass
-class JsonSchemaOneOf(JsonSchemaNode):
-    oneOf: list["JsonSchemaAny"]
-    discriminator: Discriminator | None
-
-
-JsonSchemaAny = Union[
-    JsonSchemaRef,
-    JsonSchemaBoolean,
-    JsonSchemaInteger,
-    JsonSchemaNumber,
-    JsonSchemaString,
-    JsonSchemaArray,
-    JsonSchemaObject,
-    JsonSchemaOneOf,
-]
-
-
-@dataclass
-class JsonSchemaTopLevelObject(JsonSchemaObject):
-    schema: Annotated[str, Alias("$schema")]
-    definitions: dict[str, JsonSchemaAny] | None
-
-
-def integer_range_to_type(min_value: float, max_value: float) -> type:
-    if min_value >= -(2**15) and max_value < 2**15:
-        return int16
-    elif min_value >= -(2**31) and max_value < 2**31:
-        return int32
-    else:
-        return int64
-
-
-def enum_safe_name(name: str) -> str:
-    name = re.sub(r"\W", "_", name)
-    is_dunder = name.startswith("__")
-    is_sunder = name.startswith("_") and name.endswith("_")
-    if is_dunder or is_sunder:  # provide an alternative for dunder and sunder names
-        name = f"v{name}"
-    return name
-
-
-def enum_values_to_type(
-    module: types.ModuleType,
-    name: str,
-    values: dict[str, Any],
-    title: str | None = None,
-    description: str | None = None,
-) -> type[enum.Enum]:
-    enum_class: type[enum.Enum] = enum.Enum(name, values)  # type: ignore
-
-    # assign the newly created type to the same module where the defining class is
-    enum_class.__module__ = module.__name__
-    enum_class.__doc__ = str(Docstring(short_description=title, long_description=description))
-    setattr(module, name, enum_class)
-
-    return enum.unique(enum_class)
-
-
-def schema_to_type(schema: Schema, *, module: types.ModuleType, class_name: str) -> TypeLike:
-    """
-    Creates a Python type from a JSON schema.
-
-    :param schema: The JSON schema that the types would correspond to.
-    :param module: The module in which to create the new types.
-    :param class_name: The name assigned to the top-level class.
-    """
-
-    top_node = typing.cast(JsonSchemaTopLevelObject, json_to_object(JsonSchemaTopLevelObject, schema))
-    if top_node.definitions is not None:
-        for type_name, type_node in top_node.definitions.items():
-            type_def = node_to_typedef(module, type_name, type_node)
-            if type_def.default is not dataclasses.MISSING:
-                raise TypeError("disallowed: `default` for top-level type definitions")
-
-            type_def.type.__module__ = module.__name__
-            setattr(module, type_name, type_def.type)
-
-    return node_to_typedef(module, class_name, top_node).type
-
-
-@dataclass
-class TypeDef:
-    type: TypeLike
-    default: Any = dataclasses.MISSING
-
-
-def json_to_value(target_type: TypeLike, data: JsonType) -> Any:
-    if data is not None:
-        return json_to_object(target_type, data)
-    else:
-        return dataclasses.MISSING
-
-
-def node_to_typedef(module: types.ModuleType, context: str, node: JsonSchemaNode) -> TypeDef:
-    if isinstance(node, JsonSchemaRef):
-        match_obj = re.match(r"^#/definitions/(\w+)$", node.ref)
-        if not match_obj:
-            raise ValueError(f"invalid reference: {node.ref}")
-
-        type_name = match_obj.group(1)
-        return TypeDef(getattr(module, type_name), dataclasses.MISSING)
-
-    elif isinstance(node, JsonSchemaBoolean):
-        if node.const is not None:
-            return TypeDef(Literal[node.const], dataclasses.MISSING)
-
-        default = json_to_value(bool, node.default)
-        return TypeDef(bool, default)
-
-    elif isinstance(node, JsonSchemaInteger):
-        if node.const is not None:
-            return TypeDef(Literal[node.const], dataclasses.MISSING)
-
-        integer_type: TypeLike
-        if node.format == "int16":
-            integer_type = int16
-        elif node.format == "int32":
-            integer_type = int32
-        elif node.format == "int64":
-            integer_type = int64
-        else:
-            if node.enum is not None:
-                integer_type = integer_range_to_type(min(node.enum), max(node.enum))
-            elif node.minimum is not None and node.maximum is not None:
-                integer_type = integer_range_to_type(node.minimum, node.maximum)
-            else:
-                integer_type = int
-
-        default = json_to_value(integer_type, node.default)
-        return TypeDef(integer_type, default)
-
-    elif isinstance(node, JsonSchemaNumber):
-        if node.const is not None:
-            return TypeDef(Literal[node.const], dataclasses.MISSING)
-
-        number_type: TypeLike
-        if node.format == "float32":
-            number_type = float32
-        elif node.format == "float64":
-            number_type = float64
-        else:
-            if (
-                node.exclusiveMinimum is not None
-                and node.exclusiveMaximum is not None
-                and node.exclusiveMinimum == -node.exclusiveMaximum
-            ):
-                integer_digits = round(math.log10(node.exclusiveMaximum))
-            else:
-                integer_digits = None
-
-            if node.multipleOf is not None:
-                decimal_digits = -round(math.log10(node.multipleOf))
-            else:
-                decimal_digits = None
-
-            if integer_digits is not None and decimal_digits is not None:
-                number_type = Annotated[
-                    decimal.Decimal,
-                    Precision(integer_digits + decimal_digits, decimal_digits),
-                ]
-            else:
-                number_type = float
-
-        default = json_to_value(number_type, node.default)
-        return TypeDef(number_type, default)
-
-    elif isinstance(node, JsonSchemaString):
-        if node.const is not None:
-            return TypeDef(Literal[node.const], dataclasses.MISSING)
-
-        string_type: TypeLike
-        if node.format == "date-time":
-            string_type = datetime.datetime
-        elif node.format == "uuid":
-            string_type = uuid.UUID
-        elif node.format == "ipv4":
-            string_type = ipaddress.IPv4Address
-        elif node.format == "ipv6":
-            string_type = ipaddress.IPv6Address
-
-        elif node.enum is not None:
-            string_type = enum_values_to_type(
-                module,
-                context,
-                {enum_safe_name(e): e for e in node.enum},
-                title=node.title,
-                description=node.description,
-            )
-
-        elif node.maxLength is not None:
-            string_type = Annotated[str, MaxLength(node.maxLength)]
-        else:
-            string_type = str
-
-        default = json_to_value(string_type, node.default)
-        return TypeDef(string_type, default)
-
-    elif isinstance(node, JsonSchemaArray):
-        type_def = node_to_typedef(module, context, node.items)
-        if type_def.default is not dataclasses.MISSING:
-            raise TypeError("disallowed: `default` for array element type")
-        list_type = list[(type_def.type,)]  # type: ignore
-        return TypeDef(list_type, dataclasses.MISSING)
-
-    elif isinstance(node, JsonSchemaObject):
-        if node.properties is None:
-            return TypeDef(JsonType, dataclasses.MISSING)
-
-        if node.additionalProperties is None or node.additionalProperties is not False:
-            raise TypeError("expected: `additionalProperties` equals `false`")
-
-        required = node.required if node.required is not None else []
-
-        class_name = context
-
-        fields: list[tuple[str, Any, dataclasses.Field]] = []
-        params: dict[str, DocstringParam] = {}
-        for prop_name, prop_node in node.properties.items():
-            type_def = node_to_typedef(module, f"{class_name}__{prop_name}", prop_node)
-            if prop_name in required:
-                prop_type = type_def.type
-            else:
-                prop_type = Union[(None, type_def.type)]
-            fields.append((prop_name, prop_type, dataclasses.field(default=type_def.default)))
-            prop_desc = prop_node.title or prop_node.description
-            if prop_desc is not None:
-                params[prop_name] = DocstringParam(prop_name, prop_desc)
-
-        fields.sort(key=lambda t: t[2].default is not dataclasses.MISSING)
-        if sys.version_info >= (3, 12):
-            class_type = dataclasses.make_dataclass(class_name, fields, module=module.__name__)
-        else:
-            class_type = dataclasses.make_dataclass(class_name, fields, namespace={"__module__": module.__name__})
-        class_type.__doc__ = str(
-            Docstring(
-                short_description=node.title,
-                long_description=node.description,
-                params=params,
-            )
-        )
-        setattr(module, class_name, class_type)
-        return TypeDef(class_type, dataclasses.MISSING)
-
-    elif isinstance(node, JsonSchemaOneOf):
-        union_defs = tuple(node_to_typedef(module, context, n) for n in node.oneOf)
-        if any(d.default is not dataclasses.MISSING for d in union_defs):
-            raise TypeError("disallowed: `default` for union member type")
-        union_types = tuple(d.type for d in union_defs)
-        return TypeDef(Union[union_types], dataclasses.MISSING)
-
-    raise NotImplementedError()
-
-
-@dataclass
-class SchemaFlatteningOptions:
-    qualified_names: bool = False
-    recursive: bool = False
-
-
-def flatten_schema(schema: Schema, *, options: SchemaFlatteningOptions | None = None) -> Schema:
-    top_node = typing.cast(JsonSchemaTopLevelObject, json_to_object(JsonSchemaTopLevelObject, schema))
-    flattener = SchemaFlattener(options)
-    obj = flattener.flatten(top_node)
-    return typing.cast(Schema, object_to_json(obj))
-
-
-class SchemaFlattener:
-    options: SchemaFlatteningOptions
-
-    def __init__(self, options: SchemaFlatteningOptions | None = None) -> None:
-        self.options = options or SchemaFlatteningOptions()
-
-    def flatten(self, source_node: JsonSchemaObject) -> JsonSchemaObject:
-        if source_node.type != "object":
-            return source_node
-
-        source_props = source_node.properties or {}
-        target_props: dict[str, JsonSchemaAny] = {}
-
-        source_reqs = source_node.required or []
-        target_reqs: list[str] = []
-
-        for name, prop in source_props.items():
-            if not isinstance(prop, JsonSchemaObject):
-                target_props[name] = prop
-                if name in source_reqs:
-                    target_reqs.append(name)
-                continue
-
-            if self.options.recursive:
-                obj = self.flatten(prop)
-            else:
-                obj = prop
-            if obj.properties is not None:
-                if self.options.qualified_names:
-                    target_props.update((f"{name}.{n}", p) for n, p in obj.properties.items())
-                else:
-                    target_props.update(obj.properties.items())
-            if obj.required is not None:
-                if self.options.qualified_names:
-                    target_reqs.extend(f"{name}.{n}" for n in obj.required)
-                else:
-                    target_reqs.extend(obj.required)
-
-        target_node = copy.copy(source_node)
-        target_node.properties = target_props or None
-        target_node.additionalProperties = False
-        target_node.required = target_reqs or None
-        return target_node
diff --git a/src/llama_stack_api/strong_typing/core.py b/src/llama_stack_api/strong_typing/core.py
deleted file mode 100644
index 5f3764aeb..000000000
--- a/src/llama_stack_api/strong_typing/core.py
+++ /dev/null
@@ -1,46 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-"""
-Type-safe data interchange for Python data classes.
-
-:see: https://github.com/hunyadi/strong_typing
-"""
-
-from typing import Union
-
-
-class JsonObject:
-    "Placeholder type for an unrestricted JSON object."
-
-
-class JsonArray:
-    "Placeholder type for an unrestricted JSON array."
-
-
-# a JSON type with possible `null` values
-JsonType = Union[
-    None,
-    bool,
-    int,
-    float,
-    str,
-    dict[str, "JsonType"],
-    list["JsonType"],
-]
-
-# a JSON type that cannot contain `null` values
-StrictJsonType = Union[
-    bool,
-    int,
-    float,
-    str,
-    dict[str, "StrictJsonType"],
-    list["StrictJsonType"],
-]
-
-# a meta-type that captures the object type in a JSON schema
-Schema = dict[str, JsonType]
diff --git a/src/llama_stack_api/strong_typing/deserializer.py b/src/llama_stack_api/strong_typing/deserializer.py
deleted file mode 100644
index 58dfe53a4..000000000
--- a/src/llama_stack_api/strong_typing/deserializer.py
+++ /dev/null
@@ -1,872 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-"""
-Type-safe data interchange for Python data classes.
-
-:see: https://github.com/hunyadi/strong_typing
-"""
-
-import abc
-import base64
-import dataclasses
-import datetime
-import enum
-import inspect
-import ipaddress
-import sys
-import typing
-import uuid
-from collections.abc import Callable
-from types import ModuleType
-from typing import (
-    Any,
-    Generic,
-    Literal,
-    NamedTuple,
-    Optional,
-    TypeVar,
-    Union,
-)
-
-from .core import JsonType
-from .exception import JsonKeyError, JsonTypeError, JsonValueError
-from .inspection import (
-    TypeLike,
-    create_object,
-    enum_value_types,
-    evaluate_type,
-    get_class_properties,
-    get_class_property,
-    get_resolved_hints,
-    is_dataclass_instance,
-    is_dataclass_type,
-    is_named_tuple_type,
-    is_type_annotated,
-    is_type_literal,
-    is_type_optional,
-    unwrap_annotated_type,
-    unwrap_literal_values,
-    unwrap_optional_type,
-)
-from .mapping import python_field_to_json_property
-from .name import python_type_to_str
-
-E = TypeVar("E", bound=enum.Enum)
-T = TypeVar("T")
-R = TypeVar("R")
-K = TypeVar("K")
-V = TypeVar("V")
-
-
-class Deserializer(abc.ABC, Generic[T]):
-    "Parses a JSON value into a Python type."
-
-    def build(self, context: ModuleType | None) -> None:
-        """
-        Creates auxiliary parsers that this parser is depending on.
-
-        :param context: A module context for evaluating types specified as a string.
-        """
-
-    @abc.abstractmethod
-    def parse(self, data: JsonType) -> T:
-        """
-        Parses a JSON value into a Python type.
-
-        :param data: The JSON value to de-serialize.
-        :returns: The Python object that the JSON value de-serializes to.
-        """
-
-
-class NoneDeserializer(Deserializer[None]):
-    "Parses JSON `null` values into Python `None`."
-
-    def parse(self, data: JsonType) -> None:
-        if data is not None:
-            raise JsonTypeError(f"`None` type expects JSON `null` but instead received: {data}")
-        return None
-
-
-class BoolDeserializer(Deserializer[bool]):
-    "Parses JSON `boolean` values into Python `bool` type."
-
-    def parse(self, data: JsonType) -> bool:
-        if not isinstance(data, bool):
-            raise JsonTypeError(f"`bool` type expects JSON `boolean` data but instead received: {data}")
-        return bool(data)
-
-
-class IntDeserializer(Deserializer[int]):
-    "Parses JSON `number` values into Python `int` type."
-
-    def parse(self, data: JsonType) -> int:
-        if not isinstance(data, int):
-            raise JsonTypeError(f"`int` type expects integer data as JSON `number` but instead received: {data}")
-        return int(data)
-
-
-class FloatDeserializer(Deserializer[float]):
-    "Parses JSON `number` values into Python `float` type."
-
-    def parse(self, data: JsonType) -> float:
-        if not isinstance(data, float) and not isinstance(data, int):
-            raise JsonTypeError(f"`int` type expects data as JSON `number` but instead received: {data}")
-        return float(data)
-
-
-class StringDeserializer(Deserializer[str]):
-    "Parses JSON `string` values into Python `str` type."
-
-    def parse(self, data: JsonType) -> str:
-        if not isinstance(data, str):
-            raise JsonTypeError(f"`str` type expects JSON `string` data but instead received: {data}")
-        return str(data)
-
-
-class BytesDeserializer(Deserializer[bytes]):
-    "Parses JSON `string` values of Base64-encoded strings into Python `bytes` type."
-
-    def parse(self, data: JsonType) -> bytes:
-        if not isinstance(data, str):
-            raise JsonTypeError(f"`bytes` type expects JSON `string` data but instead received: {data}")
-        return base64.b64decode(data, validate=True)
-
-
-class DateTimeDeserializer(Deserializer[datetime.datetime]):
-    "Parses JSON `string` values representing timestamps in ISO 8601 format to Python `datetime` with time zone."
-
-    def parse(self, data: JsonType) -> datetime.datetime:
-        if not isinstance(data, str):
-            raise JsonTypeError(f"`datetime` type expects JSON `string` data but instead received: {data}")
-
-        if data.endswith("Z"):
-            data = f"{data[:-1]}+00:00"  # Python's isoformat() does not support military time zones like "Zulu" for UTC
-        timestamp = datetime.datetime.fromisoformat(data)
-        if timestamp.tzinfo is None:
-            raise JsonValueError(f"timestamp lacks explicit time zone designator: {data}")
-        return timestamp
-
-
-class DateDeserializer(Deserializer[datetime.date]):
-    "Parses JSON `string` values representing dates in ISO 8601 format to Python `date` type."
-
-    def parse(self, data: JsonType) -> datetime.date:
-        if not isinstance(data, str):
-            raise JsonTypeError(f"`date` type expects JSON `string` data but instead received: {data}")
-
-        return datetime.date.fromisoformat(data)
-
-
-class TimeDeserializer(Deserializer[datetime.time]):
-    "Parses JSON `string` values representing time instances in ISO 8601 format to Python `time` type with time zone."
-
-    def parse(self, data: JsonType) -> datetime.time:
-        if not isinstance(data, str):
-            raise JsonTypeError(f"`time` type expects JSON `string` data but instead received: {data}")
-
-        return datetime.time.fromisoformat(data)
-
-
-class UUIDDeserializer(Deserializer[uuid.UUID]):
-    "Parses JSON `string` values of UUID strings into Python `uuid.UUID` type."
-
-    def parse(self, data: JsonType) -> uuid.UUID:
-        if not isinstance(data, str):
-            raise JsonTypeError(f"`UUID` type expects JSON `string` data but instead received: {data}")
-        return uuid.UUID(data)
-
-
-class IPv4Deserializer(Deserializer[ipaddress.IPv4Address]):
-    "Parses JSON `string` values of IPv4 address strings into Python `ipaddress.IPv4Address` type."
-
-    def parse(self, data: JsonType) -> ipaddress.IPv4Address:
-        if not isinstance(data, str):
-            raise JsonTypeError(f"`IPv4Address` type expects JSON `string` data but instead received: {data}")
-        return ipaddress.IPv4Address(data)
-
-
-class IPv6Deserializer(Deserializer[ipaddress.IPv6Address]):
-    "Parses JSON `string` values of IPv6 address strings into Python `ipaddress.IPv6Address` type."
-
-    def parse(self, data: JsonType) -> ipaddress.IPv6Address:
-        if not isinstance(data, str):
-            raise JsonTypeError(f"`IPv6Address` type expects JSON `string` data but instead received: {data}")
-        return ipaddress.IPv6Address(data)
-
-
-class ListDeserializer(Deserializer[list[T]]):
-    "Recursively de-serializes a JSON array into a Python `list`."
-
-    item_type: type[T]
-    item_parser: Deserializer
-
-    def __init__(self, item_type: type[T]) -> None:
-        self.item_type = item_type
-
-    def build(self, context: ModuleType | None) -> None:
-        self.item_parser = _get_deserializer(self.item_type, context)
-
-    def parse(self, data: JsonType) -> list[T]:
-        if not isinstance(data, list):
-            type_name = python_type_to_str(self.item_type)
-            raise JsonTypeError(f"type `List[{type_name}]` expects JSON `array` data but instead received: {data}")
-
-        return [self.item_parser.parse(item) for item in data]
-
-
-class DictDeserializer(Deserializer[dict[K, V]]):
-    "Recursively de-serializes a JSON object into a Python `dict`."
-
-    key_type: type[K]
-    value_type: type[V]
-    value_parser: Deserializer[V]
-
-    def __init__(self, key_type: type[K], value_type: type[V]) -> None:
-        self.key_type = key_type
-        self.value_type = value_type
-        self._check_key_type()
-
-    def build(self, context: ModuleType | None) -> None:
-        self.value_parser = _get_deserializer(self.value_type, context)
-
-    def _check_key_type(self) -> None:
-        if self.key_type is str:
-            return
-
-        if issubclass(self.key_type, enum.Enum):
-            value_types = enum_value_types(self.key_type)
-            if len(value_types) != 1:
-                raise JsonTypeError(
-                    f"type `{self.container_type}` has invalid key type, "
-                    f"enumerations must have a consistent member value type but several types found: {value_types}"
-                )
-            value_type = value_types.pop()
-            if value_type is not str:
-                f"`type `{self.container_type}` has invalid enumeration key type, expected `enum.Enum` with string values"
-            return
-
-        raise JsonTypeError(
-            f"`type `{self.container_type}` has invalid key type, expected `str` or `enum.Enum` with string values"
-        )
-
-    @property
-    def container_type(self) -> str:
-        key_type_name = python_type_to_str(self.key_type)
-        value_type_name = python_type_to_str(self.value_type)
-        return f"Dict[{key_type_name}, {value_type_name}]"
-
-    def parse(self, data: JsonType) -> dict[K, V]:
-        if not isinstance(data, dict):
-            raise JsonTypeError(
-                f"`type `{self.container_type}` expects JSON `object` data but instead received: {data}"
-            )
-
-        return dict(
-            (self.key_type(key), self.value_parser.parse(value))  # type: ignore[call-arg]
-            for key, value in data.items()
-        )
-
-
-class SetDeserializer(Deserializer[set[T]]):
-    "Recursively de-serializes a JSON list into a Python `set`."
-
-    member_type: type[T]
-    member_parser: Deserializer
-
-    def __init__(self, member_type: type[T]) -> None:
-        self.member_type = member_type
-
-    def build(self, context: ModuleType | None) -> None:
-        self.member_parser = _get_deserializer(self.member_type, context)
-
-    def parse(self, data: JsonType) -> set[T]:
-        if not isinstance(data, list):
-            type_name = python_type_to_str(self.member_type)
-            raise JsonTypeError(f"type `Set[{type_name}]` expects JSON `array` data but instead received: {data}")
-
-        return set(self.member_parser.parse(item) for item in data)
-
-
-class TupleDeserializer(Deserializer[tuple[Any, ...]]):
-    "Recursively de-serializes a JSON list into a Python `tuple`."
-
-    item_types: tuple[type[Any], ...]
-    item_parsers: tuple[Deserializer[Any], ...]
-
-    def __init__(self, item_types: tuple[type[Any], ...]) -> None:
-        self.item_types = item_types
-
-    def build(self, context: ModuleType | None) -> None:
-        self.item_parsers = tuple(_get_deserializer(item_type, context) for item_type in self.item_types)
-
-    @property
-    def container_type(self) -> str:
-        type_names = ", ".join(python_type_to_str(item_type) for item_type in self.item_types)
-        return f"Tuple[{type_names}]"
-
-    def parse(self, data: JsonType) -> tuple[Any, ...]:
-        if not isinstance(data, list) or len(data) != len(self.item_parsers):
-            if not isinstance(data, list):
-                raise JsonTypeError(
-                    f"type `{self.container_type}` expects JSON `array` data but instead received: {data}"
-                )
-            else:
-                count = len(self.item_parsers)
-                raise JsonValueError(
-                    f"type `{self.container_type}` expects a JSON `array` of length {count} but received length {len(data)}"
-                )
-
-        return tuple(item_parser.parse(item) for item_parser, item in zip(self.item_parsers, data, strict=False))
-
-
-class UnionDeserializer(Deserializer):
-    "De-serializes a JSON value (of any type) into a Python union type."
-
-    member_types: tuple[type, ...]
-    member_parsers: tuple[Deserializer, ...]
-
-    def __init__(self, member_types: tuple[type, ...]) -> None:
-        self.member_types = member_types
-
-    def build(self, context: ModuleType | None) -> None:
-        self.member_parsers = tuple(_get_deserializer(member_type, context) for member_type in self.member_types)
-
-    def parse(self, data: JsonType) -> Any:
-        for member_parser in self.member_parsers:
-            # iterate over potential types of discriminated union
-            try:
-                return member_parser.parse(data)
-            except (JsonKeyError, JsonTypeError):
-                # indicates a required field is missing from JSON dict -OR- the data cannot be cast to the expected type,
-                # i.e. we don't have the type that we are looking for
-                continue
-
-        type_names = ", ".join(python_type_to_str(member_type) for member_type in self.member_types)
-        raise JsonKeyError(f"type `Union[{type_names}]` could not be instantiated from: {data}")
-
-
-def get_literal_properties(typ: type) -> set[str]:
-    "Returns the names of all properties in a class that are of a literal type."
-
-    return set(
-        property_name for property_name, property_type in get_class_properties(typ) if is_type_literal(property_type)
-    )
-
-
-def get_discriminating_properties(types: tuple[type, ...]) -> set[str]:
-    "Returns a set of properties with literal type that are common across all specified classes."
-
-    if not types or not all(isinstance(typ, type) for typ in types):
-        return set()
-
-    props = get_literal_properties(types[0])
-    for typ in types[1:]:
-        props = props & get_literal_properties(typ)
-
-    return props
-
-
-class TaggedUnionDeserializer(Deserializer):
-    "De-serializes a JSON value with one or more disambiguating properties into a Python union type."
-
-    member_types: tuple[type, ...]
-    disambiguating_properties: set[str]
-    member_parsers: dict[tuple[str, Any], Deserializer]
-
-    def __init__(self, member_types: tuple[type, ...]) -> None:
-        self.member_types = member_types
-        self.disambiguating_properties = get_discriminating_properties(member_types)
-
-    def build(self, context: ModuleType | None) -> None:
-        self.member_parsers = {}
-        for member_type in self.member_types:
-            for property_name in self.disambiguating_properties:
-                literal_type = get_class_property(member_type, property_name)
-                if not literal_type:
-                    continue
-
-                for literal_value in unwrap_literal_values(literal_type):
-                    tpl = (property_name, literal_value)
-                    if tpl in self.member_parsers:
-                        raise JsonTypeError(
-                            f"disambiguating property `{property_name}` in type `{self.union_type}` has a duplicate value: {literal_value}"
-                        )
-
-                    self.member_parsers[tpl] = _get_deserializer(member_type, context)
-
-    @property
-    def union_type(self) -> str:
-        type_names = ", ".join(python_type_to_str(member_type) for member_type in self.member_types)
-        return f"Union[{type_names}]"
-
-    def parse(self, data: JsonType) -> Any:
-        if not isinstance(data, dict):
-            raise JsonTypeError(
-                f"tagged union type `{self.union_type}` expects JSON `object` data but instead received: {data}"
-            )
-
-        for property_name in self.disambiguating_properties:
-            disambiguating_value = data.get(property_name)
-            if disambiguating_value is None:
-                continue
-
-            member_parser = self.member_parsers.get((property_name, disambiguating_value))
-            if member_parser is None:
-                raise JsonTypeError(
-                    f"disambiguating property value is invalid for tagged union type `{self.union_type}`: {data}"
-                )
-
-            return member_parser.parse(data)
-
-        raise JsonTypeError(
-            f"disambiguating property value is missing for tagged union type `{self.union_type}`: {data}"
-        )
-
-
-class LiteralDeserializer(Deserializer):
-    "De-serializes a JSON value into a Python literal type."
-
-    values: tuple[Any, ...]
-    parser: Deserializer
-
-    def __init__(self, values: tuple[Any, ...]) -> None:
-        self.values = values
-
-    def build(self, context: ModuleType | None) -> None:
-        literal_type_tuple = tuple(type(value) for value in self.values)
-        literal_type_set = set(literal_type_tuple)
-        if len(literal_type_set) != 1:
-            value_names = ", ".join(repr(value) for value in self.values)
-            raise TypeError(
-                f"type `Literal[{value_names}]` expects consistent literal value types but got: {literal_type_tuple}"
-            )
-
-        literal_type = literal_type_set.pop()
-        self.parser = _get_deserializer(literal_type, context)
-
-    def parse(self, data: JsonType) -> Any:
-        value = self.parser.parse(data)
-        if value not in self.values:
-            value_names = ", ".join(repr(value) for value in self.values)
-            raise JsonTypeError(f"type `Literal[{value_names}]` could not be instantiated from: {data}")
-        return value
-
-
-class EnumDeserializer(Deserializer[E]):
-    "Returns an enumeration instance based on the enumeration value read from a JSON value."
-
-    enum_type: type[E]
-
-    def __init__(self, enum_type: type[E]) -> None:
-        self.enum_type = enum_type
-
-    def parse(self, data: JsonType) -> E:
-        return self.enum_type(data)
-
-
-class CustomDeserializer(Deserializer[T]):
-    "Uses the `from_json` class method in class to de-serialize the object from JSON."
-
-    converter: Callable[[JsonType], T]
-
-    def __init__(self, converter: Callable[[JsonType], T]) -> None:
-        self.converter = converter
-
-    def parse(self, data: JsonType) -> T:
-        return self.converter(data)
-
-
-class FieldDeserializer(abc.ABC, Generic[T, R]):
-    """
-    Deserializes a JSON property into a Python object field.
-
-    :param property_name: The name of the JSON property to read from a JSON `object`.
-    :param field_name: The name of the field in a Python class to write data to.
-    :param parser: A compatible deserializer that can handle the field's type.
-    """
-
-    property_name: str
-    field_name: str
-    parser: Deserializer[T]
-
-    def __init__(self, property_name: str, field_name: str, parser: Deserializer[T]) -> None:
-        self.property_name = property_name
-        self.field_name = field_name
-        self.parser = parser
-
-    @abc.abstractmethod
-    def parse_field(self, data: dict[str, JsonType]) -> R: ...
-
-
-class RequiredFieldDeserializer(FieldDeserializer[T, T]):
-    "Deserializes a JSON property into a mandatory Python object field."
-
-    def parse_field(self, data: dict[str, JsonType]) -> T:
-        if self.property_name not in data:
-            raise JsonKeyError(f"missing required property `{self.property_name}` from JSON object: {data}")
-
-        return self.parser.parse(data[self.property_name])
-
-
-class OptionalFieldDeserializer(FieldDeserializer[T, Optional[T]]):
-    "Deserializes a JSON property into an optional Python object field with a default value of `None`."
-
-    def parse_field(self, data: dict[str, JsonType]) -> T | None:
-        value = data.get(self.property_name)
-        if value is not None:
-            return self.parser.parse(value)
-        else:
-            return None
-
-
-class DefaultFieldDeserializer(FieldDeserializer[T, T]):
-    "Deserializes a JSON property into a Python object field with an explicit default value."
-
-    default_value: T
-
-    def __init__(
-        self,
-        property_name: str,
-        field_name: str,
-        parser: Deserializer,
-        default_value: T,
-    ) -> None:
-        super().__init__(property_name, field_name, parser)
-        self.default_value = default_value
-
-    def parse_field(self, data: dict[str, JsonType]) -> T:
-        value = data.get(self.property_name)
-        if value is not None:
-            return self.parser.parse(value)
-        else:
-            return self.default_value
-
-
-class DefaultFactoryFieldDeserializer(FieldDeserializer[T, T]):
-    "Deserializes a JSON property into an optional Python object field with an explicit default value factory."
-
-    default_factory: Callable[[], T]
-
-    def __init__(
-        self,
-        property_name: str,
-        field_name: str,
-        parser: Deserializer[T],
-        default_factory: Callable[[], T],
-    ) -> None:
-        super().__init__(property_name, field_name, parser)
-        self.default_factory = default_factory
-
-    def parse_field(self, data: dict[str, JsonType]) -> T:
-        value = data.get(self.property_name)
-        if value is not None:
-            return self.parser.parse(value)
-        else:
-            return self.default_factory()
-
-
-class ClassDeserializer(Deserializer[T]):
-    "Base class for de-serializing class-like types such as data classes, named tuples and regular classes."
-
-    class_type: type
-    property_parsers: list[FieldDeserializer]
-    property_fields: set[str]
-
-    def __init__(self, class_type: type[T]) -> None:
-        self.class_type = class_type
-
-    def assign(self, property_parsers: list[FieldDeserializer]) -> None:
-        self.property_parsers = property_parsers
-        self.property_fields = set(property_parser.property_name for property_parser in property_parsers)
-
-    def parse(self, data: JsonType) -> T:
-        if not isinstance(data, dict):
-            type_name = python_type_to_str(self.class_type)
-            raise JsonTypeError(f"`type `{type_name}` expects JSON `object` data but instead received: {data}")
-
-        object_data: dict[str, JsonType] = typing.cast(dict[str, JsonType], data)
-
-        field_values = {}
-        for property_parser in self.property_parsers:
-            field_values[property_parser.field_name] = property_parser.parse_field(object_data)
-
-        if not self.property_fields.issuperset(object_data):
-            unassigned_names = [name for name in object_data if name not in self.property_fields]
-            raise JsonKeyError(f"unrecognized fields in JSON object: {unassigned_names}")
-
-        return self.create(**field_values)
-
-    def create(self, **field_values: Any) -> T:
-        "Instantiates an object with a collection of property values."
-
-        obj: T = create_object(self.class_type)
-
-        # use `setattr` on newly created object instance
-        for field_name, field_value in field_values.items():
-            setattr(obj, field_name, field_value)
-        return obj
-
-
-class NamedTupleDeserializer(ClassDeserializer[NamedTuple]):
-    "De-serializes a named tuple from a JSON `object`."
-
-    def build(self, context: ModuleType | None) -> None:
-        property_parsers: list[FieldDeserializer] = [
-            RequiredFieldDeserializer(field_name, field_name, _get_deserializer(field_type, context))
-            for field_name, field_type in get_resolved_hints(self.class_type).items()
-        ]
-        super().assign(property_parsers)
-
-    def create(self, **field_values: Any) -> NamedTuple:
-        # mypy fails to deduce that this class returns NamedTuples only, hence the `ignore` directive
-        return self.class_type(**field_values)  # type: ignore[no-any-return]
-
-
-class DataclassDeserializer(ClassDeserializer[T]):
-    "De-serializes a data class from a JSON `object`."
-
-    def __init__(self, class_type: type[T]) -> None:
-        if not dataclasses.is_dataclass(class_type):
-            raise TypeError("expected: data-class type")
-        super().__init__(class_type)  # type: ignore[arg-type]
-
-    def build(self, context: ModuleType | None) -> None:
-        property_parsers: list[FieldDeserializer] = []
-        resolved_hints = get_resolved_hints(self.class_type)
-        for field in dataclasses.fields(self.class_type):
-            field_type = resolved_hints[field.name]
-            property_name = python_field_to_json_property(field.name, field_type)
-
-            is_optional = is_type_optional(field_type)
-            has_default = field.default is not dataclasses.MISSING
-            has_default_factory = field.default_factory is not dataclasses.MISSING
-
-            if is_optional:
-                required_type: type[T] = unwrap_optional_type(field_type)
-            else:
-                required_type = field_type
-
-            parser = _get_deserializer(required_type, context)
-
-            if has_default:
-                field_parser: FieldDeserializer = DefaultFieldDeserializer(
-                    property_name, field.name, parser, field.default
-                )
-            elif has_default_factory:
-                default_factory = typing.cast(Callable[[], Any], field.default_factory)
-                field_parser = DefaultFactoryFieldDeserializer(property_name, field.name, parser, default_factory)
-            elif is_optional:
-                field_parser = OptionalFieldDeserializer(property_name, field.name, parser)
-            else:
-                field_parser = RequiredFieldDeserializer(property_name, field.name, parser)
-
-            property_parsers.append(field_parser)
-
-        super().assign(property_parsers)
-
-
-class FrozenDataclassDeserializer(DataclassDeserializer[T]):
-    "De-serializes a frozen data class from a JSON `object`."
-
-    def create(self, **field_values: Any) -> T:
-        "Instantiates an object with a collection of property values."
-
-        # create object instance without calling `__init__`
-        obj: T = create_object(self.class_type)
-
-        # can't use `setattr` on frozen dataclasses, pass member variable values to `__init__`
-        obj.__init__(**field_values)  # type: ignore
-        return obj
-
-
-class TypedClassDeserializer(ClassDeserializer[T]):
-    "De-serializes a class with type annotations from a JSON `object` by iterating over class properties."
-
-    def build(self, context: ModuleType | None) -> None:
-        property_parsers: list[FieldDeserializer] = []
-        for field_name, field_type in get_resolved_hints(self.class_type).items():
-            property_name = python_field_to_json_property(field_name, field_type)
-
-            is_optional = is_type_optional(field_type)
-
-            if is_optional:
-                required_type: type[T] = unwrap_optional_type(field_type)
-            else:
-                required_type = field_type
-
-            parser = _get_deserializer(required_type, context)
-
-            if is_optional:
-                field_parser: FieldDeserializer = OptionalFieldDeserializer(property_name, field_name, parser)
-            else:
-                field_parser = RequiredFieldDeserializer(property_name, field_name, parser)
-
-            property_parsers.append(field_parser)
-
-        super().assign(property_parsers)
-
-
-def create_deserializer(typ: TypeLike, context: ModuleType | None = None) -> Deserializer:
-    """
-    Creates a de-serializer engine to produce a Python object from an object obtained from a JSON string.
-
-    When de-serializing a JSON object into a Python object, the following transformations are applied:
-
-    * Fundamental types are parsed as `bool`, `int`, `float` or `str`.
-    * Date and time types are parsed from the ISO 8601 format with time zone into the corresponding Python type
-      `datetime`, `date` or `time`.
-    * Byte arrays are read from a string with Base64 encoding into a `bytes` instance.
-    * UUIDs are extracted from a UUID string compliant with RFC 4122 into a `uuid.UUID` instance.
-    * Enumerations are instantiated with a lookup on enumeration value.
-    * Containers (e.g. `list`, `dict`, `set`, `tuple`) are parsed recursively.
-    * Complex objects with properties (including data class types) are populated from dictionaries of key-value pairs
-      using reflection (enumerating type annotations).
-
-    :raises TypeError: A de-serializer engine cannot be constructed for the input type.
-    """
-
-    if context is None:
-        if isinstance(typ, type):
-            context = sys.modules[typ.__module__]
-
-    return _get_deserializer(typ, context)
-
-
-_CACHE: dict[tuple[str, str], Deserializer] = {}
-
-
-def _get_deserializer(typ: TypeLike, context: ModuleType | None) -> Deserializer:
-    "Creates or re-uses a de-serializer engine to parse an object obtained from a JSON string."
-
-    cache_key = None
-
-    if isinstance(typ, (str, typing.ForwardRef)):
-        if context is None:
-            raise TypeError(f"missing context for evaluating type: {typ}")
-
-        if isinstance(typ, str):
-            if hasattr(context, typ):
-                cache_key = (context.__name__, typ)
-        elif isinstance(typ, typing.ForwardRef):
-            if hasattr(context, typ.__forward_arg__):
-                cache_key = (context.__name__, typ.__forward_arg__)
-
-        typ = evaluate_type(typ, context)
-
-    typ = unwrap_annotated_type(typ) if is_type_annotated(typ) else typ
-
-    if isinstance(typ, type) and typing.get_origin(typ) is None:
-        cache_key = (typ.__module__, typ.__name__)
-
-    if cache_key is not None:
-        deserializer = _CACHE.get(cache_key)
-        if deserializer is None:
-            deserializer = _create_deserializer(typ)
-
-            # store de-serializer immediately in cache to avoid stack overflow for recursive types
-            _CACHE[cache_key] = deserializer
-
-            if isinstance(typ, type):
-                # use type's own module as context for evaluating member types
-                context = sys.modules[typ.__module__]
-
-            # create any de-serializers this de-serializer is depending on
-            deserializer.build(context)
-    else:
-        # special forms are not always hashable, create a new de-serializer every time
-        deserializer = _create_deserializer(typ)
-        deserializer.build(context)
-
-    return deserializer
-
-
-def _create_deserializer(typ: TypeLike) -> Deserializer:
-    "Creates a de-serializer engine to parse an object obtained from a JSON string."
-
-    # check for well-known types
-    if typ is type(None):
-        return NoneDeserializer()
-    elif typ is bool:
-        return BoolDeserializer()
-    elif typ is int:
-        return IntDeserializer()
-    elif typ is float:
-        return FloatDeserializer()
-    elif typ is str:
-        return StringDeserializer()
-    elif typ is bytes:
-        return BytesDeserializer()
-    elif typ is datetime.datetime:
-        return DateTimeDeserializer()
-    elif typ is datetime.date:
-        return DateDeserializer()
-    elif typ is datetime.time:
-        return TimeDeserializer()
-    elif typ is uuid.UUID:
-        return UUIDDeserializer()
-    elif typ is ipaddress.IPv4Address:
-        return IPv4Deserializer()
-    elif typ is ipaddress.IPv6Address:
-        return IPv6Deserializer()
-
-    # dynamically-typed collection types
-    if typ is list:
-        raise TypeError("explicit item type required: use `List[T]` instead of `list`")
-    if typ is dict:
-        raise TypeError("explicit key and value types required: use `Dict[K, V]` instead of `dict`")
-    if typ is set:
-        raise TypeError("explicit member type required: use `Set[T]` instead of `set`")
-    if typ is tuple:
-        raise TypeError("explicit item type list required: use `Tuple[T, ...]` instead of `tuple`")
-
-    # generic types (e.g. list, dict, set, etc.)
-    origin_type = typing.get_origin(typ)
-    if origin_type is list:
-        (list_item_type,) = typing.get_args(typ)  # unpack single tuple element
-        return ListDeserializer(list_item_type)
-    elif origin_type is dict:
-        key_type, value_type = typing.get_args(typ)
-        return DictDeserializer(key_type, value_type)
-    elif origin_type is set:
-        (set_member_type,) = typing.get_args(typ)  # unpack single tuple element
-        return SetDeserializer(set_member_type)
-    elif origin_type is tuple:
-        return TupleDeserializer(typing.get_args(typ))
-    elif origin_type is Union:
-        union_args = typing.get_args(typ)
-        if get_discriminating_properties(union_args):
-            return TaggedUnionDeserializer(union_args)
-        else:
-            return UnionDeserializer(union_args)
-    elif origin_type is Literal:
-        return LiteralDeserializer(typing.get_args(typ))
-
-    if not inspect.isclass(typ):
-        if is_dataclass_instance(typ):
-            raise TypeError(f"dataclass type expected but got instance: {typ}")
-        else:
-            raise TypeError(f"unable to de-serialize unrecognized type: {typ}")
-
-    if issubclass(typ, enum.Enum):
-        return EnumDeserializer(typ)
-
-    if is_named_tuple_type(typ):
-        return NamedTupleDeserializer(typ)
-
-    # check if object has custom serialization method
-    convert_func = getattr(typ, "from_json", None)
-    if callable(convert_func):
-        return CustomDeserializer(convert_func)
-
-    if is_dataclass_type(typ):
-        dataclass_params = getattr(typ, "__dataclass_params__", None)
-        if dataclass_params is not None and dataclass_params.frozen:
-            return FrozenDataclassDeserializer(typ)
-        else:
-            return DataclassDeserializer(typ)
-
-    return TypedClassDeserializer(typ)
diff --git a/src/llama_stack_api/strong_typing/docstring.py b/src/llama_stack_api/strong_typing/docstring.py
deleted file mode 100644
index 4c9ea49e5..000000000
--- a/src/llama_stack_api/strong_typing/docstring.py
+++ /dev/null
@@ -1,410 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-"""
-Type-safe data interchange for Python data classes.
-
-:see: https://github.com/hunyadi/strong_typing
-"""
-
-import builtins
-import collections.abc
-import dataclasses
-import inspect
-import re
-import sys
-import types
-import typing
-from collections.abc import Callable
-from dataclasses import dataclass
-from io import StringIO
-from typing import Any, Protocol, TypeVar
-
-if sys.version_info >= (3, 10):
-    from typing import TypeGuard
-else:
-    from typing import TypeGuard
-
-from .inspection import (
-    DataclassInstance,
-    get_class_properties,
-    get_signature,
-    is_dataclass_type,
-    is_type_enum,
-)
-
-T = TypeVar("T")
-
-
-@dataclass
-class DocstringParam:
-    """
-    A parameter declaration in a parameter block.
-
-    :param name: The name of the parameter.
-    :param description: The description text for the parameter.
-    """
-
-    name: str
-    description: str
-    param_type: type | str = inspect.Signature.empty
-
-    def __str__(self) -> str:
-        return f":param {self.name}: {self.description}"
-
-
-@dataclass
-class DocstringReturns:
-    """
-    A `returns` declaration extracted from a docstring.
-
-    :param description: The description text for the return value.
-    """
-
-    description: str
-    return_type: type = inspect.Signature.empty
-
-    def __str__(self) -> str:
-        return f":returns: {self.description}"
-
-
-@dataclass
-class DocstringRaises:
-    """
-    A `raises` declaration extracted from a docstring.
-
-    :param typename: The type name of the exception raised.
-    :param description: The description associated with the exception raised.
-    """
-
-    typename: str
-    description: str
-    raise_type: type = inspect.Signature.empty
-
-    def __str__(self) -> str:
-        return f":raises {self.typename}: {self.description}"
-
-
-@dataclass
-class Docstring:
-    """
-    Represents the documentation string (a.k.a. docstring) for a type such as a (data) class or function.
-
-    A docstring is broken down into the following components:
-    * A short description, which is the first block of text in the documentation string, and ends with a double
-      newline or a parameter block.
-    * A long description, which is the optional block of text following the short description, and ends with
-      a parameter block.
-    * A parameter block of named parameter and description string pairs in ReST-style.
-    * A `returns` declaration, which adds explanation to the return value.
-    * A `raises` declaration, which adds explanation to the exception type raised by the function on error.
-
-    When the docstring is attached to a data class, it is understood as the documentation string of the class
-    `__init__` method.
-
-    :param short_description: The short description text parsed from a docstring.
-    :param long_description: The long description text parsed from a docstring.
-    :param params: The parameter block extracted from a docstring.
-    :param returns: The returns declaration extracted from a docstring.
-    """
-
-    short_description: str | None = None
-    long_description: str | None = None
-    params: dict[str, DocstringParam] = dataclasses.field(default_factory=dict)
-    returns: DocstringReturns | None = None
-    raises: dict[str, DocstringRaises] = dataclasses.field(default_factory=dict)
-
-    @property
-    def full_description(self) -> str | None:
-        if self.short_description and self.long_description:
-            return f"{self.short_description}\n\n{self.long_description}"
-        elif self.short_description:
-            return self.short_description
-        else:
-            return None
-
-    def __str__(self) -> str:
-        output = StringIO()
-
-        has_description = self.short_description or self.long_description
-        has_blocks = self.params or self.returns or self.raises
-
-        if has_description:
-            if self.short_description and self.long_description:
-                output.write(self.short_description)
-                output.write("\n\n")
-                output.write(self.long_description)
-            elif self.short_description:
-                output.write(self.short_description)
-
-        if has_blocks:
-            if has_description:
-                output.write("\n")
-
-            for param in self.params.values():
-                output.write("\n")
-                output.write(str(param))
-            if self.returns:
-                output.write("\n")
-                output.write(str(self.returns))
-            for raises in self.raises.values():
-                output.write("\n")
-                output.write(str(raises))
-
-        s = output.getvalue()
-        output.close()
-        return s
-
-
-def is_exception(member: object) -> TypeGuard[type[BaseException]]:
-    return isinstance(member, type) and issubclass(member, BaseException)
-
-
-def get_exceptions(module: types.ModuleType) -> dict[str, type[BaseException]]:
-    "Returns all exception classes declared in a module."
-
-    return {name: class_type for name, class_type in inspect.getmembers(module, is_exception)}
-
-
-class SupportsDoc(Protocol):
-    __doc__: str | None
-
-
-def _maybe_unwrap_async_iterator(t):
-    origin_type = typing.get_origin(t)
-    if origin_type is collections.abc.AsyncIterator:
-        return typing.get_args(t)[0]
-    return t
-
-
-def parse_type(typ: SupportsDoc) -> Docstring:
-    """
-    Parse the docstring of a type into its components.
-
-    :param typ: The type whose documentation string to parse.
-    :returns: Components of the documentation string.
-    """
-    # Use docstring from the iterator origin type for streaming apis
-    typ = _maybe_unwrap_async_iterator(typ)
-
-    doc = get_docstring(typ)
-    if doc is None:
-        return Docstring()
-
-    docstring = parse_text(doc)
-    check_docstring(typ, docstring)
-
-    # assign parameter and return types
-    if is_dataclass_type(typ):
-        properties = dict(get_class_properties(typing.cast(type, typ)))
-
-        for name, param in docstring.params.items():
-            param.param_type = properties[name]
-
-    elif inspect.isfunction(typ):
-        signature = get_signature(typ)
-        for name, param in docstring.params.items():
-            param.param_type = signature.parameters[name].annotation
-        if docstring.returns:
-            docstring.returns.return_type = signature.return_annotation
-
-    # assign exception types
-    defining_module = inspect.getmodule(typ)
-    if defining_module:
-        context: dict[str, type] = {}
-        context.update(get_exceptions(builtins))
-        context.update(get_exceptions(defining_module))
-        for exc_name, exc in docstring.raises.items():
-            raise_type = context.get(exc_name)
-            if raise_type is None:
-                type_name = getattr(typ, "__qualname__", None) or getattr(typ, "__name__", None) or None
-                raise TypeError(
-                    f"doc-string exception type `{exc_name}` is not an exception defined in the context of `{type_name}`"
-                )
-
-            exc.raise_type = raise_type
-
-    return docstring
-
-
-def parse_text(text: str) -> Docstring:
-    """
-    Parse a ReST-style docstring into its components.
-
-    :param text: The documentation string to parse, typically acquired as `type.__doc__`.
-    :returns: Components of the documentation string.
-    """
-
-    if not text:
-        return Docstring()
-
-    # find block that starts object metadata block (e.g. `:param p:` or `:returns:`)
-    text = inspect.cleandoc(text)
-    match = re.search("^:", text, flags=re.MULTILINE)
-    if match:
-        desc_chunk = text[: match.start()]
-        meta_chunk = text[match.start() :]  # noqa: E203
-    else:
-        desc_chunk = text
-        meta_chunk = ""
-
-    # split description text into short and long description
-    parts = desc_chunk.split("\n\n", 1)
-
-    # ensure short description has no newlines
-    short_description = parts[0].strip().replace("\n", " ") or None
-
-    # ensure long description preserves its structure (e.g. preformatted text)
-    if len(parts) > 1:
-        long_description = parts[1].strip() or None
-    else:
-        long_description = None
-
-    params: dict[str, DocstringParam] = {}
-    raises: dict[str, DocstringRaises] = {}
-    returns = None
-    for match in re.finditer(r"(^:.*?)(?=^:|\Z)", meta_chunk, flags=re.DOTALL | re.MULTILINE):
-        chunk = match.group(0)
-        if not chunk:
-            continue
-
-        args_chunk, desc_chunk = chunk.lstrip(":").split(":", 1)
-        args = args_chunk.split()
-        desc = re.sub(r"\s+", " ", desc_chunk.strip())
-
-        if len(args) > 0:
-            kw = args[0]
-            if len(args) == 2:
-                if kw == "param":
-                    params[args[1]] = DocstringParam(
-                        name=args[1],
-                        description=desc,
-                    )
-                elif kw == "raise" or kw == "raises":
-                    raises[args[1]] = DocstringRaises(
-                        typename=args[1],
-                        description=desc,
-                    )
-
-            elif len(args) == 1:
-                if kw == "return" or kw == "returns":
-                    returns = DocstringReturns(description=desc)
-
-    return Docstring(
-        long_description=long_description,
-        short_description=short_description,
-        params=params,
-        returns=returns,
-        raises=raises,
-    )
-
-
-def has_default_docstring(typ: SupportsDoc) -> bool:
-    "Check if class has the auto-generated string assigned by @dataclass."
-
-    if not isinstance(typ, type):
-        return False
-
-    if is_dataclass_type(typ):
-        return typ.__doc__ is not None and re.match(f"^{re.escape(typ.__name__)}[(].*[)]$", typ.__doc__) is not None
-
-    if is_type_enum(typ):
-        return typ.__doc__ is not None and typ.__doc__ == "An enumeration."
-
-    return False
-
-
-def has_docstring(typ: SupportsDoc) -> bool:
-    "Check if class has a documentation string other than the auto-generated string assigned by @dataclass."
-
-    if has_default_docstring(typ):
-        return False
-
-    return bool(typ.__doc__)
-
-
-def get_docstring(typ: SupportsDoc) -> str | None:
-    if typ.__doc__ is None:
-        return None
-
-    if has_default_docstring(typ):
-        return None
-
-    return typ.__doc__
-
-
-def check_docstring(typ: SupportsDoc, docstring: Docstring, strict: bool = False) -> None:
-    """
-    Verifies the doc-string of a type.
-
-    :raises TypeError: Raised on a mismatch between doc-string parameters, and function or type signature.
-    """
-
-    if is_dataclass_type(typ):
-        check_dataclass_docstring(typ, docstring, strict)
-    elif inspect.isfunction(typ):
-        check_function_docstring(typ, docstring, strict)
-
-
-def check_dataclass_docstring(typ: type[DataclassInstance], docstring: Docstring, strict: bool = False) -> None:
-    """
-    Verifies the doc-string of a data-class type.
-
-    :param strict: Whether to check if all data-class members have doc-strings.
-    :raises TypeError: Raised on a mismatch between doc-string parameters and data-class members.
-    """
-
-    if not is_dataclass_type(typ):
-        raise TypeError("not a data-class type")
-
-    properties = dict(get_class_properties(typ))
-    class_name = typ.__name__
-
-    for name in docstring.params:
-        if name not in properties:
-            raise TypeError(f"doc-string parameter `{name}` is not a member of the data-class `{class_name}`")
-
-    if not strict:
-        return
-
-    for name in properties:
-        if name not in docstring.params:
-            raise TypeError(f"member `{name}` in data-class `{class_name}` is missing its doc-string")
-
-
-def check_function_docstring(fn: Callable[..., Any], docstring: Docstring, strict: bool = False) -> None:
-    """
-    Verifies the doc-string of a function or member function.
-
-    :param strict: Whether to check if all function parameters and the return type have doc-strings.
-    :raises TypeError: Raised on a mismatch between doc-string parameters and function signature.
-    """
-
-    signature = get_signature(fn)
-    func_name = fn.__qualname__
-
-    for name in docstring.params:
-        if name not in signature.parameters:
-            raise TypeError(f"doc-string parameter `{name}` is absent from signature of function `{func_name}`")
-
-    if docstring.returns is not None and signature.return_annotation is inspect.Signature.empty:
-        raise TypeError(f"doc-string has returns description in function `{func_name}` with no return type annotation")
-
-    if not strict:
-        return
-
-    for name, param in signature.parameters.items():
-        # ignore `self` in member function signatures
-        if name == "self" and (
-            param.kind is inspect.Parameter.POSITIONAL_ONLY or param.kind is inspect.Parameter.POSITIONAL_OR_KEYWORD
-        ):
-            continue
-
-        if name not in docstring.params:
-            raise TypeError(f"function parameter `{name}` in `{func_name}` is missing its doc-string")
-
-    if signature.return_annotation is not inspect.Signature.empty and docstring.returns is None:
-        raise TypeError(f"function `{func_name}` has no returns description in its doc-string")
diff --git a/src/llama_stack_api/strong_typing/exception.py b/src/llama_stack_api/strong_typing/exception.py
deleted file mode 100644
index af037cc3c..000000000
--- a/src/llama_stack_api/strong_typing/exception.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-"""
-Type-safe data interchange for Python data classes.
-
-:see: https://github.com/hunyadi/strong_typing
-"""
-
-
-class JsonKeyError(Exception):
-    "Raised when deserialization for a class or union type has failed because a matching member was not found."
-
-
-class JsonValueError(Exception):
-    "Raised when (de)serialization of data has failed due to invalid value."
-
-
-class JsonTypeError(Exception):
-    "Raised when deserialization of data has failed due to a type mismatch."
diff --git a/src/llama_stack_api/strong_typing/inspection.py b/src/llama_stack_api/strong_typing/inspection.py
deleted file mode 100644
index 319d12657..000000000
--- a/src/llama_stack_api/strong_typing/inspection.py
+++ /dev/null
@@ -1,1104 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-"""
-Type-safe data interchange for Python data classes.
-
-:see: https://github.com/hunyadi/strong_typing
-"""
-
-import dataclasses
-import datetime
-import enum
-import importlib
-import importlib.machinery
-import importlib.util
-import inspect
-import re
-import sys
-import types
-import typing
-import uuid
-from collections.abc import Callable, Iterable
-from typing import (
-    Any,
-    Literal,
-    NamedTuple,
-    Protocol,
-    TypeVar,
-    Union,
-    runtime_checkable,
-)
-
-if sys.version_info >= (3, 9):
-    from typing import Annotated
-else:
-    from typing import Annotated
-
-if sys.version_info >= (3, 10):
-    from typing import TypeGuard
-else:
-    from typing import TypeGuard
-
-
-from pydantic import BaseModel
-from pydantic.fields import FieldInfo
-
-S = TypeVar("S")
-T = TypeVar("T")
-K = TypeVar("K")
-V = TypeVar("V")
-
-
-def _is_type_like(data_type: object) -> bool:
-    """
-    Checks if the object is a type or type-like object (e.g. generic type).
-
-    :param data_type: The object to validate.
-    :returns: True if the object is a type or type-like object.
-    """
-
-    if isinstance(data_type, type):
-        # a standard type
-        return True
-    elif typing.get_origin(data_type) is not None:
-        # a generic type such as `list`, `dict` or `set`
-        return True
-    elif hasattr(data_type, "__forward_arg__"):
-        # an instance of `ForwardRef`
-        return True
-    elif data_type is Any:
-        # the special form `Any`
-        return True
-    else:
-        return False
-
-
-if sys.version_info >= (3, 9):
-    TypeLike = Union[type, types.GenericAlias, typing.ForwardRef, Any]
-
-    def is_type_like(
-        data_type: object,
-    ) -> TypeGuard[TypeLike]:
-        """
-        Checks if the object is a type or type-like object (e.g. generic type).
-
-        :param data_type: The object to validate.
-        :returns: True if the object is a type or type-like object.
-        """
-
-        return _is_type_like(data_type)
-
-else:
-    TypeLike = object
-
-    def is_type_like(
-        data_type: object,
-    ) -> bool:
-        return _is_type_like(data_type)
-
-
-def evaluate_member_type(typ: Any, cls: type) -> Any:
-    """
-    Evaluates a forward reference type in a dataclass member.
-
-    :param typ: The dataclass member type to convert.
-    :param cls: The dataclass in which the member is defined.
-    :returns: The evaluated type.
-    """
-
-    return evaluate_type(typ, sys.modules[cls.__module__])
-
-
-def evaluate_type(typ: Any, module: types.ModuleType) -> Any:
-    """
-    Evaluates a forward reference type.
-
-    :param typ: The type to convert, typically a dataclass member type.
-    :param module: The context for the type, i.e. the module in which the member is defined.
-    :returns: The evaluated type.
-    """
-
-    if isinstance(typ, str):
-        # evaluate data-class field whose type annotation is a string
-        return eval(typ, module.__dict__, locals())
-    if isinstance(typ, typing.ForwardRef):
-        if sys.version_info >= (3, 9):
-            return typ._evaluate(module.__dict__, locals(), recursive_guard=frozenset())
-        else:
-            return typ._evaluate(module.__dict__, locals())
-    else:
-        return typ
-
-
-@runtime_checkable
-class DataclassInstance(Protocol):
-    __dataclass_fields__: typing.ClassVar[dict[str, dataclasses.Field]]
-
-
-def is_dataclass_type(typ: Any) -> TypeGuard[type[DataclassInstance]]:
-    "True if the argument corresponds to a data class type (but not an instance)."
-
-    typ = unwrap_annotated_type(typ)
-    return isinstance(typ, type) and dataclasses.is_dataclass(typ)
-
-
-def is_dataclass_instance(obj: Any) -> TypeGuard[DataclassInstance]:
-    "True if the argument corresponds to a data class instance (but not a type)."
-
-    return not isinstance(obj, type) and dataclasses.is_dataclass(obj)
-
-
-@dataclasses.dataclass
-class DataclassField:
-    name: str
-    type: Any
-    default: Any
-
-    def __init__(self, name: str, type: Any, default: Any = dataclasses.MISSING) -> None:
-        self.name = name
-        self.type = type
-        self.default = default
-
-
-def dataclass_fields(cls: type[DataclassInstance]) -> Iterable[DataclassField]:
-    "Generates the fields of a data-class resolving forward references."
-
-    for field in dataclasses.fields(cls):
-        yield DataclassField(field.name, evaluate_member_type(field.type, cls), field.default)
-
-
-def dataclass_field_by_name(cls: type[DataclassInstance], name: str) -> DataclassField:
-    "Looks up a field in a data-class by its field name."
-
-    for field in dataclasses.fields(cls):
-        if field.name == name:
-            return DataclassField(field.name, evaluate_member_type(field.type, cls))
-
-    raise LookupError(f"field `{name}` missing from class `{cls.__name__}`")
-
-
-def is_named_tuple_instance(obj: Any) -> TypeGuard[NamedTuple]:
-    "True if the argument corresponds to a named tuple instance."
-
-    return is_named_tuple_type(type(obj))
-
-
-def is_named_tuple_type(typ: Any) -> TypeGuard[type[NamedTuple]]:
-    """
-    True if the argument corresponds to a named tuple type.
-
-    Calling the function `collections.namedtuple` gives a new type that is a subclass of `tuple` (and no other classes)
-    with a member named `_fields` that is a tuple whose items are all strings.
-    """
-
-    if not isinstance(typ, type):
-        return False
-
-    typ = unwrap_annotated_type(typ)
-
-    b = getattr(typ, "__bases__", None)
-    if b is None:
-        return False
-
-    if len(b) != 1 or b[0] != tuple:
-        return False
-
-    f = getattr(typ, "_fields", None)
-    if not isinstance(f, tuple):
-        return False
-
-    return all(isinstance(n, str) for n in f)
-
-
-if sys.version_info >= (3, 11):
-
-    def is_type_enum(typ: object) -> TypeGuard[type[enum.Enum]]:
-        "True if the specified type is an enumeration type."
-
-        typ = unwrap_annotated_type(typ)
-        return isinstance(typ, enum.EnumType)
-
-else:
-
-    def is_type_enum(typ: object) -> TypeGuard[type[enum.Enum]]:
-        "True if the specified type is an enumeration type."
-
-        typ = unwrap_annotated_type(typ)
-
-        # use an explicit isinstance(..., type) check to filter out special forms like generics
-        return isinstance(typ, type) and issubclass(typ, enum.Enum)
-
-
-def enum_value_types(enum_type: type[enum.Enum]) -> list[type]:
-    """
-    Returns all unique value types of the `enum.Enum` type in definition order.
-    """
-
-    # filter unique enumeration value types by keeping definition order
-    return list(dict.fromkeys(type(e.value) for e in enum_type))
-
-
-def extend_enum(
-    source: type[enum.Enum],
-) -> Callable[[type[enum.Enum]], type[enum.Enum]]:
-    """
-    Creates a new enumeration type extending the set of values in an existing type.
-
-    :param source: The existing enumeration type to be extended with new values.
-    :returns: A new enumeration type with the extended set of values.
-    """
-
-    def wrap(extend: type[enum.Enum]) -> type[enum.Enum]:
-        # create new enumeration type combining the values from both types
-        values: dict[str, Any] = {}
-        values.update((e.name, e.value) for e in source)
-        values.update((e.name, e.value) for e in extend)
-        # mypy fails to determine that __name__ is always a string; hence the `ignore` directive.
-        enum_class: type[enum.Enum] = enum.Enum(extend.__name__, values)  # type: ignore[misc]
-
-        # assign the newly created type to the same module where the extending class is defined
-        enum_class.__module__ = extend.__module__
-        enum_class.__doc__ = extend.__doc__
-        setattr(sys.modules[extend.__module__], extend.__name__, enum_class)
-
-        return enum.unique(enum_class)
-
-    return wrap
-
-
-if sys.version_info >= (3, 10):
-
-    def _is_union_like(typ: object) -> bool:
-        "True if type is a union such as `Union[T1, T2, ...]` or a union type `T1 | T2`."
-
-        return typing.get_origin(typ) is Union or isinstance(typ, types.UnionType)
-
-else:
-
-    def _is_union_like(typ: object) -> bool:
-        "True if type is a union such as `Union[T1, T2, ...]` or a union type `T1 | T2`."
-
-        return typing.get_origin(typ) is Union
-
-
-def is_type_optional(typ: object, strict: bool = False) -> TypeGuard[type[Any | None]]:
-    """
-    True if the type annotation corresponds to an optional type (e.g. `Optional[T]` or `Union[T1,T2,None]`).
-
-    `Optional[T]` is represented as `Union[T, None]` is classic style, and is equivalent to `T | None` in new style.
-
-    :param strict: True if only `Optional[T]` qualifies as an optional type but `Union[T1, T2, None]` does not.
-    """
-
-    typ = unwrap_annotated_type(typ)
-
-    if _is_union_like(typ):
-        args = typing.get_args(typ)
-        if strict and len(args) != 2:
-            return False
-
-        return type(None) in args
-
-    return False
-
-
-def unwrap_optional_type(typ: type[T | None]) -> type[T]:
-    """
-    Extracts the inner type of an optional type.
-
-    :param typ: The optional type `Optional[T]`.
-    :returns: The inner type `T`.
-    """
-
-    return rewrap_annotated_type(_unwrap_optional_type, typ)
-
-
-def _unwrap_optional_type(typ: type[T | None]) -> type[T]:
-    "Extracts the type qualified as optional (e.g. returns `T` for `Optional[T]`)."
-
-    # Optional[T] is represented internally as Union[T, None]
-    if not _is_union_like(typ):
-        raise TypeError("optional type must have un-subscripted type of Union")
-
-    # will automatically unwrap Union[T] into T
-    return Union[tuple(filter(lambda item: item is not type(None), typing.get_args(typ)))]  # type: ignore[return-value]
-
-
-def is_type_union(typ: object) -> bool:
-    "True if the type annotation corresponds to a union type (e.g. `Union[T1,T2,T3]`)."
-
-    typ = unwrap_annotated_type(typ)
-    if _is_union_like(typ):
-        args = typing.get_args(typ)
-        return len(args) > 2 or type(None) not in args
-
-    return False
-
-
-def unwrap_union_types(typ: object) -> tuple[object, ...]:
-    """
-    Extracts the inner types of a union type.
-
-    :param typ: The union type `Union[T1, T2, ...]`.
-    :returns: The inner types `T1`, `T2`, etc.
-    """
-
-    typ = unwrap_annotated_type(typ)
-    return _unwrap_union_types(typ)
-
-
-def _unwrap_union_types(typ: object) -> tuple[object, ...]:
-    "Extracts the types in a union (e.g. returns a tuple of types `T1` and `T2` for `Union[T1, T2]`)."
-
-    if not _is_union_like(typ):
-        raise TypeError("union type must have un-subscripted type of Union")
-
-    return typing.get_args(typ)
-
-
-def is_type_literal(typ: object) -> bool:
-    "True if the specified type is a literal of one or more constant values, e.g. `Literal['string']` or `Literal[42]`."
-
-    typ = unwrap_annotated_type(typ)
-    return typing.get_origin(typ) is Literal
-
-
-def unwrap_literal_value(typ: object) -> Any:
-    """
-    Extracts the single constant value captured by a literal type.
-
-    :param typ: The literal type `Literal[value]`.
-    :returns: The values captured by the literal type.
-    """
-
-    args = unwrap_literal_values(typ)
-    if len(args) != 1:
-        raise TypeError("too many values in literal type")
-
-    return args[0]
-
-
-def unwrap_literal_values(typ: object) -> tuple[Any, ...]:
-    """
-    Extracts the constant values captured by a literal type.
-
-    :param typ: The literal type `Literal[value, ...]`.
-    :returns: A tuple of values captured by the literal type.
-    """
-
-    typ = unwrap_annotated_type(typ)
-    return typing.get_args(typ)
-
-
-def unwrap_literal_types(typ: object) -> tuple[type, ...]:
-    """
-    Extracts the types of the constant values captured by a literal type.
-
-    :param typ: The literal type `Literal[value, ...]`.
-    :returns: A tuple of item types `T` such that `type(value) == T`.
-    """
-
-    return tuple(type(t) for t in unwrap_literal_values(typ))
-
-
-def is_generic_list(typ: object) -> TypeGuard[type[list]]:
-    "True if the specified type is a generic list, i.e. `List[T]`."
-
-    typ = unwrap_annotated_type(typ)
-    return typing.get_origin(typ) is list
-
-
-def unwrap_generic_list(typ: type[list[T]]) -> type[T]:
-    """
-    Extracts the item type of a list type.
-
-    :param typ: The list type `List[T]`.
-    :returns: The item type `T`.
-    """
-
-    return rewrap_annotated_type(_unwrap_generic_list, typ)
-
-
-def _unwrap_generic_list(typ: type[list[T]]) -> type[T]:
-    "Extracts the item type of a list type (e.g. returns `T` for `List[T]`)."
-
-    (list_type,) = typing.get_args(typ)  # unpack single tuple element
-    return list_type  # type: ignore[no-any-return]
-
-
-def is_generic_sequence(typ: object) -> bool:
-    "True if the specified type is a generic Sequence, i.e. `Sequence[T]`."
-    import collections.abc
-
-    typ = unwrap_annotated_type(typ)
-    return typing.get_origin(typ) is collections.abc.Sequence
-
-
-def unwrap_generic_sequence(typ: object) -> type:
-    """
-    Extracts the item type of a Sequence type.
-
-    :param typ: The Sequence type `Sequence[T]`.
-    :returns: The item type `T`.
-    """
-
-    return rewrap_annotated_type(_unwrap_generic_sequence, typ)  # type: ignore[arg-type]
-
-
-def _unwrap_generic_sequence(typ: object) -> type:
-    "Extracts the item type of a Sequence type (e.g. returns `T` for `Sequence[T]`)."
-
-    (sequence_type,) = typing.get_args(typ)  # unpack single tuple element
-    return sequence_type  # type: ignore[no-any-return]
-
-
-def is_generic_set(typ: object) -> TypeGuard[type[set]]:
-    "True if the specified type is a generic set, i.e. `Set[T]`."
-
-    typ = unwrap_annotated_type(typ)
-    return typing.get_origin(typ) is set
-
-
-def unwrap_generic_set(typ: type[set[T]]) -> type[T]:
-    """
-    Extracts the item type of a set type.
-
-    :param typ: The set type `Set[T]`.
-    :returns: The item type `T`.
-    """
-
-    return rewrap_annotated_type(_unwrap_generic_set, typ)
-
-
-def _unwrap_generic_set(typ: type[set[T]]) -> type[T]:
-    "Extracts the item type of a set type (e.g. returns `T` for `Set[T]`)."
-
-    (set_type,) = typing.get_args(typ)  # unpack single tuple element
-    return set_type  # type: ignore[no-any-return]
-
-
-def is_generic_dict(typ: object) -> TypeGuard[type[dict]]:
-    "True if the specified type is a generic dictionary, i.e. `Dict[KeyType, ValueType]`."
-
-    typ = unwrap_annotated_type(typ)
-    return typing.get_origin(typ) is dict
-
-
-def unwrap_generic_dict(typ: type[dict[K, V]]) -> tuple[type[K], type[V]]:
-    """
-    Extracts the key and value types of a dictionary type as a tuple.
-
-    :param typ: The dictionary type `Dict[K, V]`.
-    :returns: The key and value types `K` and `V`.
-    """
-
-    return _unwrap_generic_dict(unwrap_annotated_type(typ))
-
-
-def _unwrap_generic_dict(typ: type[dict[K, V]]) -> tuple[type[K], type[V]]:
-    "Extracts the key and value types of a dict type (e.g. returns (`K`, `V`) for `Dict[K, V]`)."
-
-    key_type, value_type = typing.get_args(typ)
-    return key_type, value_type
-
-
-def is_type_annotated(typ: TypeLike) -> bool:
-    "True if the type annotation corresponds to an annotated type (i.e. `Annotated[T, ...]`)."
-
-    return getattr(typ, "__metadata__", None) is not None
-
-
-def get_annotation(data_type: TypeLike, annotation_type: type[T]) -> T | None:
-    """
-    Returns the first annotation on a data type that matches the expected annotation type.
-
-    :param data_type: The annotated type from which to extract the annotation.
-    :param annotation_type: The annotation class to look for.
-    :returns: The annotation class instance found (if any).
-    """
-
-    metadata = getattr(data_type, "__metadata__", None)
-    if metadata is not None:
-        for annotation in metadata:
-            if isinstance(annotation, annotation_type):
-                return annotation
-
-    return None
-
-
-def unwrap_annotated_type(typ: T) -> T:
-    "Extracts the wrapped type from an annotated type (e.g. returns `T` for `Annotated[T, ...]`)."
-
-    if is_type_annotated(typ):
-        # type is Annotated[T, ...]
-        return typing.get_args(typ)[0]  # type: ignore[no-any-return]
-    else:
-        # type is a regular type
-        return typ
-
-
-def rewrap_annotated_type(transform: Callable[[type[S]], type[T]], typ: type[S]) -> type[T]:
-    """
-    Un-boxes, transforms and re-boxes an optionally annotated type.
-
-    :param transform: A function that maps an un-annotated type to another type.
-    :param typ: A type to un-box (if necessary), transform, and re-box (if necessary).
-    """
-
-    metadata = getattr(typ, "__metadata__", None)
-    if metadata is not None:
-        # type is Annotated[T, ...]
-        inner_type = typing.get_args(typ)[0]
-    else:
-        # type is a regular type
-        inner_type = typ
-
-    transformed_type = transform(inner_type)
-
-    if metadata is not None:
-        return Annotated[(transformed_type, *metadata)]  # type: ignore[return-value]
-    else:
-        return transformed_type
-
-
-def get_module_classes(module: types.ModuleType) -> list[type]:
-    "Returns all classes declared directly in a module."
-
-    def is_class_member(member: object) -> TypeGuard[type]:
-        return inspect.isclass(member) and member.__module__ == module.__name__
-
-    return [class_type for _, class_type in inspect.getmembers(module, is_class_member)]
-
-
-if sys.version_info >= (3, 9):
-
-    def get_resolved_hints(typ: type) -> dict[str, type]:
-        return typing.get_type_hints(typ, include_extras=True)
-
-else:
-
-    def get_resolved_hints(typ: type) -> dict[str, type]:
-        return typing.get_type_hints(typ)
-
-
-def get_class_properties(typ: type) -> Iterable[tuple[str, type | str]]:
-    "Returns all properties of a class."
-
-    if is_dataclass_type(typ):
-        return ((field.name, field.type) for field in dataclasses.fields(typ))
-    elif hasattr(typ, "model_fields"):
-        # Pydantic BaseModel - use model_fields to exclude ClassVar and other non-field attributes
-        # Reconstruct Annotated type if discriminator exists to preserve metadata
-        from typing import Annotated, Any
-
-        from pydantic.fields import FieldInfo
-
-        def get_field_type(name: str, field: Any) -> type | str:
-            # If field has discriminator, wrap in Annotated to preserve it for schema generation
-            if field.discriminator:
-                field_info = FieldInfo(annotation=None, discriminator=field.discriminator)
-                # Annotated returns _AnnotatedAlias which isn't a type but is valid here
-                return Annotated[field.annotation, field_info]  # type: ignore[return-value]
-            # field.annotation can be Union types, Annotated, etc. which aren't type but are valid
-            return field.annotation  # type: ignore[return-value,no-any-return]
-
-        return ((name, get_field_type(name, field)) for name, field in typ.model_fields.items())
-    else:
-        resolved_hints = get_resolved_hints(typ)
-        return resolved_hints.items()
-
-
-def get_class_property(typ: type, name: str) -> type | str | None:
-    "Looks up the annotated type of a property in a class by its property name."
-
-    for property_name, property_type in get_class_properties(typ):
-        if name == property_name:
-            return property_type
-    return None
-
-
-@dataclasses.dataclass
-class _ROOT:
-    pass
-
-
-def get_referenced_types(typ: TypeLike, module: types.ModuleType | None = None) -> set[type]:
-    """
-    Extracts types directly or indirectly referenced by this type.
-
-    For example, extract `T` from `List[T]`, `Optional[T]` or `Annotated[T, ...]`, `K` and `V` from `Dict[K,V]`,
-    `A` and `B` from `Union[A,B]`.
-
-    :param typ: A type or special form.
-    :param module: The context in which types are evaluated.
-    :returns: Types referenced by the given type or special form.
-    """
-
-    collector = TypeCollector()
-    collector.run(typ, _ROOT, module)
-    return collector.references
-
-
-class TypeCollector:
-    """
-    Collects types directly or indirectly referenced by a type.
-
-    :param graph: The type dependency graph, linking types to types they depend on.
-    """
-
-    graph: dict[type, set[type]]
-
-    @property
-    def references(self) -> set[type]:
-        "Types collected by the type collector."
-
-        dependencies = set()
-        for edges in self.graph.values():
-            dependencies.update(edges)
-        return dependencies
-
-    def __init__(self) -> None:
-        self.graph = {_ROOT: set()}
-
-    def traverse(self, typ: type) -> None:
-        "Finds all dependent types of a type."
-
-        self.run(typ, _ROOT, sys.modules[typ.__module__])
-
-    def traverse_all(self, types: Iterable[type]) -> None:
-        "Finds all dependent types of a list of types."
-
-        for typ in types:
-            self.traverse(typ)
-
-    def run(
-        self,
-        typ: TypeLike,
-        cls: type[DataclassInstance],
-        module: types.ModuleType | None,
-    ) -> None:
-        """
-        Extracts types indirectly referenced by this type.
-
-        For example, extract `T` from `List[T]`, `Optional[T]` or `Annotated[T, ...]`, `K` and `V` from `Dict[K,V]`,
-        `A` and `B` from `Union[A,B]`.
-
-        :param typ: A type or special form.
-        :param cls: A dataclass type being expanded for dependent types.
-        :param module: The context in which types are evaluated.
-        :returns: Types referenced by the given type or special form.
-        """
-
-        if typ is type(None) or typ is Any:
-            return
-
-        if isinstance(typ, type):
-            self.graph[cls].add(typ)
-
-            if typ in self.graph:
-                return
-
-            self.graph[typ] = set()
-
-        metadata = getattr(typ, "__metadata__", None)
-        if metadata is not None:
-            # type is Annotated[T, ...]
-            arg = typing.get_args(typ)[0]
-            return self.run(arg, cls, module)
-
-        # type is a forward reference
-        if isinstance(typ, str) or isinstance(typ, typing.ForwardRef):
-            if module is None:
-                raise ValueError("missing context for evaluating types")
-
-            evaluated_type = evaluate_type(typ, module)
-            return self.run(evaluated_type, cls, module)
-
-        # type is a special form
-        origin = typing.get_origin(typ)
-        if origin in [list, dict, frozenset, set, tuple, Union]:
-            for arg in typing.get_args(typ):
-                self.run(arg, cls, module)
-            return
-        elif origin is Literal:
-            return
-
-        # type is optional or a union type
-        if is_type_optional(typ):
-            return self.run(unwrap_optional_type(typ), cls, module)
-        if is_type_union(typ):
-            for union_type in unwrap_union_types(typ):
-                self.run(union_type, cls, module)
-            return
-
-        # type is a regular type
-        elif is_dataclass_type(typ) or is_type_enum(typ) or isinstance(typ, type):
-            context = sys.modules[typ.__module__]
-            if is_dataclass_type(typ):
-                for field in dataclass_fields(typ):
-                    self.run(field.type, typ, context)
-            else:
-                for field_name, field_type in get_resolved_hints(typ).items():
-                    self.run(field_type, typ, context)
-            return
-
-        raise TypeError(f"expected: type-like; got: {typ}")
-
-
-if sys.version_info >= (3, 10):
-
-    def get_signature(fn: Callable[..., Any]) -> inspect.Signature:
-        "Extracts the signature of a function."
-
-        return inspect.signature(fn, eval_str=True)
-
-else:
-
-    def get_signature(fn: Callable[..., Any]) -> inspect.Signature:
-        "Extracts the signature of a function."
-
-        return inspect.signature(fn)
-
-
-def is_reserved_property(name: str) -> bool:
-    "True if the name stands for an internal property."
-
-    # filter built-in and special properties
-    if re.match(r"^__.+__$", name):
-        return True
-
-    # filter built-in special names
-    if name in ["_abc_impl"]:
-        return True
-
-    return False
-
-
-def create_module(name: str) -> types.ModuleType:
-    """
-    Creates a new module dynamically at run-time.
-
-    :param name: Fully qualified name of the new module (with dot notation).
-    """
-
-    if name in sys.modules:
-        raise KeyError(f"{name!r} already in sys.modules")
-
-    spec = importlib.machinery.ModuleSpec(name, None)
-    module = importlib.util.module_from_spec(spec)
-    sys.modules[name] = module
-    if spec.loader is not None:
-        spec.loader.exec_module(module)
-    return module
-
-
-if sys.version_info >= (3, 10):
-
-    def create_data_type(class_name: str, fields: list[tuple[str, type]]) -> type:
-        """
-        Creates a new data-class type dynamically.
-
-        :param class_name: The name of new data-class type.
-        :param fields: A list of fields (and their type) that the new data-class type is expected to have.
-        :returns: The newly created data-class type.
-        """
-
-        # has the `slots` parameter
-        return dataclasses.make_dataclass(class_name, fields, slots=True)
-
-else:
-
-    def create_data_type(class_name: str, fields: list[tuple[str, type]]) -> type:
-        """
-        Creates a new data-class type dynamically.
-
-        :param class_name: The name of new data-class type.
-        :param fields: A list of fields (and their type) that the new data-class type is expected to have.
-        :returns: The newly created data-class type.
-        """
-
-        cls = dataclasses.make_dataclass(class_name, fields)
-
-        cls_dict = dict(cls.__dict__)
-        field_names = tuple(field.name for field in dataclasses.fields(cls))
-
-        cls_dict["__slots__"] = field_names
-
-        for field_name in field_names:
-            cls_dict.pop(field_name, None)
-        cls_dict.pop("__dict__", None)
-
-        qualname = getattr(cls, "__qualname__", None)
-        cls = type(cls)(cls.__name__, (), cls_dict)
-        if qualname is not None:
-            cls.__qualname__ = qualname
-
-        return cls
-
-
-def create_object(typ: type[T]) -> T:
-    "Creates an instance of a type."
-
-    if issubclass(typ, Exception):
-        # exception types need special treatment
-        e = typ.__new__(typ)
-        return typing.cast(T, e)
-    else:
-        return object.__new__(typ)
-
-
-if sys.version_info >= (3, 9):
-    TypeOrGeneric = Union[type, types.GenericAlias]
-
-else:
-    TypeOrGeneric = object
-
-
-def is_generic_instance(obj: Any, typ: TypeLike) -> bool:
-    """
-    Returns whether an object is an instance of a generic class, a standard class or of a subclass thereof.
-
-    This function checks the following items recursively:
-    * items of a list
-    * keys and values of a dictionary
-    * members of a set
-    * items of a tuple
-    * members of a union type
-
-    :param obj: The (possibly generic container) object to check recursively.
-    :param typ: The expected type of the object.
-    """
-
-    if isinstance(typ, typing.ForwardRef):
-        fwd: typing.ForwardRef = typ
-        identifier = fwd.__forward_arg__
-        typ = eval(identifier)
-        if isinstance(typ, type):
-            return isinstance(obj, typ)
-        else:
-            return False
-
-    # generic types (e.g. list, dict, set, etc.)
-    origin_type = typing.get_origin(typ)
-    if origin_type is list:
-        if not isinstance(obj, list):
-            return False
-        (list_item_type,) = typing.get_args(typ)  # unpack single tuple element
-        list_obj: list = obj
-        return all(is_generic_instance(item, list_item_type) for item in list_obj)
-    elif origin_type is dict:
-        if not isinstance(obj, dict):
-            return False
-        key_type, value_type = typing.get_args(typ)
-        dict_obj: dict = obj
-        return all(
-            is_generic_instance(key, key_type) and is_generic_instance(value, value_type)
-            for key, value in dict_obj.items()
-        )
-    elif origin_type is set:
-        if not isinstance(obj, set):
-            return False
-        (set_member_type,) = typing.get_args(typ)  # unpack single tuple element
-        set_obj: set = obj
-        return all(is_generic_instance(item, set_member_type) for item in set_obj)
-    elif origin_type is tuple:
-        if not isinstance(obj, tuple):
-            return False
-        return all(
-            is_generic_instance(item, tuple_item_type)
-            for tuple_item_type, item in zip(
-                (tuple_item_type for tuple_item_type in typing.get_args(typ)),
-                (item for item in obj),
-                strict=False,
-            )
-        )
-    elif origin_type is Union:
-        return any(is_generic_instance(obj, member_type) for member_type in typing.get_args(typ))
-    elif isinstance(typ, type):
-        return isinstance(obj, typ)
-    else:
-        raise TypeError(f"expected `type` but got: {typ}")
-
-
-class RecursiveChecker:
-    _pred: Callable[[type, Any], bool] | None
-
-    def __init__(self, pred: Callable[[type, Any], bool]) -> None:
-        """
-        Creates a checker to verify if a predicate applies to all nested member properties of an object recursively.
-
-        :param pred: The predicate to test on member properties. Takes a property type and a property value.
-        """
-
-        self._pred = pred
-
-    def pred(self, typ: type, obj: Any) -> bool:
-        "Acts as a workaround for the type checker mypy."
-
-        assert self._pred is not None
-        return self._pred(typ, obj)
-
-    def check(self, typ: TypeLike, obj: Any) -> bool:
-        """
-        Checks if a predicate applies to all nested member properties of an object recursively.
-
-        :param typ: The type to recurse into.
-        :param obj: The object to inspect recursively. Must be an instance of the given type.
-        :returns: True if all member properties pass the filter predicate.
-        """
-
-        # check for well-known types
-        if (
-            typ is type(None)
-            or typ is bool
-            or typ is int
-            or typ is float
-            or typ is str
-            or typ is bytes
-            or typ is datetime.datetime
-            or typ is datetime.date
-            or typ is datetime.time
-            or typ is uuid.UUID
-        ):
-            return self.pred(typing.cast(type, typ), obj)
-
-        # generic types (e.g. list, dict, set, etc.)
-        origin_type = typing.get_origin(typ)
-        if origin_type is list:
-            if not isinstance(obj, list):
-                raise TypeError(f"expected `list` but got: {obj}")
-            (list_item_type,) = typing.get_args(typ)  # unpack single tuple element
-            list_obj: list = obj
-            return all(self.check(list_item_type, item) for item in list_obj)
-        elif origin_type is dict:
-            if not isinstance(obj, dict):
-                raise TypeError(f"expected `dict` but got: {obj}")
-            key_type, value_type = typing.get_args(typ)
-            dict_obj: dict = obj
-            return all(self.check(value_type, item) for item in dict_obj.values())
-        elif origin_type is set:
-            if not isinstance(obj, set):
-                raise TypeError(f"expected `set` but got: {obj}")
-            (set_member_type,) = typing.get_args(typ)  # unpack single tuple element
-            set_obj: set = obj
-            return all(self.check(set_member_type, item) for item in set_obj)
-        elif origin_type is tuple:
-            if not isinstance(obj, tuple):
-                raise TypeError(f"expected `tuple` but got: {obj}")
-            return all(
-                self.check(tuple_item_type, item)
-                for tuple_item_type, item in zip(
-                    (tuple_item_type for tuple_item_type in typing.get_args(typ)),
-                    (item for item in obj),
-                    strict=False,
-                )
-            )
-        elif origin_type is Union:
-            return self.pred(typ, obj)  # type: ignore[arg-type]
-
-        if not inspect.isclass(typ):
-            raise TypeError(f"expected `type` but got: {typ}")
-
-        # enumeration type
-        if issubclass(typ, enum.Enum):
-            if not isinstance(obj, enum.Enum):
-                raise TypeError(f"expected `{typ}` but got: {obj}")
-            return self.pred(typ, obj)
-
-        # class types with properties
-        if is_named_tuple_type(typ):
-            if not isinstance(obj, tuple):
-                raise TypeError(f"expected `NamedTuple` but got: {obj}")
-            return all(
-                self.check(field_type, getattr(obj, field_name))
-                for field_name, field_type in typing.get_type_hints(typ).items()
-            )
-        elif is_dataclass_type(typ):
-            if not isinstance(obj, typ):
-                raise TypeError(f"expected `{typ}` but got: {obj}")
-            resolved_hints = get_resolved_hints(typ)
-            return all(
-                self.check(resolved_hints[field.name], getattr(obj, field.name)) for field in dataclasses.fields(typ)
-            )
-        else:
-            if not isinstance(obj, typ):
-                raise TypeError(f"expected `{typ}` but got: {obj}")
-            return all(
-                self.check(property_type, getattr(obj, property_name))
-                for property_name, property_type in get_class_properties(typ)
-            )
-
-
-def check_recursive(
-    obj: object,
-    /,
-    *,
-    pred: Callable[[type, Any], bool] | None = None,
-    type_pred: Callable[[type], bool] | None = None,
-    value_pred: Callable[[Any], bool] | None = None,
-) -> bool:
-    """
-    Checks if a predicate applies to all nested member properties of an object recursively.
-
-    :param obj: The object to inspect recursively.
-    :param pred: The predicate to test on member properties. Takes a property type and a property value.
-    :param type_pred: Constrains the check to properties of an expected type. Properties of other types pass automatically.
-    :param value_pred: Verifies a condition on member property values (of an expected type).
-    :returns: True if all member properties pass the filter predicate(s).
-    """
-
-    if type_pred is not None and value_pred is not None:
-        if pred is not None:
-            raise TypeError("filter predicate not permitted when type and value predicates are present")
-
-        type_p: Callable[[type[T]], bool] = type_pred
-        value_p: Callable[[T], bool] = value_pred
-        pred = lambda typ, obj: not type_p(typ) or value_p(obj)  # noqa: E731
-
-    elif value_pred is not None:
-        if pred is not None:
-            raise TypeError("filter predicate not permitted when value predicate is present")
-
-        value_only_p: Callable[[T], bool] = value_pred
-        pred = lambda typ, obj: value_only_p(obj)  # noqa: E731
-
-    elif type_pred is not None:
-        raise TypeError("value predicate required when type predicate is present")
-
-    elif pred is None:
-        pred = lambda typ, obj: True  # noqa: E731
-
-    return RecursiveChecker(pred).check(type(obj), obj)
-
-
-def is_unwrapped_body_param(param_type: Any) -> bool:
-    """
-    Check if a parameter type represents an unwrapped body parameter.
-    An unwrapped body parameter is an Annotated type with Body(embed=False)
-
-    This is used to determine whether request parameters should be flattened
-    in OpenAPI specs and client libraries (matching FastAPI's embed=False behavior).
-
-    Args:
-        param_type: The parameter type annotation to check
-
-    Returns:
-        True if the parameter should be treated as an unwrapped body parameter
-    """
-    # Check if it's Annotated with Body(embed=False)
-    if typing.get_origin(param_type) is Annotated:
-        args = typing.get_args(param_type)
-        base_type = args[0]
-        metadata = args[1:]
-
-        # Look for Body annotation with embed=False
-        # Body() returns a FieldInfo object, so we check for that type and the embed attribute
-        for item in metadata:
-            if isinstance(item, FieldInfo) and hasattr(item, "embed") and not item.embed:
-                return inspect.isclass(base_type) and issubclass(base_type, BaseModel)
-
-    return False
diff --git a/src/llama_stack_api/strong_typing/mapping.py b/src/llama_stack_api/strong_typing/mapping.py
deleted file mode 100644
index d6c1a3172..000000000
--- a/src/llama_stack_api/strong_typing/mapping.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-"""
-Type-safe data interchange for Python data classes.
-
-:see: https://github.com/hunyadi/strong_typing
-"""
-
-import keyword
-
-from .auxiliary import Alias
-from .inspection import get_annotation
-
-
-def python_field_to_json_property(python_id: str, python_type: object | None = None) -> str:
-    """
-    Map a Python field identifier to a JSON property name.
-
-    Authors may use an underscore appended at the end of a Python identifier as per PEP 8 if it clashes with a Python
-    keyword: e.g. `in` would become `in_` and `from` would become `from_`. Remove these suffixes when exporting to JSON.
-
-    Authors may supply an explicit alias with the type annotation `Alias`, e.g. `Annotated[MyType, Alias("alias")]`.
-    """
-
-    if python_type is not None:
-        alias = get_annotation(python_type, Alias)
-        if alias:
-            return alias.name
-
-    if python_id.endswith("_"):
-        id = python_id[:-1]
-        if keyword.iskeyword(id):
-            return id
-
-    return python_id
diff --git a/src/llama_stack_api/strong_typing/name.py b/src/llama_stack_api/strong_typing/name.py
deleted file mode 100644
index 60501ac43..000000000
--- a/src/llama_stack_api/strong_typing/name.py
+++ /dev/null
@@ -1,188 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-"""
-Type-safe data interchange for Python data classes.
-
-:see: https://github.com/hunyadi/strong_typing
-"""
-
-import typing
-from typing import Any, Literal, Union
-
-from .auxiliary import _auxiliary_types
-from .inspection import (
-    TypeLike,
-    is_generic_dict,
-    is_generic_list,
-    is_generic_sequence,
-    is_type_optional,
-    is_type_union,
-    unwrap_generic_dict,
-    unwrap_generic_list,
-    unwrap_generic_sequence,
-    unwrap_optional_type,
-    unwrap_union_types,
-)
-
-
-class TypeFormatter:
-    """
-    Type formatter.
-
-    :param use_union_operator: Whether to emit union types as `X | Y` as per PEP 604.
-    """
-
-    use_union_operator: bool
-
-    def __init__(self, use_union_operator: bool = False) -> None:
-        self.use_union_operator = use_union_operator
-
-    def union_to_str(self, data_type_args: tuple[TypeLike, ...]) -> str:
-        if self.use_union_operator:
-            return " | ".join(self.python_type_to_str(t) for t in data_type_args)
-        else:
-            if len(data_type_args) == 2 and type(None) in data_type_args:
-                # Optional[T] is represented as Union[T, None]
-                origin_name = "Optional"
-                data_type_args = tuple(t for t in data_type_args if t is not type(None))
-            else:
-                origin_name = "Union"
-
-            args = ", ".join(self.python_type_to_str(t) for t in data_type_args)
-            return f"{origin_name}[{args}]"
-
-    def plain_type_to_str(self, data_type: TypeLike) -> str:
-        "Returns the string representation of a Python type without metadata."
-
-        # return forward references as the annotation string
-        if isinstance(data_type, typing.ForwardRef):
-            fwd: typing.ForwardRef = data_type
-            return fwd.__forward_arg__
-        elif isinstance(data_type, str):
-            return data_type
-
-        origin = typing.get_origin(data_type)
-        if origin is not None:
-            data_type_args = typing.get_args(data_type)
-
-            if origin is dict:  # Dict[T]
-                origin_name = "Dict"
-            elif origin is list:  # List[T]
-                origin_name = "List"
-            elif origin is set:  # Set[T]
-                origin_name = "Set"
-            elif origin is Union:
-                return self.union_to_str(data_type_args)
-            elif origin is Literal:
-                args = ", ".join(repr(arg) for arg in data_type_args)
-                return f"Literal[{args}]"
-            else:
-                origin_name = origin.__name__
-
-            args = ", ".join(self.python_type_to_str(t) for t in data_type_args)
-            return f"{origin_name}[{args}]"
-
-        return data_type.__name__
-
-    def python_type_to_str(self, data_type: TypeLike) -> str:
-        "Returns the string representation of a Python type."
-
-        if data_type is type(None):
-            return "None"
-
-        # use compact name for alias types
-        name = _auxiliary_types.get(data_type)
-        if name is not None:
-            return name
-
-        metadata = getattr(data_type, "__metadata__", None)
-        if metadata is not None:
-            # type is Annotated[T, ...]
-            metatuple: tuple[Any, ...] = metadata
-            arg = typing.get_args(data_type)[0]
-
-            # check for auxiliary types with user-defined annotations
-            metaset = set(metatuple)
-            for auxiliary_type, auxiliary_name in _auxiliary_types.items():
-                auxiliary_arg = typing.get_args(auxiliary_type)[0]
-                if arg is not auxiliary_arg:
-                    continue
-
-                auxiliary_metatuple: tuple[Any, ...] | None = getattr(auxiliary_type, "__metadata__", None)
-                if auxiliary_metatuple is None:
-                    continue
-
-                if metaset.issuperset(auxiliary_metatuple):
-                    # type is an auxiliary type with extra annotations
-                    auxiliary_args = ", ".join(repr(m) for m in metatuple if m not in auxiliary_metatuple)
-                    return f"Annotated[{auxiliary_name}, {auxiliary_args}]"
-
-            # type is an annotated type
-            args = ", ".join(repr(m) for m in metatuple)
-            return f"Annotated[{self.plain_type_to_str(arg)}, {args}]"
-        else:
-            # type is a regular type
-            return self.plain_type_to_str(data_type)
-
-
-def python_type_to_str(data_type: TypeLike, use_union_operator: bool = False) -> str:
-    """
-    Returns the string representation of a Python type.
-
-    :param use_union_operator: Whether to emit union types as `X | Y` as per PEP 604.
-    """
-
-    fmt = TypeFormatter(use_union_operator)
-    return fmt.python_type_to_str(data_type)
-
-
-def python_type_to_name(data_type: TypeLike, force: bool = False) -> str:
-    """
-    Returns the short name of a Python type.
-
-    :param force: Whether to produce a name for composite types such as generics.
-    """
-
-    # use compact name for alias types
-    name = _auxiliary_types.get(data_type)
-    if name is not None:
-        return name
-
-    # unwrap annotated types
-    metadata = getattr(data_type, "__metadata__", None)
-    if metadata is not None:
-        # type is Annotated[T, ...]
-        arg = typing.get_args(data_type)[0]
-        return python_type_to_name(arg, force=force)
-
-    if force:
-        # generic types
-        if is_type_optional(data_type, strict=True):
-            inner_name = python_type_to_name(unwrap_optional_type(data_type), force=True)
-            return f"Optional__{inner_name}"
-        elif is_generic_list(data_type):
-            item_name = python_type_to_name(unwrap_generic_list(data_type), force=True)
-            return f"List__{item_name}"
-        elif is_generic_sequence(data_type):
-            # Treat Sequence the same as List for schema generation purposes
-            item_name = python_type_to_name(unwrap_generic_sequence(data_type), force=True)
-            return f"List__{item_name}"
-        elif is_generic_dict(data_type):
-            key_type, value_type = unwrap_generic_dict(data_type)
-            key_name = python_type_to_name(key_type, force=True)
-            value_name = python_type_to_name(value_type, force=True)
-            return f"Dict__{key_name}__{value_name}"
-        elif is_type_union(data_type):
-            member_types = unwrap_union_types(data_type)
-            member_names = "__".join(python_type_to_name(member_type, force=True) for member_type in member_types)
-            return f"Union__{member_names}"
-
-    # named system or user-defined type
-    if hasattr(data_type, "__name__") and not typing.get_args(data_type):
-        return data_type.__name__
-
-    raise TypeError(f"cannot assign a simple name to type: {data_type}")
diff --git a/src/llama_stack_api/strong_typing/schema.py b/src/llama_stack_api/strong_typing/schema.py
deleted file mode 100644
index 916690e41..000000000
--- a/src/llama_stack_api/strong_typing/schema.py
+++ /dev/null
@@ -1,791 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-"""
-Type-safe data interchange for Python data classes.
-
-:see: https://github.com/hunyadi/strong_typing
-"""
-
-import collections.abc
-import dataclasses
-import datetime
-import decimal
-import enum
-import functools
-import inspect
-import json
-import types
-import typing
-import uuid
-from collections.abc import Callable
-from copy import deepcopy
-from typing import (
-    Annotated,
-    Any,
-    ClassVar,
-    Literal,
-    TypeVar,
-    Union,
-    overload,
-)
-
-import jsonschema
-
-from . import docstring
-from .auxiliary import (
-    Alias,
-    IntegerRange,
-    MaxLength,
-    MinLength,
-    Precision,
-    get_auxiliary_format,
-)
-from .core import JsonArray, JsonObject, JsonType, Schema, StrictJsonType
-from .inspection import (
-    TypeLike,
-    enum_value_types,
-    get_annotation,
-    get_class_properties,
-    is_type_enum,
-    is_type_like,
-    is_type_optional,
-    unwrap_optional_type,
-)
-from .name import python_type_to_name
-from .serialization import object_to_json
-
-# determines the maximum number of distinct enum members up to which a Dict[EnumType, Any] is converted into a JSON
-# schema with explicitly listed properties (rather than employing a pattern constraint on property names)
-OBJECT_ENUM_EXPANSION_LIMIT = 4
-
-
-T = TypeVar("T")
-
-
-def get_class_docstrings(data_type: type) -> tuple[str | None, str | None]:
-    docstr = docstring.parse_type(data_type)
-
-    # check if class has a doc-string other than the auto-generated string assigned by @dataclass
-    if docstring.has_default_docstring(data_type):
-        return None, None
-
-    return docstr.short_description, docstr.long_description
-
-
-def get_class_property_docstrings(
-    data_type: type, transform_fun: Callable[[type, str, str], str] | None = None
-) -> dict[str, str]:
-    """
-    Extracts the documentation strings associated with the properties of a composite type.
-
-    :param data_type: The object whose properties to iterate over.
-    :param transform_fun: An optional function that maps a property documentation string to a custom tailored string.
-    :returns: A dictionary mapping property names to descriptions.
-    """
-
-    result: dict[str, str] = {}
-    # Only try to get MRO if data_type is actually a class
-    # Special types like Literal, Union, etc. don't have MRO
-    if not inspect.isclass(data_type):
-        return result
-
-    for base in inspect.getmro(data_type):
-        docstr = docstring.parse_type(base)
-        for param in docstr.params.values():
-            if param.name in result:
-                continue
-
-            if transform_fun:
-                description = transform_fun(data_type, param.name, param.description)
-            else:
-                description = param.description
-
-            result[param.name] = description
-    return result
-
-
-def docstring_to_schema(data_type: type) -> Schema:
-    short_description, long_description = get_class_docstrings(data_type)
-    schema: Schema = {
-        "title": python_type_to_name(data_type, force=True),
-    }
-
-    description = "\n".join(filter(None, [short_description, long_description]))
-    if description:
-        schema["description"] = description
-    return schema
-
-
-def id_from_ref(data_type: typing.ForwardRef | str | type) -> str:
-    "Extracts the name of a possibly forward-referenced type."
-
-    if isinstance(data_type, typing.ForwardRef):
-        forward_type: typing.ForwardRef = data_type
-        return forward_type.__forward_arg__
-    elif isinstance(data_type, str):
-        return data_type
-    else:
-        return data_type.__name__
-
-
-def type_from_ref(data_type: typing.ForwardRef | str | type) -> tuple[str, type]:
-    "Creates a type from a forward reference."
-
-    if isinstance(data_type, typing.ForwardRef):
-        forward_type: typing.ForwardRef = data_type
-        true_type = eval(forward_type.__forward_code__)
-        return forward_type.__forward_arg__, true_type
-    elif isinstance(data_type, str):
-        true_type = eval(data_type)
-        return data_type, true_type
-    else:
-        return data_type.__name__, data_type
-
-
-@dataclasses.dataclass
-class TypeCatalogEntry:
-    schema: Schema | None
-    identifier: str
-    examples: JsonType | None = None
-
-
-class TypeCatalog:
-    "Maintains an association of well-known Python types to their JSON schema."
-
-    _by_type: dict[TypeLike, TypeCatalogEntry]
-    _by_name: dict[str, TypeCatalogEntry]
-
-    def __init__(self) -> None:
-        self._by_type = {}
-        self._by_name = {}
-
-    def __contains__(self, data_type: TypeLike) -> bool:
-        if isinstance(data_type, typing.ForwardRef):
-            fwd: typing.ForwardRef = data_type
-            name = fwd.__forward_arg__
-            return name in self._by_name
-        else:
-            return data_type in self._by_type
-
-    def add(
-        self,
-        data_type: TypeLike,
-        schema: Schema | None,
-        identifier: str,
-        examples: list[JsonType] | None = None,
-    ) -> None:
-        if isinstance(data_type, typing.ForwardRef):
-            raise TypeError("forward references cannot be used to register a type")
-
-        if data_type in self._by_type:
-            raise ValueError(f"type {data_type} is already registered in the catalog")
-
-        entry = TypeCatalogEntry(schema, identifier, examples)
-        self._by_type[data_type] = entry
-        self._by_name[identifier] = entry
-
-    def get(self, data_type: TypeLike) -> TypeCatalogEntry:
-        if isinstance(data_type, typing.ForwardRef):
-            fwd: typing.ForwardRef = data_type
-            name = fwd.__forward_arg__
-            return self._by_name[name]
-        else:
-            return self._by_type[data_type]
-
-
-@dataclasses.dataclass
-class SchemaOptions:
-    definitions_path: str = "#/definitions/"
-    use_descriptions: bool = True
-    use_examples: bool = True
-    property_description_fun: Callable[[type, str, str], str] | None = None
-
-
-class JsonSchemaGenerator:
-    "Creates a JSON schema with user-defined type definitions."
-
-    type_catalog: ClassVar[TypeCatalog] = TypeCatalog()
-    types_used: dict[str, TypeLike]
-    options: SchemaOptions
-
-    def __init__(self, options: SchemaOptions | None = None):
-        if options is None:
-            self.options = SchemaOptions()
-        else:
-            self.options = options
-        self.types_used = {}
-
-    @functools.singledispatchmethod
-    def _metadata_to_schema(self, arg: object) -> Schema:
-        # unrecognized annotation
-        return {}
-
-    @_metadata_to_schema.register
-    def _(self, arg: IntegerRange) -> Schema:
-        return {"minimum": arg.minimum, "maximum": arg.maximum}
-
-    @_metadata_to_schema.register
-    def _(self, arg: Precision) -> Schema:
-        return {
-            "multipleOf": 10 ** (-arg.decimal_digits),
-            "exclusiveMinimum": -(10**arg.integer_digits),
-            "exclusiveMaximum": (10**arg.integer_digits),
-        }
-
-    @_metadata_to_schema.register
-    def _(self, arg: MinLength) -> Schema:
-        return {"minLength": arg.value}
-
-    @_metadata_to_schema.register
-    def _(self, arg: MaxLength) -> Schema:
-        return {"maxLength": arg.value}
-
-    def _with_metadata(self, type_schema: Schema, metadata: tuple[Any, ...] | None) -> Schema:
-        if metadata:
-            for m in metadata:
-                type_schema.update(self._metadata_to_schema(m))
-        return type_schema
-
-    def _simple_type_to_schema(self, typ: TypeLike, json_schema_extra: dict | None = None) -> Schema | None:
-        """
-        Returns the JSON schema associated with a simple, unrestricted type.
-
-        :returns: The schema for a simple type, or `None`.
-        """
-
-        if typ is type(None):
-            return {"type": "null"}
-        elif typ is bool:
-            return {"type": "boolean"}
-        elif typ is int:
-            return {"type": "integer"}
-        elif typ is float:
-            return {"type": "number"}
-        elif typ is str:
-            if json_schema_extra and "contentEncoding" in json_schema_extra:
-                return {
-                    "type": "string",
-                    "contentEncoding": json_schema_extra["contentEncoding"],
-                }
-            return {"type": "string"}
-        elif typ is bytes:
-            return {"type": "string", "contentEncoding": "base64"}
-        elif typ is datetime.datetime:
-            # 2018-11-13T20:20:39+00:00
-            return {
-                "type": "string",
-                "format": "date-time",
-            }
-        elif typ is datetime.date:
-            # 2018-11-13
-            return {"type": "string", "format": "date"}
-        elif typ is datetime.time:
-            # 20:20:39+00:00
-            return {"type": "string", "format": "time"}
-        elif typ is decimal.Decimal:
-            return {"type": "number"}
-        elif typ is uuid.UUID:
-            # f81d4fae-7dec-11d0-a765-00a0c91e6bf6
-            return {"type": "string", "format": "uuid"}
-        elif typ is Any:
-            return {
-                "oneOf": [
-                    {"type": "null"},
-                    {"type": "boolean"},
-                    {"type": "number"},
-                    {"type": "string"},
-                    {"type": "array"},
-                    {"type": "object"},
-                ]
-            }
-        elif typ is JsonObject:
-            return {"type": "object"}
-        elif typ is JsonArray:
-            return {"type": "array"}
-        else:
-            # not a simple type
-            return None
-
-    def type_to_schema(
-        self,
-        data_type: TypeLike,
-        force_expand: bool = False,
-        json_schema_extra: dict | None = None,
-    ) -> Schema:
-        common_info = {}
-        if json_schema_extra and "deprecated" in json_schema_extra:
-            common_info["deprecated"] = json_schema_extra["deprecated"]
-        return self._type_to_schema(data_type, force_expand, json_schema_extra) | common_info
-
-    def _type_to_schema(
-        self,
-        data_type: TypeLike,
-        force_expand: bool = False,
-        json_schema_extra: dict | None = None,
-    ) -> Schema:
-        """
-        Returns the JSON schema associated with a type.
-
-        :param data_type: The Python type whose JSON schema to return.
-        :param force_expand: Forces a JSON schema to be returned even if the type is registered in the catalog of known types.
-        :returns: The JSON schema associated with the type.
-        """
-
-        # short-circuit for common simple types
-        schema = self._simple_type_to_schema(data_type, json_schema_extra)
-        if schema is not None:
-            return schema
-
-        # types registered in the type catalog of well-known types
-        type_catalog = JsonSchemaGenerator.type_catalog
-        if not force_expand and data_type in type_catalog:
-            # user-defined type
-            identifier = type_catalog.get(data_type).identifier
-            self.types_used.setdefault(identifier, data_type)
-            return {"$ref": f"{self.options.definitions_path}{identifier}"}
-
-        # unwrap annotated types
-        metadata = getattr(data_type, "__metadata__", None)
-        if metadata is not None:
-            # type is Annotated[T, ...]
-            typ = typing.get_args(data_type)[0]
-            schema = self._simple_type_to_schema(typ)
-            if schema is not None:
-                # recognize well-known auxiliary types
-                fmt = get_auxiliary_format(data_type)
-                if fmt is not None:
-                    schema.update({"format": fmt})
-                    return schema
-                else:
-                    return self._with_metadata(schema, metadata)
-
-        else:
-            # type is a regular type
-            typ = data_type
-
-        if isinstance(typ, typing.ForwardRef) or isinstance(typ, str):
-            if force_expand:
-                identifier, true_type = type_from_ref(typ)
-                return self.type_to_schema(true_type, force_expand=True)
-            else:
-                try:
-                    identifier, true_type = type_from_ref(typ)
-                    self.types_used[identifier] = true_type
-                except NameError:
-                    identifier = id_from_ref(typ)
-
-                return {"$ref": f"{self.options.definitions_path}{identifier}"}
-
-        if is_type_enum(typ):
-            enum_type: type[enum.Enum] = typ
-            value_types = enum_value_types(enum_type)
-            if len(value_types) != 1:
-                raise ValueError(
-                    f"enumerations must have a consistent member value type but several types found: {value_types}"
-                )
-            enum_value_type = value_types.pop()
-
-            enum_schema: Schema
-            if enum_value_type is bool or enum_value_type is int or enum_value_type is float or enum_value_type is str:
-                if enum_value_type is bool:
-                    enum_schema_type = "boolean"
-                elif enum_value_type is int:
-                    enum_schema_type = "integer"
-                elif enum_value_type is float:
-                    enum_schema_type = "number"
-                elif enum_value_type is str:
-                    enum_schema_type = "string"
-
-                enum_schema = {
-                    "type": enum_schema_type,
-                    "enum": [object_to_json(e.value) for e in enum_type],
-                }
-                if self.options.use_descriptions:
-                    enum_schema.update(docstring_to_schema(typ))
-                return enum_schema
-            else:
-                enum_schema = self.type_to_schema(enum_value_type)
-                if self.options.use_descriptions:
-                    enum_schema.update(docstring_to_schema(typ))
-                return enum_schema
-
-        origin_type = typing.get_origin(typ)
-        if origin_type is list:
-            (list_type,) = typing.get_args(typ)  # unpack single tuple element
-            return {"type": "array", "items": self.type_to_schema(list_type)}
-        elif origin_type is collections.abc.Sequence:
-            # Treat Sequence the same as list for JSON schema (both are arrays)
-            (sequence_type,) = typing.get_args(typ)  # unpack single tuple element
-            return {"type": "array", "items": self.type_to_schema(sequence_type)}
-        elif origin_type is dict:
-            key_type, value_type = typing.get_args(typ)
-            if not (key_type is str or key_type is int or is_type_enum(key_type)):
-                raise ValueError("`dict` with key type not coercible to `str` is not supported")
-
-            dict_schema: Schema
-            value_schema = self.type_to_schema(value_type)
-            if is_type_enum(key_type):
-                enum_values = [str(e.value) for e in key_type]
-                if len(enum_values) > OBJECT_ENUM_EXPANSION_LIMIT:
-                    dict_schema = {
-                        "propertyNames": {"pattern": "^(" + "|".join(enum_values) + ")$"},
-                        "additionalProperties": value_schema,
-                    }
-                else:
-                    dict_schema = {
-                        "properties": dict.fromkeys(enum_values, value_schema),
-                        "additionalProperties": False,
-                    }
-            else:
-                dict_schema = {"additionalProperties": value_schema}
-
-            schema = {"type": "object"}
-            schema.update(dict_schema)
-            return schema
-        elif origin_type is set:
-            (set_type,) = typing.get_args(typ)  # unpack single tuple element
-            return {
-                "type": "array",
-                "items": self.type_to_schema(set_type),
-                "uniqueItems": True,
-            }
-        elif origin_type is tuple:
-            args = typing.get_args(typ)
-            return {
-                "type": "array",
-                "minItems": len(args),
-                "maxItems": len(args),
-                "prefixItems": [self.type_to_schema(member_type) for member_type in args],
-            }
-        elif origin_type in (Union, types.UnionType):
-            discriminator = None
-            if typing.get_origin(data_type) is Annotated:
-                discriminator = typing.get_args(data_type)[1].discriminator
-            ret: Schema = {"oneOf": [self.type_to_schema(union_type) for union_type in typing.get_args(typ)]}
-            if discriminator:
-                # for each union type, we need to read the value of the discriminator
-                mapping: dict[str, JsonType] = {}
-                for union_type in typing.get_args(typ):
-                    props = self.type_to_schema(union_type, force_expand=True)["properties"]
-                    # mypy is confused here because JsonType allows multiple types, some of them
-                    # not indexable (bool?) or not indexable by string (list?). The correctness of
-                    # types depends on correct model definitions. Hence multiple ignore statements below.
-                    discriminator_value = props[discriminator]["default"]  # type: ignore[index,call-overload]
-                    mapping[discriminator_value] = self.type_to_schema(union_type)["$ref"]  # type: ignore[index]
-
-                ret["discriminator"] = {
-                    "propertyName": discriminator,
-                    "mapping": mapping,
-                }
-            return ret
-        elif origin_type is Literal:
-            literal_args = typing.get_args(typ)
-            if len(literal_args) == 1:
-                (literal_value,) = literal_args
-                schema = self.type_to_schema(type(literal_value))
-                schema["const"] = literal_value
-                return schema
-            elif len(literal_args) > 1:
-                first_value = literal_args[0]
-                schema = self.type_to_schema(type(first_value))
-                schema["enum"] = list(literal_args)
-                return schema
-            else:
-                return {"enum": []}
-        elif origin_type is type:
-            (concrete_type,) = typing.get_args(typ)  # unpack single tuple element
-            return {"const": self.type_to_schema(concrete_type, force_expand=True)}
-        elif origin_type is collections.abc.AsyncIterator:
-            (concrete_type,) = typing.get_args(typ)
-            return self.type_to_schema(concrete_type)
-
-        # dictionary of class attributes
-        members = dict(inspect.getmembers(typ, lambda a: not inspect.isroutine(a)))
-
-        property_docstrings = get_class_property_docstrings(typ, self.options.property_description_fun)
-        properties: dict[str, Schema] = {}
-        required: list[str] = []
-        for property_name, property_type in get_class_properties(typ):
-            # rename property if an alias name is specified
-            alias = get_annotation(property_type, Alias)
-            if alias:
-                output_name = alias.name
-            else:
-                output_name = property_name
-
-            defaults = {}
-            json_schema_extra = None
-            if "model_fields" in members:
-                f = members["model_fields"]
-                defaults = {k: finfo.default for k, finfo in f.items()}
-                if output_name in f:
-                    finfo = f[output_name]
-                    json_schema_extra = finfo.json_schema_extra or {}
-                    if finfo.deprecated:
-                        json_schema_extra["deprecated"] = True
-
-            if is_type_optional(property_type):
-                optional_type: type = unwrap_optional_type(property_type)
-                property_def = self.type_to_schema(optional_type, json_schema_extra=json_schema_extra)
-            else:
-                property_def = self.type_to_schema(property_type, json_schema_extra=json_schema_extra)
-                required.append(output_name)
-
-            # check if attribute has a default value initializer
-            if defaults.get(property_name) is not None:
-                def_value = defaults[property_name]
-                # check if value can be directly represented in JSON
-                if isinstance(
-                    def_value,
-                    (
-                        bool,
-                        int,
-                        float,
-                        str,
-                        enum.Enum,
-                        datetime.datetime,
-                        datetime.date,
-                        datetime.time,
-                    ),
-                ):
-                    property_def["default"] = object_to_json(def_value)
-
-            # add property docstring if available
-            property_doc = property_docstrings.get(property_name)
-            if property_doc:
-                # print(output_name, property_doc)
-                property_def.pop("title", None)
-                property_def["description"] = property_doc
-
-            properties[output_name] = property_def
-
-        schema = {"type": "object"}
-        if len(properties) > 0:
-            schema["properties"] = typing.cast(JsonType, properties)
-            schema["additionalProperties"] = False
-        if len(required) > 0:
-            schema["required"] = typing.cast(JsonType, required)
-        if self.options.use_descriptions:
-            schema.update(docstring_to_schema(typ))
-        return schema
-
-    def _type_to_schema_with_lookup(self, data_type: TypeLike) -> Schema:
-        """
-        Returns the JSON schema associated with a type that may be registered in the catalog of known types.
-
-        :param data_type: The type whose JSON schema we seek.
-        :returns: The JSON schema associated with the type.
-        """
-
-        entry = JsonSchemaGenerator.type_catalog.get(data_type)
-        if entry.schema is None:
-            type_schema = self.type_to_schema(data_type, force_expand=True)
-        else:
-            type_schema = deepcopy(entry.schema)
-
-        # add descriptive text (if present)
-        if self.options.use_descriptions:
-            if isinstance(data_type, type) and not isinstance(data_type, typing.ForwardRef):
-                type_schema.update(docstring_to_schema(data_type))
-
-        # add example (if present)
-        if self.options.use_examples and entry.examples:
-            type_schema["examples"] = entry.examples
-
-        return type_schema
-
-    def classdef_to_schema(self, data_type: TypeLike, force_expand: bool = False) -> tuple[Schema, dict[str, Schema]]:
-        """
-        Returns the JSON schema associated with a type and any nested types.
-
-        :param data_type: The type whose JSON schema to return.
-        :param force_expand: True if a full JSON schema is to be returned even for well-known types; false if a schema
-        reference is to be used for well-known types.
-        :returns: A tuple of the JSON schema, and a mapping between nested type names and their corresponding schema.
-        """
-
-        if not is_type_like(data_type):
-            raise TypeError(f"expected a type-like object but got: {data_type}")
-
-        self.types_used = {}
-        try:
-            type_schema = self.type_to_schema(data_type, force_expand=force_expand)
-
-            types_defined: dict[str, Schema] = {}
-            while len(self.types_used) > len(types_defined):
-                # make a snapshot copy; original collection is going to be modified
-                types_undefined = {
-                    sub_name: sub_type
-                    for sub_name, sub_type in self.types_used.items()
-                    if sub_name not in types_defined
-                }
-
-                # expand undefined types, which may lead to additional types to be defined
-                for sub_name, sub_type in types_undefined.items():
-                    types_defined[sub_name] = self._type_to_schema_with_lookup(sub_type)
-
-            type_definitions = dict(sorted(types_defined.items()))
-        finally:
-            self.types_used = {}
-
-        return type_schema, type_definitions
-
-
-class Validator(enum.Enum):
-    "Defines constants for JSON schema standards."
-
-    Draft7 = jsonschema.Draft7Validator
-    Draft201909 = jsonschema.Draft201909Validator
-    Draft202012 = jsonschema.Draft202012Validator
-    Latest = jsonschema.Draft202012Validator
-
-
-def classdef_to_schema(
-    data_type: TypeLike,
-    options: SchemaOptions | None = None,
-    validator: Validator = Validator.Latest,
-) -> Schema:
-    """
-    Returns the JSON schema corresponding to the given type.
-
-    :param data_type: The Python type used to generate the JSON schema
-    :returns: A JSON object that you can serialize to a JSON string with json.dump or json.dumps
-    :raises TypeError: Indicates that the generated JSON schema does not validate against the desired meta-schema.
-    """
-
-    # short-circuit with an error message when passing invalid data
-    if not is_type_like(data_type):
-        raise TypeError(f"expected a type-like object but got: {data_type}")
-
-    generator = JsonSchemaGenerator(options)
-    type_schema, type_definitions = generator.classdef_to_schema(data_type)
-
-    class_schema: Schema = {}
-    if type_definitions:
-        class_schema["definitions"] = typing.cast(JsonType, type_definitions)
-    class_schema.update(type_schema)
-
-    validator_id = validator.value.META_SCHEMA["$id"]
-    try:
-        validator.value.check_schema(class_schema)
-    except jsonschema.exceptions.SchemaError:
-        raise TypeError(f"schema does not validate against meta-schema <{validator_id}>")
-
-    schema = {"$schema": validator_id}
-    schema.update(class_schema)
-    return schema
-
-
-def validate_object(data_type: TypeLike, json_dict: JsonType) -> None:
-    """
-    Validates if the JSON dictionary object conforms to the expected type.
-
-    :param data_type: The type to match against.
-    :param json_dict: A JSON object obtained with `json.load` or `json.loads`.
-    :raises jsonschema.exceptions.ValidationError: Indicates that the JSON object cannot represent the type.
-    """
-
-    schema_dict = classdef_to_schema(data_type)
-    jsonschema.validate(json_dict, schema_dict, format_checker=jsonschema.FormatChecker())
-
-
-def print_schema(data_type: type) -> None:
-    """Pretty-prints the JSON schema corresponding to the type."""
-
-    s = classdef_to_schema(data_type)
-    print(json.dumps(s, indent=4))
-
-
-def get_schema_identifier(data_type: type) -> str | None:
-    if data_type in JsonSchemaGenerator.type_catalog:
-        return JsonSchemaGenerator.type_catalog.get(data_type).identifier
-    else:
-        return None
-
-
-def register_schema(
-    data_type: T,
-    schema: Schema | None = None,
-    name: str | None = None,
-    examples: list[JsonType] | None = None,
-) -> T:
-    """
-    Associates a type with a JSON schema definition.
-
-    :param data_type: The type to associate with a JSON schema.
-    :param schema: The schema to associate the type with. Derived automatically if omitted.
-    :param name: The name used for looking uo the type. Determined automatically if omitted.
-    :returns: The input type.
-    """
-
-    JsonSchemaGenerator.type_catalog.add(
-        data_type,
-        schema,
-        name if name is not None else python_type_to_name(data_type),
-        examples,
-    )
-    return data_type
-
-
-@overload
-def json_schema_type(cls: type[T], /) -> type[T]: ...
-
-
-@overload
-def json_schema_type(cls: None, *, schema: Schema | None = None) -> Callable[[type[T]], type[T]]: ...
-
-
-def json_schema_type(
-    cls: type[T] | None = None,
-    *,
-    schema: Schema | None = None,
-    examples: list[JsonType] | None = None,
-) -> type[T] | Callable[[type[T]], type[T]]:
-    """Decorator to add user-defined schema definition to a class."""
-
-    def wrap(cls: type[T]) -> type[T]:
-        return register_schema(cls, schema, examples=examples)
-
-    # see if decorator is used as @json_schema_type or @json_schema_type()
-    if cls is None:
-        # called with parentheses
-        return wrap
-    else:
-        # called as @json_schema_type without parentheses
-        return wrap(cls)
-
-
-register_schema(JsonObject, name="JsonObject")
-register_schema(JsonArray, name="JsonArray")
-
-register_schema(
-    JsonType,
-    name="JsonType",
-    examples=[
-        {
-            "property1": None,
-            "property2": True,
-            "property3": 64,
-            "property4": "string",
-            "property5": ["item"],
-            "property6": {"key": "value"},
-        }
-    ],
-)
-register_schema(
-    StrictJsonType,
-    name="StrictJsonType",
-    examples=[
-        {
-            "property1": True,
-            "property2": 64,
-            "property3": "string",
-            "property4": ["item"],
-            "property5": {"key": "value"},
-        }
-    ],
-)
diff --git a/src/llama_stack_api/strong_typing/serialization.py b/src/llama_stack_api/strong_typing/serialization.py
deleted file mode 100644
index 3e34945ad..000000000
--- a/src/llama_stack_api/strong_typing/serialization.py
+++ /dev/null
@@ -1,97 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-"""
-Type-safe data interchange for Python data classes.
-
-:see: https://github.com/hunyadi/strong_typing
-"""
-
-import inspect
-import json
-import sys
-from types import ModuleType
-from typing import Any, TextIO, TypeVar
-
-from .core import JsonType
-from .deserializer import create_deserializer
-from .inspection import TypeLike
-from .serializer import create_serializer
-
-T = TypeVar("T")
-
-
-def object_to_json(obj: Any) -> JsonType:
-    """
-    Converts a Python object to a representation that can be exported to JSON.
-
-    * Fundamental types (e.g. numeric types) are written as is.
-    * Date and time types are serialized in the ISO 8601 format with time zone.
-    * A byte array is written as a string with Base64 encoding.
-    * UUIDs are written as a UUID string.
-    * Enumerations are written as their value.
-    * Containers (e.g. `list`, `dict`, `set`, `tuple`) are exported recursively.
-    * Objects with properties (including data class types) are converted to a dictionaries of key-value pairs.
-    """
-
-    typ: type = type(obj)
-    generator = create_serializer(typ)
-    return generator.generate(obj)
-
-
-def json_to_object(typ: TypeLike, data: JsonType, *, context: ModuleType | None = None) -> object:
-    """
-    Creates an object from a representation that has been de-serialized from JSON.
-
-    When de-serializing a JSON object into a Python object, the following transformations are applied:
-
-    * Fundamental types are parsed as `bool`, `int`, `float` or `str`.
-    * Date and time types are parsed from the ISO 8601 format with time zone into the corresponding Python type
-      `datetime`, `date` or `time`
-    * A byte array is read from a string with Base64 encoding into a `bytes` instance.
-    * UUIDs are extracted from a UUID string into a `uuid.UUID` instance.
-    * Enumerations are instantiated with a lookup on enumeration value.
-    * Containers (e.g. `list`, `dict`, `set`, `tuple`) are parsed recursively.
-    * Complex objects with properties (including data class types) are populated from dictionaries of key-value pairs
-      using reflection (enumerating type annotations).
-
-    :raises TypeError: A de-serializing engine cannot be constructed for the input type.
-    :raises JsonKeyError: Deserialization for a class or union type has failed because a matching member was not found.
-    :raises JsonTypeError: Deserialization for data has failed due to a type mismatch.
-    """
-
-    # use caller context for evaluating types if no context is supplied
-    if context is None:
-        this_frame = inspect.currentframe()
-        if this_frame is not None:
-            caller_frame = this_frame.f_back
-            del this_frame
-
-            if caller_frame is not None:
-                try:
-                    context = sys.modules[caller_frame.f_globals["__name__"]]
-                finally:
-                    del caller_frame
-
-    parser = create_deserializer(typ, context)
-    return parser.parse(data)
-
-
-def json_dump_string(json_object: JsonType) -> str:
-    "Dump an object as a JSON string with a compact representation."
-
-    return json.dumps(json_object, ensure_ascii=False, check_circular=False, separators=(",", ":"))
-
-
-def json_dump(json_object: JsonType, file: TextIO) -> None:
-    json.dump(
-        json_object,
-        file,
-        ensure_ascii=False,
-        check_circular=False,
-        separators=(",", ":"),
-    )
-    file.write("\n")
diff --git a/src/llama_stack_api/strong_typing/serializer.py b/src/llama_stack_api/strong_typing/serializer.py
deleted file mode 100644
index 4a12a1f4b..000000000
--- a/src/llama_stack_api/strong_typing/serializer.py
+++ /dev/null
@@ -1,494 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-"""
-Type-safe data interchange for Python data classes.
-
-:see: https://github.com/hunyadi/strong_typing
-"""
-
-import abc
-import base64
-import datetime
-import enum
-import functools
-import inspect
-import ipaddress
-import sys
-import typing
-import uuid
-from collections.abc import Callable
-from types import FunctionType, MethodType, ModuleType
-from typing import (
-    Any,
-    Generic,
-    Literal,
-    NamedTuple,
-    TypeVar,
-    Union,
-)
-
-from .core import JsonType
-from .exception import JsonTypeError, JsonValueError
-from .inspection import (
-    TypeLike,
-    enum_value_types,
-    evaluate_type,
-    get_class_properties,
-    get_resolved_hints,
-    is_dataclass_type,
-    is_named_tuple_type,
-    is_reserved_property,
-    is_type_annotated,
-    is_type_enum,
-    unwrap_annotated_type,
-)
-from .mapping import python_field_to_json_property
-
-T = TypeVar("T")
-
-
-class Serializer(abc.ABC, Generic[T]):
-    @abc.abstractmethod
-    def generate(self, data: T) -> JsonType: ...
-
-
-class NoneSerializer(Serializer[None]):
-    def generate(self, data: None) -> None:
-        # can be directly represented in JSON
-        return None
-
-
-class BoolSerializer(Serializer[bool]):
-    def generate(self, data: bool) -> bool:
-        # can be directly represented in JSON
-        return data
-
-
-class IntSerializer(Serializer[int]):
-    def generate(self, data: int) -> int:
-        # can be directly represented in JSON
-        return data
-
-
-class FloatSerializer(Serializer[float]):
-    def generate(self, data: float) -> float:
-        # can be directly represented in JSON
-        return data
-
-
-class StringSerializer(Serializer[str]):
-    def generate(self, data: str) -> str:
-        # can be directly represented in JSON
-        return data
-
-
-class BytesSerializer(Serializer[bytes]):
-    def generate(self, data: bytes) -> str:
-        return base64.b64encode(data).decode("ascii")
-
-
-class DateTimeSerializer(Serializer[datetime.datetime]):
-    def generate(self, obj: datetime.datetime) -> str:
-        if obj.tzinfo is None:
-            raise JsonValueError(f"timestamp lacks explicit time zone designator: {obj}")
-        fmt = obj.isoformat()
-        if fmt.endswith("+00:00"):
-            fmt = f"{fmt[:-6]}Z"  # Python's isoformat() does not support military time zones like "Zulu" for UTC
-        return fmt
-
-
-class DateSerializer(Serializer[datetime.date]):
-    def generate(self, obj: datetime.date) -> str:
-        return obj.isoformat()
-
-
-class TimeSerializer(Serializer[datetime.time]):
-    def generate(self, obj: datetime.time) -> str:
-        return obj.isoformat()
-
-
-class UUIDSerializer(Serializer[uuid.UUID]):
-    def generate(self, obj: uuid.UUID) -> str:
-        return str(obj)
-
-
-class IPv4Serializer(Serializer[ipaddress.IPv4Address]):
-    def generate(self, obj: ipaddress.IPv4Address) -> str:
-        return str(obj)
-
-
-class IPv6Serializer(Serializer[ipaddress.IPv6Address]):
-    def generate(self, obj: ipaddress.IPv6Address) -> str:
-        return str(obj)
-
-
-class EnumSerializer(Serializer[enum.Enum]):
-    def generate(self, obj: enum.Enum) -> int | str:
-        value = obj.value
-        if isinstance(value, int):
-            return value
-        return str(value)
-
-
-class UntypedListSerializer(Serializer[list]):
-    def generate(self, obj: list) -> list[JsonType]:
-        return [object_to_json(item) for item in obj]
-
-
-class UntypedDictSerializer(Serializer[dict]):
-    def generate(self, obj: dict) -> dict[str, JsonType]:
-        if obj and isinstance(next(iter(obj.keys())), enum.Enum):
-            iterator = ((key.value, object_to_json(value)) for key, value in obj.items())
-        else:
-            iterator = ((str(key), object_to_json(value)) for key, value in obj.items())
-        return dict(iterator)
-
-
-class UntypedSetSerializer(Serializer[set]):
-    def generate(self, obj: set) -> list[JsonType]:
-        return [object_to_json(item) for item in obj]
-
-
-class UntypedTupleSerializer(Serializer[tuple]):
-    def generate(self, obj: tuple) -> list[JsonType]:
-        return [object_to_json(item) for item in obj]
-
-
-class TypedCollectionSerializer(Serializer, Generic[T]):
-    generator: Serializer[T]
-
-    def __init__(self, item_type: type[T], context: ModuleType | None) -> None:
-        self.generator = _get_serializer(item_type, context)
-
-
-class TypedListSerializer(TypedCollectionSerializer[T]):
-    def generate(self, obj: list[T]) -> list[JsonType]:
-        return [self.generator.generate(item) for item in obj]
-
-
-class TypedStringDictSerializer(TypedCollectionSerializer[T]):
-    def __init__(self, value_type: type[T], context: ModuleType | None) -> None:
-        super().__init__(value_type, context)
-
-    def generate(self, obj: dict[str, T]) -> dict[str, JsonType]:
-        return {key: self.generator.generate(value) for key, value in obj.items()}
-
-
-class TypedEnumDictSerializer(TypedCollectionSerializer[T]):
-    def __init__(
-        self,
-        key_type: type[enum.Enum],
-        value_type: type[T],
-        context: ModuleType | None,
-    ) -> None:
-        super().__init__(value_type, context)
-
-        value_types = enum_value_types(key_type)
-        if len(value_types) != 1:
-            raise JsonTypeError(
-                f"invalid key type, enumerations must have a consistent member value type but several types found: {value_types}"
-            )
-
-        value_type = value_types.pop()
-        if value_type is not str:
-            raise JsonTypeError("invalid enumeration key type, expected `enum.Enum` with string values")
-
-    def generate(self, obj: dict[enum.Enum, T]) -> dict[str, JsonType]:
-        return {key.value: self.generator.generate(value) for key, value in obj.items()}
-
-
-class TypedSetSerializer(TypedCollectionSerializer[T]):
-    def generate(self, obj: set[T]) -> JsonType:
-        return [self.generator.generate(item) for item in obj]
-
-
-class TypedTupleSerializer(Serializer[tuple]):
-    item_generators: tuple[Serializer, ...]
-
-    def __init__(self, item_types: tuple[type, ...], context: ModuleType | None) -> None:
-        self.item_generators = tuple(_get_serializer(item_type, context) for item_type in item_types)
-
-    def generate(self, obj: tuple) -> list[JsonType]:
-        return [item_generator.generate(item) for item_generator, item in zip(self.item_generators, obj, strict=False)]
-
-
-class CustomSerializer(Serializer):
-    converter: Callable[[object], JsonType]
-
-    def __init__(self, converter: Callable[[object], JsonType]) -> None:
-        self.converter = converter
-
-    def generate(self, obj: object) -> JsonType:
-        return self.converter(obj)
-
-
-class FieldSerializer(Generic[T]):
-    """
-    Serializes a Python object field into a JSON property.
-
-    :param field_name: The name of the field in a Python class to read data from.
-    :param property_name: The name of the JSON property to write to a JSON `object`.
-    :param generator: A compatible serializer that can handle the field's type.
-    """
-
-    field_name: str
-    property_name: str
-    generator: Serializer
-
-    def __init__(self, field_name: str, property_name: str, generator: Serializer[T]) -> None:
-        self.field_name = field_name
-        self.property_name = property_name
-        self.generator = generator
-
-    def generate_field(self, obj: object, object_dict: dict[str, JsonType]) -> None:
-        value = getattr(obj, self.field_name)
-        if value is not None:
-            object_dict[self.property_name] = self.generator.generate(value)
-
-
-class TypedClassSerializer(Serializer[T]):
-    property_generators: list[FieldSerializer]
-
-    def __init__(self, class_type: type[T], context: ModuleType | None) -> None:
-        self.property_generators = [
-            FieldSerializer(
-                field_name,
-                python_field_to_json_property(field_name, field_type),
-                _get_serializer(field_type, context),
-            )
-            for field_name, field_type in get_class_properties(class_type)
-        ]
-
-    def generate(self, obj: T) -> dict[str, JsonType]:
-        object_dict: dict[str, JsonType] = {}
-        for property_generator in self.property_generators:
-            property_generator.generate_field(obj, object_dict)
-
-        return object_dict
-
-
-class TypedNamedTupleSerializer(TypedClassSerializer[NamedTuple]):
-    def __init__(self, class_type: type[NamedTuple], context: ModuleType | None) -> None:
-        super().__init__(class_type, context)
-
-
-class DataclassSerializer(TypedClassSerializer[T]):
-    def __init__(self, class_type: type[T], context: ModuleType | None) -> None:
-        super().__init__(class_type, context)
-
-
-class UnionSerializer(Serializer):
-    def generate(self, obj: Any) -> JsonType:
-        return object_to_json(obj)
-
-
-class LiteralSerializer(Serializer):
-    generator: Serializer
-
-    def __init__(self, values: tuple[Any, ...], context: ModuleType | None) -> None:
-        literal_type_tuple = tuple(type(value) for value in values)
-        literal_type_set = set(literal_type_tuple)
-        if len(literal_type_set) != 1:
-            value_names = ", ".join(repr(value) for value in values)
-            raise TypeError(
-                f"type `Literal[{value_names}]` expects consistent literal value types but got: {literal_type_tuple}"
-            )
-
-        literal_type = literal_type_set.pop()
-        self.generator = _get_serializer(literal_type, context)
-
-    def generate(self, obj: Any) -> JsonType:
-        return self.generator.generate(obj)
-
-
-class UntypedNamedTupleSerializer(Serializer):
-    fields: dict[str, str]
-
-    def __init__(self, class_type: type[NamedTuple]) -> None:
-        # named tuples are also instances of tuple
-        self.fields = {}
-        field_names: tuple[str, ...] = class_type._fields
-        for field_name in field_names:
-            self.fields[field_name] = python_field_to_json_property(field_name)
-
-    def generate(self, obj: NamedTuple) -> JsonType:
-        object_dict = {}
-        for field_name, property_name in self.fields.items():
-            value = getattr(obj, field_name)
-            object_dict[property_name] = object_to_json(value)
-
-        return object_dict
-
-
-class UntypedClassSerializer(Serializer):
-    def generate(self, obj: object) -> JsonType:
-        # iterate over object attributes to get a standard representation
-        object_dict = {}
-        for name in dir(obj):
-            if is_reserved_property(name):
-                continue
-
-            value = getattr(obj, name)
-            if value is None:
-                continue
-
-            # filter instance methods
-            if inspect.ismethod(value):
-                continue
-
-            object_dict[python_field_to_json_property(name)] = object_to_json(value)
-
-        return object_dict
-
-
-def create_serializer(typ: TypeLike, context: ModuleType | None = None) -> Serializer:
-    """
-    Creates a serializer engine to produce an object that can be directly converted into a JSON string.
-
-    When serializing a Python object into a JSON object, the following transformations are applied:
-
-    * Fundamental types (`bool`, `int`, `float` or `str`) are returned as-is.
-    * Date and time types (`datetime`, `date` or `time`) produce an ISO 8601 format string with time zone
-      (ending with `Z` for UTC).
-    * Byte arrays (`bytes`) are written as a string with Base64 encoding.
-    * UUIDs (`uuid.UUID`) are written as a UUID string as per RFC 4122.
-    * Enumerations yield their enumeration value.
-    * Containers (e.g. `list`, `dict`, `set`, `tuple`) are processed recursively.
-    * Complex objects with properties (including data class types) generate dictionaries of key-value pairs.
-
-    :raises TypeError: A serializer engine cannot be constructed for the input type.
-    """
-
-    if context is None:
-        if isinstance(typ, type):
-            context = sys.modules[typ.__module__]
-
-    return _get_serializer(typ, context)
-
-
-def _get_serializer(typ: TypeLike, context: ModuleType | None) -> Serializer:
-    if isinstance(typ, (str, typing.ForwardRef)):
-        if context is None:
-            raise TypeError(f"missing context for evaluating type: {typ}")
-
-        typ = evaluate_type(typ, context)
-
-    if isinstance(typ, type):
-        return _fetch_serializer(typ)
-    else:
-        # special forms are not always hashable
-        return _create_serializer(typ, context)
-
-
-@functools.cache
-def _fetch_serializer(typ: type) -> Serializer:
-    context = sys.modules[typ.__module__]
-    return _create_serializer(typ, context)
-
-
-def _create_serializer(typ: TypeLike, context: ModuleType | None) -> Serializer:
-    # check for well-known types
-    if typ is type(None):
-        return NoneSerializer()
-    elif typ is bool:
-        return BoolSerializer()
-    elif typ is int:
-        return IntSerializer()
-    elif typ is float:
-        return FloatSerializer()
-    elif typ is str:
-        return StringSerializer()
-    elif typ is bytes:
-        return BytesSerializer()
-    elif typ is datetime.datetime:
-        return DateTimeSerializer()
-    elif typ is datetime.date:
-        return DateSerializer()
-    elif typ is datetime.time:
-        return TimeSerializer()
-    elif typ is uuid.UUID:
-        return UUIDSerializer()
-    elif typ is ipaddress.IPv4Address:
-        return IPv4Serializer()
-    elif typ is ipaddress.IPv6Address:
-        return IPv6Serializer()
-
-    # dynamically-typed collection types
-    if typ is list:
-        return UntypedListSerializer()
-    elif typ is dict:
-        return UntypedDictSerializer()
-    elif typ is set:
-        return UntypedSetSerializer()
-    elif typ is tuple:
-        return UntypedTupleSerializer()
-
-    # generic types (e.g. list, dict, set, etc.)
-    origin_type = typing.get_origin(typ)
-    if origin_type is list:
-        (list_item_type,) = typing.get_args(typ)  # unpack single tuple element
-        return TypedListSerializer(list_item_type, context)
-    elif origin_type is dict:
-        key_type, value_type = typing.get_args(typ)
-        if key_type is str:
-            return TypedStringDictSerializer(value_type, context)
-        elif issubclass(key_type, enum.Enum):
-            return TypedEnumDictSerializer(key_type, value_type, context)
-    elif origin_type is set:
-        (set_member_type,) = typing.get_args(typ)  # unpack single tuple element
-        return TypedSetSerializer(set_member_type, context)
-    elif origin_type is tuple:
-        return TypedTupleSerializer(typing.get_args(typ), context)
-    elif origin_type is Union:
-        return UnionSerializer()
-    elif origin_type is Literal:
-        return LiteralSerializer(typing.get_args(typ), context)
-
-    if is_type_annotated(typ):
-        return create_serializer(unwrap_annotated_type(typ))
-
-    # check if object has custom serialization method
-    convert_func = getattr(typ, "to_json", None)
-    if callable(convert_func):
-        return CustomSerializer(convert_func)
-
-    if is_type_enum(typ):
-        return EnumSerializer()
-    if is_dataclass_type(typ):
-        return DataclassSerializer(typ, context)
-    if is_named_tuple_type(typ):
-        if getattr(typ, "__annotations__", None):
-            return TypedNamedTupleSerializer(typ, context)
-        else:
-            return UntypedNamedTupleSerializer(typ)
-
-    # fail early if caller passes an object with an exotic type
-    if not isinstance(typ, type) or typ is FunctionType or typ is MethodType or typ is type or typ is ModuleType:
-        raise TypeError(f"object of type {typ} cannot be represented in JSON")
-
-    if get_resolved_hints(typ):
-        return TypedClassSerializer(typ, context)
-    else:
-        return UntypedClassSerializer()
-
-
-def object_to_json(obj: Any) -> JsonType:
-    """
-    Converts a Python object to a representation that can be exported to JSON.
-
-    * Fundamental types (e.g. numeric types) are written as is.
-    * Date and time types are serialized in the ISO 8601 format with time zone.
-    * A byte array is written as a string with Base64 encoding.
-    * UUIDs are written as a UUID string.
-    * Enumerations are written as their value.
-    * Containers (e.g. `list`, `dict`, `set`, `tuple`) are exported recursively.
-    * Objects with properties (including data class types) are converted to a dictionaries of key-value pairs.
-    """
-
-    typ: type = type(obj)
-    generator = create_serializer(typ)
-    return generator.generate(obj)
diff --git a/src/llama_stack_api/strong_typing/slots.py b/src/llama_stack_api/strong_typing/slots.py
deleted file mode 100644
index 772834140..000000000
--- a/src/llama_stack_api/strong_typing/slots.py
+++ /dev/null
@@ -1,27 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from typing import Any, TypeVar
-
-T = TypeVar("T")
-
-
-class SlotsMeta(type):
-    def __new__(cls: type[T], name: str, bases: tuple[type, ...], ns: dict[str, Any]) -> T:
-        # caller may have already provided slots, in which case just retain them and keep going
-        slots: tuple[str, ...] = ns.get("__slots__", ())
-
-        # add fields with type annotations to slots
-        annotations: dict[str, Any] = ns.get("__annotations__", {})
-        members = tuple(member for member in annotations.keys() if member not in slots)
-
-        # assign slots
-        ns["__slots__"] = slots + tuple(members)
-        return super().__new__(cls, name, bases, ns)  # type: ignore
-
-
-class Slots(metaclass=SlotsMeta):
-    pass
diff --git a/src/llama_stack_api/strong_typing/topological.py b/src/llama_stack_api/strong_typing/topological.py
deleted file mode 100644
index 9502a5887..000000000
--- a/src/llama_stack_api/strong_typing/topological.py
+++ /dev/null
@@ -1,90 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-"""
-Type-safe data interchange for Python data classes.
-
-:see: https://github.com/hunyadi/strong_typing
-"""
-
-from collections.abc import Callable, Iterable
-from typing import TypeVar
-
-from .inspection import TypeCollector
-
-T = TypeVar("T")
-
-
-def topological_sort(graph: dict[T, set[T]]) -> list[T]:
-    """
-    Performs a topological sort of a graph.
-
-    Nodes with no outgoing edges are first. Nodes with no incoming edges are last.
-    The topological ordering is not unique.
-
-    :param graph: A dictionary of mappings from nodes to adjacent nodes. Keys and set members must be hashable.
-    :returns: The list of nodes in topological order.
-    """
-
-    # empty list that will contain the sorted nodes (in reverse order)
-    ordered: list[T] = []
-
-    seen: dict[T, bool] = {}
-
-    def _visit(n: T) -> None:
-        status = seen.get(n)
-        if status is not None:
-            if status:  # node has a permanent mark
-                return
-            else:  # node has a temporary mark
-                raise RuntimeError(f"cycle detected in graph for node {n}")
-
-        seen[n] = False  # apply temporary mark
-        for m in graph[n]:  # visit all adjacent nodes
-            if m != n:  # ignore self-referencing nodes
-                _visit(m)
-
-        seen[n] = True  # apply permanent mark
-        ordered.append(n)
-
-    for n in graph.keys():
-        _visit(n)
-
-    return ordered
-
-
-def type_topological_sort(
-    types: Iterable[type],
-    dependency_fn: Callable[[type], Iterable[type]] | None = None,
-) -> list[type]:
-    """
-    Performs a topological sort of a list of types.
-
-    Types that don't depend on other types (i.e. fundamental types) are first. Types on which no other types depend
-    are last. The topological ordering is not unique.
-
-    :param types: A list of types (simple or composite).
-    :param dependency_fn: Returns a list of additional dependencies for a class (e.g. classes referenced by a foreign key).
-    :returns: The list of types in topological order.
-    """
-
-    if not all(isinstance(typ, type) for typ in types):
-        raise TypeError("expected a list of types")
-
-    collector = TypeCollector()
-    collector.traverse_all(types)
-    graph = collector.graph
-
-    if dependency_fn:
-        new_types: set[type] = set()
-        for source_type, references in graph.items():
-            dependent_types = dependency_fn(source_type)
-            references.update(dependent_types)
-            new_types.update(dependent_types)
-        for new_type in new_types:
-            graph[new_type] = set()
-
-    return topological_sort(graph)
diff --git a/src/llama_stack_api/tools.py b/src/llama_stack_api/tools.py
index 81c989f88..4dd5d55d2 100644
--- a/src/llama_stack_api/tools.py
+++ b/src/llama_stack_api/tools.py
@@ -88,6 +88,7 @@ class ToolStore(Protocol):
     async def get_tool_group(self, toolgroup_id: str) -> ToolGroup: ...
 
 
+@json_schema_type
 class ListToolGroupsResponse(BaseModel):
     """Response containing a list of tool groups.
 
@@ -97,6 +98,7 @@ class ListToolGroupsResponse(BaseModel):
     data: list[ToolGroup]
 
 
+@json_schema_type
 class ListToolDefsResponse(BaseModel):
     """Response containing a list of tool definitions.
 
diff --git a/src/llama_stack_api/vector_io.py b/src/llama_stack_api/vector_io.py
index 053e569f4..bfad644cc 100644
--- a/src/llama_stack_api/vector_io.py
+++ b/src/llama_stack_api/vector_io.py
@@ -15,8 +15,7 @@ from pydantic import BaseModel, Field
 
 from llama_stack_api.common.tracing import telemetry_traceable
 from llama_stack_api.inference import InterleavedContent
-from llama_stack_api.schema_utils import json_schema_type, webmethod
-from llama_stack_api.strong_typing.schema import register_schema
+from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
 from llama_stack_api.vector_stores import VectorStore
 from llama_stack_api.version import LLAMA_STACK_API_V1
 
@@ -738,8 +737,8 @@ class VectorIO(Protocol):
         self,
         vector_store_id: str,
         file_id: str,
-        include_embeddings: Annotated[bool | None, Query(default=False)] = False,
-        include_metadata: Annotated[bool | None, Query(default=False)] = False,
+        include_embeddings: Annotated[bool | None, Query()] = False,
+        include_metadata: Annotated[bool | None, Query()] = False,
     ) -> VectorStoreFileContentResponse:
         """Retrieves the contents of a vector store file.
 
diff --git a/tests/unit/server/test_schema_registry.py b/tests/unit/server/test_schema_registry.py
new file mode 100644
index 000000000..548b43a29
--- /dev/null
+++ b/tests/unit/server/test_schema_registry.py
@@ -0,0 +1,48 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from pydantic import BaseModel
+
+from llama_stack_api import Conversation, SamplingStrategy
+from llama_stack_api.schema_utils import (
+    clear_dynamic_schema_types,
+    get_registered_schema_info,
+    iter_dynamic_schema_types,
+    iter_json_schema_types,
+    iter_registered_schema_types,
+    register_dynamic_schema_type,
+)
+
+
+def test_json_schema_registry_contains_known_model() -> None:
+    assert Conversation in iter_json_schema_types()
+
+
+def test_registered_schema_registry_contains_sampling_strategy() -> None:
+    registered_names = {info.name for info in iter_registered_schema_types()}
+    assert "SamplingStrategy" in registered_names
+
+    schema_info = get_registered_schema_info(SamplingStrategy)
+    assert schema_info is not None
+    assert schema_info.name == "SamplingStrategy"
+
+
+def test_dynamic_schema_registration_round_trip() -> None:
+    existing_models = tuple(iter_dynamic_schema_types())
+    clear_dynamic_schema_types()
+    try:
+
+        class TemporaryModel(BaseModel):
+            foo: str
+
+        register_dynamic_schema_type(TemporaryModel)
+        assert TemporaryModel in iter_dynamic_schema_types()
+
+        clear_dynamic_schema_types()
+        assert TemporaryModel not in iter_dynamic_schema_types()
+    finally:
+        for model in existing_models:
+            register_dynamic_schema_type(model)
diff --git a/uv.lock b/uv.lock
index 8f45f0564..a343eb5d8 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1834,6 +1834,21 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/fe/54/c86cd8e011fe98803d7e382fd67c0df5ceab8d2b7ad8c5a81524f791551c/jsonschema-4.25.0-py3-none-any.whl", hash = "sha256:24c2e8da302de79c8b9382fee3e76b355e44d2a4364bb207159ce10b517bd716", size = 89184, upload-time = "2025-07-18T15:39:42.956Z" },
 ]
 
+[[package]]
+name = "jsonschema-path"
+version = "0.3.4"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pathable" },
+    { name = "pyyaml" },
+    { name = "referencing" },
+    { name = "requests" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/6e/45/41ebc679c2a4fced6a722f624c18d658dee42612b83ea24c1caf7c0eb3a8/jsonschema_path-0.3.4.tar.gz", hash = "sha256:8365356039f16cc65fddffafda5f58766e34bebab7d6d105616ab52bc4297001", size = 11159, upload-time = "2025-01-24T14:33:16.547Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/cb/58/3485da8cb93d2f393bce453adeef16896751f14ba3e2024bc21dc9597646/jsonschema_path-0.3.4-py3-none-any.whl", hash = "sha256:f502191fdc2b22050f9a81c9237be9d27145b9001c55842bece5e94e382e52f8", size = 14810, upload-time = "2025-01-24T14:33:14.652Z" },
+]
+
 [[package]]
 name = "jsonschema-specifications"
 version = "2025.4.1"
@@ -1913,6 +1928,38 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/89/43/d9bebfc3db7dea6ec80df5cb2aad8d274dd18ec2edd6c4f21f32c237cbbb/kubernetes-33.1.0-py2.py3-none-any.whl", hash = "sha256:544de42b24b64287f7e0aa9513c93cb503f7f40eea39b20f66810011a86eabc5", size = 1941335, upload-time = "2025-06-09T21:57:56.327Z" },
 ]
 
+[[package]]
+name = "lazy-object-proxy"
+version = "1.12.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/08/a2/69df9c6ba6d316cfd81fe2381e464db3e6de5db45f8c43c6a23504abf8cb/lazy_object_proxy-1.12.0.tar.gz", hash = "sha256:1f5a462d92fd0cfb82f1fab28b51bfb209fabbe6aabf7f0d51472c0c124c0c61", size = 43681, upload-time = "2025-08-22T13:50:06.783Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0d/1b/b5f5bd6bda26f1e15cd3232b223892e4498e34ec70a7f4f11c401ac969f1/lazy_object_proxy-1.12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8ee0d6027b760a11cc18281e702c0309dd92da458a74b4c15025d7fc490deede", size = 26746, upload-time = "2025-08-22T13:42:37.572Z" },
+    { url = "https://files.pythonhosted.org/packages/55/64/314889b618075c2bfc19293ffa9153ce880ac6153aacfd0a52fcabf21a66/lazy_object_proxy-1.12.0-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:4ab2c584e3cc8be0dfca422e05ad30a9abe3555ce63e9ab7a559f62f8dbc6ff9", size = 71457, upload-time = "2025-08-22T13:42:38.743Z" },
+    { url = "https://files.pythonhosted.org/packages/11/53/857fc2827fc1e13fbdfc0ba2629a7d2579645a06192d5461809540b78913/lazy_object_proxy-1.12.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:14e348185adbd03ec17d051e169ec45686dcd840a3779c9d4c10aabe2ca6e1c0", size = 71036, upload-time = "2025-08-22T13:42:40.184Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/24/e581ffed864cd33c1b445b5763d617448ebb880f48675fc9de0471a95cbc/lazy_object_proxy-1.12.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c4fcbe74fb85df8ba7825fa05eddca764138da752904b378f0ae5ab33a36c308", size = 69329, upload-time = "2025-08-22T13:42:41.311Z" },
+    { url = "https://files.pythonhosted.org/packages/78/be/15f8f5a0b0b2e668e756a152257d26370132c97f2f1943329b08f057eff0/lazy_object_proxy-1.12.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:563d2ec8e4d4b68ee7848c5ab4d6057a6d703cb7963b342968bb8758dda33a23", size = 70690, upload-time = "2025-08-22T13:42:42.51Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/aa/f02be9bbfb270e13ee608c2b28b8771f20a5f64356c6d9317b20043c6129/lazy_object_proxy-1.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:53c7fd99eb156bbb82cbc5d5188891d8fdd805ba6c1e3b92b90092da2a837073", size = 26563, upload-time = "2025-08-22T13:42:43.685Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/26/b74c791008841f8ad896c7f293415136c66cc27e7c7577de4ee68040c110/lazy_object_proxy-1.12.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:86fd61cb2ba249b9f436d789d1356deae69ad3231dc3c0f17293ac535162672e", size = 26745, upload-time = "2025-08-22T13:42:44.982Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/52/641870d309e5d1fb1ea7d462a818ca727e43bfa431d8c34b173eb090348c/lazy_object_proxy-1.12.0-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:81d1852fb30fab81696f93db1b1e55a5d1ff7940838191062f5f56987d5fcc3e", size = 71537, upload-time = "2025-08-22T13:42:46.141Z" },
+    { url = "https://files.pythonhosted.org/packages/47/b6/919118e99d51c5e76e8bf5a27df406884921c0acf2c7b8a3b38d847ab3e9/lazy_object_proxy-1.12.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:be9045646d83f6c2664c1330904b245ae2371b5c57a3195e4028aedc9f999655", size = 71141, upload-time = "2025-08-22T13:42:47.375Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/47/1d20e626567b41de085cf4d4fb3661a56c159feaa73c825917b3b4d4f806/lazy_object_proxy-1.12.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:67f07ab742f1adfb3966c40f630baaa7902be4222a17941f3d85fd1dae5565ff", size = 69449, upload-time = "2025-08-22T13:42:48.49Z" },
+    { url = "https://files.pythonhosted.org/packages/58/8d/25c20ff1a1a8426d9af2d0b6f29f6388005fc8cd10d6ee71f48bff86fdd0/lazy_object_proxy-1.12.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:75ba769017b944fcacbf6a80c18b2761a1795b03f8899acdad1f1c39db4409be", size = 70744, upload-time = "2025-08-22T13:42:49.608Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/67/8ec9abe15c4f8a4bcc6e65160a2c667240d025cbb6591b879bea55625263/lazy_object_proxy-1.12.0-cp313-cp313-win_amd64.whl", hash = "sha256:7b22c2bbfb155706b928ac4d74c1a63ac8552a55ba7fff4445155523ea4067e1", size = 26568, upload-time = "2025-08-22T13:42:57.719Z" },
+    { url = "https://files.pythonhosted.org/packages/23/12/cd2235463f3469fd6c62d41d92b7f120e8134f76e52421413a0ad16d493e/lazy_object_proxy-1.12.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:4a79b909aa16bde8ae606f06e6bbc9d3219d2e57fb3e0076e17879072b742c65", size = 27391, upload-time = "2025-08-22T13:42:50.62Z" },
+    { url = "https://files.pythonhosted.org/packages/60/9e/f1c53e39bbebad2e8609c67d0830cc275f694d0ea23d78e8f6db526c12d3/lazy_object_proxy-1.12.0-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:338ab2f132276203e404951205fe80c3fd59429b3a724e7b662b2eb539bb1be9", size = 80552, upload-time = "2025-08-22T13:42:51.731Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/b6/6c513693448dcb317d9d8c91d91f47addc09553613379e504435b4cc8b3e/lazy_object_proxy-1.12.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8c40b3c9faee2e32bfce0df4ae63f4e73529766893258eca78548bac801c8f66", size = 82857, upload-time = "2025-08-22T13:42:53.225Z" },
+    { url = "https://files.pythonhosted.org/packages/12/1c/d9c4aaa4c75da11eb7c22c43d7c90a53b4fca0e27784a5ab207768debea7/lazy_object_proxy-1.12.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:717484c309df78cedf48396e420fa57fc8a2b1f06ea889df7248fdd156e58847", size = 80833, upload-time = "2025-08-22T13:42:54.391Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/ae/29117275aac7d7d78ae4f5a4787f36ff33262499d486ac0bf3e0b97889f6/lazy_object_proxy-1.12.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:a6b7ea5ea1ffe15059eb44bcbcb258f97bcb40e139b88152c40d07b1a1dfc9ac", size = 79516, upload-time = "2025-08-22T13:42:55.812Z" },
+    { url = "https://files.pythonhosted.org/packages/19/40/b4e48b2c38c69392ae702ae7afa7b6551e0ca5d38263198b7c79de8b3bdf/lazy_object_proxy-1.12.0-cp313-cp313t-win_amd64.whl", hash = "sha256:08c465fb5cd23527512f9bd7b4c7ba6cec33e28aad36fbbe46bf7b858f9f3f7f", size = 27656, upload-time = "2025-08-22T13:42:56.793Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/3a/277857b51ae419a1574557c0b12e0d06bf327b758ba94cafc664cb1e2f66/lazy_object_proxy-1.12.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c9defba70ab943f1df98a656247966d7729da2fe9c2d5d85346464bf320820a3", size = 26582, upload-time = "2025-08-22T13:49:49.366Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/b6/c5e0fa43535bb9c87880e0ba037cdb1c50e01850b0831e80eb4f4762f270/lazy_object_proxy-1.12.0-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:6763941dbf97eea6b90f5b06eb4da9418cc088fce0e3883f5816090f9afcde4a", size = 71059, upload-time = "2025-08-22T13:49:50.488Z" },
+    { url = "https://files.pythonhosted.org/packages/06/8a/7dcad19c685963c652624702f1a968ff10220b16bfcc442257038216bf55/lazy_object_proxy-1.12.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fdc70d81235fc586b9e3d1aeef7d1553259b62ecaae9db2167a5d2550dcc391a", size = 71034, upload-time = "2025-08-22T13:49:54.224Z" },
+    { url = "https://files.pythonhosted.org/packages/12/ac/34cbfb433a10e28c7fd830f91c5a348462ba748413cbb950c7f259e67aa7/lazy_object_proxy-1.12.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:0a83c6f7a6b2bfc11ef3ed67f8cbe99f8ff500b05655d8e7df9aab993a6abc95", size = 69529, upload-time = "2025-08-22T13:49:55.29Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/6a/11ad7e349307c3ca4c0175db7a77d60ce42a41c60bcb11800aabd6a8acb8/lazy_object_proxy-1.12.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:256262384ebd2a77b023ad02fbcc9326282bcfd16484d5531154b02bc304f4c5", size = 70391, upload-time = "2025-08-22T13:49:56.35Z" },
+    { url = "https://files.pythonhosted.org/packages/59/97/9b410ed8fbc6e79c1ee8b13f8777a80137d4bc189caf2c6202358e66192c/lazy_object_proxy-1.12.0-cp314-cp314-win_amd64.whl", hash = "sha256:7601ec171c7e8584f8ff3f4e440aa2eebf93e854f04639263875b8c2971f819f", size = 26988, upload-time = "2025-08-22T13:49:57.302Z" },
+]
+
 [[package]]
 name = "linkify"
 version = "1.4"
@@ -1992,6 +2039,7 @@ dev = [
     { name = "black" },
     { name = "mypy" },
     { name = "nbval" },
+    { name = "openapi-spec-validator" },
     { name = "pre-commit" },
     { name = "pytest" },
     { name = "pytest-asyncio" },
@@ -2117,6 +2165,7 @@ requires-dist = [
     { name = "python-dotenv" },
     { name = "python-multipart", specifier = ">=0.0.20" },
     { name = "pyyaml", specifier = ">=6.0" },
+    { name = "pyyaml", specifier = ">=6.0.2" },
     { name = "rich" },
     { name = "sqlalchemy", extras = ["asyncio"], specifier = ">=2.0.41" },
     { name = "starlette" },
@@ -2138,6 +2187,7 @@ dev = [
     { name = "black" },
     { name = "mypy" },
     { name = "nbval" },
+    { name = "openapi-spec-validator", specifier = ">=0.7.2" },
     { name = "pre-commit", specifier = ">=4.4.0" },
     { name = "pytest", specifier = ">=8.4" },
     { name = "pytest-asyncio", specifier = ">=1.0" },
@@ -3011,6 +3061,35 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/14/f3/ebbd700d8dc1e6380a7a382969d96bc0cbea8717b52fb38ff0ca2a7653e8/openai-2.5.0-py3-none-any.whl", hash = "sha256:21380e5f52a71666dbadbf322dd518bdf2b9d11ed0bb3f96bea17310302d6280", size = 999851, upload-time = "2025-10-17T18:14:45.528Z" },
 ]
 
+[[package]]
+name = "openapi-schema-validator"
+version = "0.6.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "jsonschema" },
+    { name = "jsonschema-specifications" },
+    { name = "rfc3339-validator" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/8b/f3/5507ad3325169347cd8ced61c232ff3df70e2b250c49f0fe140edb4973c6/openapi_schema_validator-0.6.3.tar.gz", hash = "sha256:f37bace4fc2a5d96692f4f8b31dc0f8d7400fd04f3a937798eaf880d425de6ee", size = 11550, upload-time = "2025-01-10T18:08:22.268Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/21/c6/ad0fba32775ae749016829dace42ed80f4407b171da41313d1a3a5f102e4/openapi_schema_validator-0.6.3-py3-none-any.whl", hash = "sha256:f3b9870f4e556b5a62a1c39da72a6b4b16f3ad9c73dc80084b1b11e74ba148a3", size = 8755, upload-time = "2025-01-10T18:08:19.758Z" },
+]
+
+[[package]]
+name = "openapi-spec-validator"
+version = "0.7.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "jsonschema" },
+    { name = "jsonschema-path" },
+    { name = "lazy-object-proxy" },
+    { name = "openapi-schema-validator" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/82/af/fe2d7618d6eae6fb3a82766a44ed87cd8d6d82b4564ed1c7cfb0f6378e91/openapi_spec_validator-0.7.2.tar.gz", hash = "sha256:cc029309b5c5dbc7859df0372d55e9d1ff43e96d678b9ba087f7c56fc586f734", size = 36855, upload-time = "2025-06-07T14:48:56.299Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/27/dd/b3fd642260cb17532f66cc1e8250f3507d1e580483e209dc1e9d13bd980d/openapi_spec_validator-0.7.2-py3-none-any.whl", hash = "sha256:4bbdc0894ec85f1d1bea1d6d9c8b2c3c8d7ccaa13577ef40da9c006c9fd0eb60", size = 39713, upload-time = "2025-06-07T14:48:54.077Z" },
+]
+
 [[package]]
 name = "opentelemetry-api"
 version = "1.36.0"
@@ -3247,6 +3326,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/c6/ac/dac4a63f978e4dcb3c6d3a78c4d8e0192a113d288502a1216950c41b1027/parso-0.8.4-py2.py3-none-any.whl", hash = "sha256:a418670a20291dacd2dddc80c377c5c3791378ee1e8d12bffc35420643d43f18", size = 103650, upload-time = "2024-04-05T09:43:53.299Z" },
 ]
 
+[[package]]
+name = "pathable"
+version = "0.4.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/67/93/8f2c2075b180c12c1e9f6a09d1a985bc2036906b13dff1d8917e395f2048/pathable-0.4.4.tar.gz", hash = "sha256:6905a3cd17804edfac7875b5f6c9142a218c7caef78693c2dbbbfbac186d88b2", size = 8124, upload-time = "2025-01-10T18:43:13.247Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7d/eb/b6260b31b1a96386c0a880edebe26f89669098acea8e0318bff6adb378fd/pathable-0.4.4-py3-none-any.whl", hash = "sha256:5ae9e94793b6ef5a4cbe0a7ce9dbbefc1eec38df253763fd0aeeacf2762dbbc2", size = 9592, upload-time = "2025-01-10T18:43:11.88Z" },
+]
+
 [[package]]
 name = "pathspec"
 version = "0.12.1"
@@ -4404,6 +4492,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/1c/4c/cc276ce57e572c102d9542d383b2cfd551276581dc60004cb94fe8774c11/responses-0.25.8-py3-none-any.whl", hash = "sha256:0c710af92def29c8352ceadff0c3fe340ace27cf5af1bbe46fb71275bcd2831c", size = 34769, upload-time = "2025-08-08T19:01:45.018Z" },
 ]
 
+[[package]]
+name = "rfc3339-validator"
+version = "0.1.4"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "six" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/28/ea/a9387748e2d111c3c2b275ba970b735e04e15cdb1eb30693b6b5708c4dbd/rfc3339_validator-0.1.4.tar.gz", hash = "sha256:138a2abdf93304ad60530167e51d2dfb9549521a836871b88d7f4695d0022f6b", size = 5513, upload-time = "2021-05-12T16:37:54.178Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7b/44/4e421b96b67b2daff264473f7465db72fbdf36a07e05494f50300cc7b0c6/rfc3339_validator-0.1.4-py2.py3-none-any.whl", hash = "sha256:24f6ec1eda14ef823da9e36ec7113124b39c04d50a4d3d3a3c2859577e7791fa", size = 3490, upload-time = "2021-05-12T16:37:52.536Z" },
+]
+
 [[package]]
 name = "rich"
 version = "14.1.0"
@@ -4516,40 +4616,46 @@ wheels = [
 
 [[package]]
 name = "ruamel-yaml"
-version = "0.18.14"
+version = "0.18.16"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "ruamel-yaml-clib", marker = "python_full_version < '3.14' and platform_python_implementation == 'CPython'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/39/87/6da0df742a4684263261c253f00edd5829e6aca970fff69e75028cccc547/ruamel.yaml-0.18.14.tar.gz", hash = "sha256:7227b76aaec364df15936730efbf7d72b30c0b79b1d578bbb8e3dcb2d81f52b7", size = 145511, upload-time = "2025-06-09T08:51:09.828Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/9f/c7/ee630b29e04a672ecfc9b63227c87fd7a37eb67c1bf30fe95376437f897c/ruamel.yaml-0.18.16.tar.gz", hash = "sha256:a6e587512f3c998b2225d68aa1f35111c29fad14aed561a26e73fab729ec5e5a", size = 147269, upload-time = "2025-10-22T17:54:02.346Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/af/6d/6fe4805235e193aad4aaf979160dd1f3c487c57d48b810c816e6e842171b/ruamel.yaml-0.18.14-py3-none-any.whl", hash = "sha256:710ff198bb53da66718c7db27eec4fbcc9aa6ca7204e4c1df2f282b6fe5eb6b2", size = 118570, upload-time = "2025-06-09T08:51:06.348Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/73/bb1bc2529f852e7bf64a2dec885e89ff9f5cc7bbf6c9340eed30ff2c69c5/ruamel.yaml-0.18.16-py3-none-any.whl", hash = "sha256:048f26d64245bae57a4f9ef6feb5b552a386830ef7a826f235ffb804c59efbba", size = 119858, upload-time = "2025-10-22T17:53:59.012Z" },
 ]
 
 [[package]]
 name = "ruamel-yaml-clib"
-version = "0.2.12"
+version = "0.2.14"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/20/84/80203abff8ea4993a87d823a5f632e4d92831ef75d404c9fc78d0176d2b5/ruamel.yaml.clib-0.2.12.tar.gz", hash = "sha256:6c8fbb13ec503f99a91901ab46e0b07ae7941cd527393187039aec586fdfd36f", size = 225315, upload-time = "2024-10-20T10:10:56.22Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/d8/e9/39ec4d4b3f91188fad1842748f67d4e749c77c37e353c4e545052ee8e893/ruamel.yaml.clib-0.2.14.tar.gz", hash = "sha256:803f5044b13602d58ea378576dd75aa759f52116a0232608e8fdada4da33752e", size = 225394, upload-time = "2025-09-22T19:51:23.753Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/48/41/e7a405afbdc26af961678474a55373e1b323605a4f5e2ddd4a80ea80f628/ruamel.yaml.clib-0.2.12-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:20b0f8dc160ba83b6dcc0e256846e1a02d044e13f7ea74a3d1d56ede4e48c632", size = 133433, upload-time = "2024-10-20T10:12:55.657Z" },
-    { url = "https://files.pythonhosted.org/packages/ec/b0/b850385604334c2ce90e3ee1013bd911aedf058a934905863a6ea95e9eb4/ruamel.yaml.clib-0.2.12-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:943f32bc9dedb3abff9879edc134901df92cfce2c3d5c9348f172f62eb2d771d", size = 647362, upload-time = "2024-10-20T10:12:57.155Z" },
-    { url = "https://files.pythonhosted.org/packages/44/d0/3f68a86e006448fb6c005aee66565b9eb89014a70c491d70c08de597f8e4/ruamel.yaml.clib-0.2.12-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95c3829bb364fdb8e0332c9931ecf57d9be3519241323c5274bd82f709cebc0c", size = 754118, upload-time = "2024-10-20T10:12:58.501Z" },
-    { url = "https://files.pythonhosted.org/packages/52/a9/d39f3c5ada0a3bb2870d7db41901125dbe2434fa4f12ca8c5b83a42d7c53/ruamel.yaml.clib-0.2.12-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:749c16fcc4a2b09f28843cda5a193e0283e47454b63ec4b81eaa2242f50e4ccd", size = 706497, upload-time = "2024-10-20T10:13:00.211Z" },
-    { url = "https://files.pythonhosted.org/packages/b0/fa/097e38135dadd9ac25aecf2a54be17ddf6e4c23e43d538492a90ab3d71c6/ruamel.yaml.clib-0.2.12-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:bf165fef1f223beae7333275156ab2022cffe255dcc51c27f066b4370da81e31", size = 698042, upload-time = "2024-10-21T11:26:46.038Z" },
-    { url = "https://files.pythonhosted.org/packages/ec/d5/a659ca6f503b9379b930f13bc6b130c9f176469b73b9834296822a83a132/ruamel.yaml.clib-0.2.12-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:32621c177bbf782ca5a18ba4d7af0f1082a3f6e517ac2a18b3974d4edf349680", size = 745831, upload-time = "2024-10-21T11:26:47.487Z" },
-    { url = "https://files.pythonhosted.org/packages/db/5d/36619b61ffa2429eeaefaab4f3374666adf36ad8ac6330d855848d7d36fd/ruamel.yaml.clib-0.2.12-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b82a7c94a498853aa0b272fd5bc67f29008da798d4f93a2f9f289feb8426a58d", size = 715692, upload-time = "2024-12-11T19:58:17.252Z" },
-    { url = "https://files.pythonhosted.org/packages/b1/82/85cb92f15a4231c89b95dfe08b09eb6adca929ef7df7e17ab59902b6f589/ruamel.yaml.clib-0.2.12-cp312-cp312-win32.whl", hash = "sha256:e8c4ebfcfd57177b572e2040777b8abc537cdef58a2120e830124946aa9b42c5", size = 98777, upload-time = "2024-10-20T10:13:01.395Z" },
-    { url = "https://files.pythonhosted.org/packages/d7/8f/c3654f6f1ddb75daf3922c3d8fc6005b1ab56671ad56ffb874d908bfa668/ruamel.yaml.clib-0.2.12-cp312-cp312-win_amd64.whl", hash = "sha256:0467c5965282c62203273b838ae77c0d29d7638c8a4e3a1c8bdd3602c10904e4", size = 115523, upload-time = "2024-10-20T10:13:02.768Z" },
-    { url = "https://files.pythonhosted.org/packages/29/00/4864119668d71a5fa45678f380b5923ff410701565821925c69780356ffa/ruamel.yaml.clib-0.2.12-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:4c8c5d82f50bb53986a5e02d1b3092b03622c02c2eb78e29bec33fd9593bae1a", size = 132011, upload-time = "2024-10-20T10:13:04.377Z" },
-    { url = "https://files.pythonhosted.org/packages/7f/5e/212f473a93ae78c669ffa0cb051e3fee1139cb2d385d2ae1653d64281507/ruamel.yaml.clib-0.2.12-cp313-cp313-manylinux2014_aarch64.whl", hash = "sha256:e7e3736715fbf53e9be2a79eb4db68e4ed857017344d697e8b9749444ae57475", size = 642488, upload-time = "2024-10-20T10:13:05.906Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/8f/ecfbe2123ade605c49ef769788f79c38ddb1c8fa81e01f4dbf5cf1a44b16/ruamel.yaml.clib-0.2.12-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b7e75b4965e1d4690e93021adfcecccbca7d61c7bddd8e22406ef2ff20d74ef", size = 745066, upload-time = "2024-10-20T10:13:07.26Z" },
-    { url = "https://files.pythonhosted.org/packages/e2/a9/28f60726d29dfc01b8decdb385de4ced2ced9faeb37a847bd5cf26836815/ruamel.yaml.clib-0.2.12-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:96777d473c05ee3e5e3c3e999f5d23c6f4ec5b0c38c098b3a5229085f74236c6", size = 701785, upload-time = "2024-10-20T10:13:08.504Z" },
-    { url = "https://files.pythonhosted.org/packages/84/7e/8e7ec45920daa7f76046578e4f677a3215fe8f18ee30a9cb7627a19d9b4c/ruamel.yaml.clib-0.2.12-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:3bc2a80e6420ca8b7d3590791e2dfc709c88ab9152c00eeb511c9875ce5778bf", size = 693017, upload-time = "2024-10-21T11:26:48.866Z" },
-    { url = "https://files.pythonhosted.org/packages/c5/b3/d650eaade4ca225f02a648321e1ab835b9d361c60d51150bac49063b83fa/ruamel.yaml.clib-0.2.12-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:e188d2699864c11c36cdfdada94d781fd5d6b0071cd9c427bceb08ad3d7c70e1", size = 741270, upload-time = "2024-10-21T11:26:50.213Z" },
-    { url = "https://files.pythonhosted.org/packages/87/b8/01c29b924dcbbed75cc45b30c30d565d763b9c4d540545a0eeecffb8f09c/ruamel.yaml.clib-0.2.12-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4f6f3eac23941b32afccc23081e1f50612bdbe4e982012ef4f5797986828cd01", size = 709059, upload-time = "2024-12-11T19:58:18.846Z" },
-    { url = "https://files.pythonhosted.org/packages/30/8c/ed73f047a73638257aa9377ad356bea4d96125b305c34a28766f4445cc0f/ruamel.yaml.clib-0.2.12-cp313-cp313-win32.whl", hash = "sha256:6442cb36270b3afb1b4951f060eccca1ce49f3d087ca1ca4563a6eb479cb3de6", size = 98583, upload-time = "2024-10-20T10:13:09.658Z" },
-    { url = "https://files.pythonhosted.org/packages/b0/85/e8e751d8791564dd333d5d9a4eab0a7a115f7e349595417fd50ecae3395c/ruamel.yaml.clib-0.2.12-cp313-cp313-win_amd64.whl", hash = "sha256:e5b8daf27af0b90da7bb903a876477a9e6d7270be6146906b276605997c7e9a3", size = 115190, upload-time = "2024-10-20T10:13:10.66Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/42/ccfb34a25289afbbc42017e4d3d4288e61d35b2e00cfc6b92974a6a1f94b/ruamel.yaml.clib-0.2.14-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:6aeadc170090ff1889f0d2c3057557f9cd71f975f17535c26a5d37af98f19c27", size = 271775, upload-time = "2025-09-23T14:24:12.771Z" },
+    { url = "https://files.pythonhosted.org/packages/82/73/e628a92e80197ff6a79ab81ec3fa00d4cc082d58ab78d3337b7ba7043301/ruamel.yaml.clib-0.2.14-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:5e56ac47260c0eed992789fa0b8efe43404a9adb608608631a948cee4fc2b052", size = 138842, upload-time = "2025-09-22T19:50:49.156Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/c5/346c7094344a60419764b4b1334d9e0285031c961176ff88ffb652405b0c/ruamel.yaml.clib-0.2.14-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:a911aa73588d9a8b08d662b9484bc0567949529824a55d3885b77e8dd62a127a", size = 647404, upload-time = "2025-09-22T19:50:52.921Z" },
+    { url = "https://files.pythonhosted.org/packages/df/99/65080c863eb06d4498de3d6c86f3e90595e02e159fd8529f1565f56cfe2c/ruamel.yaml.clib-0.2.14-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a05ba88adf3d7189a974b2de7a9d56731548d35dc0a822ec3dc669caa7019b29", size = 753141, upload-time = "2025-09-22T19:50:50.294Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/e3/0de85f3e3333f8e29e4b10244374a202a87665d1131798946ee22cf05c7c/ruamel.yaml.clib-0.2.14-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fb04c5650de6668b853623eceadcdb1a9f2fee381f5d7b6bc842ee7c239eeec4", size = 703477, upload-time = "2025-09-22T19:50:51.508Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/25/0d2f09d8833c7fd77ab8efeff213093c16856479a9d293180a0d89f6bed9/ruamel.yaml.clib-0.2.14-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:df3ec9959241d07bc261f4983d25a1205ff37703faf42b474f15d54d88b4f8c9", size = 741157, upload-time = "2025-09-23T18:42:50.408Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/8c/959f10c2e2153cbdab834c46e6954b6dd9e3b109c8f8c0a3cf1618310985/ruamel.yaml.clib-0.2.14-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:fbc08c02e9b147a11dfcaa1ac8a83168b699863493e183f7c0c8b12850b7d259", size = 745859, upload-time = "2025-09-22T19:50:54.497Z" },
+    { url = "https://files.pythonhosted.org/packages/ed/6b/e580a7c18b485e1a5f30a32cda96b20364b0ba649d9d2baaf72f8bd21f83/ruamel.yaml.clib-0.2.14-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c099cafc1834d3c5dac305865d04235f7c21c167c8dd31ebc3d6bbc357e2f023", size = 770200, upload-time = "2025-09-22T19:50:55.718Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/44/3455eebc761dc8e8fdced90f2b0a3fa61e32ba38b50de4130e2d57db0f21/ruamel.yaml.clib-0.2.14-cp312-cp312-win32.whl", hash = "sha256:b5b0f7e294700b615a3bcf6d28b26e6da94e8eba63b079f4ec92e9ba6c0d6b54", size = 98829, upload-time = "2025-09-22T19:50:58.895Z" },
+    { url = "https://files.pythonhosted.org/packages/76/ab/5121f7f3b651db93de546f8c982c241397aad0a4765d793aca1dac5eadee/ruamel.yaml.clib-0.2.14-cp312-cp312-win_amd64.whl", hash = "sha256:a37f40a859b503304dd740686359fcf541d6fb3ff7fc10f539af7f7150917c68", size = 115570, upload-time = "2025-09-22T19:50:57.981Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/ae/e3811f05415594025e96000349d3400978adaed88d8f98d494352d9761ee/ruamel.yaml.clib-0.2.14-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:7e4f9da7e7549946e02a6122dcad00b7c1168513acb1f8a726b1aaf504a99d32", size = 269205, upload-time = "2025-09-23T14:24:15.06Z" },
+    { url = "https://files.pythonhosted.org/packages/72/06/7d51f4688d6d72bb72fa74254e1593c4f5ebd0036be5b41fe39315b275e9/ruamel.yaml.clib-0.2.14-cp313-cp313-macosx_15_0_arm64.whl", hash = "sha256:dd7546c851e59c06197a7c651335755e74aa383a835878ca86d2c650c07a2f85", size = 137417, upload-time = "2025-09-22T19:50:59.82Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/08/b4499234a420ef42960eeb05585df5cc7eb25ccb8c980490b079e6367050/ruamel.yaml.clib-0.2.14-cp313-cp313-manylinux2014_aarch64.whl", hash = "sha256:1c1acc3a0209ea9042cc3cfc0790edd2eddd431a2ec3f8283d081e4d5018571e", size = 642558, upload-time = "2025-09-22T19:51:03.388Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/ba/1975a27dedf1c4c33306ee67c948121be8710b19387aada29e2f139c43ee/ruamel.yaml.clib-0.2.14-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2070bf0ad1540d5c77a664de07ebcc45eebd1ddcab71a7a06f26936920692beb", size = 744087, upload-time = "2025-09-22T19:51:00.897Z" },
+    { url = "https://files.pythonhosted.org/packages/20/15/8a19a13d27f3bd09fa18813add8380a29115a47b553845f08802959acbce/ruamel.yaml.clib-0.2.14-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9bd8fe07f49c170e09d76773fb86ad9135e0beee44f36e1576a201b0676d3d1d", size = 699709, upload-time = "2025-09-22T19:51:02.075Z" },
+    { url = "https://files.pythonhosted.org/packages/19/ee/8d6146a079ad21e534b5083c9ee4a4c8bec42f79cf87594b60978286b39a/ruamel.yaml.clib-0.2.14-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ff86876889ea478b1381089e55cf9e345707b312beda4986f823e1d95e8c0f59", size = 708926, upload-time = "2025-09-23T18:42:51.707Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/f5/426b714abdc222392e68f3b8ad323930d05a214a27c7e7a0f06c69126401/ruamel.yaml.clib-0.2.14-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:1f118b707eece8cf84ecbc3e3ec94d9db879d85ed608f95870d39b2d2efa5dca", size = 740202, upload-time = "2025-09-22T19:51:04.673Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/ac/3c5c2b27a183f4fda8a57c82211721c016bcb689a4a175865f7646db9f94/ruamel.yaml.clib-0.2.14-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b30110b29484adc597df6bd92a37b90e63a8c152ca8136aad100a02f8ba6d1b6", size = 765196, upload-time = "2025-09-22T19:51:05.916Z" },
+    { url = "https://files.pythonhosted.org/packages/92/2e/06f56a71fd55021c993ed6e848c9b2e5e9cfce180a42179f0ddd28253f7c/ruamel.yaml.clib-0.2.14-cp313-cp313-win32.whl", hash = "sha256:f4e97a1cf0b7a30af9e1d9dad10a5671157b9acee790d9e26996391f49b965a2", size = 98635, upload-time = "2025-09-22T19:51:08.183Z" },
+    { url = "https://files.pythonhosted.org/packages/51/79/76aba16a1689b50528224b182f71097ece338e7a4ab55e84c2e73443b78a/ruamel.yaml.clib-0.2.14-cp313-cp313-win_amd64.whl", hash = "sha256:090782b5fb9d98df96509eecdbcaffd037d47389a89492320280d52f91330d78", size = 115238, upload-time = "2025-09-22T19:51:07.081Z" },
+    { url = "https://files.pythonhosted.org/packages/21/e2/a59ff65c26aaf21a24eb38df777cb9af5d87ba8fc8107c163c2da9d1e85e/ruamel.yaml.clib-0.2.14-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:7df6f6e9d0e33c7b1d435defb185095386c469109de723d514142632a7b9d07f", size = 271441, upload-time = "2025-09-23T14:24:16.498Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/fa/3234f913fe9a6525a7b97c6dad1f51e72b917e6872e051a5e2ffd8b16fbb/ruamel.yaml.clib-0.2.14-cp314-cp314-macosx_15_0_arm64.whl", hash = "sha256:70eda7703b8126f5e52fcf276e6c0f40b0d314674f896fc58c47b0aef2b9ae83", size = 137970, upload-time = "2025-09-22T19:51:09.472Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/ec/4edbf17ac2c87fa0845dd366ef8d5852b96eb58fcd65fc1ecf5fe27b4641/ruamel.yaml.clib-0.2.14-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:a0cb71ccc6ef9ce36eecb6272c81afdc2f565950cdcec33ae8e6cd8f7fc86f27", size = 739639, upload-time = "2025-09-22T19:51:10.566Z" },
+    { url = "https://files.pythonhosted.org/packages/15/18/b0e1fafe59051de9e79cdd431863b03593ecfa8341c110affad7c8121efc/ruamel.yaml.clib-0.2.14-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e7cb9ad1d525d40f7d87b6df7c0ff916a66bc52cb61b66ac1b2a16d0c1b07640", size = 764456, upload-time = "2025-09-22T19:51:11.736Z" },
 ]
 
 [[package]]

From 8bf4ee9ab9d2f94ec8b6a9f0cea1e5aa1b893995 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Han?= <seb@redhat.com>
Date: Mon, 17 Nov 2025 15:26:10 +0100
Subject: [PATCH 34/62] fix: list-deps command (#4174)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

# What does this PR do?

It was referencing strong_typing which was removed in
https://github.com/llamastack/llama-stack/pull/3944

## Test Plan

New CI build test.

Signed-off-by: Sébastien Han <seb@redhat.com>
---
 .github/workflows/python-build-test.yml    | 1 +
 src/llama_stack_api/__init__.py            | 5 +----
 src/llama_stack_api/strong_typing/py.typed | 0
 3 files changed, 2 insertions(+), 4 deletions(-)
 delete mode 100644 src/llama_stack_api/strong_typing/py.typed

diff --git a/.github/workflows/python-build-test.yml b/.github/workflows/python-build-test.yml
index b58f4eb69..a498ef0a0 100644
--- a/.github/workflows/python-build-test.yml
+++ b/.github/workflows/python-build-test.yml
@@ -48,3 +48,4 @@ jobs:
         command -v llama
         llama stack list-apis
         llama stack list-providers inference
+        llama stack list-deps starter
diff --git a/src/llama_stack_api/__init__.py b/src/llama_stack_api/__init__.py
index b7efcc543..b6fe2fd23 100644
--- a/src/llama_stack_api/__init__.py
+++ b/src/llama_stack_api/__init__.py
@@ -22,7 +22,7 @@ and considered a code smell. All exported symbols are explicitly listed in __all
 __version__ = "0.4.0.dev0"
 
 # Import submodules for those who need them
-from . import common, strong_typing  # noqa: F401
+from . import common  # noqa: F401
 
 # Import all public API symbols
 from .agents import Agents, ResponseGuardrail, ResponseGuardrailSpec
@@ -393,8 +393,6 @@ from .shields import (
     ShieldInput,
     Shields,
 )
-
-# Import from strong_typing
 from .tools import (
     ListToolDefsResponse,
     ListToolGroupsResponse,
@@ -449,7 +447,6 @@ from .version import (
 __all__ = [
     # Submodules
     "common",
-    "strong_typing",
     # Version constants
     "LLAMA_STACK_API_V1",
     "LLAMA_STACK_API_V1ALPHA",
diff --git a/src/llama_stack_api/strong_typing/py.typed b/src/llama_stack_api/strong_typing/py.typed
deleted file mode 100644
index e69de29bb..000000000

From 5ea1be69fe9d9485de34f70ebd07d7dee52333bd Mon Sep 17 00:00:00 2001
From: Yuan Tang <terrytangyuan@gmail.com>
Date: Mon, 17 Nov 2025 09:28:41 -0500
Subject: [PATCH 35/62] chore: Remove myself from codeowners (#4175)

# What does this PR do?
<!-- Provide a short summary of what this PR does and why. Link to
relevant issues if applicable. -->

<!-- If resolving an issue, uncomment and update the line below -->
<!-- Closes #[issue-number] -->

## Test Plan
<!-- Describe the tests you ran to verify your changes with result
summaries. *Provide clear instructions so the plan can be easily
re-executed.* -->
---
 .github/CODEOWNERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 8b17510b7..8fff470f6 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -2,4 +2,4 @@
 
 # These owners will be the default owners for everything in
 # the repo. Unless a later match takes precedence,
-* @ashwinb @yanxi0830 @hardikjshah @raghotham @ehhuang @terrytangyuan @leseb @bbrowning @reluctantfuturist @mattf @slekkala1 @franciscojavierarceo
+* @ashwinb @yanxi0830 @hardikjshah @raghotham @ehhuang @leseb @bbrowning @reluctantfuturist @mattf @slekkala1 @franciscojavierarceo

From f648cacdad4e4e288247663641a37a7b203f752b Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Mon, 17 Nov 2025 11:36:23 -0800
Subject: [PATCH 36/62] fix(openapi): restore embedded request wrappers (#4176)

FastAPI generator now only unwraps body params explicitly marked with
Body(embed=False) so the /eval run_eval schema once again exposes
RunEvalRequest, matching our integration tests and the server's request
parsing.

Regenerated the OpenAPI specs to capture the restored wrapper.

CI on the Stainless preview builds should be green.
---
 client-sdks/stainless/openapi.yml             | 135 +++++++++++++-----
 docs/static/deprecated-llama-stack-spec.yaml  | 133 ++++++++++++-----
 .../static/experimental-llama-stack-spec.yaml |  10 +-
 docs/static/stainless-llama-stack-spec.yaml   | 135 +++++++++++++-----
 scripts/openapi_generator/endpoints.py        |  65 ++++-----
 5 files changed, 332 insertions(+), 146 deletions(-)

diff --git a/client-sdks/stainless/openapi.yml b/client-sdks/stainless/openapi.yml
index ff86e30e1..3a6735cbc 100644
--- a/client-sdks/stainless/openapi.yml
+++ b/client-sdks/stainless/openapi.yml
@@ -1810,7 +1810,7 @@ paths:
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/RegisterScoringFunctionRequestLoose'
+              $ref: '#/components/schemas/RegisterScoringFunctionRequest'
         required: true
       deprecated: true
   /v1/scoring-functions/{scoring_fn_id}:
@@ -3300,7 +3300,7 @@ paths:
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/RegisterDatasetRequestLoose'
+              $ref: '#/components/schemas/RegisterDatasetRequest'
         required: true
       deprecated: true
   /v1beta/datasets/{dataset_id}:
@@ -3557,7 +3557,7 @@ paths:
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/BenchmarkConfig'
+              $ref: '#/components/schemas/RunEvalRequest'
         required: true
   /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}:
     get:
@@ -10586,6 +10586,14 @@ components:
       - scores
       title: EvaluateResponse
       description: The response from an evaluation.
+    RunEvalRequest:
+      properties:
+        benchmark_config:
+          $ref: '#/components/schemas/BenchmarkConfig'
+      type: object
+      required:
+      - benchmark_config
+      title: RunEvalRequest
     Job:
       properties:
         job_id:
@@ -11169,6 +11177,67 @@ components:
       - $ref: '#/components/schemas/CompletionInputType'
         title: CompletionInputType
       title: StringType | ... (9 variants)
+    RegisterScoringFunctionRequest:
+      properties:
+        scoring_fn_id:
+          type: string
+          title: Scoring Fn Id
+        description:
+          type: string
+          title: Description
+        return_type:
+          anyOf:
+          - $ref: '#/components/schemas/StringType'
+            title: StringType
+          - $ref: '#/components/schemas/NumberType'
+            title: NumberType
+          - $ref: '#/components/schemas/BooleanType'
+            title: BooleanType
+          - $ref: '#/components/schemas/ArrayType'
+            title: ArrayType
+          - $ref: '#/components/schemas/ObjectType'
+            title: ObjectType
+          - $ref: '#/components/schemas/JsonType'
+            title: JsonType
+          - $ref: '#/components/schemas/UnionType'
+            title: UnionType
+          - $ref: '#/components/schemas/ChatCompletionInputType'
+            title: ChatCompletionInputType
+          - $ref: '#/components/schemas/CompletionInputType'
+            title: CompletionInputType
+          title: StringType | ... (9 variants)
+        provider_scoring_fn_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        provider_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        params:
+          anyOf:
+          - oneOf:
+            - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
+              title: LLMAsJudgeScoringFnParams
+            - $ref: '#/components/schemas/RegexParserScoringFnParams'
+              title: RegexParserScoringFnParams
+            - $ref: '#/components/schemas/BasicScoringFnParams'
+              title: BasicScoringFnParams
+            discriminator:
+              propertyName: type
+              mapping:
+                basic: '#/components/schemas/BasicScoringFnParams'
+                llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams'
+                regex_parser: '#/components/schemas/RegexParserScoringFnParams'
+            title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams
+          - type: 'null'
+          title: Params
+      type: object
+      required:
+      - scoring_fn_id
+      - description
+      - return_type
+      title: RegisterScoringFunctionRequest
     RegisterShieldRequest:
       properties:
         shield_id:
@@ -11227,6 +11296,31 @@ components:
       - $ref: '#/components/schemas/RowsDataSource'
         title: RowsDataSource
       title: URIDataSource | RowsDataSource
+    RegisterDatasetRequest:
+      properties:
+        purpose:
+          $ref: '#/components/schemas/DatasetPurpose'
+        source:
+          anyOf:
+          - $ref: '#/components/schemas/URIDataSource'
+            title: URIDataSource
+          - $ref: '#/components/schemas/RowsDataSource'
+            title: RowsDataSource
+          title: URIDataSource | RowsDataSource
+        metadata:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+        dataset_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      required:
+      - purpose
+      - source
+      title: RegisterDatasetRequest
     RegisterBenchmarkRequest:
       properties:
         benchmark_id:
@@ -11963,41 +12057,6 @@ components:
       required:
       - reasoning_tokens
       title: OutputTokensDetails
-    RegisterDatasetRequestLoose:
-      properties:
-        purpose:
-          title: Purpose
-        source:
-          title: Source
-        metadata:
-          title: Metadata
-        dataset_id:
-          title: Dataset Id
-      type: object
-      required:
-      - purpose
-      - source
-      title: RegisterDatasetRequestLoose
-    RegisterScoringFunctionRequestLoose:
-      properties:
-        scoring_fn_id:
-          title: Scoring Fn Id
-        description:
-          title: Description
-        return_type:
-          title: Return Type
-        provider_scoring_fn_id:
-          title: Provider Scoring Fn Id
-        provider_id:
-          title: Provider Id
-        params:
-          title: Params
-      type: object
-      required:
-      - scoring_fn_id
-      - description
-      - return_type
-      title: RegisterScoringFunctionRequestLoose
     SearchRankingOptions:
       properties:
         ranker:
diff --git a/docs/static/deprecated-llama-stack-spec.yaml b/docs/static/deprecated-llama-stack-spec.yaml
index 3bc06d7d7..0bade1866 100644
--- a/docs/static/deprecated-llama-stack-spec.yaml
+++ b/docs/static/deprecated-llama-stack-spec.yaml
@@ -193,7 +193,7 @@ paths:
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/RegisterScoringFunctionRequestLoose'
+              $ref: '#/components/schemas/RegisterScoringFunctionRequest'
         required: true
       deprecated: true
   /v1/scoring-functions/{scoring_fn_id}:
@@ -549,7 +549,7 @@ paths:
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/RegisterDatasetRequestLoose'
+              $ref: '#/components/schemas/RegisterDatasetRequest'
         required: true
       deprecated: true
   /v1beta/datasets/{dataset_id}:
@@ -7429,6 +7429,14 @@ components:
       - scores
       title: EvaluateResponse
       description: The response from an evaluation.
+    RunEvalRequest:
+      properties:
+        benchmark_config:
+          $ref: '#/components/schemas/BenchmarkConfig'
+      type: object
+      required:
+      - benchmark_config
+      title: RunEvalRequest
     Job:
       properties:
         job_id:
@@ -8012,6 +8020,67 @@ components:
       - $ref: '#/components/schemas/CompletionInputType'
         title: CompletionInputType
       title: StringType | ... (9 variants)
+    RegisterScoringFunctionRequest:
+      properties:
+        scoring_fn_id:
+          type: string
+          title: Scoring Fn Id
+        description:
+          type: string
+          title: Description
+        return_type:
+          anyOf:
+          - $ref: '#/components/schemas/StringType'
+            title: StringType
+          - $ref: '#/components/schemas/NumberType'
+            title: NumberType
+          - $ref: '#/components/schemas/BooleanType'
+            title: BooleanType
+          - $ref: '#/components/schemas/ArrayType'
+            title: ArrayType
+          - $ref: '#/components/schemas/ObjectType'
+            title: ObjectType
+          - $ref: '#/components/schemas/JsonType'
+            title: JsonType
+          - $ref: '#/components/schemas/UnionType'
+            title: UnionType
+          - $ref: '#/components/schemas/ChatCompletionInputType'
+            title: ChatCompletionInputType
+          - $ref: '#/components/schemas/CompletionInputType'
+            title: CompletionInputType
+          title: StringType | ... (9 variants)
+        provider_scoring_fn_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        provider_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        params:
+          anyOf:
+          - oneOf:
+            - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
+              title: LLMAsJudgeScoringFnParams
+            - $ref: '#/components/schemas/RegexParserScoringFnParams'
+              title: RegexParserScoringFnParams
+            - $ref: '#/components/schemas/BasicScoringFnParams'
+              title: BasicScoringFnParams
+            discriminator:
+              propertyName: type
+              mapping:
+                basic: '#/components/schemas/BasicScoringFnParams'
+                llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams'
+                regex_parser: '#/components/schemas/RegexParserScoringFnParams'
+            title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams
+          - type: 'null'
+          title: Params
+      type: object
+      required:
+      - scoring_fn_id
+      - description
+      - return_type
+      title: RegisterScoringFunctionRequest
     RegisterShieldRequest:
       properties:
         shield_id:
@@ -8070,6 +8139,31 @@ components:
       - $ref: '#/components/schemas/RowsDataSource'
         title: RowsDataSource
       title: URIDataSource | RowsDataSource
+    RegisterDatasetRequest:
+      properties:
+        purpose:
+          $ref: '#/components/schemas/DatasetPurpose'
+        source:
+          anyOf:
+          - $ref: '#/components/schemas/URIDataSource'
+            title: URIDataSource
+          - $ref: '#/components/schemas/RowsDataSource'
+            title: RowsDataSource
+          title: URIDataSource | RowsDataSource
+        metadata:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+        dataset_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      required:
+      - purpose
+      - source
+      title: RegisterDatasetRequest
     RegisterBenchmarkRequest:
       properties:
         benchmark_id:
@@ -8806,41 +8900,6 @@ components:
       required:
       - reasoning_tokens
       title: OutputTokensDetails
-    RegisterDatasetRequestLoose:
-      properties:
-        purpose:
-          title: Purpose
-        source:
-          title: Source
-        metadata:
-          title: Metadata
-        dataset_id:
-          title: Dataset Id
-      type: object
-      required:
-      - purpose
-      - source
-      title: RegisterDatasetRequestLoose
-    RegisterScoringFunctionRequestLoose:
-      properties:
-        scoring_fn_id:
-          title: Scoring Fn Id
-        description:
-          title: Description
-        return_type:
-          title: Return Type
-        provider_scoring_fn_id:
-          title: Provider Scoring Fn Id
-        provider_id:
-          title: Provider Id
-        params:
-          title: Params
-      type: object
-      required:
-      - scoring_fn_id
-      - description
-      - return_type
-      title: RegisterScoringFunctionRequestLoose
     SearchRankingOptions:
       properties:
         ranker:
diff --git a/docs/static/experimental-llama-stack-spec.yaml b/docs/static/experimental-llama-stack-spec.yaml
index 2b36ebf47..4271989d6 100644
--- a/docs/static/experimental-llama-stack-spec.yaml
+++ b/docs/static/experimental-llama-stack-spec.yaml
@@ -300,7 +300,7 @@ paths:
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/BenchmarkConfig'
+              $ref: '#/components/schemas/RunEvalRequest'
         required: true
   /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}:
     get:
@@ -6711,6 +6711,14 @@ components:
       - scores
       title: EvaluateResponse
       description: The response from an evaluation.
+    RunEvalRequest:
+      properties:
+        benchmark_config:
+          $ref: '#/components/schemas/BenchmarkConfig'
+      type: object
+      required:
+      - benchmark_config
+      title: RunEvalRequest
     Job:
       properties:
         job_id:
diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml
index ff86e30e1..3a6735cbc 100644
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@@ -1810,7 +1810,7 @@ paths:
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/RegisterScoringFunctionRequestLoose'
+              $ref: '#/components/schemas/RegisterScoringFunctionRequest'
         required: true
       deprecated: true
   /v1/scoring-functions/{scoring_fn_id}:
@@ -3300,7 +3300,7 @@ paths:
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/RegisterDatasetRequestLoose'
+              $ref: '#/components/schemas/RegisterDatasetRequest'
         required: true
       deprecated: true
   /v1beta/datasets/{dataset_id}:
@@ -3557,7 +3557,7 @@ paths:
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/BenchmarkConfig'
+              $ref: '#/components/schemas/RunEvalRequest'
         required: true
   /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}:
     get:
@@ -10586,6 +10586,14 @@ components:
       - scores
       title: EvaluateResponse
       description: The response from an evaluation.
+    RunEvalRequest:
+      properties:
+        benchmark_config:
+          $ref: '#/components/schemas/BenchmarkConfig'
+      type: object
+      required:
+      - benchmark_config
+      title: RunEvalRequest
     Job:
       properties:
         job_id:
@@ -11169,6 +11177,67 @@ components:
       - $ref: '#/components/schemas/CompletionInputType'
         title: CompletionInputType
       title: StringType | ... (9 variants)
+    RegisterScoringFunctionRequest:
+      properties:
+        scoring_fn_id:
+          type: string
+          title: Scoring Fn Id
+        description:
+          type: string
+          title: Description
+        return_type:
+          anyOf:
+          - $ref: '#/components/schemas/StringType'
+            title: StringType
+          - $ref: '#/components/schemas/NumberType'
+            title: NumberType
+          - $ref: '#/components/schemas/BooleanType'
+            title: BooleanType
+          - $ref: '#/components/schemas/ArrayType'
+            title: ArrayType
+          - $ref: '#/components/schemas/ObjectType'
+            title: ObjectType
+          - $ref: '#/components/schemas/JsonType'
+            title: JsonType
+          - $ref: '#/components/schemas/UnionType'
+            title: UnionType
+          - $ref: '#/components/schemas/ChatCompletionInputType'
+            title: ChatCompletionInputType
+          - $ref: '#/components/schemas/CompletionInputType'
+            title: CompletionInputType
+          title: StringType | ... (9 variants)
+        provider_scoring_fn_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        provider_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        params:
+          anyOf:
+          - oneOf:
+            - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
+              title: LLMAsJudgeScoringFnParams
+            - $ref: '#/components/schemas/RegexParserScoringFnParams'
+              title: RegexParserScoringFnParams
+            - $ref: '#/components/schemas/BasicScoringFnParams'
+              title: BasicScoringFnParams
+            discriminator:
+              propertyName: type
+              mapping:
+                basic: '#/components/schemas/BasicScoringFnParams'
+                llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams'
+                regex_parser: '#/components/schemas/RegexParserScoringFnParams'
+            title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams
+          - type: 'null'
+          title: Params
+      type: object
+      required:
+      - scoring_fn_id
+      - description
+      - return_type
+      title: RegisterScoringFunctionRequest
     RegisterShieldRequest:
       properties:
         shield_id:
@@ -11227,6 +11296,31 @@ components:
       - $ref: '#/components/schemas/RowsDataSource'
         title: RowsDataSource
       title: URIDataSource | RowsDataSource
+    RegisterDatasetRequest:
+      properties:
+        purpose:
+          $ref: '#/components/schemas/DatasetPurpose'
+        source:
+          anyOf:
+          - $ref: '#/components/schemas/URIDataSource'
+            title: URIDataSource
+          - $ref: '#/components/schemas/RowsDataSource'
+            title: RowsDataSource
+          title: URIDataSource | RowsDataSource
+        metadata:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+        dataset_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      required:
+      - purpose
+      - source
+      title: RegisterDatasetRequest
     RegisterBenchmarkRequest:
       properties:
         benchmark_id:
@@ -11963,41 +12057,6 @@ components:
       required:
       - reasoning_tokens
       title: OutputTokensDetails
-    RegisterDatasetRequestLoose:
-      properties:
-        purpose:
-          title: Purpose
-        source:
-          title: Source
-        metadata:
-          title: Metadata
-        dataset_id:
-          title: Dataset Id
-      type: object
-      required:
-      - purpose
-      - source
-      title: RegisterDatasetRequestLoose
-    RegisterScoringFunctionRequestLoose:
-      properties:
-        scoring_fn_id:
-          title: Scoring Fn Id
-        description:
-          title: Description
-        return_type:
-          title: Return Type
-        provider_scoring_fn_id:
-          title: Provider Scoring Fn Id
-        provider_id:
-          title: Provider Id
-        params:
-          title: Params
-      type: object
-      required:
-      - scoring_fn_id
-      - description
-      - return_type
-      title: RegisterScoringFunctionRequestLoose
     SearchRankingOptions:
       properties:
         ranker:
diff --git a/scripts/openapi_generator/endpoints.py b/scripts/openapi_generator/endpoints.py
index 39086f47f..85203cb71 100644
--- a/scripts/openapi_generator/endpoints.py
+++ b/scripts/openapi_generator/endpoints.py
@@ -15,6 +15,7 @@ import typing
 from typing import Annotated, Any, get_args, get_origin
 
 from fastapi import FastAPI
+from fastapi.params import Body as FastAPIBody
 from pydantic import Field, create_model
 
 from llama_stack.log import get_logger
@@ -26,6 +27,8 @@ from .state import _extra_body_fields, register_dynamic_model
 
 logger = get_logger(name=__name__, category="core")
 
+type QueryParameter = tuple[str, type, Any, bool]
+
 
 def _to_pascal_case(segment: str) -> str:
     tokens = re.findall(r"[A-Za-z]+|\d+", segment)
@@ -75,12 +78,12 @@ def _create_endpoint_with_request_model(
     return endpoint
 
 
-def _build_field_definitions(query_parameters: list[tuple[str, type, Any]], use_any: bool = False) -> dict[str, tuple]:
+def _build_field_definitions(query_parameters: list[QueryParameter], use_any: bool = False) -> dict[str, tuple]:
     """Build field definitions for a Pydantic model from query parameters."""
     from typing import Any
 
     field_definitions = {}
-    for param_name, param_type, default_value in query_parameters:
+    for param_name, param_type, default_value, _ in query_parameters:
         if use_any:
             field_definitions[param_name] = (Any, ... if default_value is inspect.Parameter.empty else default_value)
             continue
@@ -108,10 +111,10 @@ def _build_field_definitions(query_parameters: list[tuple[str, type, Any]], use_
             field_definitions[param_name] = (Any, ... if default_value is inspect.Parameter.empty else default_value)
 
     # Ensure all parameters are included
-    expected_params = {name for name, _, _ in query_parameters}
+    expected_params = {name for name, _, _, _ in query_parameters}
     missing = expected_params - set(field_definitions.keys())
     if missing:
-        for param_name, _, default_value in query_parameters:
+        for param_name, _, default_value, _ in query_parameters:
             if param_name in missing:
                 field_definitions[param_name] = (
                     Any,
@@ -126,7 +129,7 @@ def _create_dynamic_request_model(
     webmethod,
     method_name: str,
     http_method: str,
-    query_parameters: list[tuple[str, type, Any]],
+    query_parameters: list[QueryParameter],
     use_any: bool = False,
     variant_suffix: str | None = None,
 ) -> type | None:
@@ -143,12 +146,12 @@ def _create_dynamic_request_model(
 
 
 def _build_signature_params(
-    query_parameters: list[tuple[str, type, Any]],
+    query_parameters: list[QueryParameter],
 ) -> tuple[list[inspect.Parameter], dict[str, type]]:
     """Build signature parameters and annotations from query parameters."""
     signature_params = []
     param_annotations = {}
-    for param_name, param_type, default_value in query_parameters:
+    for param_name, param_type, default_value, _ in query_parameters:
         param_annotations[param_name] = param_type
         signature_params.append(
             inspect.Parameter(
@@ -219,6 +222,19 @@ def _is_extra_body_field(metadata_item: Any) -> bool:
     return isinstance(metadata_item, ExtraBodyField)
 
 
+def _should_embed_parameter(param_type: Any) -> bool:
+    """Determine whether a parameter should be embedded (wrapped) in the request body."""
+    if get_origin(param_type) is Annotated:
+        args = get_args(param_type)
+        metadata = args[1:] if len(args) > 1 else []
+        for metadata_item in metadata:
+            if isinstance(metadata_item, FastAPIBody):
+                # FastAPI treats embed=None as False, so default to False when unset.
+                return bool(metadata_item.embed)
+    # Unannotated parameters default to embed=True through create_dynamic_typed_route.
+    return True
+
+
 def _is_async_iterator_type(type_obj: Any) -> bool:
     """Check if a type is AsyncIterator or AsyncIterable."""
     from collections.abc import AsyncIterable, AsyncIterator
@@ -282,7 +298,7 @@ def _find_models_for_endpoint(
 
     Returns:
         tuple: (request_model, response_model, query_parameters, file_form_params, streaming_response_model, response_schema_name)
-        where query_parameters is a list of (name, type, default_value) tuples
+        where query_parameters is a list of (name, type, default_value, should_embed) tuples
         and file_form_params is a list of inspect.Parameter objects for File()/Form() params
         and streaming_response_model is the model for streaming responses (AsyncIterator content)
     """
@@ -299,7 +315,7 @@ def _find_models_for_endpoint(
 
         # Find request model and collect all body parameters
         request_model = None
-        query_parameters = []
+        query_parameters: list[QueryParameter] = []
         file_form_params = []
         path_params = set()
         extra_body_params = []
@@ -325,6 +341,7 @@ def _find_models_for_endpoint(
 
             # Check if it's a File() or Form() parameter - these need special handling
             param_type = param.annotation
+            param_should_embed = _should_embed_parameter(param_type)
             if _is_file_or_form_param(param_type):
                 # File() and Form() parameters must be in the function signature directly
                 # They cannot be part of a Pydantic model
@@ -350,30 +367,14 @@ def _find_models_for_endpoint(
                     # Store as extra body parameter - exclude from request model
                     extra_body_params.append((param_name, base_type, extra_body_description))
                     continue
+                param_type = base_type
 
             # Check if it's a Pydantic model (for POST/PUT requests)
             if hasattr(param_type, "model_json_schema"):
-                # Collect all body parameters including Pydantic models
-                # We'll decide later whether to use a single model or create a combined one
-                query_parameters.append((param_name, param_type, param.default))
-            elif get_origin(param_type) is Annotated:
-                # Handle Annotated types - get the base type
-                args = get_args(param_type)
-                if args and hasattr(args[0], "model_json_schema"):
-                    # Collect Pydantic models from Annotated types
-                    query_parameters.append((param_name, args[0], param.default))
-                else:
-                    # Regular annotated parameter (but not File/Form, already handled above)
-                    query_parameters.append((param_name, param_type, param.default))
+                query_parameters.append((param_name, param_type, param.default, param_should_embed))
             else:
-                # This is likely a body parameter for POST/PUT or query parameter for GET
-                # Store the parameter info for later use
-                # Preserve inspect.Parameter.empty to distinguish "no default" from "default=None"
-                default_value = param.default
-
-                # Extract the base type from union types (e.g., str | None -> str)
-                # Also make it safe for FastAPI to avoid forward reference issues
-                query_parameters.append((param_name, param_type, default_value))
+                # Regular annotated parameter (but not File/Form, already handled above)
+                query_parameters.append((param_name, param_type, param.default, param_should_embed))
 
         # Store extra body fields for later use in post-processing
         # We'll store them when the endpoint is created, as we need the full path
@@ -385,8 +386,8 @@ def _find_models_for_endpoint(
         # Otherwise, we'll create a combined request model from all parameters
         # BUT: For GET requests, never create a request body - all parameters should be query parameters
         if is_post_put and len(query_parameters) == 1:
-            param_name, param_type, default_value = query_parameters[0]
-            if hasattr(param_type, "model_json_schema"):
+            param_name, param_type, default_value, should_embed = query_parameters[0]
+            if hasattr(param_type, "model_json_schema") and not should_embed:
                 request_model = param_type
                 query_parameters = []  # Clear query_parameters so we use the single model
 
@@ -495,7 +496,7 @@ def _create_fastapi_endpoint(app: FastAPI, route, webmethod, api: Api):
     if file_form_params and is_post_put:
         signature_params = list(file_form_params)
         param_annotations = {param.name: param.annotation for param in file_form_params}
-        for param_name, param_type, default_value in query_parameters:
+        for param_name, param_type, default_value, _ in query_parameters:
             signature_params.append(
                 inspect.Parameter(
                     param_name,

From 0128effbf7ac71215c557b73f5ab6396731919ef Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Han?= <seb@redhat.com>
Date: Mon, 17 Nov 2025 21:09:02 +0100
Subject: [PATCH 37/62] chore: remove pyyaml and starlette duplication in
 pyproject (#4172)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Sébastien Han <seb@redhat.com>
Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com>
---
 pyproject.toml | 2 --
 uv.lock        | 4 ++--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index bdf8309ad..eea515b09 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -38,7 +38,6 @@ dependencies = [
     "pyjwt[crypto]>=2.10.0",                          # Pull crypto to support RS256 for jwt. Requires 2.10.0+ for ssl_context support.
     "pydantic>=2.11.9",
     "rich",
-    "starlette",
     "termcolor",
     "tiktoken",
     "pillow",
@@ -50,7 +49,6 @@ dependencies = [
     "aiosqlite>=0.21.0",                              # server - for metadata store
     "asyncpg",                                        # for metadata store
     "sqlalchemy[asyncio]>=2.0.41",                    # server - for conversations
-    "pyyaml>=6.0.2",
     "starlette>=0.49.1",
 ]
 
diff --git a/uv.lock b/uv.lock
index a343eb5d8..8c648c362 100644
--- a/uv.lock
+++ b/uv.lock
@@ -2165,10 +2165,8 @@ requires-dist = [
     { name = "python-dotenv" },
     { name = "python-multipart", specifier = ">=0.0.20" },
     { name = "pyyaml", specifier = ">=6.0" },
-    { name = "pyyaml", specifier = ">=6.0.2" },
     { name = "rich" },
     { name = "sqlalchemy", extras = ["asyncio"], specifier = ">=2.0.41" },
-    { name = "starlette" },
     { name = "starlette", specifier = ">=0.49.1" },
     { name = "termcolor" },
     { name = "tiktoken" },
@@ -4656,6 +4654,8 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/6b/fa/3234f913fe9a6525a7b97c6dad1f51e72b917e6872e051a5e2ffd8b16fbb/ruamel.yaml.clib-0.2.14-cp314-cp314-macosx_15_0_arm64.whl", hash = "sha256:70eda7703b8126f5e52fcf276e6c0f40b0d314674f896fc58c47b0aef2b9ae83", size = 137970, upload-time = "2025-09-22T19:51:09.472Z" },
     { url = "https://files.pythonhosted.org/packages/ef/ec/4edbf17ac2c87fa0845dd366ef8d5852b96eb58fcd65fc1ecf5fe27b4641/ruamel.yaml.clib-0.2.14-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:a0cb71ccc6ef9ce36eecb6272c81afdc2f565950cdcec33ae8e6cd8f7fc86f27", size = 739639, upload-time = "2025-09-22T19:51:10.566Z" },
     { url = "https://files.pythonhosted.org/packages/15/18/b0e1fafe59051de9e79cdd431863b03593ecfa8341c110affad7c8121efc/ruamel.yaml.clib-0.2.14-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e7cb9ad1d525d40f7d87b6df7c0ff916a66bc52cb61b66ac1b2a16d0c1b07640", size = 764456, upload-time = "2025-09-22T19:51:11.736Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/cd/150fdb96b8fab27fe08d8a59fe67554568727981806e6bc2677a16081ec7/ruamel_yaml_clib-0.2.14-cp314-cp314-win32.whl", hash = "sha256:9b4104bf43ca0cd4e6f738cb86326a3b2f6eef00f417bd1e7efb7bdffe74c539", size = 102394, upload-time = "2025-11-14T21:57:36.703Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/e6/a3fa40084558c7e1dc9546385f22a93949c890a8b2e445b2ba43935f51da/ruamel_yaml_clib-0.2.14-cp314-cp314-win_amd64.whl", hash = "sha256:13997d7d354a9890ea1ec5937a219817464e5cc344805b37671562a401ca3008", size = 122673, upload-time = "2025-11-14T21:57:38.177Z" },
 ]
 
 [[package]]

From fe91d331efb98193ebed6208a6f9ecaa52cbceda Mon Sep 17 00:00:00 2001
From: Omar Abdelwahab <omaryashraf10@gmail.com>
Date: Mon, 17 Nov 2025 12:16:35 -0800
Subject: [PATCH 38/62] fix: Remove authorization from provider data (#4161)

# What does this PR do?
- Remove backward compatibility for authorization in mcp_headers
- Enforce authorization must use dedicated parameter
- Add validation error if Authorization found in provider_data headers
- Update test_mcp.py to use authorization parameter
- Update test_mcp_json_schema.py to use authorization parameter
- Update test_tools_with_schemas.py to use authorization parameter
- Update documentation to show the change in the authorization approach

Breaking Change:
- Authorization can no longer be passed via mcp_headers in provider_data
- Users must use the dedicated 'authorization' parameter instead
- Clear error message guides users to the new approach"

## Test Plan
CI

---------

Co-authored-by: Omar Abdelwahab <omara@fb.com>
Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com>
---
 docs/docs/building_applications/tools.mdx     |  22 ++--
 .../model_context_protocol.py                 |  60 +++++-----
 .../inference/test_tools_with_schemas.py      |  17 +--
 tests/integration/tool_runtime/test_mcp.py    |  28 ++---
 .../tool_runtime/test_mcp_json_schema.py      | 104 +++---------------
 5 files changed, 60 insertions(+), 171 deletions(-)

diff --git a/docs/docs/building_applications/tools.mdx b/docs/docs/building_applications/tools.mdx
index 3b78ec57b..f7b913fef 100644
--- a/docs/docs/building_applications/tools.mdx
+++ b/docs/docs/building_applications/tools.mdx
@@ -104,23 +104,19 @@ client.toolgroups.register(
 )
 ```
 
-Note that most of the more useful MCP servers need you to authenticate with them. Many of them use OAuth2.0 for authentication. You can provide authorization headers to send to the MCP server using the "Provider Data" abstraction provided by Llama Stack. When making an agent call,
+Note that most of the more useful MCP servers need you to authenticate with them. Many of them use OAuth2.0 for authentication. You can provide the authorization token when creating the Agent:
 
 ```python
 agent = Agent(
     ...,
-    tools=["mcp::deepwiki"],
-    extra_headers={
-        "X-LlamaStack-Provider-Data": json.dumps(
-            {
-                "mcp_headers": {
-                    "http://mcp.deepwiki.com/sse": {
-                        "Authorization": "Bearer <your_access_token>",
-                    },
-                },
-            }
-        ),
-    },
+    tools=[
+        {
+            "type": "mcp",
+            "server_url": "https://mcp.deepwiki.com/sse",
+            "server_label": "mcp::deepwiki",
+            "authorization": "<your_access_token>",  # OAuth token (without "Bearer " prefix)
+        }
+    ],
 )
 agent.create_turn(...)
 ```
diff --git a/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py b/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py
index 649bddecb..97b044dbf 100644
--- a/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py
+++ b/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py
@@ -48,16 +48,10 @@ class ModelContextProtocolToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime
         if mcp_endpoint is None:
             raise ValueError("mcp_endpoint is required")
 
-        # Phase 1: Support both old header-based auth AND new authorization parameter
-        # Get headers and auth from provider data (old approach)
-        provider_headers, provider_auth = await self.get_headers_from_request(mcp_endpoint.uri)
+        # Get other headers from provider data (but NOT authorization)
+        provider_headers = await self.get_headers_from_request(mcp_endpoint.uri)
 
-        # New authorization parameter takes precedence over provider data
-        final_authorization = authorization or provider_auth
-
-        return await list_mcp_tools(
-            endpoint=mcp_endpoint.uri, headers=provider_headers, authorization=final_authorization
-        )
+        return await list_mcp_tools(endpoint=mcp_endpoint.uri, headers=provider_headers, authorization=authorization)
 
     async def invoke_tool(
         self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None
@@ -69,39 +63,38 @@ class ModelContextProtocolToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime
         if urlparse(endpoint).scheme not in ("http", "https"):
             raise ValueError(f"Endpoint {endpoint} is not a valid HTTP(S) URL")
 
-        # Phase 1: Support both old header-based auth AND new authorization parameter
-        # Get headers and auth from provider data (old approach)
-        provider_headers, provider_auth = await self.get_headers_from_request(endpoint)
-
-        # New authorization parameter takes precedence over provider data
-        final_authorization = authorization or provider_auth
+        # Get other headers from provider data (but NOT authorization)
+        provider_headers = await self.get_headers_from_request(endpoint)
 
         return await invoke_mcp_tool(
             endpoint=endpoint,
             tool_name=tool_name,
             kwargs=kwargs,
             headers=provider_headers,
-            authorization=final_authorization,
+            authorization=authorization,
         )
 
-    async def get_headers_from_request(self, mcp_endpoint_uri: str) -> tuple[dict[str, str], str | None]:
+    async def get_headers_from_request(self, mcp_endpoint_uri: str) -> dict[str, str]:
         """
-        Extract headers and authorization from request provider data (Phase 1 backward compatibility).
+        Extract headers from request provider data, excluding authorization.
 
-        Phase 1: Temporarily allows Authorization to be passed via mcp_headers for backward compatibility.
-        Phase 2: Will enforce that Authorization should use the dedicated authorization parameter instead.
+        Authorization must be provided via the dedicated authorization parameter.
+        If Authorization is found in mcp_headers, raise an error to guide users to the correct approach.
+
+        Args:
+            mcp_endpoint_uri: The MCP endpoint URI to match against provider data
 
         Returns:
-            Tuple of (headers_dict, authorization_token)
-            - headers_dict: All headers except Authorization
-            - authorization_token: Token from Authorization header (with "Bearer " prefix removed), or None
+            dict[str, str]: Headers dictionary (without Authorization)
+
+        Raises:
+            ValueError: If Authorization header is found in mcp_headers
         """
 
         def canonicalize_uri(uri: str) -> str:
             return f"{urlparse(uri).netloc or ''}/{urlparse(uri).path or ''}"
 
         headers = {}
-        authorization = None
 
         provider_data = self.get_request_provider_data()
         if provider_data and hasattr(provider_data, "mcp_headers") and provider_data.mcp_headers:
@@ -109,17 +102,14 @@ class ModelContextProtocolToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime
                 if canonicalize_uri(uri) != canonicalize_uri(mcp_endpoint_uri):
                     continue
 
-                # Phase 1: Extract Authorization from mcp_headers for backward compatibility
-                # (Phase 2 will reject this and require the dedicated authorization parameter)
+                # Reject Authorization in mcp_headers - must use authorization parameter
                 for key in values.keys():
                     if key.lower() == "authorization":
-                        # Extract authorization token and strip "Bearer " prefix if present
-                        auth_value = values[key]
-                        if auth_value.startswith("Bearer "):
-                            authorization = auth_value[7:]  # Remove "Bearer " prefix
-                        else:
-                            authorization = auth_value
-                    else:
-                        headers[key] = values[key]
+                        raise ValueError(
+                            "Authorization cannot be provided via mcp_headers in provider_data. "
+                            "Please use the dedicated 'authorization' parameter instead. "
+                            "Example: tool_runtime.invoke_tool(..., authorization='your-token')"
+                        )
+                    headers[key] = values[key]
 
-        return headers, authorization
+        return headers
diff --git a/tests/integration/inference/test_tools_with_schemas.py b/tests/integration/inference/test_tools_with_schemas.py
index 5b6e69ae3..ab033c381 100644
--- a/tests/integration/inference/test_tools_with_schemas.py
+++ b/tests/integration/inference/test_tools_with_schemas.py
@@ -9,8 +9,6 @@ Integration tests for inference/chat completion with JSON Schema-based tools.
 Tests that tools pass through correctly to various LLM providers.
 """
 
-import json
-
 import pytest
 
 from llama_stack.core.library_client import LlamaStackAsLibraryClient
@@ -193,22 +191,11 @@ class TestMCPToolsInChatCompletion:
             mcp_endpoint=dict(uri=uri),
         )
 
-        # Use old header-based approach for Phase 1 (backward compatibility)
-        provider_data = {
-            "mcp_headers": {
-                uri: {
-                    "Authorization": f"Bearer {AUTH_TOKEN}",
-                },
-            },
-        }
-        auth_headers = {
-            "X-LlamaStack-Provider-Data": json.dumps(provider_data),
-        }
-
+        # Use the dedicated authorization parameter
         # Get the tools from MCP
         tools_response = llama_stack_client.tool_runtime.list_tools(
             tool_group_id=test_toolgroup_id,
-            extra_headers=auth_headers,
+            authorization=AUTH_TOKEN,
         )
 
         # Convert to OpenAI format for inference
diff --git a/tests/integration/tool_runtime/test_mcp.py b/tests/integration/tool_runtime/test_mcp.py
index 1b7f509d2..074a92afb 100644
--- a/tests/integration/tool_runtime/test_mcp.py
+++ b/tests/integration/tool_runtime/test_mcp.py
@@ -4,8 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-import json
-
 import pytest
 from llama_stack_client.lib.agents.agent import Agent
 from llama_stack_client.lib.agents.turn_events import StepCompleted, StepProgress, ToolCallIssuedDelta
@@ -37,32 +35,20 @@ def test_mcp_invocation(llama_stack_client, text_model_id, mcp_server):
         mcp_endpoint=dict(uri=uri),
     )
 
-    # Use old header-based approach for Phase 1 (backward compatibility)
-    provider_data = {
-        "mcp_headers": {
-            uri: {
-                "Authorization": f"Bearer {AUTH_TOKEN}",
-            },
-        },
-    }
-    auth_headers = {
-        "X-LlamaStack-Provider-Data": json.dumps(provider_data),
-    }
-
-    with pytest.raises(Exception, match="Unauthorized"):
-        llama_stack_client.tools.list(toolgroup_id=test_toolgroup_id)
-
-    tools_list = llama_stack_client.tools.list(
-        toolgroup_id=test_toolgroup_id,
-        extra_headers=auth_headers,  # Use old header-based approach
+    # Use the dedicated authorization parameter (no more provider_data headers)
+    # This tests direct tool_runtime.invoke_tool API calls
+    tools_list = llama_stack_client.tool_runtime.list_tools(
+        tool_group_id=test_toolgroup_id,
+        authorization=AUTH_TOKEN,  # Use dedicated authorization parameter
     )
     assert len(tools_list) == 2
     assert {t.name for t in tools_list} == {"greet_everyone", "get_boiling_point"}
 
+    # Invoke tool with authorization parameter
     response = llama_stack_client.tool_runtime.invoke_tool(
         tool_name="greet_everyone",
         kwargs=dict(url="https://www.google.com"),
-        extra_headers=auth_headers,  # Use old header-based approach
+        authorization=AUTH_TOKEN,  # Use dedicated authorization parameter
     )
     content = response.content
     assert len(content) == 1
diff --git a/tests/integration/tool_runtime/test_mcp_json_schema.py b/tests/integration/tool_runtime/test_mcp_json_schema.py
index 719588c7f..6be71caaf 100644
--- a/tests/integration/tool_runtime/test_mcp_json_schema.py
+++ b/tests/integration/tool_runtime/test_mcp_json_schema.py
@@ -8,8 +8,6 @@
 Tests $ref, $defs, and other JSON Schema features through MCP integration.
 """
 
-import json
-
 import pytest
 
 from llama_stack.core.library_client import LlamaStackAsLibraryClient
@@ -122,22 +120,11 @@ class TestMCPSchemaPreservation:
             mcp_endpoint=dict(uri=uri),
         )
 
-        # Use old header-based approach for Phase 1 (backward compatibility)
-        provider_data = {
-            "mcp_headers": {
-                uri: {
-                    "Authorization": f"Bearer {AUTH_TOKEN}",
-                },
-            },
-        }
-        auth_headers = {
-            "X-LlamaStack-Provider-Data": json.dumps(provider_data),
-        }
-
+        # Use the dedicated authorization parameter
         # List runtime tools
         response = llama_stack_client.tool_runtime.list_tools(
             tool_group_id=test_toolgroup_id,
-            extra_headers=auth_headers,
+            authorization=AUTH_TOKEN,
         )
 
         tools = response
@@ -173,22 +160,11 @@ class TestMCPSchemaPreservation:
             mcp_endpoint=dict(uri=uri),
         )
 
-        # Use old header-based approach for Phase 1 (backward compatibility)
-        provider_data = {
-            "mcp_headers": {
-                uri: {
-                    "Authorization": f"Bearer {AUTH_TOKEN}",
-                },
-            },
-        }
-        auth_headers = {
-            "X-LlamaStack-Provider-Data": json.dumps(provider_data),
-        }
-
+        # Use the dedicated authorization parameter
         # List tools
         response = llama_stack_client.tool_runtime.list_tools(
             tool_group_id=test_toolgroup_id,
-            extra_headers=auth_headers,
+            authorization=AUTH_TOKEN,
         )
 
         # Find book_flight tool (which should have $ref/$defs)
@@ -230,21 +206,10 @@ class TestMCPSchemaPreservation:
             mcp_endpoint=dict(uri=uri),
         )
 
-        # Use old header-based approach for Phase 1 (backward compatibility)
-        provider_data = {
-            "mcp_headers": {
-                uri: {
-                    "Authorization": f"Bearer {AUTH_TOKEN}",
-                },
-            },
-        }
-        auth_headers = {
-            "X-LlamaStack-Provider-Data": json.dumps(provider_data),
-        }
-
+        # Use the dedicated authorization parameter
         response = llama_stack_client.tool_runtime.list_tools(
             tool_group_id=test_toolgroup_id,
-            extra_headers=auth_headers,
+            authorization=AUTH_TOKEN,
         )
 
         # Find get_weather tool
@@ -284,22 +249,10 @@ class TestMCPToolInvocation:
             mcp_endpoint=dict(uri=uri),
         )
 
-        # Use old header-based approach for Phase 1 (backward compatibility)
-        provider_data = {
-            "mcp_headers": {
-                uri: {
-                    "Authorization": f"Bearer {AUTH_TOKEN}",
-                },
-            },
-        }
-        auth_headers = {
-            "X-LlamaStack-Provider-Data": json.dumps(provider_data),
-        }
-
-        # List tools to populate the tool index
+        # Use the dedicated authorization parameter
         llama_stack_client.tool_runtime.list_tools(
             tool_group_id=test_toolgroup_id,
-            extra_headers=auth_headers,
+            authorization=AUTH_TOKEN,
         )
 
         # Invoke tool with complex nested data
@@ -311,7 +264,7 @@ class TestMCPToolInvocation:
                     "shipping": {"address": {"street": "123 Main St", "city": "San Francisco", "zipcode": "94102"}},
                 }
             },
-            extra_headers=auth_headers,
+            authorization=AUTH_TOKEN,
         )
 
         # Should succeed without schema validation errors
@@ -337,29 +290,17 @@ class TestMCPToolInvocation:
             mcp_endpoint=dict(uri=uri),
         )
 
-        # Use old header-based approach for Phase 1 (backward compatibility)
-        provider_data = {
-            "mcp_headers": {
-                uri: {
-                    "Authorization": f"Bearer {AUTH_TOKEN}",
-                },
-            },
-        }
-        auth_headers = {
-            "X-LlamaStack-Provider-Data": json.dumps(provider_data),
-        }
-
-        # List tools to populate the tool index
+        # Use the dedicated authorization parameter
         llama_stack_client.tool_runtime.list_tools(
             tool_group_id=test_toolgroup_id,
-            extra_headers=auth_headers,
+            authorization=AUTH_TOKEN,
         )
 
         # Test with email format
         result_email = llama_stack_client.tool_runtime.invoke_tool(
             tool_name="flexible_contact",
             kwargs={"contact_info": "user@example.com"},
-            extra_headers=auth_headers,
+            authorization=AUTH_TOKEN,
         )
 
         assert result_email.error_message is None
@@ -368,7 +309,7 @@ class TestMCPToolInvocation:
         result_phone = llama_stack_client.tool_runtime.invoke_tool(
             tool_name="flexible_contact",
             kwargs={"contact_info": "+15551234567"},
-            extra_headers=auth_headers,
+            authorization=AUTH_TOKEN,
         )
 
         assert result_phone.error_message is None
@@ -400,21 +341,10 @@ class TestAgentWithMCPTools:
             mcp_endpoint=dict(uri=uri),
         )
 
-        # Use old header-based approach for Phase 1 (backward compatibility)
-        provider_data = {
-            "mcp_headers": {
-                uri: {
-                    "Authorization": f"Bearer {AUTH_TOKEN}",
-                },
-            },
-        }
-        auth_headers = {
-            "X-LlamaStack-Provider-Data": json.dumps(provider_data),
-        }
-
-        tools_list = llama_stack_client.tools.list(
-            toolgroup_id=test_toolgroup_id,
-            extra_headers=auth_headers,
+        # Use the dedicated authorization parameter
+        tools_list = llama_stack_client.tool_runtime.list_tools(
+            tool_group_id=test_toolgroup_id,
+            authorization=AUTH_TOKEN,
         )
         tool_defs = [
             {

From 5fe6098350647ca1f62ba9f93bbaa80145a8c370 Mon Sep 17 00:00:00 2001
From: Theofanis Petkos <thepetk@gmail.com>
Date: Mon, 17 Nov 2025 20:35:28 +0000
Subject: [PATCH 39/62] docs: Improvements on `provider_codegen` for type hints
 and multi-line yaml descriptions  (#4033)

# What does this PR do?

This PR improves type hint cleanup in auto-generated provider
documentation by adding regex logic.

**Issues Fixed:**
- Type hints with missing closing brackets (e.g., `list[str` instead of
`list[str]`)
- Types showing as `<class 'bool'>`, `<class 'str'>` instead of `bool`,
`str`
- The multi-line YAML frontmatter in index documentation files wasn't
ideal, so we now add the proper `|` character.

**Changes:**
1. Replaced string replacement (`.replace`) with regex-based type
cleaning to preserve the trailing bracket in case of `list` and `dict`.
2. Adds the `|` character for multi-line YAML descriptions.
3. I have regenerated the docs. However, let me know if that's not
needed.

## Test Plan

1. Ran uv run python scripts/provider_codegen.py - successfully
regenerated all docs
2. We can see that the updated docs handle correctly type hint cleanup
and multi-line yaml descriptions have now the `|` character.

### Note to the reviewer(s)

This is my first contribution to your lovely repo! Initially I was going
thourgh docs (wanted to use `remote::gemini` as provider) and realized
the issue. I've read the
[CONTRIBUTING.md](https://github.com/llamastack/llama-stack/blob/main/CONTRIBUTING.md)
and decided to open the PR. Let me know if there's anything I did wrong
and I'll update my PR!

---------

Signed-off-by: thepetk <thepetk@gmail.com>
Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com>
---
 docs/docs/providers/agents/index.mdx          |  5 +-
 .../agents/inline_meta-reference.mdx          |  2 +-
 docs/docs/providers/batches/index.mdx         | 15 ++---
 .../providers/batches/inline_reference.mdx    |  6 +-
 .../providers/datasetio/inline_localfs.mdx    |  2 +-
 .../datasetio/remote_huggingface.mdx          |  2 +-
 .../providers/datasetio/remote_nvidia.mdx     |  2 +-
 docs/docs/providers/eval/index.mdx            |  5 +-
 .../providers/eval/inline_meta-reference.mdx  |  2 +-
 docs/docs/providers/eval/remote_nvidia.mdx    |  2 +-
 docs/docs/providers/files/index.mdx           |  5 +-
 docs/docs/providers/files/inline_localfs.mdx  |  6 +-
 docs/docs/providers/files/remote_openai.mdx   |  4 +-
 docs/docs/providers/files/remote_s3.mdx       |  8 +--
 docs/docs/providers/inference/index.mdx       | 13 +++--
 .../inference/inline_meta-reference.mdx       |  8 +--
 .../providers/inference/remote_anthropic.mdx  |  6 +-
 .../docs/providers/inference/remote_azure.mdx |  8 +--
 .../providers/inference/remote_bedrock.mdx    |  8 +--
 .../providers/inference/remote_cerebras.mdx   |  8 +--
 .../providers/inference/remote_databricks.mdx |  6 +-
 .../providers/inference/remote_fireworks.mdx  |  8 +--
 .../providers/inference/remote_gemini.mdx     |  6 +-
 docs/docs/providers/inference/remote_groq.mdx |  8 +--
 .../inference/remote_hf_endpoint.mdx          |  4 +-
 .../inference/remote_hf_serverless.mdx        |  4 +-
 .../inference/remote_llama-openai-compat.mdx  |  8 +--
 .../providers/inference/remote_nvidia.mdx     | 14 ++---
 docs/docs/providers/inference/remote_oci.mdx  | 16 ++---
 .../providers/inference/remote_ollama.mdx     |  6 +-
 .../providers/inference/remote_openai.mdx     |  8 +--
 .../inference/remote_passthrough.mdx          |  8 +--
 .../providers/inference/remote_runpod.mdx     |  6 +-
 .../providers/inference/remote_sambanova.mdx  |  8 +--
 docs/docs/providers/inference/remote_tgi.mdx  |  6 +-
 .../providers/inference/remote_together.mdx   |  8 +--
 .../providers/inference/remote_vertexai.mdx   |  8 +--
 docs/docs/providers/inference/remote_vllm.mdx |  8 +--
 .../providers/inference/remote_watsonx.mdx    | 10 ++--
 .../post_training/inline_huggingface-gpu.mdx  | 34 +++++------
 .../post_training/inline_torchtune-cpu.mdx    |  2 +-
 .../post_training/inline_torchtune-gpu.mdx    |  2 +-
 .../providers/post_training/remote_nvidia.mdx |  6 +-
 docs/docs/providers/safety/index.mdx          |  5 +-
 .../providers/safety/inline_llama-guard.mdx   |  2 +-
 .../providers/safety/inline_prompt-guard.mdx  |  2 +-
 docs/docs/providers/safety/remote_bedrock.mdx |  4 +-
 docs/docs/providers/safety/remote_nvidia.mdx  |  2 +-
 .../providers/safety/remote_sambanova.mdx     |  4 +-
 .../tool_runtime/remote_bing-search.mdx       |  2 +-
 .../tool_runtime/remote_brave-search.mdx      |  2 +-
 .../tool_runtime/remote_tavily-search.mdx     |  2 +-
 .../providers/vector_io/inline_chromadb.mdx   |  4 +-
 .../docs/providers/vector_io/inline_faiss.mdx |  2 +-
 .../vector_io/inline_meta-reference.mdx       |  2 +-
 .../providers/vector_io/inline_milvus.mdx     |  6 +-
 .../providers/vector_io/inline_qdrant.mdx     |  4 +-
 .../providers/vector_io/inline_sqlite-vec.mdx |  4 +-
 .../providers/vector_io/inline_sqlite_vec.mdx |  4 +-
 .../providers/vector_io/remote_chromadb.mdx   |  2 +-
 .../providers/vector_io/remote_milvus.mdx     |  6 +-
 .../providers/vector_io/remote_pgvector.mdx   |  2 +-
 .../providers/vector_io/remote_qdrant.mdx     |  6 +-
 .../providers/vector_io/remote_weaviate.mdx   |  2 +-
 scripts/provider_codegen.py                   | 58 +++++++++++++++----
 65 files changed, 241 insertions(+), 197 deletions(-)

diff --git a/docs/docs/providers/agents/index.mdx b/docs/docs/providers/agents/index.mdx
index 06eb104af..200a3b9ca 100644
--- a/docs/docs/providers/agents/index.mdx
+++ b/docs/docs/providers/agents/index.mdx
@@ -1,7 +1,8 @@
 ---
-description: "Agents
+description: |
+  Agents
 
-    APIs for creating and interacting with agentic systems."
+      APIs for creating and interacting with agentic systems.
 sidebar_label: Agents
 title: Agents
 ---
diff --git a/docs/docs/providers/agents/inline_meta-reference.mdx b/docs/docs/providers/agents/inline_meta-reference.mdx
index fac9b8406..99a67feb4 100644
--- a/docs/docs/providers/agents/inline_meta-reference.mdx
+++ b/docs/docs/providers/agents/inline_meta-reference.mdx
@@ -14,7 +14,7 @@ Meta's reference implementation of an agent system that can use tools, access ve
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `persistence` | `<class 'inline.agents.meta_reference.config.AgentPersistenceConfig'>` | No |  |  |
+| `persistence` | `AgentPersistenceConfig` | No |  |  |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/batches/index.mdx b/docs/docs/providers/batches/index.mdx
index 2c64b277f..18fd49945 100644
--- a/docs/docs/providers/batches/index.mdx
+++ b/docs/docs/providers/batches/index.mdx
@@ -1,14 +1,15 @@
 ---
-description: "The Batches API enables efficient processing of multiple requests in a single operation,
-    particularly useful for processing large datasets, batch evaluation workflows, and
-    cost-effective inference at scale.
+description: |
+  The Batches API enables efficient processing of multiple requests in a single operation,
+      particularly useful for processing large datasets, batch evaluation workflows, and
+      cost-effective inference at scale.
 
-    The API is designed to allow use of openai client libraries for seamless integration.
+      The API is designed to allow use of openai client libraries for seamless integration.
 
-    This API provides the following extensions:
-     - idempotent batch creation
+      This API provides the following extensions:
+       - idempotent batch creation
 
-    Note: This API is currently under active development and may undergo changes."
+      Note: This API is currently under active development and may undergo changes.
 sidebar_label: Batches
 title: Batches
 ---
diff --git a/docs/docs/providers/batches/inline_reference.mdx b/docs/docs/providers/batches/inline_reference.mdx
index 45304fbb1..0a062c245 100644
--- a/docs/docs/providers/batches/inline_reference.mdx
+++ b/docs/docs/providers/batches/inline_reference.mdx
@@ -14,9 +14,9 @@ Reference implementation of batches API with KVStore persistence.
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `kvstore` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  | Configuration for the key-value store backend. |
-| `max_concurrent_batches` | `<class 'int'>` | No | 1 | Maximum number of concurrent batches to process simultaneously. |
-| `max_concurrent_requests_per_batch` | `<class 'int'>` | No | 10 | Maximum number of concurrent requests to process per batch. |
+| `kvstore` | `KVStoreReference` | No |  | Configuration for the key-value store backend. |
+| `max_concurrent_batches` | `int` | No | 1 | Maximum number of concurrent batches to process simultaneously. |
+| `max_concurrent_requests_per_batch` | `int` | No | 10 | Maximum number of concurrent requests to process per batch. |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/datasetio/inline_localfs.mdx b/docs/docs/providers/datasetio/inline_localfs.mdx
index a9363376c..4314696c5 100644
--- a/docs/docs/providers/datasetio/inline_localfs.mdx
+++ b/docs/docs/providers/datasetio/inline_localfs.mdx
@@ -14,7 +14,7 @@ Local filesystem-based dataset I/O provider for reading and writing datasets to
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `kvstore` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  |  |
+| `kvstore` | `KVStoreReference` | No |  |  |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/datasetio/remote_huggingface.mdx b/docs/docs/providers/datasetio/remote_huggingface.mdx
index de3ffaaa6..ede8ed631 100644
--- a/docs/docs/providers/datasetio/remote_huggingface.mdx
+++ b/docs/docs/providers/datasetio/remote_huggingface.mdx
@@ -14,7 +14,7 @@ HuggingFace datasets provider for accessing and managing datasets from the Huggi
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `kvstore` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  |  |
+| `kvstore` | `KVStoreReference` | No |  |  |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/datasetio/remote_nvidia.mdx b/docs/docs/providers/datasetio/remote_nvidia.mdx
index 35a7dacee..97c48d810 100644
--- a/docs/docs/providers/datasetio/remote_nvidia.mdx
+++ b/docs/docs/providers/datasetio/remote_nvidia.mdx
@@ -17,7 +17,7 @@ NVIDIA's dataset I/O provider for accessing datasets from NVIDIA's data platform
 | `api_key` | `str \| None` | No |  | The NVIDIA API key. |
 | `dataset_namespace` | `str \| None` | No | default | The NVIDIA dataset namespace. |
 | `project_id` | `str \| None` | No | test-project | The NVIDIA project ID. |
-| `datasets_url` | `<class 'str'>` | No | http://nemo.test | Base URL for the NeMo Dataset API |
+| `datasets_url` | `str` | No | http://nemo.test | Base URL for the NeMo Dataset API |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/eval/index.mdx b/docs/docs/providers/eval/index.mdx
index 94bafe15e..3543db246 100644
--- a/docs/docs/providers/eval/index.mdx
+++ b/docs/docs/providers/eval/index.mdx
@@ -1,7 +1,8 @@
 ---
-description: "Evaluations
+description: |
+  Evaluations
 
-    Llama Stack Evaluation API for running evaluations on model and agent candidates."
+      Llama Stack Evaluation API for running evaluations on model and agent candidates.
 sidebar_label: Eval
 title: Eval
 ---
diff --git a/docs/docs/providers/eval/inline_meta-reference.mdx b/docs/docs/providers/eval/inline_meta-reference.mdx
index 2c86c18c9..f1e923ee8 100644
--- a/docs/docs/providers/eval/inline_meta-reference.mdx
+++ b/docs/docs/providers/eval/inline_meta-reference.mdx
@@ -14,7 +14,7 @@ Meta's reference implementation of evaluation tasks with support for multiple la
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `kvstore` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  |  |
+| `kvstore` | `KVStoreReference` | No |  |  |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/eval/remote_nvidia.mdx b/docs/docs/providers/eval/remote_nvidia.mdx
index 36bb4726b..311496791 100644
--- a/docs/docs/providers/eval/remote_nvidia.mdx
+++ b/docs/docs/providers/eval/remote_nvidia.mdx
@@ -14,7 +14,7 @@ NVIDIA's evaluation provider for running evaluation tasks on NVIDIA's platform.
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `evaluator_url` | `<class 'str'>` | No | http://0.0.0.0:7331 | The url for accessing the evaluator service |
+| `evaluator_url` | `str` | No | http://0.0.0.0:7331 | The url for accessing the evaluator service |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/files/index.mdx b/docs/docs/providers/files/index.mdx
index 19e338035..0b28e9aee 100644
--- a/docs/docs/providers/files/index.mdx
+++ b/docs/docs/providers/files/index.mdx
@@ -1,7 +1,8 @@
 ---
-description: "Files
+description: |
+  Files
 
-    This API is used to upload documents that can be used with other Llama Stack APIs."
+      This API is used to upload documents that can be used with other Llama Stack APIs.
 sidebar_label: Files
 title: Files
 ---
diff --git a/docs/docs/providers/files/inline_localfs.mdx b/docs/docs/providers/files/inline_localfs.mdx
index bff0c4eb9..aa3a9232b 100644
--- a/docs/docs/providers/files/inline_localfs.mdx
+++ b/docs/docs/providers/files/inline_localfs.mdx
@@ -14,9 +14,9 @@ Local filesystem-based file storage provider for managing files and documents lo
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `storage_dir` | `<class 'str'>` | No |  | Directory to store uploaded files |
-| `metadata_store` | `<class 'llama_stack.core.storage.datatypes.SqlStoreReference'>` | No |  | SQL store configuration for file metadata |
-| `ttl_secs` | `<class 'int'>` | No | 31536000 |  |
+| `storage_dir` | `str` | No |  | Directory to store uploaded files |
+| `metadata_store` | `SqlStoreReference` | No |  | SQL store configuration for file metadata |
+| `ttl_secs` | `int` | No | 31536000 |  |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/files/remote_openai.mdx b/docs/docs/providers/files/remote_openai.mdx
index 3b5c40aad..48fe2fd57 100644
--- a/docs/docs/providers/files/remote_openai.mdx
+++ b/docs/docs/providers/files/remote_openai.mdx
@@ -14,8 +14,8 @@ OpenAI Files API provider for managing files through OpenAI's native file storag
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `api_key` | `<class 'str'>` | No |  | OpenAI API key for authentication |
-| `metadata_store` | `<class 'llama_stack.core.storage.datatypes.SqlStoreReference'>` | No |  | SQL store configuration for file metadata |
+| `api_key` | `str` | No |  | OpenAI API key for authentication |
+| `metadata_store` | `SqlStoreReference` | No |  | SQL store configuration for file metadata |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/files/remote_s3.mdx b/docs/docs/providers/files/remote_s3.mdx
index 65cd545c5..857ba1819 100644
--- a/docs/docs/providers/files/remote_s3.mdx
+++ b/docs/docs/providers/files/remote_s3.mdx
@@ -14,13 +14,13 @@ AWS S3-based file storage provider for scalable cloud file management with metad
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `bucket_name` | `<class 'str'>` | No |  | S3 bucket name to store files |
-| `region` | `<class 'str'>` | No | us-east-1 | AWS region where the bucket is located |
+| `bucket_name` | `str` | No |  | S3 bucket name to store files |
+| `region` | `str` | No | us-east-1 | AWS region where the bucket is located |
 | `aws_access_key_id` | `str \| None` | No |  | AWS access key ID (optional if using IAM roles) |
 | `aws_secret_access_key` | `str \| None` | No |  | AWS secret access key (optional if using IAM roles) |
 | `endpoint_url` | `str \| None` | No |  | Custom S3 endpoint URL (for MinIO, LocalStack, etc.) |
-| `auto_create_bucket` | `<class 'bool'>` | No | False | Automatically create the S3 bucket if it doesn't exist |
-| `metadata_store` | `<class 'llama_stack.core.storage.datatypes.SqlStoreReference'>` | No |  | SQL store configuration for file metadata |
+| `auto_create_bucket` | `bool` | No | False | Automatically create the S3 bucket if it doesn't exist |
+| `metadata_store` | `SqlStoreReference` | No |  | SQL store configuration for file metadata |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/inference/index.mdx b/docs/docs/providers/inference/index.mdx
index 478611420..e2d94bfaf 100644
--- a/docs/docs/providers/inference/index.mdx
+++ b/docs/docs/providers/inference/index.mdx
@@ -1,12 +1,13 @@
 ---
-description: "Inference
+description: |
+  Inference
 
-    Llama Stack Inference API for generating completions, chat completions, and embeddings.
+      Llama Stack Inference API for generating completions, chat completions, and embeddings.
 
-    This API provides the raw interface to the underlying models. Three kinds of models are supported:
-    - LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.
-    - Embedding models: these models generate embeddings to be used for semantic search.
-    - Rerank models: these models reorder the documents based on their relevance to a query."
+      This API provides the raw interface to the underlying models. Three kinds of models are supported:
+      - LLM models: these models generate "raw" and "chat" (conversational) completions.
+      - Embedding models: these models generate embeddings to be used for semantic search.
+      - Rerank models: these models reorder the documents based on their relevance to a query.
 sidebar_label: Inference
 title: Inference
 ---
diff --git a/docs/docs/providers/inference/inline_meta-reference.mdx b/docs/docs/providers/inference/inline_meta-reference.mdx
index 328586f9a..55b1606b0 100644
--- a/docs/docs/providers/inference/inline_meta-reference.mdx
+++ b/docs/docs/providers/inference/inline_meta-reference.mdx
@@ -16,12 +16,12 @@ Meta's reference implementation of inference with support for various model form
 |-------|------|----------|---------|-------------|
 | `model` | `str \| None` | No |  |  |
 | `torch_seed` | `int \| None` | No |  |  |
-| `max_seq_len` | `<class 'int'>` | No | 4096 |  |
-| `max_batch_size` | `<class 'int'>` | No | 1 |  |
+| `max_seq_len` | `int` | No | 4096 |  |
+| `max_batch_size` | `int` | No | 1 |  |
 | `model_parallel_size` | `int \| None` | No |  |  |
-| `create_distributed_process_group` | `<class 'bool'>` | No | True |  |
+| `create_distributed_process_group` | `bool` | No | True |  |
 | `checkpoint_dir` | `str \| None` | No |  |  |
-| `quantization` | `Bf16QuantizationConfig \| Fp8QuantizationConfig \| Int4QuantizationConfig, annotation=NoneType, required=True, discriminator='type'` | No |  |  |
+| `quantization` | `Bf16QuantizationConfig \| Fp8QuantizationConfig \| Int4QuantizationConfig \| None` | No |  |  |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/inference/remote_anthropic.mdx b/docs/docs/providers/inference/remote_anthropic.mdx
index 4acbbac50..14b431894 100644
--- a/docs/docs/providers/inference/remote_anthropic.mdx
+++ b/docs/docs/providers/inference/remote_anthropic.mdx
@@ -14,9 +14,9 @@ Anthropic inference provider for accessing Claude models and Anthropic's AI serv
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
-| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
-| `api_key` | `pydantic.types.SecretStr \| None` | No |  | Authentication credential for the provider |
+| `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
+| `api_key` | `SecretStr \| None` | No |  | Authentication credential for the provider |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/inference/remote_azure.mdx b/docs/docs/providers/inference/remote_azure.mdx
index b3041259e..fd22b157e 100644
--- a/docs/docs/providers/inference/remote_azure.mdx
+++ b/docs/docs/providers/inference/remote_azure.mdx
@@ -21,10 +21,10 @@ https://learn.microsoft.com/en-us/azure/ai-foundry/openai/overview
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
-| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
-| `api_key` | `pydantic.types.SecretStr \| None` | No |  | Authentication credential for the provider |
-| `api_base` | `<class 'pydantic.networks.HttpUrl'>` | No |  | Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com) |
+| `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
+| `api_key` | `SecretStr \| None` | No |  | Authentication credential for the provider |
+| `api_base` | `HttpUrl` | No |  | Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com) |
 | `api_version` | `str \| None` | No |  | Azure API version for Azure (e.g., 2024-12-01-preview) |
 | `api_type` | `str \| None` | No | azure | Azure API type for Azure (e.g., azure) |
 
diff --git a/docs/docs/providers/inference/remote_bedrock.mdx b/docs/docs/providers/inference/remote_bedrock.mdx
index 61931643e..86bef3000 100644
--- a/docs/docs/providers/inference/remote_bedrock.mdx
+++ b/docs/docs/providers/inference/remote_bedrock.mdx
@@ -14,10 +14,10 @@ AWS Bedrock inference provider using OpenAI compatible endpoint.
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
-| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
-| `api_key` | `pydantic.types.SecretStr \| None` | No |  | Authentication credential for the provider |
-| `region_name` | `<class 'str'>` | No | us-east-2 | AWS Region for the Bedrock Runtime endpoint |
+| `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
+| `api_key` | `SecretStr \| None` | No |  | Authentication credential for the provider |
+| `region_name` | `str` | No | us-east-2 | AWS Region for the Bedrock Runtime endpoint |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/inference/remote_cerebras.mdx b/docs/docs/providers/inference/remote_cerebras.mdx
index cda0be224..1fb9530bb 100644
--- a/docs/docs/providers/inference/remote_cerebras.mdx
+++ b/docs/docs/providers/inference/remote_cerebras.mdx
@@ -14,10 +14,10 @@ Cerebras inference provider for running models on Cerebras Cloud platform.
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
-| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
-| `api_key` | `pydantic.types.SecretStr \| None` | No |  | Authentication credential for the provider |
-| `base_url` | `<class 'str'>` | No | https://api.cerebras.ai | Base URL for the Cerebras API |
+| `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
+| `api_key` | `SecretStr \| None` | No |  | Authentication credential for the provider |
+| `base_url` | `str` | No | https://api.cerebras.ai | Base URL for the Cerebras API |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/inference/remote_databricks.mdx b/docs/docs/providers/inference/remote_databricks.mdx
index f14fd0175..7a926baf4 100644
--- a/docs/docs/providers/inference/remote_databricks.mdx
+++ b/docs/docs/providers/inference/remote_databricks.mdx
@@ -14,9 +14,9 @@ Databricks inference provider for running models on Databricks' unified analytic
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
-| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
-| `api_token` | `pydantic.types.SecretStr \| None` | No |  | The Databricks API token |
+| `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
+| `api_token` | `SecretStr \| None` | No |  | The Databricks API token |
 | `url` | `str \| None` | No |  | The URL for the Databricks model serving endpoint |
 
 ## Sample Configuration
diff --git a/docs/docs/providers/inference/remote_fireworks.mdx b/docs/docs/providers/inference/remote_fireworks.mdx
index 71f16ccec..7db74efc4 100644
--- a/docs/docs/providers/inference/remote_fireworks.mdx
+++ b/docs/docs/providers/inference/remote_fireworks.mdx
@@ -14,10 +14,10 @@ Fireworks AI inference provider for Llama models and other AI models on the Fire
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
-| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
-| `api_key` | `pydantic.types.SecretStr \| None` | No |  | Authentication credential for the provider |
-| `url` | `<class 'str'>` | No | https://api.fireworks.ai/inference/v1 | The URL for the Fireworks server |
+| `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
+| `api_key` | `SecretStr \| None` | No |  | Authentication credential for the provider |
+| `url` | `str` | No | https://api.fireworks.ai/inference/v1 | The URL for the Fireworks server |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/inference/remote_gemini.mdx b/docs/docs/providers/inference/remote_gemini.mdx
index 22b3c8cb7..75e6b9692 100644
--- a/docs/docs/providers/inference/remote_gemini.mdx
+++ b/docs/docs/providers/inference/remote_gemini.mdx
@@ -14,9 +14,9 @@ Google Gemini inference provider for accessing Gemini models and Google's AI ser
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
-| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
-| `api_key` | `pydantic.types.SecretStr \| None` | No |  | Authentication credential for the provider |
+| `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
+| `api_key` | `SecretStr \| None` | No |  | Authentication credential for the provider |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/inference/remote_groq.mdx b/docs/docs/providers/inference/remote_groq.mdx
index aaf1516ca..3ebd6f907 100644
--- a/docs/docs/providers/inference/remote_groq.mdx
+++ b/docs/docs/providers/inference/remote_groq.mdx
@@ -14,10 +14,10 @@ Groq inference provider for ultra-fast inference using Groq's LPU technology.
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
-| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
-| `api_key` | `pydantic.types.SecretStr \| None` | No |  | Authentication credential for the provider |
-| `url` | `<class 'str'>` | No | https://api.groq.com | The URL for the Groq AI server |
+| `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
+| `api_key` | `SecretStr \| None` | No |  | Authentication credential for the provider |
+| `url` | `str` | No | https://api.groq.com | The URL for the Groq AI server |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/inference/remote_hf_endpoint.mdx b/docs/docs/providers/inference/remote_hf_endpoint.mdx
index 771b24f8d..52b40c1f2 100644
--- a/docs/docs/providers/inference/remote_hf_endpoint.mdx
+++ b/docs/docs/providers/inference/remote_hf_endpoint.mdx
@@ -14,8 +14,8 @@ HuggingFace Inference Endpoints provider for dedicated model serving.
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `endpoint_name` | `<class 'str'>` | No |  | The name of the Hugging Face Inference Endpoint in the format of '&#123;namespace&#125;/&#123;endpoint_name&#125;' (e.g. 'my-cool-org/meta-llama-3-1-8b-instruct-rce'). Namespace is optional and will default to the user account if not provided. |
-| `api_token` | `pydantic.types.SecretStr \| None` | No |  | Your Hugging Face user access token (will default to locally saved token if not provided) |
+| `endpoint_name` | `str` | No |  | The name of the Hugging Face Inference Endpoint in the format of '&#123;namespace&#125;/&#123;endpoint_name&#125;' (e.g. 'my-cool-org/meta-llama-3-1-8b-instruct-rce'). Namespace is optional and will default to the user account if not provided. |
+| `api_token` | `SecretStr \| None` | No |  | Your Hugging Face user access token (will default to locally saved token if not provided) |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/inference/remote_hf_serverless.mdx b/docs/docs/providers/inference/remote_hf_serverless.mdx
index 1a89b8e3e..52280df82 100644
--- a/docs/docs/providers/inference/remote_hf_serverless.mdx
+++ b/docs/docs/providers/inference/remote_hf_serverless.mdx
@@ -14,8 +14,8 @@ HuggingFace Inference API serverless provider for on-demand model inference.
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `huggingface_repo` | `<class 'str'>` | No |  | The model ID of the model on the Hugging Face Hub (e.g. 'meta-llama/Meta-Llama-3.1-70B-Instruct') |
-| `api_token` | `pydantic.types.SecretStr \| None` | No |  | Your Hugging Face user access token (will default to locally saved token if not provided) |
+| `huggingface_repo` | `str` | No |  | The model ID of the model on the Hugging Face Hub (e.g. 'meta-llama/Meta-Llama-3.1-70B-Instruct') |
+| `api_token` | `SecretStr \| None` | No |  | Your Hugging Face user access token (will default to locally saved token if not provided) |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/inference/remote_llama-openai-compat.mdx b/docs/docs/providers/inference/remote_llama-openai-compat.mdx
index 9769c0793..f67f40909 100644
--- a/docs/docs/providers/inference/remote_llama-openai-compat.mdx
+++ b/docs/docs/providers/inference/remote_llama-openai-compat.mdx
@@ -14,10 +14,10 @@ Llama OpenAI-compatible provider for using Llama models with OpenAI API format.
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
-| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
-| `api_key` | `pydantic.types.SecretStr \| None` | No |  | Authentication credential for the provider |
-| `openai_compat_api_base` | `<class 'str'>` | No | https://api.llama.com/compat/v1/ | The URL for the Llama API server |
+| `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
+| `api_key` | `SecretStr \| None` | No |  | Authentication credential for the provider |
+| `openai_compat_api_base` | `str` | No | https://api.llama.com/compat/v1/ | The URL for the Llama API server |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/inference/remote_nvidia.mdx b/docs/docs/providers/inference/remote_nvidia.mdx
index 57c64ab46..6646d8b00 100644
--- a/docs/docs/providers/inference/remote_nvidia.mdx
+++ b/docs/docs/providers/inference/remote_nvidia.mdx
@@ -14,13 +14,13 @@ NVIDIA inference provider for accessing NVIDIA NIM models and AI services.
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
-| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
-| `api_key` | `pydantic.types.SecretStr \| None` | No |  | Authentication credential for the provider |
-| `url` | `<class 'str'>` | No | https://integrate.api.nvidia.com | A base url for accessing the NVIDIA NIM |
-| `timeout` | `<class 'int'>` | No | 60 | Timeout for the HTTP requests |
-| `append_api_version` | `<class 'bool'>` | No | True | When set to false, the API version will not be appended to the base_url. By default, it is true. |
-| `rerank_model_to_url` | `dict[str, str` | No | `{'nv-rerank-qa-mistral-4b:1': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking', 'nvidia/nv-rerankqa-mistral-4b-v3': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/nv-rerankqa-mistral-4b-v3/reranking', 'nvidia/llama-3.2-nv-rerankqa-1b-v2': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v2/reranking'}` | Mapping of rerank model identifiers to their API endpoints.  |
+| `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
+| `api_key` | `SecretStr \| None` | No |  | Authentication credential for the provider |
+| `url` | `str` | No | https://integrate.api.nvidia.com | A base url for accessing the NVIDIA NIM |
+| `timeout` | `int` | No | 60 | Timeout for the HTTP requests |
+| `append_api_version` | `bool` | No | True | When set to false, the API version will not be appended to the base_url. By default, it is true. |
+| `rerank_model_to_url` | `dict[str, str]` | No | `{'nv-rerank-qa-mistral-4b:1': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking', 'nvidia/nv-rerankqa-mistral-4b-v3': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/nv-rerankqa-mistral-4b-v3/reranking', 'nvidia/llama-3.2-nv-rerankqa-1b-v2': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v2/reranking'}` | Mapping of rerank model identifiers to their API endpoints.  |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/inference/remote_oci.mdx b/docs/docs/providers/inference/remote_oci.mdx
index 33a201a55..d448755bf 100644
--- a/docs/docs/providers/inference/remote_oci.mdx
+++ b/docs/docs/providers/inference/remote_oci.mdx
@@ -21,14 +21,14 @@ https://docs.oracle.com/en-us/iaas/Content/generative-ai/home.htm
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
-| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
-| `api_key` | `pydantic.types.SecretStr \| None` | No |  | Authentication credential for the provider |
-| `oci_auth_type` | `<class 'str'>` | No | instance_principal | OCI authentication type (must be one of: instance_principal, config_file) |
-| `oci_region` | `<class 'str'>` | No | us-ashburn-1 | OCI region (e.g., us-ashburn-1) |
-| `oci_compartment_id` | `<class 'str'>` | No |  | OCI compartment ID for the Generative AI service |
-| `oci_config_file_path` | `<class 'str'>` | No | ~/.oci/config | OCI config file path (required if oci_auth_type is config_file) |
-| `oci_config_profile` | `<class 'str'>` | No | DEFAULT | OCI config profile (required if oci_auth_type is config_file) |
+| `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
+| `api_key` | `SecretStr \| None` | No |  | Authentication credential for the provider |
+| `oci_auth_type` | `str` | No | instance_principal | OCI authentication type (must be one of: instance_principal, config_file) |
+| `oci_region` | `str` | No | us-ashburn-1 | OCI region (e.g., us-ashburn-1) |
+| `oci_compartment_id` | `str` | No |  | OCI compartment ID for the Generative AI service |
+| `oci_config_file_path` | `str` | No | ~/.oci/config | OCI config file path (required if oci_auth_type is config_file) |
+| `oci_config_profile` | `str` | No | DEFAULT | OCI config profile (required if oci_auth_type is config_file) |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/inference/remote_ollama.mdx b/docs/docs/providers/inference/remote_ollama.mdx
index e00e34e4a..497bfed52 100644
--- a/docs/docs/providers/inference/remote_ollama.mdx
+++ b/docs/docs/providers/inference/remote_ollama.mdx
@@ -14,9 +14,9 @@ Ollama inference provider for running local models through the Ollama runtime.
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
-| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
-| `url` | `<class 'str'>` | No | http://localhost:11434 |  |
+| `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
+| `url` | `str` | No | http://localhost:11434 |  |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/inference/remote_openai.mdx b/docs/docs/providers/inference/remote_openai.mdx
index 28c8ab7bf..4931118fd 100644
--- a/docs/docs/providers/inference/remote_openai.mdx
+++ b/docs/docs/providers/inference/remote_openai.mdx
@@ -14,10 +14,10 @@ OpenAI inference provider for accessing GPT models and other OpenAI services.
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
-| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
-| `api_key` | `pydantic.types.SecretStr \| None` | No |  | Authentication credential for the provider |
-| `base_url` | `<class 'str'>` | No | https://api.openai.com/v1 | Base URL for OpenAI API |
+| `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
+| `api_key` | `SecretStr \| None` | No |  | Authentication credential for the provider |
+| `base_url` | `str` | No | https://api.openai.com/v1 | Base URL for OpenAI API |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/inference/remote_passthrough.mdx b/docs/docs/providers/inference/remote_passthrough.mdx
index 957cd04da..009961d49 100644
--- a/docs/docs/providers/inference/remote_passthrough.mdx
+++ b/docs/docs/providers/inference/remote_passthrough.mdx
@@ -14,10 +14,10 @@ Passthrough inference provider for connecting to any external inference service
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
-| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
-| `api_key` | `pydantic.types.SecretStr \| None` | No |  | Authentication credential for the provider |
-| `url` | `<class 'str'>` | No |  | The URL for the passthrough endpoint |
+| `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
+| `api_key` | `SecretStr \| None` | No |  | Authentication credential for the provider |
+| `url` | `str` | No |  | The URL for the passthrough endpoint |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/inference/remote_runpod.mdx b/docs/docs/providers/inference/remote_runpod.mdx
index 3cbbd0322..3b67e157d 100644
--- a/docs/docs/providers/inference/remote_runpod.mdx
+++ b/docs/docs/providers/inference/remote_runpod.mdx
@@ -14,9 +14,9 @@ RunPod inference provider for running models on RunPod's cloud GPU platform.
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
-| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
-| `api_token` | `pydantic.types.SecretStr \| None` | No |  | The API token |
+| `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
+| `api_token` | `SecretStr \| None` | No |  | The API token |
 | `url` | `str \| None` | No |  | The URL for the Runpod model serving endpoint |
 
 ## Sample Configuration
diff --git a/docs/docs/providers/inference/remote_sambanova.mdx b/docs/docs/providers/inference/remote_sambanova.mdx
index 0ac4600b7..6f4c5d7f6 100644
--- a/docs/docs/providers/inference/remote_sambanova.mdx
+++ b/docs/docs/providers/inference/remote_sambanova.mdx
@@ -14,10 +14,10 @@ SambaNova inference provider for running models on SambaNova's dataflow architec
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
-| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
-| `api_key` | `pydantic.types.SecretStr \| None` | No |  | Authentication credential for the provider |
-| `url` | `<class 'str'>` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova AI server |
+| `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
+| `api_key` | `SecretStr \| None` | No |  | Authentication credential for the provider |
+| `url` | `str` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova AI server |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/inference/remote_tgi.mdx b/docs/docs/providers/inference/remote_tgi.mdx
index 67fe6d237..cd5ea7661 100644
--- a/docs/docs/providers/inference/remote_tgi.mdx
+++ b/docs/docs/providers/inference/remote_tgi.mdx
@@ -14,9 +14,9 @@ Text Generation Inference (TGI) provider for HuggingFace model serving.
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
-| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
-| `url` | `<class 'str'>` | No |  | The URL for the TGI serving endpoint |
+| `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
+| `url` | `str` | No |  | The URL for the TGI serving endpoint |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/inference/remote_together.mdx b/docs/docs/providers/inference/remote_together.mdx
index c8e3bcdcf..43192cc9e 100644
--- a/docs/docs/providers/inference/remote_together.mdx
+++ b/docs/docs/providers/inference/remote_together.mdx
@@ -14,10 +14,10 @@ Together AI inference provider for open-source models and collaborative AI devel
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
-| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
-| `api_key` | `pydantic.types.SecretStr \| None` | No |  | Authentication credential for the provider |
-| `url` | `<class 'str'>` | No | https://api.together.xyz/v1 | The URL for the Together AI server |
+| `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
+| `api_key` | `SecretStr \| None` | No |  | Authentication credential for the provider |
+| `url` | `str` | No | https://api.together.xyz/v1 | The URL for the Together AI server |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/inference/remote_vertexai.mdx b/docs/docs/providers/inference/remote_vertexai.mdx
index c182ed485..59b574561 100644
--- a/docs/docs/providers/inference/remote_vertexai.mdx
+++ b/docs/docs/providers/inference/remote_vertexai.mdx
@@ -53,10 +53,10 @@ Available Models:
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
-| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
-| `project` | `<class 'str'>` | No |  | Google Cloud project ID for Vertex AI |
-| `location` | `<class 'str'>` | No | us-central1 | Google Cloud location for Vertex AI |
+| `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
+| `project` | `str` | No |  | Google Cloud project ID for Vertex AI |
+| `location` | `str` | No | us-central1 | Google Cloud location for Vertex AI |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/inference/remote_vllm.mdx b/docs/docs/providers/inference/remote_vllm.mdx
index f844bcee0..81620dbca 100644
--- a/docs/docs/providers/inference/remote_vllm.mdx
+++ b/docs/docs/providers/inference/remote_vllm.mdx
@@ -14,11 +14,11 @@ Remote vLLM inference provider for connecting to vLLM servers.
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
-| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
-| `api_token` | `pydantic.types.SecretStr \| None` | No |  | The API token |
+| `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
+| `api_token` | `SecretStr \| None` | No |  | The API token |
 | `url` | `str \| None` | No |  | The URL for the vLLM model serving endpoint |
-| `max_tokens` | `<class 'int'>` | No | 4096 | Maximum number of tokens to generate. |
+| `max_tokens` | `int` | No | 4096 | Maximum number of tokens to generate. |
 | `tls_verify` | `bool \| str` | No | True | Whether to verify TLS certificates. Can be a boolean or a path to a CA certificate file. |
 
 ## Sample Configuration
diff --git a/docs/docs/providers/inference/remote_watsonx.mdx b/docs/docs/providers/inference/remote_watsonx.mdx
index 2227aa1cc..3a1dba3b4 100644
--- a/docs/docs/providers/inference/remote_watsonx.mdx
+++ b/docs/docs/providers/inference/remote_watsonx.mdx
@@ -14,12 +14,12 @@ IBM WatsonX inference provider for accessing AI models on IBM's WatsonX platform
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
-| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
-| `api_key` | `pydantic.types.SecretStr \| None` | No |  | Authentication credential for the provider |
-| `url` | `<class 'str'>` | No | https://us-south.ml.cloud.ibm.com | A base url for accessing the watsonx.ai |
+| `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
+| `api_key` | `SecretStr \| None` | No |  | Authentication credential for the provider |
+| `url` | `str` | No | https://us-south.ml.cloud.ibm.com | A base url for accessing the watsonx.ai |
 | `project_id` | `str \| None` | No |  | The watsonx.ai project ID |
-| `timeout` | `<class 'int'>` | No | 60 | Timeout for the HTTP requests |
+| `timeout` | `int` | No | 60 | Timeout for the HTTP requests |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/post_training/inline_huggingface-gpu.mdx b/docs/docs/providers/post_training/inline_huggingface-gpu.mdx
index ac7644de7..0d4241b27 100644
--- a/docs/docs/providers/post_training/inline_huggingface-gpu.mdx
+++ b/docs/docs/providers/post_training/inline_huggingface-gpu.mdx
@@ -14,23 +14,23 @@ HuggingFace-based post-training provider for fine-tuning models using the Huggin
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `device` | `<class 'str'>` | No | cuda |  |
-| `distributed_backend` | `Literal['fsdp', 'deepspeed'` | No |  |  |
-| `checkpoint_format` | `Literal['full_state', 'huggingface'` | No | huggingface |  |
-| `chat_template` | `<class 'str'>` | No | `&lt;|user|&gt;`<br/>`{input}`<br/>`&lt;|assistant|&gt;`<br/>`{output}` |  |
-| `model_specific_config` | `<class 'dict'>` | No | `{'trust_remote_code': True, 'attn_implementation': 'sdpa'}` |  |
-| `max_seq_length` | `<class 'int'>` | No | 2048 |  |
-| `gradient_checkpointing` | `<class 'bool'>` | No | False |  |
-| `save_total_limit` | `<class 'int'>` | No | 3 |  |
-| `logging_steps` | `<class 'int'>` | No | 10 |  |
-| `warmup_ratio` | `<class 'float'>` | No | 0.1 |  |
-| `weight_decay` | `<class 'float'>` | No | 0.01 |  |
-| `dataloader_num_workers` | `<class 'int'>` | No | 4 |  |
-| `dataloader_pin_memory` | `<class 'bool'>` | No | True |  |
-| `dpo_beta` | `<class 'float'>` | No | 0.1 |  |
-| `use_reference_model` | `<class 'bool'>` | No | True |  |
-| `dpo_loss_type` | `Literal['sigmoid', 'hinge', 'ipo', 'kto_pair'` | No | sigmoid |  |
-| `dpo_output_dir` | `<class 'str'>` | No |  |  |
+| `device` | `str` | No | cuda |  |
+| `distributed_backend` | `Literal[fsdp, deepspeed] \| None` | No |  |  |
+| `checkpoint_format` | `Literal[full_state, huggingface] \| None` | No | huggingface |  |
+| `chat_template` | `str` | No | `&lt;|user|&gt;`<br/>`{input}`<br/>`&lt;|assistant|&gt;`<br/>`{output}` |  |
+| `model_specific_config` | `dict` | No | `{'trust_remote_code': True, 'attn_implementation': 'sdpa'}` |  |
+| `max_seq_length` | `int` | No | 2048 |  |
+| `gradient_checkpointing` | `bool` | No | False |  |
+| `save_total_limit` | `int` | No | 3 |  |
+| `logging_steps` | `int` | No | 10 |  |
+| `warmup_ratio` | `float` | No | 0.1 |  |
+| `weight_decay` | `float` | No | 0.01 |  |
+| `dataloader_num_workers` | `int` | No | 4 |  |
+| `dataloader_pin_memory` | `bool` | No | True |  |
+| `dpo_beta` | `float` | No | 0.1 |  |
+| `use_reference_model` | `bool` | No | True |  |
+| `dpo_loss_type` | `Literal[sigmoid, hinge, ipo, kto_pair]` | No | sigmoid |  |
+| `dpo_output_dir` | `str` | No |  |  |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/post_training/inline_torchtune-cpu.mdx b/docs/docs/providers/post_training/inline_torchtune-cpu.mdx
index f789392fc..3e2c15d3e 100644
--- a/docs/docs/providers/post_training/inline_torchtune-cpu.mdx
+++ b/docs/docs/providers/post_training/inline_torchtune-cpu.mdx
@@ -15,7 +15,7 @@ TorchTune-based post-training provider for fine-tuning and optimizing models usi
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `torch_seed` | `int \| None` | No |  |  |
-| `checkpoint_format` | `Literal['meta', 'huggingface'` | No | meta |  |
+| `checkpoint_format` | `Literal[meta, huggingface] \| None` | No | meta |  |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/post_training/inline_torchtune-gpu.mdx b/docs/docs/providers/post_training/inline_torchtune-gpu.mdx
index bd87797af..ac222d8a5 100644
--- a/docs/docs/providers/post_training/inline_torchtune-gpu.mdx
+++ b/docs/docs/providers/post_training/inline_torchtune-gpu.mdx
@@ -15,7 +15,7 @@ TorchTune-based post-training provider for fine-tuning and optimizing models usi
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `torch_seed` | `int \| None` | No |  |  |
-| `checkpoint_format` | `Literal['meta', 'huggingface'` | No | meta |  |
+| `checkpoint_format` | `Literal[meta, huggingface] \| None` | No | meta |  |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/post_training/remote_nvidia.mdx b/docs/docs/providers/post_training/remote_nvidia.mdx
index 448ac4c75..d0208f82f 100644
--- a/docs/docs/providers/post_training/remote_nvidia.mdx
+++ b/docs/docs/providers/post_training/remote_nvidia.mdx
@@ -18,9 +18,9 @@ NVIDIA's post-training provider for fine-tuning models on NVIDIA's platform.
 | `dataset_namespace` | `str \| None` | No | default | The NVIDIA dataset namespace. |
 | `project_id` | `str \| None` | No | test-example-model@v1 | The NVIDIA project ID. |
 | `customizer_url` | `str \| None` | No |  | Base URL for the NeMo Customizer API |
-| `timeout` | `<class 'int'>` | No | 300 | Timeout for the NVIDIA Post Training API |
-| `max_retries` | `<class 'int'>` | No | 3 | Maximum number of retries for the NVIDIA Post Training API |
-| `output_model_dir` | `<class 'str'>` | No | test-example-model@v1 | Directory to save the output model |
+| `timeout` | `int` | No | 300 | Timeout for the NVIDIA Post Training API |
+| `max_retries` | `int` | No | 3 | Maximum number of retries for the NVIDIA Post Training API |
+| `output_model_dir` | `str` | No | test-example-model@v1 | Directory to save the output model |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/safety/index.mdx b/docs/docs/providers/safety/index.mdx
index 4e2de4f33..0c13de28c 100644
--- a/docs/docs/providers/safety/index.mdx
+++ b/docs/docs/providers/safety/index.mdx
@@ -1,7 +1,8 @@
 ---
-description: "Safety
+description: |
+  Safety
 
-    OpenAI-compatible Moderations API."
+      OpenAI-compatible Moderations API.
 sidebar_label: Safety
 title: Safety
 ---
diff --git a/docs/docs/providers/safety/inline_llama-guard.mdx b/docs/docs/providers/safety/inline_llama-guard.mdx
index 65866c9b2..d52e7289a 100644
--- a/docs/docs/providers/safety/inline_llama-guard.mdx
+++ b/docs/docs/providers/safety/inline_llama-guard.mdx
@@ -14,7 +14,7 @@ Llama Guard safety provider for content moderation and safety filtering using Me
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `excluded_categories` | `list[str` | No | [] |  |
+| `excluded_categories` | `list[str]` | No | [] |  |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/safety/inline_prompt-guard.mdx b/docs/docs/providers/safety/inline_prompt-guard.mdx
index c52e03e4b..dc57f8555 100644
--- a/docs/docs/providers/safety/inline_prompt-guard.mdx
+++ b/docs/docs/providers/safety/inline_prompt-guard.mdx
@@ -14,7 +14,7 @@ Prompt Guard safety provider for detecting and filtering unsafe prompts and cont
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `guard_type` | `<class 'str'>` | No | injection |  |
+| `guard_type` | `str` | No | injection |  |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/safety/remote_bedrock.mdx b/docs/docs/providers/safety/remote_bedrock.mdx
index 663a761f0..990bd7246 100644
--- a/docs/docs/providers/safety/remote_bedrock.mdx
+++ b/docs/docs/providers/safety/remote_bedrock.mdx
@@ -14,8 +14,8 @@ AWS Bedrock safety provider for content moderation using AWS's safety services.
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
-| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
+| `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
 | `aws_access_key_id` | `str \| None` | No |  | The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID |
 | `aws_secret_access_key` | `str \| None` | No |  | The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY |
 | `aws_session_token` | `str \| None` | No |  | The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN |
diff --git a/docs/docs/providers/safety/remote_nvidia.mdx b/docs/docs/providers/safety/remote_nvidia.mdx
index 0f665e60a..ac1fd0b03 100644
--- a/docs/docs/providers/safety/remote_nvidia.mdx
+++ b/docs/docs/providers/safety/remote_nvidia.mdx
@@ -14,7 +14,7 @@ NVIDIA's safety provider for content moderation and safety filtering.
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `guardrails_service_url` | `<class 'str'>` | No | http://0.0.0.0:7331 | The url for accessing the Guardrails service |
+| `guardrails_service_url` | `str` | No | http://0.0.0.0:7331 | The url for accessing the Guardrails service |
 | `config_id` | `str \| None` | No | self-check | Guardrails configuration ID to use from the Guardrails configuration store |
 
 ## Sample Configuration
diff --git a/docs/docs/providers/safety/remote_sambanova.mdx b/docs/docs/providers/safety/remote_sambanova.mdx
index da70fce6c..69712879c 100644
--- a/docs/docs/providers/safety/remote_sambanova.mdx
+++ b/docs/docs/providers/safety/remote_sambanova.mdx
@@ -14,8 +14,8 @@ SambaNova's safety provider for content moderation and safety filtering.
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `url` | `<class 'str'>` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova AI server |
-| `api_key` | `pydantic.types.SecretStr \| None` | No |  | The SambaNova cloud API Key |
+| `url` | `str` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova AI server |
+| `api_key` | `SecretStr \| None` | No |  | The SambaNova cloud API Key |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/tool_runtime/remote_bing-search.mdx b/docs/docs/providers/tool_runtime/remote_bing-search.mdx
index ec06bc20f..f97087d9e 100644
--- a/docs/docs/providers/tool_runtime/remote_bing-search.mdx
+++ b/docs/docs/providers/tool_runtime/remote_bing-search.mdx
@@ -15,7 +15,7 @@ Bing Search tool for web search capabilities using Microsoft's search engine.
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `api_key` | `str \| None` | No |  |  |
-| `top_k` | `<class 'int'>` | No | 3 |  |
+| `top_k` | `int` | No | 3 |  |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/tool_runtime/remote_brave-search.mdx b/docs/docs/providers/tool_runtime/remote_brave-search.mdx
index 3aeed67d5..987ce0e41 100644
--- a/docs/docs/providers/tool_runtime/remote_brave-search.mdx
+++ b/docs/docs/providers/tool_runtime/remote_brave-search.mdx
@@ -15,7 +15,7 @@ Brave Search tool for web search capabilities with privacy-focused results.
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `api_key` | `str \| None` | No |  | The Brave Search API Key |
-| `max_results` | `<class 'int'>` | No | 3 | The maximum number of results to return |
+| `max_results` | `int` | No | 3 | The maximum number of results to return |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/tool_runtime/remote_tavily-search.mdx b/docs/docs/providers/tool_runtime/remote_tavily-search.mdx
index fdca31bbe..36ad63646 100644
--- a/docs/docs/providers/tool_runtime/remote_tavily-search.mdx
+++ b/docs/docs/providers/tool_runtime/remote_tavily-search.mdx
@@ -15,7 +15,7 @@ Tavily Search tool for AI-optimized web search with structured results.
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `api_key` | `str \| None` | No |  | The Tavily Search API Key |
-| `max_results` | `<class 'int'>` | No | 3 | The maximum number of results to return |
+| `max_results` | `int` | No | 3 | The maximum number of results to return |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/vector_io/inline_chromadb.mdx b/docs/docs/providers/vector_io/inline_chromadb.mdx
index 0be5cd5b3..d78a67b01 100644
--- a/docs/docs/providers/vector_io/inline_chromadb.mdx
+++ b/docs/docs/providers/vector_io/inline_chromadb.mdx
@@ -78,8 +78,8 @@ See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introducti
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `db_path` | `<class 'str'>` | No |  |  |
-| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  | Config for KV store backend |
+| `db_path` | `str` | No |  |  |
+| `persistence` | `KVStoreReference` | No |  | Config for KV store backend |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/vector_io/inline_faiss.mdx b/docs/docs/providers/vector_io/inline_faiss.mdx
index 3a1fba055..c1eedf9db 100644
--- a/docs/docs/providers/vector_io/inline_faiss.mdx
+++ b/docs/docs/providers/vector_io/inline_faiss.mdx
@@ -95,7 +95,7 @@ more details about Faiss in general.
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  |  |
+| `persistence` | `KVStoreReference` | No |  |  |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/vector_io/inline_meta-reference.mdx b/docs/docs/providers/vector_io/inline_meta-reference.mdx
index 17fd40cf5..9266b65b5 100644
--- a/docs/docs/providers/vector_io/inline_meta-reference.mdx
+++ b/docs/docs/providers/vector_io/inline_meta-reference.mdx
@@ -14,7 +14,7 @@ Meta's reference implementation of a vector database.
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  |  |
+| `persistence` | `KVStoreReference` | No |  |  |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/vector_io/inline_milvus.mdx b/docs/docs/providers/vector_io/inline_milvus.mdx
index 6063edab1..e8408a74f 100644
--- a/docs/docs/providers/vector_io/inline_milvus.mdx
+++ b/docs/docs/providers/vector_io/inline_milvus.mdx
@@ -16,9 +16,9 @@ Please refer to the remote provider documentation.
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `db_path` | `<class 'str'>` | No |  |  |
-| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  | Config for KV store backend (SQLite only for now) |
-| `consistency_level` | `<class 'str'>` | No | Strong | The consistency level of the Milvus server |
+| `db_path` | `str` | No |  |  |
+| `persistence` | `KVStoreReference` | No |  | Config for KV store backend (SQLite only for now) |
+| `consistency_level` | `str` | No | Strong | The consistency level of the Milvus server |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/vector_io/inline_qdrant.mdx b/docs/docs/providers/vector_io/inline_qdrant.mdx
index 057d96761..8f6155732 100644
--- a/docs/docs/providers/vector_io/inline_qdrant.mdx
+++ b/docs/docs/providers/vector_io/inline_qdrant.mdx
@@ -97,8 +97,8 @@ See the [Qdrant documentation](https://qdrant.tech/documentation/) for more deta
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `path` | `<class 'str'>` | No |  |  |
-| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  |  |
+| `path` | `str` | No |  |  |
+| `persistence` | `KVStoreReference` | No |  |  |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/vector_io/inline_sqlite-vec.mdx b/docs/docs/providers/vector_io/inline_sqlite-vec.mdx
index 45631dff3..b63d9db72 100644
--- a/docs/docs/providers/vector_io/inline_sqlite-vec.mdx
+++ b/docs/docs/providers/vector_io/inline_sqlite-vec.mdx
@@ -407,8 +407,8 @@ See [sqlite-vec's GitHub repo](https://github.com/asg017/sqlite-vec/tree/main) f
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `db_path` | `<class 'str'>` | No |  | Path to the SQLite database file |
-| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  | Config for KV store backend (SQLite only for now) |
+| `db_path` | `str` | No |  | Path to the SQLite database file |
+| `persistence` | `KVStoreReference` | No |  | Config for KV store backend (SQLite only for now) |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/vector_io/inline_sqlite_vec.mdx b/docs/docs/providers/vector_io/inline_sqlite_vec.mdx
index 67cbd0021..a25ff1b28 100644
--- a/docs/docs/providers/vector_io/inline_sqlite_vec.mdx
+++ b/docs/docs/providers/vector_io/inline_sqlite_vec.mdx
@@ -16,8 +16,8 @@ Please refer to the sqlite-vec provider documentation.
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `db_path` | `<class 'str'>` | No |  | Path to the SQLite database file |
-| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  | Config for KV store backend (SQLite only for now) |
+| `db_path` | `str` | No |  | Path to the SQLite database file |
+| `persistence` | `KVStoreReference` | No |  | Config for KV store backend (SQLite only for now) |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/vector_io/remote_chromadb.mdx b/docs/docs/providers/vector_io/remote_chromadb.mdx
index 2aee3eeca..970f4420f 100644
--- a/docs/docs/providers/vector_io/remote_chromadb.mdx
+++ b/docs/docs/providers/vector_io/remote_chromadb.mdx
@@ -78,7 +78,7 @@ See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introducti
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `url` | `str \| None` | No |  |  |
-| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  | Config for KV store backend |
+| `persistence` | `KVStoreReference` | No |  | Config for KV store backend |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/vector_io/remote_milvus.mdx b/docs/docs/providers/vector_io/remote_milvus.mdx
index bf9935d61..3e8ae71cf 100644
--- a/docs/docs/providers/vector_io/remote_milvus.mdx
+++ b/docs/docs/providers/vector_io/remote_milvus.mdx
@@ -405,10 +405,10 @@ For more details on TLS configuration, refer to the [TLS setup guide](https://mi
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `uri` | `<class 'str'>` | No |  | The URI of the Milvus server |
+| `uri` | `str` | No |  | The URI of the Milvus server |
 | `token` | `str \| None` | No |  | The token of the Milvus server |
-| `consistency_level` | `<class 'str'>` | No | Strong | The consistency level of the Milvus server |
-| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  | Config for KV store backend |
+| `consistency_level` | `str` | No | Strong | The consistency level of the Milvus server |
+| `persistence` | `KVStoreReference` | No |  | Config for KV store backend |
 | `config` | `dict` | No | `{}` | This configuration allows additional fields to be passed through to the underlying Milvus client. See the [Milvus](https://milvus.io/docs/install-overview.md) documentation for more details about Milvus in general. |
 
 :::note
diff --git a/docs/docs/providers/vector_io/remote_pgvector.mdx b/docs/docs/providers/vector_io/remote_pgvector.mdx
index cb70f35d1..cd69e2b2f 100644
--- a/docs/docs/providers/vector_io/remote_pgvector.mdx
+++ b/docs/docs/providers/vector_io/remote_pgvector.mdx
@@ -218,7 +218,7 @@ See [PGVector's documentation](https://github.com/pgvector/pgvector) for more de
 | `db` | `str \| None` | No | postgres |  |
 | `user` | `str \| None` | No | postgres |  |
 | `password` | `str \| None` | No | mysecretpassword |  |
-| `persistence` | `llama_stack.core.storage.datatypes.KVStoreReference \| None` | No |  | Config for KV store backend (SQLite only for now) |
+| `persistence` | `KVStoreReference \| None` | No |  | Config for KV store backend (SQLite only for now) |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/vector_io/remote_qdrant.mdx b/docs/docs/providers/vector_io/remote_qdrant.mdx
index dff9642b5..9b5117bcb 100644
--- a/docs/docs/providers/vector_io/remote_qdrant.mdx
+++ b/docs/docs/providers/vector_io/remote_qdrant.mdx
@@ -19,14 +19,14 @@ Please refer to the inline provider documentation.
 | `location` | `str \| None` | No |  |  |
 | `url` | `str \| None` | No |  |  |
 | `port` | `int \| None` | No | 6333 |  |
-| `grpc_port` | `<class 'int'>` | No | 6334 |  |
-| `prefer_grpc` | `<class 'bool'>` | No | False |  |
+| `grpc_port` | `int` | No | 6334 |  |
+| `prefer_grpc` | `bool` | No | False |  |
 | `https` | `bool \| None` | No |  |  |
 | `api_key` | `str \| None` | No |  |  |
 | `prefix` | `str \| None` | No |  |  |
 | `timeout` | `int \| None` | No |  |  |
 | `host` | `str \| None` | No |  |  |
-| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  |  |
+| `persistence` | `KVStoreReference` | No |  |  |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/vector_io/remote_weaviate.mdx b/docs/docs/providers/vector_io/remote_weaviate.mdx
index b809bed2e..7a29d0d48 100644
--- a/docs/docs/providers/vector_io/remote_weaviate.mdx
+++ b/docs/docs/providers/vector_io/remote_weaviate.mdx
@@ -75,7 +75,7 @@ See [Weaviate's documentation](https://weaviate.io/developers/weaviate) for more
 |-------|------|----------|---------|-------------|
 | `weaviate_api_key` | `str \| None` | No |  | The API key for the Weaviate instance |
 | `weaviate_cluster_url` | `str \| None` | No | localhost:8080 | The URL of the Weaviate cluster |
-| `persistence` | `llama_stack.core.storage.datatypes.KVStoreReference \| None` | No |  | Config for KV store backend (SQLite only for now) |
+| `persistence` | `KVStoreReference \| None` | No |  | Config for KV store backend (SQLite only for now) |
 
 ## Sample Configuration
 
diff --git a/scripts/provider_codegen.py b/scripts/provider_codegen.py
index d62d626ad..0eec46bc2 100755
--- a/scripts/provider_codegen.py
+++ b/scripts/provider_codegen.py
@@ -8,7 +8,8 @@
 import subprocess
 import sys
 from pathlib import Path
-from typing import Any
+from types import UnionType
+from typing import Annotated, Any, Union, get_args, get_origin
 
 from pydantic_core import PydanticUndefined
 from rich.progress import Progress, SpinnerColumn, TextColumn
@@ -51,6 +52,41 @@ class ChangedPathTracker:
         return self._changed_paths
 
 
+def extract_type_annotation(annotation: Any) -> str:
+    """extract a type annotation into a clean string representation."""
+    if annotation is None:
+        return "Any"
+
+    if annotation is type(None):
+        return "None"
+
+    origin = get_origin(annotation)
+    args = get_args(annotation)
+
+    # recursive workaround for Annotated types to ignore FieldInfo part
+    if origin is Annotated and args:
+        return extract_type_annotation(args[0])
+
+    if origin in [Union, UnionType]:
+        non_none_args = [arg for arg in args if arg is not type(None)]
+        has_none = len(non_none_args) < len(args)
+
+        if len(non_none_args) == 1:
+            formatted = extract_type_annotation(non_none_args[0])
+            return f"{formatted} | None" if has_none else formatted
+        else:
+            formatted_args = [extract_type_annotation(arg) for arg in non_none_args]
+            result = " | ".join(formatted_args)
+            return f"{result} | None" if has_none else result
+
+    if origin is not None and args:
+        origin_name = getattr(origin, "__name__", str(origin))
+        formatted_args = [extract_type_annotation(arg) for arg in args]
+        return f"{origin_name}[{', '.join(formatted_args)}]"
+
+    return annotation.__name__ if hasattr(annotation, "__name__") else str(annotation)
+
+
 def get_config_class_info(config_class_path: str) -> dict[str, Any]:
     """Extract configuration information from a config class."""
     try:
@@ -78,14 +114,8 @@ def get_config_class_info(config_class_path: str) -> dict[str, Any]:
             for field_name, field in config_class.model_fields.items():
                 if getattr(field, "exclude", False):
                     continue
-                field_type = str(field.annotation) if field.annotation else "Any"
 
-                # this string replace is ridiculous
-                field_type = field_type.replace("typing.", "").replace("Optional[", "").replace("]", "")
-                field_type = field_type.replace("Annotated[", "").replace("FieldInfo(", "").replace(")", "")
-                field_type = field_type.replace("llama_stack_api.inference.", "")
-                field_type = field_type.replace("llama_stack.providers.", "")
-                field_type = field_type.replace("llama_stack_api.datatypes.", "")
+                field_type = extract_type_annotation(field.annotation)
 
                 default_value = field.default
                 if field.default_factory is not None:
@@ -345,8 +375,16 @@ def generate_index_docs(api_name: str, api_docstring: str | None, provider_entri
     # Add YAML frontmatter for index
     md_lines.append("---")
     if api_docstring:
-        clean_desc = api_docstring.strip().replace('"', '\\"')
-        md_lines.append(f'description: "{clean_desc}"')
+        # Handle multi-line descriptions in YAML
+        if "\n" in api_docstring.strip():
+            md_lines.append("description: |")
+            for line in api_docstring.strip().split("\n"):
+                # Avoid trailing whitespace by only adding spaces to non-empty lines
+                md_lines.append(f"  {line}" if line.strip() else "")
+        else:
+            # For single line descriptions, format properly for YAML
+            clean_desc = api_docstring.strip().replace('"', '\\"')
+            md_lines.append(f'description: "{clean_desc}"')
     md_lines.append(f"sidebar_label: {sidebar_label}")
     md_lines.append(f"title: {api_name.title()}")
     md_lines.append("---")

From 7d3db6b22c901d7c0917465ad4a3bba1319523c2 Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Mon, 17 Nov 2025 12:48:03 -0800
Subject: [PATCH 40/62] feat(openapi): generate stainless config "more"
 programmatically (#4164)

Generate the Stainless client config directly from code so we can
validate the config before we ever write the YAML.

This change enforces allowed HTTP verbs/paths, detects duplicate routes
across resources, and ensures README example endpoints exist and match
the OpenAPI spec. The generator now fails fast when config entries
drift, keeping the published config (hopefully) more current with the
spec. I think more validation can be done but this is a good start.
---
 client-sdks/stainless/README.md               |   5 +-
 client-sdks/stainless/config.yml              | 337 ++++---
 scripts/openapi_generator/__init__.py         |  11 +-
 .../stainless_config/__init__.py              |   7 +
 .../stainless_config/generate_config.py       | 821 ++++++++++++++++++
 scripts/run_openapi_generator.sh              |   2 +
 6 files changed, 990 insertions(+), 193 deletions(-)
 create mode 100644 scripts/openapi_generator/stainless_config/__init__.py
 create mode 100644 scripts/openapi_generator/stainless_config/generate_config.py

diff --git a/client-sdks/stainless/README.md b/client-sdks/stainless/README.md
index 73e7082d4..54ff3d3d1 100644
--- a/client-sdks/stainless/README.md
+++ b/client-sdks/stainless/README.md
@@ -5,4 +5,7 @@ These are the source-of-truth configuration files used to generate the Stainless
 
 A small side note: notice the `.yml` suffixes since Stainless uses that suffix typically for its configuration files.
 
-These files go hand-in-hand. As of now, only the `openapi.yml` file is automatically generated using the `scripts/run_openapi_generator.sh` script.
+These files go hand-in-hand. Both `openapi.yml` and `config.yml` are generated by `scripts/run_openapi_generator.sh`:
+
+- `openapi.yml` comes from the FastAPI-based generator.
+- `config.yml` is rendered from `scripts/openapi_generator/stainless_config/config_data.py` so the Stainless config stays in lock-step with the spec.
diff --git a/client-sdks/stainless/config.yml b/client-sdks/stainless/config.yml
index 9b26114fe..212b2b54a 100644
--- a/client-sdks/stainless/config.yml
+++ b/client-sdks/stainless/config.yml
@@ -1,20 +1,16 @@
 # yaml-language-server: $schema=https://app.stainlessapi.com/config-internal.schema.json
 
 organization:
-  # Name of your organization or company, used to determine the name of the client
-  # and headings.
   name: llama-stack-client
   docs: https://llama-stack.readthedocs.io/en/latest/
   contact: llamastack@meta.com
 security:
-  - {}
-  - BearerAuth: []
+- {}
+- BearerAuth: []
 security_schemes:
   BearerAuth:
     type: http
     scheme: bearer
-# `targets` define the output targets and their customization options, such as
-# whether to emit the Node SDK and what it's package name should be.
 targets:
   node:
     package_name: llama-stack-client
@@ -40,71 +36,123 @@ targets:
     options:
       enable_v2: true
       back_compat_use_shared_package: false
-
-# `client_settings` define settings for the API client, such as extra constructor
-# arguments (used for authentication), retry behavior, idempotency, etc.
 client_settings:
   default_env_prefix: LLAMA_STACK_CLIENT
   opts:
     api_key:
       type: string
       read_env: LLAMA_STACK_CLIENT_API_KEY
-      auth: { security_scheme: BearerAuth }
+      auth:
+        security_scheme: BearerAuth
       nullable: true
-
-# `environments` are a map of the name of the environment (e.g. "sandbox",
-# "production") to the corresponding url to use.
 environments:
   production: http://any-hosted-llama-stack.com
-
-# `pagination` defines [pagination schemes] which provides a template to match
-# endpoints and generate next-page and auto-pagination helpers in the SDKs.
 pagination:
-  - name: datasets_iterrows
-    type: offset
-    request:
-      dataset_id:
-        type: string
-      start_index:
-        type: integer
-        x-stainless-pagination-property:
-          purpose: offset_count_param
-      limit:
-        type: integer
-    response:
-      data:
-        type: array
-        items:
+- name: datasets_iterrows
+  type: offset
+  request:
+    dataset_id:
+      type: string
+    start_index:
+      type: integer
+      x-stainless-pagination-property:
+        purpose: offset_count_param
+    limit:
+      type: integer
+  response:
+    data:
+      type: array
+      items:
+        type: object
+    next_index:
+      type: integer
+      x-stainless-pagination-property:
+        purpose: offset_count_start_field
+- name: openai_cursor_page
+  type: cursor
+  request:
+    limit:
+      type: integer
+    after:
+      type: string
+      x-stainless-pagination-property:
+        purpose: next_cursor_param
+  response:
+    data:
+      type: array
+      items: {}
+    has_more:
+      type: boolean
+    last_id:
+      type: string
+      x-stainless-pagination-property:
+        purpose: next_cursor_field
+settings:
+  license: MIT
+  unwrap_response_fields:
+  - data
+  file_header: 'Copyright (c) Meta Platforms, Inc. and affiliates.
+
+    All rights reserved.
+
+
+    This source code is licensed under the terms described in the LICENSE file in
+
+    the root directory of this source tree.
+
+    '
+openapi:
+  transformations:
+  - command: mergeObject
+    reason: Better return_type using enum
+    args:
+      target:
+      - $.components.schemas
+      object:
+        ReturnType:
+          additionalProperties: false
+          properties:
+            type:
+              enum:
+              - string
+              - number
+              - boolean
+              - array
+              - object
+              - json
+              - union
+              - chat_completion_input
+              - completion_input
+              - agent_turn_input
+          required:
+          - type
           type: object
-      next_index:
-        type: integer
-        x-stainless-pagination-property:
-          purpose: offset_count_start_field
-  - name: openai_cursor_page
-    type: cursor
-    request:
-      limit:
-        type: integer
-      after:
-        type: string
-        x-stainless-pagination-property:
-          purpose: next_cursor_param
-    response:
-      data:
-        type: array
-        items: {}
-      has_more:
-        type: boolean
-      last_id:
-        type: string
-        x-stainless-pagination-property:
-          purpose: next_cursor_field
-# `resources` define the structure and organziation for your API, such as how
-# methods and models are grouped together and accessed. See the [configuration
-# guide] for more information.
-#
-# [configuration guide]:
-#   https://app.stainlessapi.com/docs/guides/configure#resources
+  - command: replaceProperties
+    reason: Replace return type properties with better model (see above)
+    args:
+      filter:
+        only:
+        - $.components.schemas.ScoringFn.properties.return_type
+        - $.components.schemas.RegisterScoringFunctionRequest.properties.return_type
+      value:
+        $ref: '#/components/schemas/ReturnType'
+  - command: oneOfToAnyOf
+    reason: Prism (mock server) doesn't like one of our requests as it technically
+      matches multiple variants
+readme:
+  example_requests:
+    default:
+      type: request
+      endpoint: post /v1/chat/completions
+      params: {}
+    headline:
+      type: request
+      endpoint: get /v1/models
+      params: {}
+    pagination:
+      type: request
+      endpoint: post /v1/chat/completions
+      params: {}
 resources:
   $shared:
     models:
@@ -128,19 +176,17 @@ resources:
     methods:
       get: get /v1/tools/{tool_name}
       list:
-        endpoint: get /v1/tools
         paginated: false
-
+        endpoint: get /v1/tools
   tool_runtime:
     models:
       tool_def: ToolDef
       tool_invocation_result: ToolInvocationResult
     methods:
       list_tools:
-        endpoint: get /v1/tool-runtime/list-tools
         paginated: false
+        endpoint: get /v1/tool-runtime/list-tools
       invoke_tool: post /v1/tool-runtime/invoke
-
   responses:
     models:
       response_object_stream: OpenAIResponseObjectStream
@@ -148,10 +194,10 @@ resources:
     methods:
       create:
         type: http
-        endpoint: post /v1/responses
         streaming:
           stream_event_model: responses.response_object_stream
           param_discriminator: stream
+        endpoint: post /v1/responses
       retrieve: get /v1/responses/{response_id}
       list:
         type: http
@@ -164,9 +210,8 @@ resources:
         methods:
           list:
             type: http
-            endpoint: get /v1/responses/{response_id}/input_items
             paginated: false
-
+            endpoint: get /v1/responses/{response_id}/input_items
   prompts:
     models:
       prompt: Prompt
@@ -174,8 +219,8 @@ resources:
     methods:
       create: post /v1/prompts
       list:
-        endpoint: get /v1/prompts
         paginated: false
+        endpoint: get /v1/prompts
       retrieve: get /v1/prompts/{prompt_id}
       update: post /v1/prompts/{prompt_id}
       delete: delete /v1/prompts/{prompt_id}
@@ -184,9 +229,8 @@ resources:
       versions:
         methods:
           list:
-            endpoint: get /v1/prompts/{prompt_id}/versions
             paginated: false
-
+            endpoint: get /v1/prompts/{prompt_id}/versions
   conversations:
     models:
       conversation_object: Conversation
@@ -216,7 +260,6 @@ resources:
           delete:
             type: http
             endpoint: delete /v1/conversations/{conversation_id}/items/{item_id}
-
   inspect:
     models:
       healthInfo: HealthInfo
@@ -226,13 +269,11 @@ resources:
     methods:
       health: get /v1/health
       version: get /v1/version
-
   embeddings:
     models:
       create_embeddings_response: OpenAIEmbeddingsResponse
     methods:
       create: post /v1/embeddings
-
   chat:
     models:
       chat_completion_chunk: OpenAIChatCompletionChunk
@@ -241,14 +282,14 @@ resources:
         methods:
           create:
             type: http
-            endpoint: post /v1/chat/completions
             streaming:
               stream_event_model: chat.chat_completion_chunk
               param_discriminator: stream
+            endpoint: post /v1/chat/completions
           list:
             type: http
-            endpoint: get /v1/chat/completions
             paginated: false
+            endpoint: get /v1/chat/completions
           retrieve:
             type: http
             endpoint: get /v1/chat/completions/{completion_id}
@@ -256,17 +297,15 @@ resources:
     methods:
       create:
         type: http
-        endpoint: post /v1/completions
         streaming:
           param_discriminator: stream
-
+        endpoint: post /v1/completions
   vector_io:
     models:
       queryChunksResponse: QueryChunksResponse
     methods:
       insert: post /v1/vector-io/insert
       query: post /v1/vector-io/query
-
   vector_stores:
     models:
       vector_store: VectorStoreObject
@@ -275,8 +314,7 @@ resources:
       vector_store_search_response: VectorStoreSearchResponsePage
     methods:
       create: post /v1/vector_stores
-      list:
-        endpoint: get /v1/vector_stores
+      list: get /v1/vector_stores
       retrieve: get /v1/vector_stores/{vector_store_id}
       update: post /v1/vector_stores/{vector_store_id}
       delete: delete /v1/vector_stores/{vector_store_id}
@@ -301,15 +339,14 @@ resources:
           retrieve: get /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}
           list_files: get /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/files
           cancel: post /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel
-
   models:
     models:
       model: OpenAIModel
       list_models_response: OpenAIListModelsResponse
     methods:
       list:
-        endpoint: get /v1/models
         paginated: false
+        endpoint: get /v1/models
       retrieve: get /v1/models/{model_id}
       register: post /v1/models
       unregister: delete /v1/models/{model_id}
@@ -317,38 +354,33 @@ resources:
       openai:
         methods:
           list:
-            endpoint: get /v1/models
             paginated: false
-
+            endpoint: get /v1/models
   providers:
     models:
       list_providers_response: ListProvidersResponse
     methods:
       list:
-        endpoint: get /v1/providers
         paginated: false
+        endpoint: get /v1/providers
       retrieve: get /v1/providers/{provider_id}
-
   routes:
     models:
       list_routes_response: ListRoutesResponse
     methods:
       list:
-        endpoint: get /v1/inspect/routes
         paginated: false
-
+        endpoint: get /v1/inspect/routes
   moderations:
     models:
       create_response: ModerationObject
     methods:
       create: post /v1/moderations
-
   safety:
     models:
       run_shield_response: RunShieldResponse
     methods:
       run_shield: post /v1/safety/run-shield
-
   shields:
     models:
       shield: Shield
@@ -356,53 +388,48 @@ resources:
     methods:
       retrieve: get /v1/shields/{identifier}
       list:
-        endpoint: get /v1/shields
         paginated: false
+        endpoint: get /v1/shields
       register: post /v1/shields
       delete: delete /v1/shields/{identifier}
-
   scoring:
     methods:
       score: post /v1/scoring/score
       score_batch: post /v1/scoring/score-batch
   scoring_functions:
-    methods:
-      retrieve: get /v1/scoring-functions/{scoring_fn_id}
-      list:
-        endpoint: get /v1/scoring-functions
-        paginated: false
-      register: post /v1/scoring-functions
-      unregister: delete /v1/scoring-functions/{scoring_fn_id}
     models:
       scoring_fn: ScoringFn
       scoring_fn_params: ScoringFnParams
       list_scoring_functions_response: ListScoringFunctionsResponse
-
+    methods:
+      retrieve: get /v1/scoring-functions/{scoring_fn_id}
+      list:
+        paginated: false
+        endpoint: get /v1/scoring-functions
+      register: post /v1/scoring-functions
+      unregister: delete /v1/scoring-functions/{scoring_fn_id}
   files:
+    models:
+      file: OpenAIFileObject
+      list_files_response: ListOpenAIFileResponse
+      delete_file_response: OpenAIFileDeleteResponse
     methods:
       create: post /v1/files
       list: get /v1/files
       retrieve: get /v1/files/{file_id}
       delete: delete /v1/files/{file_id}
       content: get /v1/files/{file_id}/content
-    models:
-      file: OpenAIFileObject
-      list_files_response: ListOpenAIFileResponse
-      delete_file_response: OpenAIFileDeleteResponse
-
   batches:
     methods:
       create: post /v1/batches
       list: get /v1/batches
       retrieve: get /v1/batches/{batch_id}
       cancel: post /v1/batches/{batch_id}/cancel
-
   alpha:
     subresources:
       inference:
         methods:
           rerank: post /v1alpha/inference/rerank
-
       post_training:
         models:
           algorithm_config: AlgorithmConfig
@@ -418,39 +445,35 @@ resources:
               cancel: post /v1alpha/post-training/job/cancel
               status: get /v1alpha/post-training/job/status
               list:
-                endpoint: get /v1alpha/post-training/jobs
                 paginated: false
-
+                endpoint: get /v1alpha/post-training/jobs
       benchmarks:
-        methods:
-          retrieve: get /v1alpha/eval/benchmarks/{benchmark_id}
-          list:
-            endpoint: get /v1alpha/eval/benchmarks
-            paginated: false
-          register: post /v1alpha/eval/benchmarks
-          unregister: delete /v1alpha/eval/benchmarks/{benchmark_id}
         models:
           benchmark: Benchmark
           list_benchmarks_response: ListBenchmarksResponse
-
+        methods:
+          retrieve: get /v1alpha/eval/benchmarks/{benchmark_id}
+          list:
+            paginated: false
+            endpoint: get /v1alpha/eval/benchmarks
+          register: post /v1alpha/eval/benchmarks
+          unregister: delete /v1alpha/eval/benchmarks/{benchmark_id}
       eval:
+        models:
+          evaluate_response: EvaluateResponse
+          benchmark_config: BenchmarkConfig
+          job: Job
         methods:
           evaluate_rows: post /v1alpha/eval/benchmarks/{benchmark_id}/evaluations
           run_eval: post /v1alpha/eval/benchmarks/{benchmark_id}/jobs
           evaluate_rows_alpha: post /v1alpha/eval/benchmarks/{benchmark_id}/evaluations
           run_eval_alpha: post /v1alpha/eval/benchmarks/{benchmark_id}/jobs
-
         subresources:
           jobs:
             methods:
               cancel: delete /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}
               status: get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}
               retrieve: get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result
-        models:
-          evaluate_response: EvaluateResponse
-          benchmark_config: BenchmarkConfig
-          job: Job
-
   beta:
     subresources:
       datasets:
@@ -460,74 +483,8 @@ resources:
           register: post /v1beta/datasets
           retrieve: get /v1beta/datasets/{dataset_id}
           list:
-            endpoint: get /v1beta/datasets
             paginated: false
+            endpoint: get /v1beta/datasets
           unregister: delete /v1beta/datasets/{dataset_id}
           iterrows: get /v1beta/datasetio/iterrows/{dataset_id}
           appendrows: post /v1beta/datasetio/append-rows/{dataset_id}
-
-settings:
-  license: MIT
-  unwrap_response_fields: [data]
-  file_header: |
-    Copyright (c) Meta Platforms, Inc. and affiliates.
-    All rights reserved.
-
-    This source code is licensed under the terms described in the LICENSE file in
-    the root directory of this source tree.
-
-openapi:
-  transformations:
-    - command: mergeObject
-      reason: Better return_type using enum
-      args:
-        target:
-          - "$.components.schemas"
-        object:
-          ReturnType:
-            additionalProperties: false
-            properties:
-              type:
-                enum:
-                  - string
-                  - number
-                  - boolean
-                  - array
-                  - object
-                  - json
-                  - union
-                  - chat_completion_input
-                  - completion_input
-                  - agent_turn_input
-            required:
-              - type
-            type: object
-    - command: replaceProperties
-      reason: Replace return type properties with better model (see above)
-      args:
-        filter:
-          only:
-            - "$.components.schemas.ScoringFn.properties.return_type"
-            - "$.components.schemas.RegisterScoringFunctionRequest.properties.return_type"
-        value:
-          $ref: "#/components/schemas/ReturnType"
-    - command: oneOfToAnyOf
-      reason: Prism (mock server) doesn't like one of our requests as it technically matches multiple variants
-
-# `readme` is used to configure the code snippets that will be rendered in the
-# README.md of various SDKs. In particular, you can change the `headline`
-# snippet's endpoint and the arguments to call it with.
-readme:
-  example_requests:
-    default:
-      type: request
-      endpoint: post /v1/chat/completions
-      params: &ref_0 {}
-    headline:
-      type: request
-      endpoint: get /v1/models
-      params: *ref_0
-    pagination:
-      type: request
-      endpoint: post /v1/chat/completions
-      params: {}
diff --git a/scripts/openapi_generator/__init__.py b/scripts/openapi_generator/__init__.py
index 7f6aaa1d1..834836f76 100644
--- a/scripts/openapi_generator/__init__.py
+++ b/scripts/openapi_generator/__init__.py
@@ -11,6 +11,13 @@ This module provides functionality to generate OpenAPI specifications
 from FastAPI applications.
 """
 
-from .main import generate_openapi_spec, main
-
 __all__ = ["generate_openapi_spec", "main"]
+
+
+def __getattr__(name: str):
+    if name in {"generate_openapi_spec", "main"}:
+        from .main import generate_openapi_spec as _gos
+        from .main import main as _main
+
+        return {"generate_openapi_spec": _gos, "main": _main}[name]
+    raise AttributeError(name)
diff --git a/scripts/openapi_generator/stainless_config/__init__.py b/scripts/openapi_generator/stainless_config/__init__.py
new file mode 100644
index 000000000..bf44f82ba
--- /dev/null
+++ b/scripts/openapi_generator/stainless_config/__init__.py
@@ -0,0 +1,7 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+# Package marker for Stainless config generation.
diff --git a/scripts/openapi_generator/stainless_config/generate_config.py b/scripts/openapi_generator/stainless_config/generate_config.py
new file mode 100644
index 000000000..dabc2119f
--- /dev/null
+++ b/scripts/openapi_generator/stainless_config/generate_config.py
@@ -0,0 +1,821 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from __future__ import annotations
+
+from collections.abc import Iterator
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any
+
+import yaml
+
+HEADER = "# yaml-language-server: $schema=https://app.stainlessapi.com/config-internal.schema.json\n\n"
+
+SECTION_ORDER = [
+    "organization",
+    "security",
+    "security_schemes",
+    "targets",
+    "client_settings",
+    "environments",
+    "pagination",
+    "settings",
+    "openapi",
+    "readme",
+    "resources",
+]
+
+ORGANIZATION = {
+    "name": "llama-stack-client",
+    "docs": "https://llama-stack.readthedocs.io/en/latest/",
+    "contact": "llamastack@meta.com",
+}
+
+SECURITY = [{}, {"BearerAuth": []}]
+
+SECURITY_SCHEMES = {"BearerAuth": {"type": "http", "scheme": "bearer"}}
+
+TARGETS = {
+    "node": {
+        "package_name": "llama-stack-client",
+        "production_repo": "llamastack/llama-stack-client-typescript",
+        "publish": {"npm": False},
+    },
+    "python": {
+        "package_name": "llama_stack_client",
+        "production_repo": "llamastack/llama-stack-client-python",
+        "options": {"use_uv": True},
+        "publish": {"pypi": True},
+        "project_name": "llama_stack_client",
+    },
+    "kotlin": {
+        "reverse_domain": "com.llama_stack_client.api",
+        "production_repo": None,
+        "publish": {"maven": False},
+    },
+    "go": {
+        "package_name": "llama-stack-client",
+        "production_repo": "llamastack/llama-stack-client-go",
+        "options": {"enable_v2": True, "back_compat_use_shared_package": False},
+    },
+}
+
+CLIENT_SETTINGS = {
+    "default_env_prefix": "LLAMA_STACK_CLIENT",
+    "opts": {
+        "api_key": {
+            "type": "string",
+            "read_env": "LLAMA_STACK_CLIENT_API_KEY",
+            "auth": {"security_scheme": "BearerAuth"},
+            "nullable": True,
+        }
+    },
+}
+
+ENVIRONMENTS = {"production": "http://any-hosted-llama-stack.com"}
+
+PAGINATION = [
+    {
+        "name": "datasets_iterrows",
+        "type": "offset",
+        "request": {
+            "dataset_id": {"type": "string"},
+            "start_index": {
+                "type": "integer",
+                "x-stainless-pagination-property": {"purpose": "offset_count_param"},
+            },
+            "limit": {"type": "integer"},
+        },
+        "response": {
+            "data": {"type": "array", "items": {"type": "object"}},
+            "next_index": {
+                "type": "integer",
+                "x-stainless-pagination-property": {"purpose": "offset_count_start_field"},
+            },
+        },
+    },
+    {
+        "name": "openai_cursor_page",
+        "type": "cursor",
+        "request": {
+            "limit": {"type": "integer"},
+            "after": {
+                "type": "string",
+                "x-stainless-pagination-property": {"purpose": "next_cursor_param"},
+            },
+        },
+        "response": {
+            "data": {"type": "array", "items": {}},
+            "has_more": {"type": "boolean"},
+            "last_id": {
+                "type": "string",
+                "x-stainless-pagination-property": {"purpose": "next_cursor_field"},
+            },
+        },
+    },
+]
+
+SETTINGS = {
+    "license": "MIT",
+    "unwrap_response_fields": ["data"],
+    "file_header": "Copyright (c) Meta Platforms, Inc. and affiliates.\n"
+    "All rights reserved.\n"
+    "\n"
+    "This source code is licensed under the terms described in the "
+    "LICENSE file in\n"
+    "the root directory of this source tree.\n",
+}
+
+OPENAPI = {
+    "transformations": [
+        {
+            "command": "mergeObject",
+            "reason": "Better return_type using enum",
+            "args": {
+                "target": ["$.components.schemas"],
+                "object": {
+                    "ReturnType": {
+                        "additionalProperties": False,
+                        "properties": {
+                            "type": {
+                                "enum": [
+                                    "string",
+                                    "number",
+                                    "boolean",
+                                    "array",
+                                    "object",
+                                    "json",
+                                    "union",
+                                    "chat_completion_input",
+                                    "completion_input",
+                                    "agent_turn_input",
+                                ]
+                            }
+                        },
+                        "required": ["type"],
+                        "type": "object",
+                    }
+                },
+            },
+        },
+        {
+            "command": "replaceProperties",
+            "reason": "Replace return type properties with better model (see above)",
+            "args": {
+                "filter": {
+                    "only": [
+                        "$.components.schemas.ScoringFn.properties.return_type",
+                        "$.components.schemas.RegisterScoringFunctionRequest.properties.return_type",
+                    ]
+                },
+                "value": {"$ref": "#/components/schemas/ReturnType"},
+            },
+        },
+        {
+            "command": "oneOfToAnyOf",
+            "reason": "Prism (mock server) doesn't like one of our "
+            "requests as it technically matches multiple "
+            "variants",
+        },
+    ]
+}
+
+README = {
+    "example_requests": {
+        "default": {
+            "type": "request",
+            "endpoint": "post /v1/chat/completions",
+            "params": {},
+        },
+        "headline": {"type": "request", "endpoint": "get /v1/models", "params": {}},
+        "pagination": {
+            "type": "request",
+            "endpoint": "post /v1/chat/completions",
+            "params": {},
+        },
+    }
+}
+
+ALL_RESOURCES = {
+    "$shared": {
+        "models": {
+            "interleaved_content_item": "InterleavedContentItem",
+            "interleaved_content": "InterleavedContent",
+            "param_type": "ParamType",
+            "safety_violation": "SafetyViolation",
+            "sampling_params": "SamplingParams",
+            "scoring_result": "ScoringResult",
+            "system_message": "SystemMessage",
+        }
+    },
+    "toolgroups": {
+        "models": {
+            "tool_group": "ToolGroup",
+            "list_tool_groups_response": "ListToolGroupsResponse",
+        },
+        "methods": {
+            "register": "post /v1/toolgroups",
+            "get": "get /v1/toolgroups/{toolgroup_id}",
+            "list": "get /v1/toolgroups",
+            "unregister": "delete /v1/toolgroups/{toolgroup_id}",
+        },
+    },
+    "tools": {
+        "methods": {
+            "get": "get /v1/tools/{tool_name}",
+            "list": {"paginated": False, "endpoint": "get /v1/tools"},
+        }
+    },
+    "tool_runtime": {
+        "models": {
+            "tool_def": "ToolDef",
+            "tool_invocation_result": "ToolInvocationResult",
+        },
+        "methods": {
+            "list_tools": {
+                "paginated": False,
+                "endpoint": "get /v1/tool-runtime/list-tools",
+            },
+            "invoke_tool": "post /v1/tool-runtime/invoke",
+        },
+    },
+    "responses": {
+        "models": {
+            "response_object_stream": "OpenAIResponseObjectStream",
+            "response_object": "OpenAIResponseObject",
+        },
+        "methods": {
+            "create": {
+                "type": "http",
+                "streaming": {
+                    "stream_event_model": "responses.response_object_stream",
+                    "param_discriminator": "stream",
+                },
+                "endpoint": "post /v1/responses",
+            },
+            "retrieve": "get /v1/responses/{response_id}",
+            "list": {"type": "http", "endpoint": "get /v1/responses"},
+            "delete": {
+                "type": "http",
+                "endpoint": "delete /v1/responses/{response_id}",
+            },
+        },
+        "subresources": {
+            "input_items": {
+                "methods": {
+                    "list": {
+                        "type": "http",
+                        "paginated": False,
+                        "endpoint": "get /v1/responses/{response_id}/input_items",
+                    }
+                }
+            }
+        },
+    },
+    "prompts": {
+        "models": {"prompt": "Prompt", "list_prompts_response": "ListPromptsResponse"},
+        "methods": {
+            "create": "post /v1/prompts",
+            "list": {"paginated": False, "endpoint": "get /v1/prompts"},
+            "retrieve": "get /v1/prompts/{prompt_id}",
+            "update": "post /v1/prompts/{prompt_id}",
+            "delete": "delete /v1/prompts/{prompt_id}",
+            "set_default_version": "post /v1/prompts/{prompt_id}/set-default-version",
+        },
+        "subresources": {
+            "versions": {
+                "methods": {
+                    "list": {
+                        "paginated": False,
+                        "endpoint": "get /v1/prompts/{prompt_id}/versions",
+                    }
+                }
+            }
+        },
+    },
+    "conversations": {
+        "models": {"conversation_object": "Conversation"},
+        "methods": {
+            "create": {"type": "http", "endpoint": "post /v1/conversations"},
+            "retrieve": "get /v1/conversations/{conversation_id}",
+            "update": {
+                "type": "http",
+                "endpoint": "post /v1/conversations/{conversation_id}",
+            },
+            "delete": {
+                "type": "http",
+                "endpoint": "delete /v1/conversations/{conversation_id}",
+            },
+        },
+        "subresources": {
+            "items": {
+                "methods": {
+                    "get": {
+                        "type": "http",
+                        "endpoint": "get /v1/conversations/{conversation_id}/items/{item_id}",
+                    },
+                    "list": {
+                        "type": "http",
+                        "endpoint": "get /v1/conversations/{conversation_id}/items",
+                    },
+                    "create": {
+                        "type": "http",
+                        "endpoint": "post /v1/conversations/{conversation_id}/items",
+                    },
+                    "delete": {
+                        "type": "http",
+                        "endpoint": "delete /v1/conversations/{conversation_id}/items/{item_id}",
+                    },
+                }
+            }
+        },
+    },
+    "inspect": {
+        "models": {
+            "healthInfo": "HealthInfo",
+            "providerInfo": "ProviderInfo",
+            "routeInfo": "RouteInfo",
+            "versionInfo": "VersionInfo",
+        },
+        "methods": {"health": "get /v1/health", "version": "get /v1/version"},
+    },
+    "embeddings": {
+        "models": {"create_embeddings_response": "OpenAIEmbeddingsResponse"},
+        "methods": {"create": "post /v1/embeddings"},
+    },
+    "chat": {
+        "models": {"chat_completion_chunk": "OpenAIChatCompletionChunk"},
+        "subresources": {
+            "completions": {
+                "methods": {
+                    "create": {
+                        "type": "http",
+                        "streaming": {
+                            "stream_event_model": "chat.chat_completion_chunk",
+                            "param_discriminator": "stream",
+                        },
+                        "endpoint": "post /v1/chat/completions",
+                    },
+                    "list": {
+                        "type": "http",
+                        "paginated": False,
+                        "endpoint": "get /v1/chat/completions",
+                    },
+                    "retrieve": {
+                        "type": "http",
+                        "endpoint": "get /v1/chat/completions/{completion_id}",
+                    },
+                }
+            }
+        },
+    },
+    "completions": {
+        "methods": {
+            "create": {
+                "type": "http",
+                "streaming": {"param_discriminator": "stream"},
+                "endpoint": "post /v1/completions",
+            }
+        }
+    },
+    "vector_io": {
+        "models": {"queryChunksResponse": "QueryChunksResponse"},
+        "methods": {
+            "insert": "post /v1/vector-io/insert",
+            "query": "post /v1/vector-io/query",
+        },
+    },
+    "vector_stores": {
+        "models": {
+            "vector_store": "VectorStoreObject",
+            "list_vector_stores_response": "VectorStoreListResponse",
+            "vector_store_delete_response": "VectorStoreDeleteResponse",
+            "vector_store_search_response": "VectorStoreSearchResponsePage",
+        },
+        "methods": {
+            "create": "post /v1/vector_stores",
+            "list": "get /v1/vector_stores",
+            "retrieve": "get /v1/vector_stores/{vector_store_id}",
+            "update": "post /v1/vector_stores/{vector_store_id}",
+            "delete": "delete /v1/vector_stores/{vector_store_id}",
+            "search": "post /v1/vector_stores/{vector_store_id}/search",
+        },
+        "subresources": {
+            "files": {
+                "models": {"vector_store_file": "VectorStoreFileObject"},
+                "methods": {
+                    "list": "get /v1/vector_stores/{vector_store_id}/files",
+                    "retrieve": "get /v1/vector_stores/{vector_store_id}/files/{file_id}",
+                    "update": "post /v1/vector_stores/{vector_store_id}/files/{file_id}",
+                    "delete": "delete /v1/vector_stores/{vector_store_id}/files/{file_id}",
+                    "create": "post /v1/vector_stores/{vector_store_id}/files",
+                    "content": "get /v1/vector_stores/{vector_store_id}/files/{file_id}/content",
+                },
+            },
+            "file_batches": {
+                "models": {
+                    "vector_store_file_batches": "VectorStoreFileBatchObject",
+                    "list_vector_store_files_in_batch_response": "VectorStoreFilesListInBatchResponse",
+                },
+                "methods": {
+                    "create": "post /v1/vector_stores/{vector_store_id}/file_batches",
+                    "retrieve": "get /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}",
+                    "list_files": "get /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/files",
+                    "cancel": "post /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel",
+                },
+            },
+        },
+    },
+    "models": {
+        "models": {
+            "model": "OpenAIModel",
+            "list_models_response": "OpenAIListModelsResponse",
+        },
+        "methods": {
+            "list": {"paginated": False, "endpoint": "get /v1/models"},
+            "retrieve": "get /v1/models/{model_id}",
+            "register": "post /v1/models",
+            "unregister": "delete /v1/models/{model_id}",
+        },
+        "subresources": {"openai": {"methods": {"list": {"paginated": False, "endpoint": "get /v1/models"}}}},
+    },
+    "providers": {
+        "models": {"list_providers_response": "ListProvidersResponse"},
+        "methods": {
+            "list": {"paginated": False, "endpoint": "get /v1/providers"},
+            "retrieve": "get /v1/providers/{provider_id}",
+        },
+    },
+    "routes": {
+        "models": {"list_routes_response": "ListRoutesResponse"},
+        "methods": {"list": {"paginated": False, "endpoint": "get /v1/inspect/routes"}},
+    },
+    "moderations": {
+        "models": {"create_response": "ModerationObject"},
+        "methods": {"create": "post /v1/moderations"},
+    },
+    "safety": {
+        "models": {"run_shield_response": "RunShieldResponse"},
+        "methods": {"run_shield": "post /v1/safety/run-shield"},
+    },
+    "shields": {
+        "models": {"shield": "Shield", "list_shields_response": "ListShieldsResponse"},
+        "methods": {
+            "retrieve": "get /v1/shields/{identifier}",
+            "list": {"paginated": False, "endpoint": "get /v1/shields"},
+            "register": "post /v1/shields",
+            "delete": "delete /v1/shields/{identifier}",
+        },
+    },
+    "scoring": {
+        "methods": {
+            "score": "post /v1/scoring/score",
+            "score_batch": "post /v1/scoring/score-batch",
+        }
+    },
+    "scoring_functions": {
+        "models": {
+            "scoring_fn": "ScoringFn",
+            "scoring_fn_params": "ScoringFnParams",
+            "list_scoring_functions_response": "ListScoringFunctionsResponse",
+        },
+        "methods": {
+            "retrieve": "get /v1/scoring-functions/{scoring_fn_id}",
+            "list": {"paginated": False, "endpoint": "get /v1/scoring-functions"},
+            "register": "post /v1/scoring-functions",
+            "unregister": "delete /v1/scoring-functions/{scoring_fn_id}",
+        },
+    },
+    "files": {
+        "models": {
+            "file": "OpenAIFileObject",
+            "list_files_response": "ListOpenAIFileResponse",
+            "delete_file_response": "OpenAIFileDeleteResponse",
+        },
+        "methods": {
+            "create": "post /v1/files",
+            "list": "get /v1/files",
+            "retrieve": "get /v1/files/{file_id}",
+            "delete": "delete /v1/files/{file_id}",
+            "content": "get /v1/files/{file_id}/content",
+        },
+    },
+    "batches": {
+        "methods": {
+            "create": "post /v1/batches",
+            "list": "get /v1/batches",
+            "retrieve": "get /v1/batches/{batch_id}",
+            "cancel": "post /v1/batches/{batch_id}/cancel",
+        }
+    },
+    "alpha": {
+        "subresources": {
+            "inference": {"methods": {"rerank": "post /v1alpha/inference/rerank"}},
+            "post_training": {
+                "models": {
+                    "algorithm_config": "AlgorithmConfig",
+                    "post_training_job": "PostTrainingJob",
+                    "list_post_training_jobs_response": "ListPostTrainingJobsResponse",
+                },
+                "methods": {
+                    "preference_optimize": "post /v1alpha/post-training/preference-optimize",
+                    "supervised_fine_tune": "post /v1alpha/post-training/supervised-fine-tune",
+                },
+                "subresources": {
+                    "job": {
+                        "methods": {
+                            "artifacts": "get /v1alpha/post-training/job/artifacts",
+                            "cancel": "post /v1alpha/post-training/job/cancel",
+                            "status": "get /v1alpha/post-training/job/status",
+                            "list": {
+                                "paginated": False,
+                                "endpoint": "get /v1alpha/post-training/jobs",
+                            },
+                        }
+                    }
+                },
+            },
+            "benchmarks": {
+                "models": {
+                    "benchmark": "Benchmark",
+                    "list_benchmarks_response": "ListBenchmarksResponse",
+                },
+                "methods": {
+                    "retrieve": "get /v1alpha/eval/benchmarks/{benchmark_id}",
+                    "list": {
+                        "paginated": False,
+                        "endpoint": "get /v1alpha/eval/benchmarks",
+                    },
+                    "register": "post /v1alpha/eval/benchmarks",
+                    "unregister": "delete /v1alpha/eval/benchmarks/{benchmark_id}",
+                },
+            },
+            "eval": {
+                "models": {
+                    "evaluate_response": "EvaluateResponse",
+                    "benchmark_config": "BenchmarkConfig",
+                    "job": "Job",
+                },
+                "methods": {
+                    "evaluate_rows": "post /v1alpha/eval/benchmarks/{benchmark_id}/evaluations",
+                    "run_eval": "post /v1alpha/eval/benchmarks/{benchmark_id}/jobs",
+                    "evaluate_rows_alpha": "post /v1alpha/eval/benchmarks/{benchmark_id}/evaluations",
+                    "run_eval_alpha": "post /v1alpha/eval/benchmarks/{benchmark_id}/jobs",
+                },
+                "subresources": {
+                    "jobs": {
+                        "methods": {
+                            "cancel": "delete /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}",
+                            "status": "get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}",
+                            "retrieve": "get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result",
+                        }
+                    }
+                },
+            },
+        }
+    },
+    "beta": {
+        "subresources": {
+            "datasets": {
+                "models": {"list_datasets_response": "ListDatasetsResponse"},
+                "methods": {
+                    "register": "post /v1beta/datasets",
+                    "retrieve": "get /v1beta/datasets/{dataset_id}",
+                    "list": {"paginated": False, "endpoint": "get /v1beta/datasets"},
+                    "unregister": "delete /v1beta/datasets/{dataset_id}",
+                    "iterrows": "get /v1beta/datasetio/iterrows/{dataset_id}",
+                    "appendrows": "post /v1beta/datasetio/append-rows/{dataset_id}",
+                },
+            }
+        }
+    },
+}
+
+
+HTTP_METHODS = {"get", "post", "put", "patch", "delete", "options", "head"}
+
+
+@dataclass
+class Endpoint:
+    method: str
+    path: str
+    extra: dict[str, Any] = field(default_factory=dict)
+
+    @classmethod
+    def from_config(cls, value: Any) -> Endpoint:
+        if isinstance(value, str):
+            method, _, path = value.partition(" ")
+            return cls._from_parts(method, path)
+        if isinstance(value, dict) and "endpoint" in value:
+            method, _, path = value["endpoint"].partition(" ")
+            extra = {k: v for k, v in value.items() if k != "endpoint"}
+            endpoint = cls._from_parts(method, path)
+            endpoint.extra.update(extra)
+            return endpoint
+        raise ValueError(f"Unsupported endpoint value: {value!r}")
+
+    @classmethod
+    def _from_parts(cls, method: str, path: str) -> Endpoint:
+        method = method.strip().lower()
+        path = path.strip()
+        if method not in HTTP_METHODS:
+            raise ValueError(f"Unsupported HTTP method for Stainless config: {method!r}")
+        if not path.startswith("/"):
+            raise ValueError(f"Endpoint path must start with '/': {path!r}")
+        return cls(method=method, path=path)
+
+    def to_config(self) -> Any:
+        if not self.extra:
+            return f"{self.method} {self.path}"
+        data = dict(self.extra)
+        data["endpoint"] = f"{self.method} {self.path}"
+        return data
+
+    def route_key(self) -> str:
+        return f"{self.method} {self.path}"
+
+
+@dataclass
+class Resource:
+    models: dict[str, str] | None = None
+    methods: dict[str, Endpoint] = field(default_factory=dict)
+    subresources: dict[str, Resource] = field(default_factory=dict)
+
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> Resource:
+        models = data.get("models")
+        methods = {name: Endpoint.from_config(value) for name, value in data.get("methods", {}).items()}
+        subresources = {name: cls.from_dict(value) for name, value in data.get("subresources", {}).items()}
+        return cls(models=models, methods=methods, subresources=subresources)
+
+    def to_config(self) -> dict[str, Any]:
+        result: dict[str, Any] = {}
+        if self.models:
+            result["models"] = self.models
+        if self.methods:
+            result["methods"] = {name: endpoint.to_config() for name, endpoint in self.methods.items()}
+        if self.subresources:
+            result["subresources"] = {name: resource.to_config() for name, resource in self.subresources.items()}
+        return result
+
+    def collect_endpoint_paths(self) -> set[str]:
+        paths = {endpoint.route_key() for endpoint in self.methods.values()}
+        for subresource in self.subresources.values():
+            paths.update(subresource.collect_endpoint_paths())
+        return paths
+
+    def iter_endpoints(self, prefix: str) -> Iterator[tuple[str, str]]:
+        for method_name, endpoint in self.methods.items():
+            label = f"{prefix}.{method_name}" if prefix else method_name
+            yield endpoint.route_key(), label
+        for sub_name, subresource in self.subresources.items():
+            sub_prefix = f"{prefix}.{sub_name}" if prefix else sub_name
+            yield from subresource.iter_endpoints(sub_prefix)
+
+
+_RESOURCES = {name: Resource.from_dict(data) for name, data in ALL_RESOURCES.items()}
+
+
+def _load_openapi_paths(openapi_path: Path) -> set[str]:
+    spec = yaml.safe_load(openapi_path.read_text()) or {}
+    paths: set[str] = set()
+    for path, path_item in (spec.get("paths") or {}).items():
+        if not isinstance(path_item, dict):
+            continue
+        for method, operation in path_item.items():
+            if not isinstance(operation, dict):
+                continue
+            paths.add(f"{str(method).lower()} {path}")
+    return paths
+
+
+@dataclass(frozen=True)
+class StainlessConfig:
+    organization: dict[str, Any]
+    security: list[Any]
+    security_schemes: dict[str, Any]
+    targets: dict[str, Any]
+    client_settings: dict[str, Any]
+    environments: dict[str, Any]
+    pagination: list[dict[str, Any]]
+    settings: dict[str, Any]
+    openapi: dict[str, Any]
+    readme: dict[str, Any]
+    resources: dict[str, Resource]
+
+    @classmethod
+    def make(cls) -> StainlessConfig:
+        return cls(
+            organization=ORGANIZATION,
+            security=SECURITY,
+            security_schemes=SECURITY_SCHEMES,
+            targets=TARGETS,
+            client_settings=CLIENT_SETTINGS,
+            environments=ENVIRONMENTS,
+            pagination=PAGINATION,
+            settings=SETTINGS,
+            openapi=OPENAPI,
+            readme=README,
+            resources=dict(_RESOURCES),
+        )
+
+    def referenced_paths(self) -> set[str]:
+        paths: set[str] = set()
+        for resource in self.resources.values():
+            paths.update(resource.collect_endpoint_paths())
+        paths.update(self.readme_endpoint_paths())
+        return paths
+
+    def readme_endpoint_paths(self) -> set[str]:
+        example_requests = self.readme.get("example_requests", {}) if self.readme else {}
+        paths: set[str] = set()
+        for entry in example_requests.values():
+            endpoint = entry.get("endpoint") if isinstance(entry, dict) else None
+            if isinstance(endpoint, str):
+                method, _, route = endpoint.partition(" ")
+                method = method.strip().lower()
+                route = route.strip()
+                if method and route:
+                    paths.add(f"{method} {route}")
+        return paths
+
+    def endpoint_map(self) -> dict[str, list[str]]:
+        mapping: dict[str, list[str]] = {}
+        for resource_name, resource in self.resources.items():
+            for route, label in resource.iter_endpoints(resource_name):
+                mapping.setdefault(route, []).append(label)
+        return mapping
+
+    def validate_unique_endpoints(self) -> None:
+        duplicates: dict[str, list[str]] = {}
+        for route, labels in self.endpoint_map().items():
+            top_levels = {label.split(".", 1)[0] for label in labels}
+            if len(top_levels) > 1:
+                duplicates[route] = labels
+        if duplicates:
+            formatted = "\n".join(
+                f"  - {route} defined in: {', '.join(sorted(labels))}" for route, labels in sorted(duplicates.items())
+            )
+            raise ValueError("Duplicate endpoints found across resources:\n" + formatted)
+
+    def validate_readme_endpoints(self) -> None:
+        resource_paths: set[str] = set()
+        for resource in self.resources.values():
+            resource_paths.update(resource.collect_endpoint_paths())
+        missing = sorted(path for path in self.readme_endpoint_paths() if path not in resource_paths)
+        if missing:
+            formatted = "\n".join(f"  - {path}" for path in missing)
+            raise ValueError("README example endpoints are not present in Stainless resources:\n" + formatted)
+
+    def to_dict(self) -> dict[str, Any]:
+        cfg: dict[str, Any] = {}
+        for section in SECTION_ORDER:
+            if section == "resources":
+                cfg[section] = {name: resource.to_config() for name, resource in self.resources.items()}
+                continue
+            cfg[section] = getattr(self, section)
+        return cfg
+
+    def validate_against_openapi(self, openapi_path: Path) -> None:
+        if not openapi_path.exists():
+            raise FileNotFoundError(f"OpenAPI spec not found at {openapi_path}")
+        spec_paths = _load_openapi_paths(openapi_path)
+        config_paths = self.referenced_paths()
+        missing = sorted(path for path in config_paths if path not in spec_paths)
+        if missing:
+            formatted = "\n".join(f"  - {path}" for path in missing)
+            raise ValueError("Stainless config references missing endpoints:\n" + formatted)
+
+    def validate(self, openapi_path: Path | None = None) -> None:
+        self.validate_unique_endpoints()
+        self.validate_readme_endpoints()
+        if openapi_path is not None:
+            self.validate_against_openapi(openapi_path)
+
+
+def build_config() -> dict[str, Any]:
+    return StainlessConfig.make().to_dict()
+
+
+def write_config(repo_root: Path, openapi_path: Path | None = None) -> Path:
+    stainless_config = StainlessConfig.make()
+    spec_path = (openapi_path or (repo_root / "client-sdks" / "stainless" / "openapi.yml")).resolve()
+    stainless_config.validate(spec_path)
+    yaml_text = yaml.safe_dump(stainless_config.to_dict(), sort_keys=False)
+    output = repo_root / "client-sdks" / "stainless" / "config.yml"
+    output.write_text(HEADER + yaml_text)
+    return output
+
+
+def main() -> None:
+    repo_root = Path(__file__).resolve().parents[3]
+    output = write_config(repo_root)
+    print(f"Wrote Stainless config: {output}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/run_openapi_generator.sh b/scripts/run_openapi_generator.sh
index 946b2886f..d4e3b2ec7 100755
--- a/scripts/run_openapi_generator.sh
+++ b/scripts/run_openapi_generator.sh
@@ -17,3 +17,5 @@ PYTHONPATH=$PYTHONPATH:$stack_dir \
   python3 -m scripts.openapi_generator "$stack_dir"/docs/static
 
 cp "$stack_dir"/docs/static/stainless-llama-stack-spec.yaml "$stack_dir"/client-sdks/stainless/openapi.yml
+PYTHONPATH=$PYTHONPATH:$stack_dir \
+  python3 -m scripts.openapi_generator.stainless_config.generate_config

From 29f1fa6abd4b490d6855c2ae18fcdbb10494323e Mon Sep 17 00:00:00 2001
From: Charlie Doern <cdoern@redhat.com>
Date: Mon, 17 Nov 2025 16:23:43 -0500
Subject: [PATCH 41/62] test(api): pre-commit check to ensure API does not
 import llama_stack (#4160)

# What does this PR do?

since llama_stack_api is meant to be _just_ the API definitions of LLS,
we should have pre-commit check that prohibits anyone from accidentally
importing `from llama_stack` or adding `llama_stack` as a dependency
into `llama_stack_api`s pyproject.


## Test Plan

pre-commit should pass.

Signed-off-by: Charlie Doern <cdoern@redhat.com>
---
 .pre-commit-config.yaml | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index c31a39406..f94356fe5 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -199,6 +199,27 @@ repos:
               echo;
               exit 1;
             } || true
+      - id: check-api-independence
+        name: Ensure llama_stack_api does not import llama_stack
+        entry: bash
+        language: system
+        pass_filenames: false
+        require_serial: true
+        always_run: true
+        files: ^src/llama_stack_api/.*$
+        args:
+          - -c
+          - |
+            API_DIR="src/llama_stack_api"
+            grep -rn --include="*.py" -E '^[^#]*(import llama_stack\b|from llama_stack\b)' "$API_DIR" 2>/dev/null && {
+              echo "llama_stack_api must not import llama_stack";
+              exit 1;
+            }
+            [ -f "$API_DIR/pyproject.toml" ] && grep -n 'llama_stack[^_]' "$API_DIR/pyproject.toml" && {
+              echo "llama_stack_api must not depend on llama_stack in pyproject.toml";
+              exit 1;
+            }
+            exit 0
 
 ci:
     autofix_commit_msg: 🎨 [pre-commit.ci] Auto format from pre-commit.com hooks

From 7093978754620c31b689f9b400ac84f60d1b5b83 Mon Sep 17 00:00:00 2001
From: raghotham <rsm@meta.com>
Date: Tue, 18 Nov 2025 04:47:04 +0530
Subject: [PATCH 42/62] chore(docs): Remove Llama 4 support details from README
 (#4178)

---
 README.md | 77 -------------------------------------------------------
 1 file changed, 77 deletions(-)

diff --git a/README.md b/README.md
index 639e7280d..5360f4ff0 100644
--- a/README.md
+++ b/README.md
@@ -10,83 +10,6 @@
 [**Quick Start**](https://llamastack.github.io/docs/getting_started/quickstart) | [**Documentation**](https://llamastack.github.io/docs) | [**Colab Notebook**](./docs/getting_started.ipynb) | [**Discord**](https://discord.gg/llama-stack)
 
 
-### ✨🎉 Llama 4 Support  🎉✨
-We released [Version 0.2.0](https://github.com/meta-llama/llama-stack/releases/tag/v0.2.0) with support for the Llama 4 herd of models released by Meta.
-
-<details>
-
-<summary>👋 Click here to see how to run Llama 4 models on Llama Stack </summary>
-
-\
-*Note you need 8xH100 GPU-host to run these models*
-
-```bash
-pip install -U llama_stack
-
-MODEL="Llama-4-Scout-17B-16E-Instruct"
-# get meta url from llama.com
-huggingface-cli download meta-llama/$MODEL --local-dir ~/.llama/$MODEL
-
-# install dependencies for the distribution
-llama stack list-deps meta-reference-gpu | xargs -L1 uv pip install
-
-# start a llama stack server
-INFERENCE_MODEL=meta-llama/$MODEL llama stack run meta-reference-gpu
-
-# install client to interact with the server
-pip install llama-stack-client
-```
-### CLI
-```bash
-# Run a chat completion
-MODEL="Llama-4-Scout-17B-16E-Instruct"
-
-llama-stack-client --endpoint http://localhost:8321 \
-inference chat-completion \
---model-id meta-llama/$MODEL \
---message "write a haiku for meta's llama 4 models"
-
-OpenAIChatCompletion(
-    ...
-    choices=[
-        OpenAIChatCompletionChoice(
-            finish_reason='stop',
-            index=0,
-            message=OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParam(
-                role='assistant',
-                content='...**Silent minds awaken,**  \n**Whispers of billions of words,**  \n**Reasoning breaks the night.**  \n\n—  \n*This haiku blends the essence of LLaMA 4\'s capabilities with nature-inspired metaphor, evoking its vast training data and transformative potential.*',
-                ...
-            ),
-            ...
-        )
-    ],
-    ...
-)
-```
-### Python SDK
-```python
-from llama_stack_client import LlamaStackClient
-
-client = LlamaStackClient(base_url=f"http://localhost:8321")
-
-model_id = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
-prompt = "Write a haiku about coding"
-
-print(f"User> {prompt}")
-response = client.chat.completions.create(
-    model=model_id,
-    messages=[
-        {"role": "system", "content": "You are a helpful assistant."},
-        {"role": "user", "content": prompt},
-    ],
-)
-print(f"Assistant> {response.choices[0].message.content}")
-```
-As more providers start supporting Llama 4, you can use them in Llama Stack as well. We are adding to the list. Stay tuned!
-
-
-</details>
-
 ### 🚀 One-Line Installer 🚀
 
 To try Llama Stack locally, run:

From a3580e6bc012535a43e0b08bfae4f6e6563a4bbd Mon Sep 17 00:00:00 2001
From: Anastas Stoyanovsky <astoyano@redhat.com>
Date: Tue, 18 Nov 2025 14:25:08 -0500
Subject: [PATCH 43/62] feat!: Wire through parallel_tool_calls to Responses
 API (#4124)

# What does this PR do?
Initial PR against #4123
Adds `parallel_tool_calls` spec to Responses API and basic initial
implementation where no more than one function call is generated when
set to `False`.

## Test Plan
* Unit tests have been added to verify no more than one function call is
generated.
* A followup PR will verify passing through `parallel_tool_calls` to
providers.
* A followup PR will address verification and/or implementation of
incremental function calling across multiple conversational turns.

---------

Signed-off-by: Anastas Stoyanovsky <astoyano@redhat.com>
---
 client-sdks/stainless/openapi.yml             | 19 +++++++++++++------
 docs/static/deprecated-llama-stack-spec.yaml  | 19 +++++++++++++------
 .../static/experimental-llama-stack-spec.yaml | 14 ++++++++------
 docs/static/llama-stack-spec.yaml             | 19 +++++++++++++------
 docs/static/stainless-llama-stack-spec.yaml   | 19 +++++++++++++------
 .../inline/agents/meta_reference/agents.py    |  2 ++
 .../responses/openai_responses.py             |  4 ++++
 .../meta_reference/responses/streaming.py     |  4 ++++
 src/llama_stack_api/agents.py                 |  1 +
 src/llama_stack_api/openai_responses.py       |  4 ++--
 10 files changed, 73 insertions(+), 32 deletions(-)

diff --git a/client-sdks/stainless/openapi.yml b/client-sdks/stainless/openapi.yml
index 3a6735cbc..a6ebc868c 100644
--- a/client-sdks/stainless/openapi.yml
+++ b/client-sdks/stainless/openapi.yml
@@ -6723,9 +6723,10 @@ components:
           type: array
           title: Output
         parallel_tool_calls:
-          type: boolean
-          title: Parallel Tool Calls
-          default: false
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          default: true
         previous_response_id:
           anyOf:
           - type: string
@@ -7125,6 +7126,11 @@ components:
           anyOf:
           - type: string
           - type: 'null'
+        parallel_tool_calls:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          default: true
         previous_response_id:
           anyOf:
           - type: string
@@ -7251,9 +7257,10 @@ components:
           type: array
           title: Output
         parallel_tool_calls:
-          type: boolean
-          title: Parallel Tool Calls
-          default: false
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          default: true
         previous_response_id:
           anyOf:
           - type: string
diff --git a/docs/static/deprecated-llama-stack-spec.yaml b/docs/static/deprecated-llama-stack-spec.yaml
index 0bade1866..207af8926 100644
--- a/docs/static/deprecated-llama-stack-spec.yaml
+++ b/docs/static/deprecated-llama-stack-spec.yaml
@@ -3566,9 +3566,10 @@ components:
           type: array
           title: Output
         parallel_tool_calls:
-          type: boolean
-          title: Parallel Tool Calls
-          default: false
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          default: true
         previous_response_id:
           anyOf:
           - type: string
@@ -3968,6 +3969,11 @@ components:
           anyOf:
           - type: string
           - type: 'null'
+        parallel_tool_calls:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          default: true
         previous_response_id:
           anyOf:
           - type: string
@@ -4094,9 +4100,10 @@ components:
           type: array
           title: Output
         parallel_tool_calls:
-          type: boolean
-          title: Parallel Tool Calls
-          default: false
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          default: true
         previous_response_id:
           anyOf:
           - type: string
diff --git a/docs/static/experimental-llama-stack-spec.yaml b/docs/static/experimental-llama-stack-spec.yaml
index 4271989d6..f81a93d33 100644
--- a/docs/static/experimental-llama-stack-spec.yaml
+++ b/docs/static/experimental-llama-stack-spec.yaml
@@ -3263,9 +3263,10 @@ components:
           type: array
           title: Output
         parallel_tool_calls:
-          type: boolean
-          title: Parallel Tool Calls
-          default: false
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          default: true
         previous_response_id:
           anyOf:
           - type: string
@@ -3662,9 +3663,10 @@ components:
           type: array
           title: Output
         parallel_tool_calls:
-          type: boolean
-          title: Parallel Tool Calls
-          default: false
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          default: true
         previous_response_id:
           anyOf:
           - type: string
diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml
index a12ac342f..816f3d0fb 100644
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@@ -5744,9 +5744,10 @@ components:
           type: array
           title: Output
         parallel_tool_calls:
-          type: boolean
-          title: Parallel Tool Calls
-          default: false
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          default: true
         previous_response_id:
           anyOf:
           - type: string
@@ -6146,6 +6147,11 @@ components:
           anyOf:
           - type: string
           - type: 'null'
+        parallel_tool_calls:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          default: true
         previous_response_id:
           anyOf:
           - type: string
@@ -6272,9 +6278,10 @@ components:
           type: array
           title: Output
         parallel_tool_calls:
-          type: boolean
-          title: Parallel Tool Calls
-          default: false
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          default: true
         previous_response_id:
           anyOf:
           - type: string
diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml
index 3a6735cbc..a6ebc868c 100644
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@@ -6723,9 +6723,10 @@ components:
           type: array
           title: Output
         parallel_tool_calls:
-          type: boolean
-          title: Parallel Tool Calls
-          default: false
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          default: true
         previous_response_id:
           anyOf:
           - type: string
@@ -7125,6 +7126,11 @@ components:
           anyOf:
           - type: string
           - type: 'null'
+        parallel_tool_calls:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          default: true
         previous_response_id:
           anyOf:
           - type: string
@@ -7251,9 +7257,10 @@ components:
           type: array
           title: Output
         parallel_tool_calls:
-          type: boolean
-          title: Parallel Tool Calls
-          default: false
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          default: true
         previous_response_id:
           anyOf:
           - type: string
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/agents.py b/src/llama_stack/providers/inline/agents/meta_reference/agents.py
index 347f6fdb1..e47e757be 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/agents.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/agents.py
@@ -92,6 +92,7 @@ class MetaReferenceAgentsImpl(Agents):
         model: str,
         prompt: OpenAIResponsePrompt | None = None,
         instructions: str | None = None,
+        parallel_tool_calls: bool | None = True,
         previous_response_id: str | None = None,
         conversation: str | None = None,
         store: bool | None = True,
@@ -120,6 +121,7 @@ class MetaReferenceAgentsImpl(Agents):
             include,
             max_infer_iters,
             guardrails,
+            parallel_tool_calls,
             max_tool_calls,
         )
         return result  # type: ignore[no-any-return]
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
index cb0fe284e..7e080a675 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
@@ -252,6 +252,7 @@ class OpenAIResponsesImpl:
         include: list[str] | None = None,
         max_infer_iters: int | None = 10,
         guardrails: list[str | ResponseGuardrailSpec] | None = None,
+        parallel_tool_calls: bool | None = None,
         max_tool_calls: int | None = None,
     ):
         stream = bool(stream)
@@ -296,6 +297,7 @@ class OpenAIResponsesImpl:
             tools=tools,
             max_infer_iters=max_infer_iters,
             guardrail_ids=guardrail_ids,
+            parallel_tool_calls=parallel_tool_calls,
             max_tool_calls=max_tool_calls,
         )
 
@@ -346,6 +348,7 @@ class OpenAIResponsesImpl:
         tools: list[OpenAIResponseInputTool] | None = None,
         max_infer_iters: int | None = 10,
         guardrail_ids: list[str] | None = None,
+        parallel_tool_calls: bool | None = True,
         max_tool_calls: int | None = None,
     ) -> AsyncIterator[OpenAIResponseObjectStream]:
         # These should never be None when called from create_openai_response (which sets defaults)
@@ -385,6 +388,7 @@ class OpenAIResponsesImpl:
             created_at=created_at,
             text=text,
             max_infer_iters=max_infer_iters,
+            parallel_tool_calls=parallel_tool_calls,
             tool_executor=self.tool_executor,
             safety_api=self.safety_api,
             guardrail_ids=guardrail_ids,
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
index 95c690147..cdbd87244 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
@@ -114,6 +114,7 @@ class StreamingResponseOrchestrator:
         safety_api,
         guardrail_ids: list[str] | None = None,
         prompt: OpenAIResponsePrompt | None = None,
+        parallel_tool_calls: bool | None = None,
         max_tool_calls: int | None = None,
     ):
         self.inference_api = inference_api
@@ -128,6 +129,8 @@ class StreamingResponseOrchestrator:
         self.prompt = prompt
         # System message that is inserted into the model's context
         self.instructions = instructions
+        # Whether to allow more than one function tool call generated per turn.
+        self.parallel_tool_calls = parallel_tool_calls
         # Max number of total calls to built-in tools that can be processed in a response
         self.max_tool_calls = max_tool_calls
         self.sequence_number = 0
@@ -190,6 +193,7 @@ class StreamingResponseOrchestrator:
             usage=self.accumulated_usage,
             instructions=self.instructions,
             prompt=self.prompt,
+            parallel_tool_calls=self.parallel_tool_calls,
             max_tool_calls=self.max_tool_calls,
         )
 
diff --git a/src/llama_stack_api/agents.py b/src/llama_stack_api/agents.py
index ca0611746..9b767608a 100644
--- a/src/llama_stack_api/agents.py
+++ b/src/llama_stack_api/agents.py
@@ -72,6 +72,7 @@ class Agents(Protocol):
         model: str,
         prompt: OpenAIResponsePrompt | None = None,
         instructions: str | None = None,
+        parallel_tool_calls: bool | None = True,
         previous_response_id: str | None = None,
         conversation: str | None = None,
         store: bool | None = True,
diff --git a/src/llama_stack_api/openai_responses.py b/src/llama_stack_api/openai_responses.py
index 952418f1c..e20004487 100644
--- a/src/llama_stack_api/openai_responses.py
+++ b/src/llama_stack_api/openai_responses.py
@@ -585,7 +585,7 @@ class OpenAIResponseObject(BaseModel):
     :param model: Model identifier used for generation
     :param object: Object type identifier, always "response"
     :param output: List of generated output items (messages, tool calls, etc.)
-    :param parallel_tool_calls: Whether tool calls can be executed in parallel
+    :param parallel_tool_calls: (Optional) Whether to allow more than one function tool call generated per turn.
     :param previous_response_id: (Optional) ID of the previous response in a conversation
     :param prompt: (Optional) Reference to a prompt template and its variables.
     :param status: Current status of the response generation
@@ -605,7 +605,7 @@ class OpenAIResponseObject(BaseModel):
     model: str
     object: Literal["response"] = "response"
     output: Sequence[OpenAIResponseOutput]
-    parallel_tool_calls: bool = False
+    parallel_tool_calls: bool | None = True
     previous_response_id: str | None = None
     prompt: OpenAIResponsePrompt | None = None
     status: str

From bd5ad2963e496e78f6e115dfc9910d55ce2121b5 Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Tue, 18 Nov 2025 13:15:16 -0800
Subject: [PATCH 44/62] refactor(storage): make { kvstore, sqlstore } as llama
 stack "internal" APIs (#4181)

These primitives (used both by the Stack as well as provider
implementations) can be thought of fruitfully as internal-only APIs
which can themselves have multiple implementations. We use the new
`llama_stack_api.internal` namespace for this.

In addition: the change moves kv/sql store impls, configs, and
dependency helpers under `core/storage`

## Testing

`pytest tests/unit/utils/test_authorized_sqlstore.py`, other existing CI
---
 pyproject.toml                                |   4 +
 .../core/conversations/conversations.py       |   6 +-
 src/llama_stack/core/prompts/prompts.py       |   2 +-
 src/llama_stack/core/server/quota.py          |   4 +-
 src/llama_stack/core/stack.py                 |   4 +-
 .../storage}/kvstore/__init__.py              |   2 +
 .../utils => core/storage}/kvstore/config.py  |   0
 .../utils => core/storage}/kvstore/kvstore.py |  30 ++--
 .../storage}/kvstore/mongodb/__init__.py      |   0
 .../storage}/kvstore/mongodb/mongodb.py       |   2 +-
 .../storage}/kvstore/postgres/__init__.py     |   0
 .../storage}/kvstore/postgres/postgres.py     |  45 +++---
 .../storage}/kvstore/redis/__init__.py        |   0
 .../storage}/kvstore/redis/redis.py           |  57 +++++--
 .../storage}/kvstore/sqlite/__init__.py       |   0
 .../storage}/kvstore/sqlite/sqlite.py         |   2 +-
 .../core/storage/sqlstore/__init__.py         |  17 +++
 .../storage}/sqlstore/authorized_sqlstore.py  |   4 +-
 .../storage}/sqlstore/sqlalchemy_sqlstore.py  |   3 +-
 .../storage}/sqlstore/sqlstore.py             |   3 +-
 src/llama_stack/core/store/registry.py        |   2 +-
 .../distributions/starter/starter.py          |   4 +-
 src/llama_stack/distributions/template.py     |   8 +-
 .../inline/agents/meta_reference/agents.py    |   2 +-
 .../inline/batches/reference/__init__.py      |   2 +-
 .../inline/batches/reference/batches.py       |   2 +-
 .../inline/datasetio/localfs/datasetio.py     |   2 +-
 .../inline/eval/meta_reference/eval.py        |   2 +-
 .../providers/inline/files/localfs/files.py   |   6 +-
 .../providers/inline/vector_io/faiss/faiss.py |   4 +-
 .../inline/vector_io/sqlite_vec/sqlite_vec.py |   4 +-
 src/llama_stack/providers/registry/agents.py  |   2 +-
 src/llama_stack/providers/registry/files.py   |   2 +-
 .../datasetio/huggingface/huggingface.py      |   2 +-
 .../providers/remote/files/openai/files.py    |   6 +-
 .../providers/remote/files/s3/files.py        |   6 +-
 .../remote/vector_io/chroma/chroma.py         |   4 +-
 .../remote/vector_io/milvus/milvus.py         |   4 +-
 .../remote/vector_io/pgvector/pgvector.py     |   4 +-
 .../remote/vector_io/qdrant/qdrant.py         |   2 +-
 .../remote/vector_io/weaviate/weaviate.py     |   4 +-
 .../utils/inference/inference_store.py        |   7 +-
 .../providers/utils/kvstore/sqlite/config.py  |  20 ---
 .../utils/memory/openai_vector_store_mixin.py |   2 +-
 .../utils/responses/responses_store.py        |   7 +-
 .../providers/utils/sqlstore/api.py           | 140 ------------------
 .../internal}/__init__.py                     |   4 +
 .../internal/kvstore.py}                      |   5 +
 src/llama_stack_api/internal/sqlstore.py      |  79 ++++++++++
 tests/integration/files/test_files.py         |   6 +-
 .../sqlstore/test_authorized_sqlstore.py      |  10 +-
 .../unit/conversations/test_conversations.py  |   2 +-
 tests/unit/files/test_files.py                |   2 +-
 tests/unit/fixtures.py                        |   4 +-
 tests/unit/prompts/prompts/conftest.py        |   2 +-
 .../meta_reference/test_openai_responses.py   |   2 +-
 tests/unit/providers/batches/conftest.py      |   2 +-
 tests/unit/providers/files/conftest.py        |   2 +-
 .../providers/files/test_s3_files_auth.py     |  16 +-
 tests/unit/providers/vector_io/conftest.py    |   4 +-
 tests/unit/registry/test_registry.py          |   2 +-
 tests/unit/server/test_quota.py               |   2 +-
 tests/unit/server/test_resolver.py            |   4 +-
 .../utils/inference/test_inference_store.py   |   2 +-
 .../unit/utils/kvstore/test_sqlite_memory.py  |   4 +-
 .../utils/responses/test_responses_store.py   |   2 +-
 tests/unit/utils/sqlstore/test_sqlstore.py    |   6 +-
 tests/unit/utils/test_authorized_sqlstore.py  |  14 +-
 68 files changed, 302 insertions(+), 309 deletions(-)
 rename src/llama_stack/{providers/utils => core/storage}/kvstore/__init__.py (78%)
 rename src/llama_stack/{providers/utils => core/storage}/kvstore/config.py (100%)
 rename src/llama_stack/{providers/utils => core/storage}/kvstore/kvstore.py (82%)
 rename src/llama_stack/{providers/utils => core/storage}/kvstore/mongodb/__init__.py (100%)
 rename src/llama_stack/{providers/utils => core/storage}/kvstore/mongodb/mongodb.py (98%)
 rename src/llama_stack/{providers/utils => core/storage}/kvstore/postgres/__init__.py (100%)
 rename src/llama_stack/{providers/utils => core/storage}/kvstore/postgres/postgres.py (73%)
 rename src/llama_stack/{providers/utils => core/storage}/kvstore/redis/__init__.py (100%)
 rename src/llama_stack/{providers/utils => core/storage}/kvstore/redis/redis.py (54%)
 rename src/llama_stack/{providers/utils => core/storage}/kvstore/sqlite/__init__.py (100%)
 rename src/llama_stack/{providers/utils => core/storage}/kvstore/sqlite/sqlite.py (99%)
 create mode 100644 src/llama_stack/core/storage/sqlstore/__init__.py
 rename src/llama_stack/{providers/utils => core/storage}/sqlstore/authorized_sqlstore.py (99%)
 rename src/llama_stack/{providers/utils => core/storage}/sqlstore/sqlalchemy_sqlstore.py (99%)
 rename src/llama_stack/{providers/utils => core/storage}/sqlstore/sqlstore.py (98%)
 delete mode 100644 src/llama_stack/providers/utils/kvstore/sqlite/config.py
 delete mode 100644 src/llama_stack/providers/utils/sqlstore/api.py
 rename src/{llama_stack/providers/utils/sqlstore => llama_stack_api/internal}/__init__.py (65%)
 rename src/{llama_stack/providers/utils/kvstore/api.py => llama_stack_api/internal/kvstore.py} (89%)
 create mode 100644 src/llama_stack_api/internal/sqlstore.py

diff --git a/pyproject.toml b/pyproject.toml
index eea515b09..3e16dc08f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -356,6 +356,10 @@ exclude = [
 module = [
     "yaml",
     "fire",
+    "redis.asyncio",
+    "psycopg2",
+    "psycopg2.extras",
+    "psycopg2.extensions",
     "torchtune.*",
     "fairscale.*",
     "torchvision.*",
diff --git a/src/llama_stack/core/conversations/conversations.py b/src/llama_stack/core/conversations/conversations.py
index 4cf5a82ee..90402439b 100644
--- a/src/llama_stack/core/conversations/conversations.py
+++ b/src/llama_stack/core/conversations/conversations.py
@@ -11,10 +11,9 @@ from typing import Any, Literal
 from pydantic import BaseModel, TypeAdapter
 
 from llama_stack.core.datatypes import AccessRule, StackRunConfig
+from llama_stack.core.storage.sqlstore.authorized_sqlstore import AuthorizedSqlStore
+from llama_stack.core.storage.sqlstore.sqlstore import sqlstore_impl
 from llama_stack.log import get_logger
-from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
-from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore
-from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl
 from llama_stack_api import (
     Conversation,
     ConversationDeletedResource,
@@ -25,6 +24,7 @@ from llama_stack_api import (
     Conversations,
     Metadata,
 )
+from llama_stack_api.internal.sqlstore import ColumnDefinition, ColumnType
 
 logger = get_logger(name=__name__, category="openai_conversations")
 
diff --git a/src/llama_stack/core/prompts/prompts.py b/src/llama_stack/core/prompts/prompts.py
index 9f532c1cd..ff67ad138 100644
--- a/src/llama_stack/core/prompts/prompts.py
+++ b/src/llama_stack/core/prompts/prompts.py
@@ -10,7 +10,7 @@ from typing import Any
 from pydantic import BaseModel
 
 from llama_stack.core.datatypes import StackRunConfig
-from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
+from llama_stack.core.storage.kvstore import KVStore, kvstore_impl
 from llama_stack_api import ListPromptsResponse, Prompt, Prompts
 
 
diff --git a/src/llama_stack/core/server/quota.py b/src/llama_stack/core/server/quota.py
index 689f0e4c3..d74d3e89d 100644
--- a/src/llama_stack/core/server/quota.py
+++ b/src/llama_stack/core/server/quota.py
@@ -11,9 +11,9 @@ from datetime import UTC, datetime, timedelta
 from starlette.types import ASGIApp, Receive, Scope, Send
 
 from llama_stack.core.storage.datatypes import KVStoreReference, StorageBackendType
+from llama_stack.core.storage.kvstore.kvstore import _KVSTORE_BACKENDS, kvstore_impl
 from llama_stack.log import get_logger
-from llama_stack.providers.utils.kvstore.api import KVStore
-from llama_stack.providers.utils.kvstore.kvstore import _KVSTORE_BACKENDS, kvstore_impl
+from llama_stack_api.internal.kvstore import KVStore
 
 logger = get_logger(name=__name__, category="core::server")
 
diff --git a/src/llama_stack/core/stack.py b/src/llama_stack/core/stack.py
index 00d990cb1..8ba1f2afd 100644
--- a/src/llama_stack/core/stack.py
+++ b/src/llama_stack/core/stack.py
@@ -385,8 +385,8 @@ def _initialize_storage(run_config: StackRunConfig):
         else:
             raise ValueError(f"Unknown storage backend type: {type}")
 
-    from llama_stack.providers.utils.kvstore.kvstore import register_kvstore_backends
-    from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
+    from llama_stack.core.storage.kvstore.kvstore import register_kvstore_backends
+    from llama_stack.core.storage.sqlstore.sqlstore import register_sqlstore_backends
 
     register_kvstore_backends(kv_backends)
     register_sqlstore_backends(sql_backends)
diff --git a/src/llama_stack/providers/utils/kvstore/__init__.py b/src/llama_stack/core/storage/kvstore/__init__.py
similarity index 78%
rename from src/llama_stack/providers/utils/kvstore/__init__.py
rename to src/llama_stack/core/storage/kvstore/__init__.py
index 470a75d2d..2d60f1508 100644
--- a/src/llama_stack/providers/utils/kvstore/__init__.py
+++ b/src/llama_stack/core/storage/kvstore/__init__.py
@@ -4,4 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
+from llama_stack_api.internal.kvstore import KVStore as KVStore
+
 from .kvstore import *  # noqa: F401, F403
diff --git a/src/llama_stack/providers/utils/kvstore/config.py b/src/llama_stack/core/storage/kvstore/config.py
similarity index 100%
rename from src/llama_stack/providers/utils/kvstore/config.py
rename to src/llama_stack/core/storage/kvstore/config.py
diff --git a/src/llama_stack/providers/utils/kvstore/kvstore.py b/src/llama_stack/core/storage/kvstore/kvstore.py
similarity index 82%
rename from src/llama_stack/providers/utils/kvstore/kvstore.py
rename to src/llama_stack/core/storage/kvstore/kvstore.py
index 5b8d77102..8ea9282fa 100644
--- a/src/llama_stack/providers/utils/kvstore/kvstore.py
+++ b/src/llama_stack/core/storage/kvstore/kvstore.py
@@ -13,11 +13,19 @@ from __future__ import annotations
 
 import asyncio
 from collections import defaultdict
+from datetime import datetime
+from typing import cast
 
-from llama_stack.core.storage.datatypes import KVStoreReference, StorageBackendConfig, StorageBackendType
+from llama_stack.core.storage.datatypes import KVStoreReference, StorageBackendConfig
+from llama_stack_api.internal.kvstore import KVStore
 
-from .api import KVStore
-from .config import KVStoreConfig
+from .config import (
+    KVStoreConfig,
+    MongoDBKVStoreConfig,
+    PostgresKVStoreConfig,
+    RedisKVStoreConfig,
+    SqliteKVStoreConfig,
+)
 
 
 def kvstore_dependencies():
@@ -33,7 +41,7 @@ def kvstore_dependencies():
 
 class InmemoryKVStoreImpl(KVStore):
     def __init__(self):
-        self._store = {}
+        self._store: dict[str, str] = {}
 
     async def initialize(self) -> None:
         pass
@@ -41,7 +49,7 @@ class InmemoryKVStoreImpl(KVStore):
     async def get(self, key: str) -> str | None:
         return self._store.get(key)
 
-    async def set(self, key: str, value: str) -> None:
+    async def set(self, key: str, value: str, expiration: datetime | None = None) -> None:
         self._store[key] = value
 
     async def values_in_range(self, start_key: str, end_key: str) -> list[str]:
@@ -70,7 +78,8 @@ def register_kvstore_backends(backends: dict[str, StorageBackendConfig]) -> None
     _KVSTORE_INSTANCES.clear()
     _KVSTORE_LOCKS.clear()
     for name, cfg in backends.items():
-        _KVSTORE_BACKENDS[name] = cfg
+        typed_cfg = cast(KVStoreConfig, cfg)
+        _KVSTORE_BACKENDS[name] = typed_cfg
 
 
 async def kvstore_impl(reference: KVStoreReference) -> KVStore:
@@ -94,19 +103,20 @@ async def kvstore_impl(reference: KVStoreReference) -> KVStore:
         config = backend_config.model_copy()
         config.namespace = reference.namespace
 
-        if config.type == StorageBackendType.KV_REDIS.value:
+        impl: KVStore
+        if isinstance(config, RedisKVStoreConfig):
             from .redis import RedisKVStoreImpl
 
             impl = RedisKVStoreImpl(config)
-        elif config.type == StorageBackendType.KV_SQLITE.value:
+        elif isinstance(config, SqliteKVStoreConfig):
             from .sqlite import SqliteKVStoreImpl
 
             impl = SqliteKVStoreImpl(config)
-        elif config.type == StorageBackendType.KV_POSTGRES.value:
+        elif isinstance(config, PostgresKVStoreConfig):
             from .postgres import PostgresKVStoreImpl
 
             impl = PostgresKVStoreImpl(config)
-        elif config.type == StorageBackendType.KV_MONGODB.value:
+        elif isinstance(config, MongoDBKVStoreConfig):
             from .mongodb import MongoDBKVStoreImpl
 
             impl = MongoDBKVStoreImpl(config)
diff --git a/src/llama_stack/providers/utils/kvstore/mongodb/__init__.py b/src/llama_stack/core/storage/kvstore/mongodb/__init__.py
similarity index 100%
rename from src/llama_stack/providers/utils/kvstore/mongodb/__init__.py
rename to src/llama_stack/core/storage/kvstore/mongodb/__init__.py
diff --git a/src/llama_stack/providers/utils/kvstore/mongodb/mongodb.py b/src/llama_stack/core/storage/kvstore/mongodb/mongodb.py
similarity index 98%
rename from src/llama_stack/providers/utils/kvstore/mongodb/mongodb.py
rename to src/llama_stack/core/storage/kvstore/mongodb/mongodb.py
index 964c45090..673d6038f 100644
--- a/src/llama_stack/providers/utils/kvstore/mongodb/mongodb.py
+++ b/src/llama_stack/core/storage/kvstore/mongodb/mongodb.py
@@ -9,8 +9,8 @@ from datetime import datetime
 from pymongo import AsyncMongoClient
 from pymongo.asynchronous.collection import AsyncCollection
 
+from llama_stack.core.storage.kvstore import KVStore
 from llama_stack.log import get_logger
-from llama_stack.providers.utils.kvstore import KVStore
 
 from ..config import MongoDBKVStoreConfig
 
diff --git a/src/llama_stack/providers/utils/kvstore/postgres/__init__.py b/src/llama_stack/core/storage/kvstore/postgres/__init__.py
similarity index 100%
rename from src/llama_stack/providers/utils/kvstore/postgres/__init__.py
rename to src/llama_stack/core/storage/kvstore/postgres/__init__.py
diff --git a/src/llama_stack/providers/utils/kvstore/postgres/postgres.py b/src/llama_stack/core/storage/kvstore/postgres/postgres.py
similarity index 73%
rename from src/llama_stack/providers/utils/kvstore/postgres/postgres.py
rename to src/llama_stack/core/storage/kvstore/postgres/postgres.py
index 56d6dbb48..39c3fd2e2 100644
--- a/src/llama_stack/providers/utils/kvstore/postgres/postgres.py
+++ b/src/llama_stack/core/storage/kvstore/postgres/postgres.py
@@ -6,12 +6,13 @@
 
 from datetime import datetime
 
-import psycopg2
-from psycopg2.extras import DictCursor
+import psycopg2  # type: ignore[import-not-found]
+from psycopg2.extensions import connection as PGConnection  # type: ignore[import-not-found]
+from psycopg2.extras import DictCursor  # type: ignore[import-not-found]
 
 from llama_stack.log import get_logger
+from llama_stack_api.internal.kvstore import KVStore
 
-from ..api import KVStore
 from ..config import PostgresKVStoreConfig
 
 log = get_logger(name=__name__, category="providers::utils")
@@ -20,12 +21,12 @@ log = get_logger(name=__name__, category="providers::utils")
 class PostgresKVStoreImpl(KVStore):
     def __init__(self, config: PostgresKVStoreConfig):
         self.config = config
-        self.conn = None
-        self.cursor = None
+        self._conn: PGConnection | None = None
+        self._cursor: DictCursor | None = None
 
     async def initialize(self) -> None:
         try:
-            self.conn = psycopg2.connect(
+            self._conn = psycopg2.connect(
                 host=self.config.host,
                 port=self.config.port,
                 database=self.config.db,
@@ -34,11 +35,11 @@ class PostgresKVStoreImpl(KVStore):
                 sslmode=self.config.ssl_mode,
                 sslrootcert=self.config.ca_cert_path,
             )
-            self.conn.autocommit = True
-            self.cursor = self.conn.cursor(cursor_factory=DictCursor)
+            self._conn.autocommit = True
+            self._cursor = self._conn.cursor(cursor_factory=DictCursor)
 
             # Create table if it doesn't exist
-            self.cursor.execute(
+            self._cursor.execute(
                 f"""
                 CREATE TABLE IF NOT EXISTS {self.config.table_name} (
                     key TEXT PRIMARY KEY,
@@ -51,6 +52,11 @@ class PostgresKVStoreImpl(KVStore):
             log.exception("Could not connect to PostgreSQL database server")
             raise RuntimeError("Could not connect to PostgreSQL database server") from e
 
+    def _cursor_or_raise(self) -> DictCursor:
+        if self._cursor is None:
+            raise RuntimeError("Postgres client not initialized")
+        return self._cursor
+
     def _namespaced_key(self, key: str) -> str:
         if not self.config.namespace:
             return key
@@ -58,7 +64,8 @@ class PostgresKVStoreImpl(KVStore):
 
     async def set(self, key: str, value: str, expiration: datetime | None = None) -> None:
         key = self._namespaced_key(key)
-        self.cursor.execute(
+        cursor = self._cursor_or_raise()
+        cursor.execute(
             f"""
             INSERT INTO {self.config.table_name} (key, value, expiration)
             VALUES (%s, %s, %s)
@@ -70,7 +77,8 @@ class PostgresKVStoreImpl(KVStore):
 
     async def get(self, key: str) -> str | None:
         key = self._namespaced_key(key)
-        self.cursor.execute(
+        cursor = self._cursor_or_raise()
+        cursor.execute(
             f"""
             SELECT value FROM {self.config.table_name}
             WHERE key = %s
@@ -78,12 +86,13 @@ class PostgresKVStoreImpl(KVStore):
             """,
             (key,),
         )
-        result = self.cursor.fetchone()
+        result = cursor.fetchone()
         return result[0] if result else None
 
     async def delete(self, key: str) -> None:
         key = self._namespaced_key(key)
-        self.cursor.execute(
+        cursor = self._cursor_or_raise()
+        cursor.execute(
             f"DELETE FROM {self.config.table_name} WHERE key = %s",
             (key,),
         )
@@ -92,7 +101,8 @@ class PostgresKVStoreImpl(KVStore):
         start_key = self._namespaced_key(start_key)
         end_key = self._namespaced_key(end_key)
 
-        self.cursor.execute(
+        cursor = self._cursor_or_raise()
+        cursor.execute(
             f"""
             SELECT value FROM {self.config.table_name}
             WHERE key >= %s AND key < %s
@@ -101,14 +111,15 @@ class PostgresKVStoreImpl(KVStore):
             """,
             (start_key, end_key),
         )
-        return [row[0] for row in self.cursor.fetchall()]
+        return [row[0] for row in cursor.fetchall()]
 
     async def keys_in_range(self, start_key: str, end_key: str) -> list[str]:
         start_key = self._namespaced_key(start_key)
         end_key = self._namespaced_key(end_key)
 
-        self.cursor.execute(
+        cursor = self._cursor_or_raise()
+        cursor.execute(
             f"SELECT key FROM {self.config.table_name} WHERE key >= %s AND key < %s",
             (start_key, end_key),
         )
-        return [row[0] for row in self.cursor.fetchall()]
+        return [row[0] for row in cursor.fetchall()]
diff --git a/src/llama_stack/providers/utils/kvstore/redis/__init__.py b/src/llama_stack/core/storage/kvstore/redis/__init__.py
similarity index 100%
rename from src/llama_stack/providers/utils/kvstore/redis/__init__.py
rename to src/llama_stack/core/storage/kvstore/redis/__init__.py
diff --git a/src/llama_stack/providers/utils/kvstore/redis/redis.py b/src/llama_stack/core/storage/kvstore/redis/redis.py
similarity index 54%
rename from src/llama_stack/providers/utils/kvstore/redis/redis.py
rename to src/llama_stack/core/storage/kvstore/redis/redis.py
index 3d2d956c3..2b35a22e1 100644
--- a/src/llama_stack/providers/utils/kvstore/redis/redis.py
+++ b/src/llama_stack/core/storage/kvstore/redis/redis.py
@@ -6,18 +6,25 @@
 
 from datetime import datetime
 
-from redis.asyncio import Redis
+from redis.asyncio import Redis  # type: ignore[import-not-found]
+
+from llama_stack_api.internal.kvstore import KVStore
 
-from ..api import KVStore
 from ..config import RedisKVStoreConfig
 
 
 class RedisKVStoreImpl(KVStore):
     def __init__(self, config: RedisKVStoreConfig):
         self.config = config
+        self._redis: Redis | None = None
 
     async def initialize(self) -> None:
-        self.redis = Redis.from_url(self.config.url)
+        self._redis = Redis.from_url(self.config.url)
+
+    def _client(self) -> Redis:
+        if self._redis is None:
+            raise RuntimeError("Redis client not initialized")
+        return self._redis
 
     def _namespaced_key(self, key: str) -> str:
         if not self.config.namespace:
@@ -26,30 +33,37 @@ class RedisKVStoreImpl(KVStore):
 
     async def set(self, key: str, value: str, expiration: datetime | None = None) -> None:
         key = self._namespaced_key(key)
-        await self.redis.set(key, value)
+        client = self._client()
+        await client.set(key, value)
         if expiration:
-            await self.redis.expireat(key, expiration)
+            await client.expireat(key, expiration)
 
     async def get(self, key: str) -> str | None:
         key = self._namespaced_key(key)
-        value = await self.redis.get(key)
+        client = self._client()
+        value = await client.get(key)
         if value is None:
             return None
-        await self.redis.ttl(key)
-        return value
+        await client.ttl(key)
+        if isinstance(value, bytes):
+            return value.decode("utf-8")
+        if isinstance(value, str):
+            return value
+        return str(value)
 
     async def delete(self, key: str) -> None:
         key = self._namespaced_key(key)
-        await self.redis.delete(key)
+        await self._client().delete(key)
 
     async def values_in_range(self, start_key: str, end_key: str) -> list[str]:
         start_key = self._namespaced_key(start_key)
         end_key = self._namespaced_key(end_key)
+        client = self._client()
         cursor = 0
         pattern = start_key + "*"  # Match all keys starting with start_key prefix
-        matching_keys = []
+        matching_keys: list[str | bytes] = []
         while True:
-            cursor, keys = await self.redis.scan(cursor, match=pattern, count=1000)
+            cursor, keys = await client.scan(cursor, match=pattern, count=1000)
 
             for key in keys:
                 key_str = key.decode("utf-8") if isinstance(key, bytes) else key
@@ -61,7 +75,7 @@ class RedisKVStoreImpl(KVStore):
 
         # Then fetch all values in a single MGET call
         if matching_keys:
-            values = await self.redis.mget(matching_keys)
+            values = await client.mget(matching_keys)
             return [
                 value.decode("utf-8") if isinstance(value, bytes) else value for value in values if value is not None
             ]
@@ -70,7 +84,18 @@ class RedisKVStoreImpl(KVStore):
 
     async def keys_in_range(self, start_key: str, end_key: str) -> list[str]:
         """Get all keys in the given range."""
-        matching_keys = await self.redis.zrangebylex(self.namespace, f"[{start_key}", f"[{end_key}")
-        if not matching_keys:
-            return []
-        return [k.decode("utf-8") for k in matching_keys]
+        start_key = self._namespaced_key(start_key)
+        end_key = self._namespaced_key(end_key)
+        client = self._client()
+        cursor = 0
+        pattern = start_key + "*"
+        result: list[str] = []
+        while True:
+            cursor, keys = await client.scan(cursor, match=pattern, count=1000)
+            for key in keys:
+                key_str = key.decode("utf-8") if isinstance(key, bytes) else str(key)
+                if start_key <= key_str <= end_key:
+                    result.append(key_str)
+            if cursor == 0:
+                break
+        return result
diff --git a/src/llama_stack/providers/utils/kvstore/sqlite/__init__.py b/src/llama_stack/core/storage/kvstore/sqlite/__init__.py
similarity index 100%
rename from src/llama_stack/providers/utils/kvstore/sqlite/__init__.py
rename to src/llama_stack/core/storage/kvstore/sqlite/__init__.py
diff --git a/src/llama_stack/providers/utils/kvstore/sqlite/sqlite.py b/src/llama_stack/core/storage/kvstore/sqlite/sqlite.py
similarity index 99%
rename from src/llama_stack/providers/utils/kvstore/sqlite/sqlite.py
rename to src/llama_stack/core/storage/kvstore/sqlite/sqlite.py
index a9a7a1304..22cf8ac49 100644
--- a/src/llama_stack/providers/utils/kvstore/sqlite/sqlite.py
+++ b/src/llama_stack/core/storage/kvstore/sqlite/sqlite.py
@@ -10,8 +10,8 @@ from datetime import datetime
 import aiosqlite
 
 from llama_stack.log import get_logger
+from llama_stack_api.internal.kvstore import KVStore
 
-from ..api import KVStore
 from ..config import SqliteKVStoreConfig
 
 logger = get_logger(name=__name__, category="providers::utils")
diff --git a/src/llama_stack/core/storage/sqlstore/__init__.py b/src/llama_stack/core/storage/sqlstore/__init__.py
new file mode 100644
index 000000000..eb843e4ba
--- /dev/null
+++ b/src/llama_stack/core/storage/sqlstore/__init__.py
@@ -0,0 +1,17 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack_api.internal.sqlstore import (
+    ColumnDefinition as ColumnDefinition,
+)
+from llama_stack_api.internal.sqlstore import (
+    ColumnType as ColumnType,
+)
+from llama_stack_api.internal.sqlstore import (
+    SqlStore as SqlStore,
+)
+
+from .sqlstore import *  # noqa: F401,F403
diff --git a/src/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py b/src/llama_stack/core/storage/sqlstore/authorized_sqlstore.py
similarity index 99%
rename from src/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py
rename to src/llama_stack/core/storage/sqlstore/authorized_sqlstore.py
index ba95dd120..e6cdcc543 100644
--- a/src/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py
+++ b/src/llama_stack/core/storage/sqlstore/authorized_sqlstore.py
@@ -14,8 +14,8 @@ from llama_stack.core.datatypes import User
 from llama_stack.core.request_headers import get_authenticated_user
 from llama_stack.core.storage.datatypes import StorageBackendType
 from llama_stack.log import get_logger
-
-from .api import ColumnDefinition, ColumnType, PaginatedResponse, SqlStore
+from llama_stack_api import PaginatedResponse
+from llama_stack_api.internal.sqlstore import ColumnDefinition, ColumnType, SqlStore
 
 logger = get_logger(name=__name__, category="providers::utils")
 
diff --git a/src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py b/src/llama_stack/core/storage/sqlstore/sqlalchemy_sqlstore.py
similarity index 99%
rename from src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py
rename to src/llama_stack/core/storage/sqlstore/sqlalchemy_sqlstore.py
index 10009d396..01c561443 100644
--- a/src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py
+++ b/src/llama_stack/core/storage/sqlstore/sqlalchemy_sqlstore.py
@@ -29,8 +29,7 @@ from sqlalchemy.sql.elements import ColumnElement
 from llama_stack.core.storage.datatypes import SqlAlchemySqlStoreConfig
 from llama_stack.log import get_logger
 from llama_stack_api import PaginatedResponse
-
-from .api import ColumnDefinition, ColumnType, SqlStore
+from llama_stack_api.internal.sqlstore import ColumnDefinition, ColumnType, SqlStore
 
 logger = get_logger(name=__name__, category="providers::utils")
 
diff --git a/src/llama_stack/providers/utils/sqlstore/sqlstore.py b/src/llama_stack/core/storage/sqlstore/sqlstore.py
similarity index 98%
rename from src/llama_stack/providers/utils/sqlstore/sqlstore.py
rename to src/llama_stack/core/storage/sqlstore/sqlstore.py
index 9409b7d00..fb2c9d279 100644
--- a/src/llama_stack/providers/utils/sqlstore/sqlstore.py
+++ b/src/llama_stack/core/storage/sqlstore/sqlstore.py
@@ -16,8 +16,7 @@ from llama_stack.core.storage.datatypes import (
     StorageBackendConfig,
     StorageBackendType,
 )
-
-from .api import SqlStore
+from llama_stack_api.internal.sqlstore import SqlStore
 
 sql_store_pip_packages = ["sqlalchemy[asyncio]", "aiosqlite", "asyncpg"]
 
diff --git a/src/llama_stack/core/store/registry.py b/src/llama_stack/core/store/registry.py
index 6ff9e575b..7144a94f7 100644
--- a/src/llama_stack/core/store/registry.py
+++ b/src/llama_stack/core/store/registry.py
@@ -12,8 +12,8 @@ import pydantic
 
 from llama_stack.core.datatypes import RoutableObjectWithProvider
 from llama_stack.core.storage.datatypes import KVStoreReference
+from llama_stack.core.storage.kvstore import KVStore, kvstore_impl
 from llama_stack.log import get_logger
-from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
 
 logger = get_logger(__name__, category="core::registry")
 
diff --git a/src/llama_stack/distributions/starter/starter.py b/src/llama_stack/distributions/starter/starter.py
index 4c21a8c99..32264eebb 100644
--- a/src/llama_stack/distributions/starter/starter.py
+++ b/src/llama_stack/distributions/starter/starter.py
@@ -17,6 +17,8 @@ from llama_stack.core.datatypes import (
     ToolGroupInput,
     VectorStoresConfig,
 )
+from llama_stack.core.storage.kvstore.config import PostgresKVStoreConfig
+from llama_stack.core.storage.sqlstore.sqlstore import PostgresSqlStoreConfig
 from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings
 from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplConfig
@@ -35,8 +37,6 @@ from llama_stack.providers.remote.vector_io.pgvector.config import (
 )
 from llama_stack.providers.remote.vector_io.qdrant.config import QdrantVectorIOConfig
 from llama_stack.providers.remote.vector_io.weaviate.config import WeaviateVectorIOConfig
-from llama_stack.providers.utils.kvstore.config import PostgresKVStoreConfig
-from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig
 from llama_stack_api import RemoteProviderSpec
 
 
diff --git a/src/llama_stack/distributions/template.py b/src/llama_stack/distributions/template.py
index 5755a26de..90b458805 100644
--- a/src/llama_stack/distributions/template.py
+++ b/src/llama_stack/distributions/template.py
@@ -35,13 +35,13 @@ from llama_stack.core.storage.datatypes import (
     SqlStoreReference,
     StorageBackendType,
 )
+from llama_stack.core.storage.kvstore.config import SqliteKVStoreConfig
+from llama_stack.core.storage.kvstore.config import get_pip_packages as get_kv_pip_packages
+from llama_stack.core.storage.sqlstore.sqlstore import SqliteSqlStoreConfig
+from llama_stack.core.storage.sqlstore.sqlstore import get_pip_packages as get_sql_pip_packages
 from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.core.utils.image_types import LlamaStackImageType
 from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
-from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
-from llama_stack.providers.utils.kvstore.config import get_pip_packages as get_kv_pip_packages
-from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
-from llama_stack.providers.utils.sqlstore.sqlstore import get_pip_packages as get_sql_pip_packages
 from llama_stack_api import DatasetPurpose, ModelType
 
 
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/agents.py b/src/llama_stack/providers/inline/agents/meta_reference/agents.py
index e47e757be..ba83a9576 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/agents.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/agents.py
@@ -6,8 +6,8 @@
 
 
 from llama_stack.core.datatypes import AccessRule
+from llama_stack.core.storage.kvstore import InmemoryKVStoreImpl, kvstore_impl
 from llama_stack.log import get_logger
-from llama_stack.providers.utils.kvstore import InmemoryKVStoreImpl, kvstore_impl
 from llama_stack.providers.utils.responses.responses_store import ResponsesStore
 from llama_stack_api import (
     Agents,
diff --git a/src/llama_stack/providers/inline/batches/reference/__init__.py b/src/llama_stack/providers/inline/batches/reference/__init__.py
index 11c4b06a9..b48c82864 100644
--- a/src/llama_stack/providers/inline/batches/reference/__init__.py
+++ b/src/llama_stack/providers/inline/batches/reference/__init__.py
@@ -7,7 +7,7 @@
 from typing import Any
 
 from llama_stack.core.datatypes import AccessRule, Api
-from llama_stack.providers.utils.kvstore import kvstore_impl
+from llama_stack.core.storage.kvstore import kvstore_impl
 from llama_stack_api import Files, Inference, Models
 
 from .batches import ReferenceBatchesImpl
diff --git a/src/llama_stack/providers/inline/batches/reference/batches.py b/src/llama_stack/providers/inline/batches/reference/batches.py
index 73727799d..aaa2c7b22 100644
--- a/src/llama_stack/providers/inline/batches/reference/batches.py
+++ b/src/llama_stack/providers/inline/batches/reference/batches.py
@@ -16,8 +16,8 @@ from typing import Any, Literal
 from openai.types.batch import BatchError, Errors
 from pydantic import BaseModel
 
+from llama_stack.core.storage.kvstore import KVStore
 from llama_stack.log import get_logger
-from llama_stack.providers.utils.kvstore import KVStore
 from llama_stack_api import (
     Batches,
     BatchObject,
diff --git a/src/llama_stack/providers/inline/datasetio/localfs/datasetio.py b/src/llama_stack/providers/inline/datasetio/localfs/datasetio.py
index 6ab1a540f..85c7cff3e 100644
--- a/src/llama_stack/providers/inline/datasetio/localfs/datasetio.py
+++ b/src/llama_stack/providers/inline/datasetio/localfs/datasetio.py
@@ -5,8 +5,8 @@
 # the root directory of this source tree.
 from typing import Any
 
+from llama_stack.core.storage.kvstore import kvstore_impl
 from llama_stack.providers.utils.datasetio.url_utils import get_dataframe_from_uri
-from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.pagination import paginate_records
 from llama_stack_api import Dataset, DatasetIO, DatasetsProtocolPrivate, PaginatedResponse
 
diff --git a/src/llama_stack/providers/inline/eval/meta_reference/eval.py b/src/llama_stack/providers/inline/eval/meta_reference/eval.py
index d43e569e2..0f0cb84d6 100644
--- a/src/llama_stack/providers/inline/eval/meta_reference/eval.py
+++ b/src/llama_stack/providers/inline/eval/meta_reference/eval.py
@@ -8,8 +8,8 @@ from typing import Any
 
 from tqdm import tqdm
 
+from llama_stack.core.storage.kvstore import kvstore_impl
 from llama_stack.providers.utils.common.data_schema_validator import ColumnName
-from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack_api import (
     Agents,
     Benchmark,
diff --git a/src/llama_stack/providers/inline/files/localfs/files.py b/src/llama_stack/providers/inline/files/localfs/files.py
index 5fb35a378..2afe2fe5e 100644
--- a/src/llama_stack/providers/inline/files/localfs/files.py
+++ b/src/llama_stack/providers/inline/files/localfs/files.py
@@ -13,11 +13,10 @@ from fastapi import Depends, File, Form, Response, UploadFile
 
 from llama_stack.core.datatypes import AccessRule
 from llama_stack.core.id_generation import generate_object_id
+from llama_stack.core.storage.sqlstore.authorized_sqlstore import AuthorizedSqlStore
+from llama_stack.core.storage.sqlstore.sqlstore import sqlstore_impl
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.files.form_data import parse_expires_after
-from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
-from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore
-from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl
 from llama_stack_api import (
     ExpiresAfter,
     Files,
@@ -28,6 +27,7 @@ from llama_stack_api import (
     Order,
     ResourceNotFoundError,
 )
+from llama_stack_api.internal.sqlstore import ColumnDefinition, ColumnType
 
 from .config import LocalfsFilesImplConfig
 
diff --git a/src/llama_stack/providers/inline/vector_io/faiss/faiss.py b/src/llama_stack/providers/inline/vector_io/faiss/faiss.py
index d52a54e6a..91a17058b 100644
--- a/src/llama_stack/providers/inline/vector_io/faiss/faiss.py
+++ b/src/llama_stack/providers/inline/vector_io/faiss/faiss.py
@@ -14,9 +14,8 @@ import faiss  # type: ignore[import-untyped]
 import numpy as np
 from numpy.typing import NDArray
 
+from llama_stack.core.storage.kvstore import kvstore_impl
 from llama_stack.log import get_logger
-from llama_stack.providers.utils.kvstore import kvstore_impl
-from llama_stack.providers.utils.kvstore.api import KVStore
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
 from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
 from llama_stack_api import (
@@ -32,6 +31,7 @@ from llama_stack_api import (
     VectorStoreNotFoundError,
     VectorStoresProtocolPrivate,
 )
+from llama_stack_api.internal.kvstore import KVStore
 
 from .config import FaissVectorIOConfig
 
diff --git a/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
index 74bc349a5..a384a33dc 100644
--- a/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
+++ b/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
@@ -14,9 +14,8 @@ import numpy as np
 import sqlite_vec  # type: ignore[import-untyped]
 from numpy.typing import NDArray
 
+from llama_stack.core.storage.kvstore import kvstore_impl
 from llama_stack.log import get_logger
-from llama_stack.providers.utils.kvstore import kvstore_impl
-from llama_stack.providers.utils.kvstore.api import KVStore
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
 from llama_stack.providers.utils.memory.vector_store import (
     RERANKER_TYPE_RRF,
@@ -35,6 +34,7 @@ from llama_stack_api import (
     VectorStoreNotFoundError,
     VectorStoresProtocolPrivate,
 )
+from llama_stack_api.internal.kvstore import KVStore
 
 logger = get_logger(name=__name__, category="vector_io")
 
diff --git a/src/llama_stack/providers/registry/agents.py b/src/llama_stack/providers/registry/agents.py
index 455be1ae7..2c68750a6 100644
--- a/src/llama_stack/providers/registry/agents.py
+++ b/src/llama_stack/providers/registry/agents.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack.providers.utils.kvstore import kvstore_dependencies
+from llama_stack.core.storage.kvstore import kvstore_dependencies
 from llama_stack_api import (
     Api,
     InlineProviderSpec,
diff --git a/src/llama_stack/providers/registry/files.py b/src/llama_stack/providers/registry/files.py
index 024254b57..8ce8acd91 100644
--- a/src/llama_stack/providers/registry/files.py
+++ b/src/llama_stack/providers/registry/files.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.providers.utils.sqlstore.sqlstore import sql_store_pip_packages
+from llama_stack.core.storage.sqlstore.sqlstore import sql_store_pip_packages
 from llama_stack_api import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec
 
 
diff --git a/src/llama_stack/providers/remote/datasetio/huggingface/huggingface.py b/src/llama_stack/providers/remote/datasetio/huggingface/huggingface.py
index 72069f716..26390a63b 100644
--- a/src/llama_stack/providers/remote/datasetio/huggingface/huggingface.py
+++ b/src/llama_stack/providers/remote/datasetio/huggingface/huggingface.py
@@ -6,7 +6,7 @@
 from typing import Any
 from urllib.parse import parse_qs, urlparse
 
-from llama_stack.providers.utils.kvstore import kvstore_impl
+from llama_stack.core.storage.kvstore import kvstore_impl
 from llama_stack.providers.utils.pagination import paginate_records
 from llama_stack_api import Dataset, DatasetIO, DatasetsProtocolPrivate, PaginatedResponse
 
diff --git a/src/llama_stack/providers/remote/files/openai/files.py b/src/llama_stack/providers/remote/files/openai/files.py
index d2f5a08eb..2cfd44168 100644
--- a/src/llama_stack/providers/remote/files/openai/files.py
+++ b/src/llama_stack/providers/remote/files/openai/files.py
@@ -10,10 +10,9 @@ from typing import Annotated, Any
 from fastapi import Depends, File, Form, Response, UploadFile
 
 from llama_stack.core.datatypes import AccessRule
+from llama_stack.core.storage.sqlstore.authorized_sqlstore import AuthorizedSqlStore
+from llama_stack.core.storage.sqlstore.sqlstore import sqlstore_impl
 from llama_stack.providers.utils.files.form_data import parse_expires_after
-from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
-from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore
-from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl
 from llama_stack_api import (
     ExpiresAfter,
     Files,
@@ -24,6 +23,7 @@ from llama_stack_api import (
     Order,
     ResourceNotFoundError,
 )
+from llama_stack_api.internal.sqlstore import ColumnDefinition, ColumnType
 from openai import OpenAI
 
 from .config import OpenAIFilesImplConfig
diff --git a/src/llama_stack/providers/remote/files/s3/files.py b/src/llama_stack/providers/remote/files/s3/files.py
index 68822eb77..3c1c82fa0 100644
--- a/src/llama_stack/providers/remote/files/s3/files.py
+++ b/src/llama_stack/providers/remote/files/s3/files.py
@@ -19,10 +19,9 @@ if TYPE_CHECKING:
 
 from llama_stack.core.datatypes import AccessRule
 from llama_stack.core.id_generation import generate_object_id
+from llama_stack.core.storage.sqlstore.authorized_sqlstore import AuthorizedSqlStore
+from llama_stack.core.storage.sqlstore.sqlstore import sqlstore_impl
 from llama_stack.providers.utils.files.form_data import parse_expires_after
-from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
-from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore
-from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl
 from llama_stack_api import (
     ExpiresAfter,
     Files,
@@ -33,6 +32,7 @@ from llama_stack_api import (
     Order,
     ResourceNotFoundError,
 )
+from llama_stack_api.internal.sqlstore import ColumnDefinition, ColumnType
 
 from .config import S3FilesImplConfig
 
diff --git a/src/llama_stack/providers/remote/vector_io/chroma/chroma.py b/src/llama_stack/providers/remote/vector_io/chroma/chroma.py
index 645b40661..491db6d4d 100644
--- a/src/llama_stack/providers/remote/vector_io/chroma/chroma.py
+++ b/src/llama_stack/providers/remote/vector_io/chroma/chroma.py
@@ -11,10 +11,9 @@ from urllib.parse import urlparse
 import chromadb
 from numpy.typing import NDArray
 
+from llama_stack.core.storage.kvstore import kvstore_impl
 from llama_stack.log import get_logger
 from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig
-from llama_stack.providers.utils.kvstore import kvstore_impl
-from llama_stack.providers.utils.kvstore.api import KVStore
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
 from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
 from llama_stack_api import (
@@ -27,6 +26,7 @@ from llama_stack_api import (
     VectorStore,
     VectorStoresProtocolPrivate,
 )
+from llama_stack_api.internal.kvstore import KVStore
 
 from .config import ChromaVectorIOConfig as RemoteChromaVectorIOConfig
 
diff --git a/src/llama_stack/providers/remote/vector_io/milvus/milvus.py b/src/llama_stack/providers/remote/vector_io/milvus/milvus.py
index aefa20317..044d678fa 100644
--- a/src/llama_stack/providers/remote/vector_io/milvus/milvus.py
+++ b/src/llama_stack/providers/remote/vector_io/milvus/milvus.py
@@ -11,10 +11,9 @@ from typing import Any
 from numpy.typing import NDArray
 from pymilvus import AnnSearchRequest, DataType, Function, FunctionType, MilvusClient, RRFRanker, WeightedRanker
 
+from llama_stack.core.storage.kvstore import kvstore_impl
 from llama_stack.log import get_logger
 from llama_stack.providers.inline.vector_io.milvus import MilvusVectorIOConfig as InlineMilvusVectorIOConfig
-from llama_stack.providers.utils.kvstore import kvstore_impl
-from llama_stack.providers.utils.kvstore.api import KVStore
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
 from llama_stack.providers.utils.memory.vector_store import (
     RERANKER_TYPE_WEIGHTED,
@@ -34,6 +33,7 @@ from llama_stack_api import (
     VectorStoreNotFoundError,
     VectorStoresProtocolPrivate,
 )
+from llama_stack_api.internal.kvstore import KVStore
 
 from .config import MilvusVectorIOConfig as RemoteMilvusVectorIOConfig
 
diff --git a/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py b/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
index 2901bad97..5c86fb08d 100644
--- a/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
+++ b/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
@@ -13,10 +13,9 @@ from psycopg2 import sql
 from psycopg2.extras import Json, execute_values
 from pydantic import BaseModel, TypeAdapter
 
+from llama_stack.core.storage.kvstore import kvstore_impl
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
-from llama_stack.providers.utils.kvstore import kvstore_impl
-from llama_stack.providers.utils.kvstore.api import KVStore
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
 from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
 from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator, sanitize_collection_name
@@ -31,6 +30,7 @@ from llama_stack_api import (
     VectorStoreNotFoundError,
     VectorStoresProtocolPrivate,
 )
+from llama_stack_api.internal.kvstore import KVStore
 
 from .config import PGVectorVectorIOConfig
 
diff --git a/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py b/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
index 20ab653d0..4dd78d834 100644
--- a/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
+++ b/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
@@ -13,9 +13,9 @@ from numpy.typing import NDArray
 from qdrant_client import AsyncQdrantClient, models
 from qdrant_client.models import PointStruct
 
+from llama_stack.core.storage.kvstore import kvstore_impl
 from llama_stack.log import get_logger
 from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig
-from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
 from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
 from llama_stack_api import (
diff --git a/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py b/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
index ba3e6b7ea..c15d5f468 100644
--- a/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
+++ b/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
@@ -13,9 +13,8 @@ from weaviate.classes.init import Auth
 from weaviate.classes.query import Filter, HybridFusion
 
 from llama_stack.core.request_headers import NeedsRequestProviderData
+from llama_stack.core.storage.kvstore import kvstore_impl
 from llama_stack.log import get_logger
-from llama_stack.providers.utils.kvstore import kvstore_impl
-from llama_stack.providers.utils.kvstore.api import KVStore
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
 from llama_stack.providers.utils.memory.vector_store import (
     RERANKER_TYPE_RRF,
@@ -35,6 +34,7 @@ from llama_stack_api import (
     VectorStoreNotFoundError,
     VectorStoresProtocolPrivate,
 )
+from llama_stack_api.internal.kvstore import KVStore
 
 from .config import WeaviateVectorIOConfig
 
diff --git a/src/llama_stack/providers/utils/inference/inference_store.py b/src/llama_stack/providers/utils/inference/inference_store.py
index 49e3af7a1..a8a0cace4 100644
--- a/src/llama_stack/providers/utils/inference/inference_store.py
+++ b/src/llama_stack/providers/utils/inference/inference_store.py
@@ -10,6 +10,8 @@ from sqlalchemy.exc import IntegrityError
 
 from llama_stack.core.datatypes import AccessRule
 from llama_stack.core.storage.datatypes import InferenceStoreReference, StorageBackendType
+from llama_stack.core.storage.sqlstore.authorized_sqlstore import AuthorizedSqlStore
+from llama_stack.core.storage.sqlstore.sqlstore import _SQLSTORE_BACKENDS, sqlstore_impl
 from llama_stack.log import get_logger
 from llama_stack_api import (
     ListOpenAIChatCompletionResponse,
@@ -18,10 +20,7 @@ from llama_stack_api import (
     OpenAIMessageParam,
     Order,
 )
-
-from ..sqlstore.api import ColumnDefinition, ColumnType
-from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore
-from ..sqlstore.sqlstore import _SQLSTORE_BACKENDS, sqlstore_impl
+from llama_stack_api.internal.sqlstore import ColumnDefinition, ColumnType
 
 logger = get_logger(name=__name__, category="inference")
 
diff --git a/src/llama_stack/providers/utils/kvstore/sqlite/config.py b/src/llama_stack/providers/utils/kvstore/sqlite/config.py
deleted file mode 100644
index 0f8fa0a95..000000000
--- a/src/llama_stack/providers/utils/kvstore/sqlite/config.py
+++ /dev/null
@@ -1,20 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from pydantic import BaseModel, Field
-
-from llama_stack_api import json_schema_type
-
-
-@json_schema_type
-class SqliteControlPlaneConfig(BaseModel):
-    db_path: str = Field(
-        description="File path for the sqlite database",
-    )
-    table_name: str = Field(
-        default="llamastack_control_plane",
-        description="Table into which all the keys will be placed",
-    )
diff --git a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
index 540ff5940..bbfd60e25 100644
--- a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
+++ b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
@@ -17,7 +17,6 @@ from pydantic import TypeAdapter
 
 from llama_stack.core.id_generation import generate_object_id
 from llama_stack.log import get_logger
-from llama_stack.providers.utils.kvstore.api import KVStore
 from llama_stack.providers.utils.memory.vector_store import (
     ChunkForDeletion,
     content_from_data_and_mime_type,
@@ -53,6 +52,7 @@ from llama_stack_api import (
     VectorStoreSearchResponse,
     VectorStoreSearchResponsePage,
 )
+from llama_stack_api.internal.kvstore import KVStore
 
 EMBEDDING_DIMENSION = 768
 
diff --git a/src/llama_stack/providers/utils/responses/responses_store.py b/src/llama_stack/providers/utils/responses/responses_store.py
index f6e7c435d..0401db206 100644
--- a/src/llama_stack/providers/utils/responses/responses_store.py
+++ b/src/llama_stack/providers/utils/responses/responses_store.py
@@ -6,6 +6,8 @@
 
 from llama_stack.core.datatypes import AccessRule
 from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqlStoreReference
+from llama_stack.core.storage.sqlstore.authorized_sqlstore import AuthorizedSqlStore
+from llama_stack.core.storage.sqlstore.sqlstore import sqlstore_impl
 from llama_stack.log import get_logger
 from llama_stack_api import (
     ListOpenAIResponseInputItem,
@@ -17,10 +19,7 @@ from llama_stack_api import (
     OpenAIResponseObjectWithInput,
     Order,
 )
-
-from ..sqlstore.api import ColumnDefinition, ColumnType
-from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore
-from ..sqlstore.sqlstore import sqlstore_impl
+from llama_stack_api.internal.sqlstore import ColumnDefinition, ColumnType
 
 logger = get_logger(name=__name__, category="openai_responses")
 
diff --git a/src/llama_stack/providers/utils/sqlstore/api.py b/src/llama_stack/providers/utils/sqlstore/api.py
deleted file mode 100644
index 708fc7095..000000000
--- a/src/llama_stack/providers/utils/sqlstore/api.py
+++ /dev/null
@@ -1,140 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from collections.abc import Mapping, Sequence
-from enum import Enum
-from typing import Any, Literal, Protocol
-
-from pydantic import BaseModel
-
-from llama_stack_api import PaginatedResponse
-
-
-class ColumnType(Enum):
-    INTEGER = "INTEGER"
-    STRING = "STRING"
-    TEXT = "TEXT"
-    FLOAT = "FLOAT"
-    BOOLEAN = "BOOLEAN"
-    JSON = "JSON"
-    DATETIME = "DATETIME"
-
-
-class ColumnDefinition(BaseModel):
-    type: ColumnType
-    primary_key: bool = False
-    nullable: bool = True
-    default: Any = None
-
-
-class SqlStore(Protocol):
-    """
-    A protocol for a SQL store.
-    """
-
-    async def create_table(self, table: str, schema: Mapping[str, ColumnType | ColumnDefinition]) -> None:
-        """
-        Create a table.
-        """
-        pass
-
-    async def insert(self, table: str, data: Mapping[str, Any] | Sequence[Mapping[str, Any]]) -> None:
-        """
-        Insert a row or batch of rows into a table.
-        """
-        pass
-
-    async def upsert(
-        self,
-        table: str,
-        data: Mapping[str, Any],
-        conflict_columns: list[str],
-        update_columns: list[str] | None = None,
-    ) -> None:
-        """
-        Insert a row and update specified columns when conflicts occur.
-        """
-        pass
-
-    async def fetch_all(
-        self,
-        table: str,
-        where: Mapping[str, Any] | None = None,
-        where_sql: str | None = None,
-        limit: int | None = None,
-        order_by: list[tuple[str, Literal["asc", "desc"]]] | None = None,
-        cursor: tuple[str, str] | None = None,
-    ) -> PaginatedResponse:
-        """
-        Fetch all rows from a table with optional cursor-based pagination.
-
-        :param table: The table name
-        :param where: Simple key-value WHERE conditions
-        :param where_sql: Raw SQL WHERE clause for complex queries
-        :param limit: Maximum number of records to return
-        :param order_by: List of (column, order) tuples for sorting
-        :param cursor: Tuple of (key_column, cursor_id) for pagination (None for first page)
-                      Requires order_by with exactly one column when used
-        :return: PaginatedResult with data and has_more flag
-
-        Note: Cursor pagination only supports single-column ordering for simplicity.
-        Multi-column ordering is allowed without cursor but will raise an error with cursor.
-        """
-        pass
-
-    async def fetch_one(
-        self,
-        table: str,
-        where: Mapping[str, Any] | None = None,
-        where_sql: str | None = None,
-        order_by: list[tuple[str, Literal["asc", "desc"]]] | None = None,
-    ) -> dict[str, Any] | None:
-        """
-        Fetch one row from a table.
-        """
-        pass
-
-    async def update(
-        self,
-        table: str,
-        data: Mapping[str, Any],
-        where: Mapping[str, Any],
-    ) -> None:
-        """
-        Update a row in a table.
-        """
-        pass
-
-    async def delete(
-        self,
-        table: str,
-        where: Mapping[str, Any],
-    ) -> None:
-        """
-        Delete a row from a table.
-        """
-        pass
-
-    async def add_column_if_not_exists(
-        self,
-        table: str,
-        column_name: str,
-        column_type: ColumnType,
-        nullable: bool = True,
-    ) -> None:
-        """
-        Add a column to an existing table if the column doesn't already exist.
-
-        This is useful for table migrations when adding new functionality.
-        If the table doesn't exist, this method should do nothing.
-        If the column already exists, this method should do nothing.
-
-        :param table: Table name
-        :param column_name: Name of the column to add
-        :param column_type: Type of the column to add
-        :param nullable: Whether the column should be nullable (default: True)
-        """
-        pass
diff --git a/src/llama_stack/providers/utils/sqlstore/__init__.py b/src/llama_stack_api/internal/__init__.py
similarity index 65%
rename from src/llama_stack/providers/utils/sqlstore/__init__.py
rename to src/llama_stack_api/internal/__init__.py
index 756f351d8..bbf7010c3 100644
--- a/src/llama_stack/providers/utils/sqlstore/__init__.py
+++ b/src/llama_stack_api/internal/__init__.py
@@ -3,3 +3,7 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
+
+# Internal subpackage for shared interfaces that are not part of the public API.
+
+__all__: list[str] = []
diff --git a/src/llama_stack/providers/utils/kvstore/api.py b/src/llama_stack_api/internal/kvstore.py
similarity index 89%
rename from src/llama_stack/providers/utils/kvstore/api.py
rename to src/llama_stack_api/internal/kvstore.py
index d17dc66e1..a6d982261 100644
--- a/src/llama_stack/providers/utils/kvstore/api.py
+++ b/src/llama_stack_api/internal/kvstore.py
@@ -9,6 +9,8 @@ from typing import Protocol
 
 
 class KVStore(Protocol):
+    """Protocol for simple key/value storage backends."""
+
     # TODO: make the value type bytes instead of str
     async def set(self, key: str, value: str, expiration: datetime | None = None) -> None: ...
 
@@ -19,3 +21,6 @@ class KVStore(Protocol):
     async def values_in_range(self, start_key: str, end_key: str) -> list[str]: ...
 
     async def keys_in_range(self, start_key: str, end_key: str) -> list[str]: ...
+
+
+__all__ = ["KVStore"]
diff --git a/src/llama_stack_api/internal/sqlstore.py b/src/llama_stack_api/internal/sqlstore.py
new file mode 100644
index 000000000..ebb2d8ba2
--- /dev/null
+++ b/src/llama_stack_api/internal/sqlstore.py
@@ -0,0 +1,79 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from collections.abc import Mapping, Sequence
+from enum import Enum
+from typing import Any, Literal, Protocol
+
+from pydantic import BaseModel
+
+from llama_stack_api import PaginatedResponse
+
+
+class ColumnType(Enum):
+    INTEGER = "INTEGER"
+    STRING = "STRING"
+    TEXT = "TEXT"
+    FLOAT = "FLOAT"
+    BOOLEAN = "BOOLEAN"
+    JSON = "JSON"
+    DATETIME = "DATETIME"
+
+
+class ColumnDefinition(BaseModel):
+    type: ColumnType
+    primary_key: bool = False
+    nullable: bool = True
+    default: Any = None
+
+
+class SqlStore(Protocol):
+    """Protocol for common SQL-store functionality."""
+
+    async def create_table(self, table: str, schema: Mapping[str, ColumnType | ColumnDefinition]) -> None: ...
+
+    async def insert(self, table: str, data: Mapping[str, Any] | Sequence[Mapping[str, Any]]) -> None: ...
+
+    async def upsert(
+        self,
+        table: str,
+        data: Mapping[str, Any],
+        conflict_columns: list[str],
+        update_columns: list[str] | None = None,
+    ) -> None: ...
+
+    async def fetch_all(
+        self,
+        table: str,
+        where: Mapping[str, Any] | None = None,
+        where_sql: str | None = None,
+        limit: int | None = None,
+        order_by: list[tuple[str, Literal["asc", "desc"]]] | None = None,
+        cursor: tuple[str, str] | None = None,
+    ) -> PaginatedResponse: ...
+
+    async def fetch_one(
+        self,
+        table: str,
+        where: Mapping[str, Any] | None = None,
+        where_sql: str | None = None,
+        order_by: list[tuple[str, Literal["asc", "desc"]]] | None = None,
+    ) -> dict[str, Any] | None: ...
+
+    async def update(self, table: str, data: Mapping[str, Any], where: Mapping[str, Any]) -> None: ...
+
+    async def delete(self, table: str, where: Mapping[str, Any]) -> None: ...
+
+    async def add_column_if_not_exists(
+        self,
+        table: str,
+        column_name: str,
+        column_type: ColumnType,
+        nullable: bool = True,
+    ) -> None: ...
+
+
+__all__ = ["ColumnDefinition", "ColumnType", "SqlStore"]
diff --git a/tests/integration/files/test_files.py b/tests/integration/files/test_files.py
index 1f19c88c5..e8004c95d 100644
--- a/tests/integration/files/test_files.py
+++ b/tests/integration/files/test_files.py
@@ -175,7 +175,7 @@ def test_expires_after_requests(openai_client):
 
 
 @pytest.mark.xfail(message="User isolation broken for current providers, must be fixed.")
-@patch("llama_stack.providers.utils.sqlstore.authorized_sqlstore.get_authenticated_user")
+@patch("llama_stack.core.storage.sqlstore.authorized_sqlstore.get_authenticated_user")
 def test_files_authentication_isolation(mock_get_authenticated_user, llama_stack_client):
     """Test that users can only access their own files."""
     from llama_stack_client import NotFoundError
@@ -275,7 +275,7 @@ def test_files_authentication_isolation(mock_get_authenticated_user, llama_stack
         raise e
 
 
-@patch("llama_stack.providers.utils.sqlstore.authorized_sqlstore.get_authenticated_user")
+@patch("llama_stack.core.storage.sqlstore.authorized_sqlstore.get_authenticated_user")
 def test_files_authentication_shared_attributes(
     mock_get_authenticated_user, llama_stack_client, provider_type_is_openai
 ):
@@ -335,7 +335,7 @@ def test_files_authentication_shared_attributes(
         raise e
 
 
-@patch("llama_stack.providers.utils.sqlstore.authorized_sqlstore.get_authenticated_user")
+@patch("llama_stack.core.storage.sqlstore.authorized_sqlstore.get_authenticated_user")
 def test_files_authentication_anonymous_access(
     mock_get_authenticated_user, llama_stack_client, provider_type_is_openai
 ):
diff --git a/tests/integration/providers/utils/sqlstore/test_authorized_sqlstore.py b/tests/integration/providers/utils/sqlstore/test_authorized_sqlstore.py
index ad9115756..4f4f4a8dd 100644
--- a/tests/integration/providers/utils/sqlstore/test_authorized_sqlstore.py
+++ b/tests/integration/providers/utils/sqlstore/test_authorized_sqlstore.py
@@ -13,14 +13,14 @@ import pytest
 from llama_stack.core.access_control.access_control import default_policy
 from llama_stack.core.datatypes import User
 from llama_stack.core.storage.datatypes import SqlStoreReference
-from llama_stack.providers.utils.sqlstore.api import ColumnType
-from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore
-from llama_stack.providers.utils.sqlstore.sqlstore import (
+from llama_stack.core.storage.sqlstore.authorized_sqlstore import AuthorizedSqlStore
+from llama_stack.core.storage.sqlstore.sqlstore import (
     PostgresSqlStoreConfig,
     SqliteSqlStoreConfig,
     register_sqlstore_backends,
     sqlstore_impl,
 )
+from llama_stack_api.internal.sqlstore import ColumnType
 
 
 def get_postgres_config():
@@ -96,7 +96,7 @@ async def cleanup_records(sql_store, table_name, record_ids):
 
 
 @pytest.mark.parametrize("backend_config", BACKEND_CONFIGS)
-@patch("llama_stack.providers.utils.sqlstore.authorized_sqlstore.get_authenticated_user")
+@patch("llama_stack.core.storage.sqlstore.authorized_sqlstore.get_authenticated_user")
 async def test_authorized_store_attributes(mock_get_authenticated_user, authorized_store, request):
     """Test that JSON column comparisons work correctly for both PostgreSQL and SQLite"""
     backend_name = request.node.callspec.id
@@ -190,7 +190,7 @@ async def test_authorized_store_attributes(mock_get_authenticated_user, authoriz
 
 
 @pytest.mark.parametrize("backend_config", BACKEND_CONFIGS)
-@patch("llama_stack.providers.utils.sqlstore.authorized_sqlstore.get_authenticated_user")
+@patch("llama_stack.core.storage.sqlstore.authorized_sqlstore.get_authenticated_user")
 async def test_user_ownership_policy(mock_get_authenticated_user, authorized_store, request):
     """Test that 'user is owner' policies work correctly with record ownership"""
     from llama_stack.core.access_control.datatypes import AccessRule, Action, Scope
diff --git a/tests/unit/conversations/test_conversations.py b/tests/unit/conversations/test_conversations.py
index 95c54d379..e8286576b 100644
--- a/tests/unit/conversations/test_conversations.py
+++ b/tests/unit/conversations/test_conversations.py
@@ -23,7 +23,7 @@ from llama_stack.core.storage.datatypes import (
     SqlStoreReference,
     StorageConfig,
 )
-from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
+from llama_stack.core.storage.sqlstore.sqlstore import register_sqlstore_backends
 from llama_stack_api import OpenAIResponseInputMessageContentText, OpenAIResponseMessage
 
 
diff --git a/tests/unit/files/test_files.py b/tests/unit/files/test_files.py
index 793f4edd3..197038349 100644
--- a/tests/unit/files/test_files.py
+++ b/tests/unit/files/test_files.py
@@ -9,11 +9,11 @@ import pytest
 
 from llama_stack.core.access_control.access_control import default_policy
 from llama_stack.core.storage.datatypes import SqliteSqlStoreConfig, SqlStoreReference
+from llama_stack.core.storage.sqlstore.sqlstore import register_sqlstore_backends
 from llama_stack.providers.inline.files.localfs import (
     LocalfsFilesImpl,
     LocalfsFilesImplConfig,
 )
-from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
 from llama_stack_api import OpenAIFilePurpose, Order, ResourceNotFoundError
 
 
diff --git a/tests/unit/fixtures.py b/tests/unit/fixtures.py
index 443a1d371..9e049f8da 100644
--- a/tests/unit/fixtures.py
+++ b/tests/unit/fixtures.py
@@ -6,9 +6,9 @@
 
 import pytest
 
+from llama_stack.core.storage.kvstore.config import SqliteKVStoreConfig
+from llama_stack.core.storage.kvstore.sqlite import SqliteKVStoreImpl
 from llama_stack.core.store.registry import CachedDiskDistributionRegistry, DiskDistributionRegistry
-from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
-from llama_stack.providers.utils.kvstore.sqlite import SqliteKVStoreImpl
 
 
 @pytest.fixture(scope="function")
diff --git a/tests/unit/prompts/prompts/conftest.py b/tests/unit/prompts/prompts/conftest.py
index c876f2041..8bfc1f03c 100644
--- a/tests/unit/prompts/prompts/conftest.py
+++ b/tests/unit/prompts/prompts/conftest.py
@@ -18,7 +18,7 @@ from llama_stack.core.storage.datatypes import (
     SqlStoreReference,
     StorageConfig,
 )
-from llama_stack.providers.utils.kvstore import register_kvstore_backends
+from llama_stack.core.storage.kvstore import register_kvstore_backends
 
 
 @pytest.fixture
diff --git a/tests/unit/providers/agents/meta_reference/test_openai_responses.py b/tests/unit/providers/agents/meta_reference/test_openai_responses.py
index 78f0d7cfd..256df6baf 100644
--- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py
+++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py
@@ -17,6 +17,7 @@ from openai.types.chat.chat_completion_chunk import (
 
 from llama_stack.core.access_control.access_control import default_policy
 from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqliteSqlStoreConfig
+from llama_stack.core.storage.sqlstore.sqlstore import register_sqlstore_backends
 from llama_stack.providers.inline.agents.meta_reference.responses.openai_responses import (
     OpenAIResponsesImpl,
 )
@@ -24,7 +25,6 @@ from llama_stack.providers.utils.responses.responses_store import (
     ResponsesStore,
     _OpenAIResponseObjectWithInputAndMessages,
 )
-from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
 from llama_stack_api.agents import Order
 from llama_stack_api.inference import (
     OpenAIAssistantMessageParam,
diff --git a/tests/unit/providers/batches/conftest.py b/tests/unit/providers/batches/conftest.py
index d161bf976..8ecfa99fb 100644
--- a/tests/unit/providers/batches/conftest.py
+++ b/tests/unit/providers/batches/conftest.py
@@ -13,9 +13,9 @@ from unittest.mock import AsyncMock
 import pytest
 
 from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig
+from llama_stack.core.storage.kvstore import kvstore_impl, register_kvstore_backends
 from llama_stack.providers.inline.batches.reference.batches import ReferenceBatchesImpl
 from llama_stack.providers.inline.batches.reference.config import ReferenceBatchesImplConfig
-from llama_stack.providers.utils.kvstore import kvstore_impl, register_kvstore_backends
 
 
 @pytest.fixture
diff --git a/tests/unit/providers/files/conftest.py b/tests/unit/providers/files/conftest.py
index c64ecc3a3..f8959b5b7 100644
--- a/tests/unit/providers/files/conftest.py
+++ b/tests/unit/providers/files/conftest.py
@@ -9,8 +9,8 @@ import pytest
 from moto import mock_aws
 
 from llama_stack.core.storage.datatypes import SqliteSqlStoreConfig, SqlStoreReference
+from llama_stack.core.storage.sqlstore.sqlstore import register_sqlstore_backends
 from llama_stack.providers.remote.files.s3 import S3FilesImplConfig, get_adapter_impl
-from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
 
 
 class MockUploadFile:
diff --git a/tests/unit/providers/files/test_s3_files_auth.py b/tests/unit/providers/files/test_s3_files_auth.py
index e113611bd..49b33fd7b 100644
--- a/tests/unit/providers/files/test_s3_files_auth.py
+++ b/tests/unit/providers/files/test_s3_files_auth.py
@@ -18,11 +18,11 @@ async def test_listing_hides_other_users_file(s3_provider, sample_text_file):
     user_a = User("user-a", {"roles": ["team-a"]})
     user_b = User("user-b", {"roles": ["team-b"]})
 
-    with patch("llama_stack.providers.utils.sqlstore.authorized_sqlstore.get_authenticated_user") as mock_get_user:
+    with patch("llama_stack.core.storage.sqlstore.authorized_sqlstore.get_authenticated_user") as mock_get_user:
         mock_get_user.return_value = user_a
         uploaded = await s3_provider.openai_upload_file(file=sample_text_file, purpose=OpenAIFilePurpose.ASSISTANTS)
 
-    with patch("llama_stack.providers.utils.sqlstore.authorized_sqlstore.get_authenticated_user") as mock_get_user:
+    with patch("llama_stack.core.storage.sqlstore.authorized_sqlstore.get_authenticated_user") as mock_get_user:
         mock_get_user.return_value = user_b
         listed = await s3_provider.openai_list_files()
         assert all(f.id != uploaded.id for f in listed.data)
@@ -41,11 +41,11 @@ async def test_cannot_access_other_user_file(s3_provider, sample_text_file, op):
     user_a = User("user-a", {"roles": ["team-a"]})
     user_b = User("user-b", {"roles": ["team-b"]})
 
-    with patch("llama_stack.providers.utils.sqlstore.authorized_sqlstore.get_authenticated_user") as mock_get_user:
+    with patch("llama_stack.core.storage.sqlstore.authorized_sqlstore.get_authenticated_user") as mock_get_user:
         mock_get_user.return_value = user_a
         uploaded = await s3_provider.openai_upload_file(file=sample_text_file, purpose=OpenAIFilePurpose.ASSISTANTS)
 
-    with patch("llama_stack.providers.utils.sqlstore.authorized_sqlstore.get_authenticated_user") as mock_get_user:
+    with patch("llama_stack.core.storage.sqlstore.authorized_sqlstore.get_authenticated_user") as mock_get_user:
         mock_get_user.return_value = user_b
         with pytest.raises(ResourceNotFoundError):
             await op(s3_provider, uploaded.id)
@@ -56,11 +56,11 @@ async def test_shared_role_allows_listing(s3_provider, sample_text_file):
     user_a = User("user-a", {"roles": ["shared-role"]})
     user_b = User("user-b", {"roles": ["shared-role"]})
 
-    with patch("llama_stack.providers.utils.sqlstore.authorized_sqlstore.get_authenticated_user") as mock_get_user:
+    with patch("llama_stack.core.storage.sqlstore.authorized_sqlstore.get_authenticated_user") as mock_get_user:
         mock_get_user.return_value = user_a
         uploaded = await s3_provider.openai_upload_file(file=sample_text_file, purpose=OpenAIFilePurpose.ASSISTANTS)
 
-    with patch("llama_stack.providers.utils.sqlstore.authorized_sqlstore.get_authenticated_user") as mock_get_user:
+    with patch("llama_stack.core.storage.sqlstore.authorized_sqlstore.get_authenticated_user") as mock_get_user:
         mock_get_user.return_value = user_b
         listed = await s3_provider.openai_list_files()
         assert any(f.id == uploaded.id for f in listed.data)
@@ -79,10 +79,10 @@ async def test_shared_role_allows_access(s3_provider, sample_text_file, op):
     user_x = User("user-x", {"roles": ["shared-role"]})
     user_y = User("user-y", {"roles": ["shared-role"]})
 
-    with patch("llama_stack.providers.utils.sqlstore.authorized_sqlstore.get_authenticated_user") as mock_get_user:
+    with patch("llama_stack.core.storage.sqlstore.authorized_sqlstore.get_authenticated_user") as mock_get_user:
         mock_get_user.return_value = user_x
         uploaded = await s3_provider.openai_upload_file(file=sample_text_file, purpose=OpenAIFilePurpose.ASSISTANTS)
 
-    with patch("llama_stack.providers.utils.sqlstore.authorized_sqlstore.get_authenticated_user") as mock_get_user:
+    with patch("llama_stack.core.storage.sqlstore.authorized_sqlstore.get_authenticated_user") as mock_get_user:
         mock_get_user.return_value = user_y
         await op(s3_provider, uploaded.id)
diff --git a/tests/unit/providers/vector_io/conftest.py b/tests/unit/providers/vector_io/conftest.py
index 6408e25ab..b4ea77c0a 100644
--- a/tests/unit/providers/vector_io/conftest.py
+++ b/tests/unit/providers/vector_io/conftest.py
@@ -11,13 +11,13 @@ import numpy as np
 import pytest
 
 from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig
+from llama_stack.core.storage.kvstore import register_kvstore_backends
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
 from llama_stack.providers.inline.vector_io.faiss.faiss import FaissIndex, FaissVectorIOAdapter
 from llama_stack.providers.inline.vector_io.sqlite_vec import SQLiteVectorIOConfig
 from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import SQLiteVecIndex, SQLiteVecVectorIOAdapter
 from llama_stack.providers.remote.vector_io.pgvector.config import PGVectorVectorIOConfig
 from llama_stack.providers.remote.vector_io.pgvector.pgvector import PGVectorIndex, PGVectorVectorIOAdapter
-from llama_stack.providers.utils.kvstore import register_kvstore_backends
 from llama_stack_api import Chunk, ChunkMetadata, QueryChunksResponse, VectorStore
 
 EMBEDDING_DIMENSION = 768
@@ -279,7 +279,7 @@ async def pgvector_vec_adapter(unique_kvstore_config, mock_inference_api, embedd
         ) as mock_check_version:
             mock_check_version.return_value = "0.5.1"
 
-            with patch("llama_stack.providers.utils.kvstore.kvstore_impl") as mock_kvstore_impl:
+            with patch("llama_stack.core.storage.kvstore.kvstore_impl") as mock_kvstore_impl:
                 mock_kvstore = AsyncMock()
                 mock_kvstore_impl.return_value = mock_kvstore
 
diff --git a/tests/unit/registry/test_registry.py b/tests/unit/registry/test_registry.py
index 1b5032782..2b32de833 100644
--- a/tests/unit/registry/test_registry.py
+++ b/tests/unit/registry/test_registry.py
@@ -9,12 +9,12 @@ import pytest
 
 from llama_stack.core.datatypes import VectorStoreWithOwner
 from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig
+from llama_stack.core.storage.kvstore import kvstore_impl, register_kvstore_backends
 from llama_stack.core.store.registry import (
     KEY_FORMAT,
     CachedDiskDistributionRegistry,
     DiskDistributionRegistry,
 )
-from llama_stack.providers.utils.kvstore import kvstore_impl, register_kvstore_backends
 from llama_stack_api import Model, VectorStore
 
 
diff --git a/tests/unit/server/test_quota.py b/tests/unit/server/test_quota.py
index 0939414dd..cd8c38eed 100644
--- a/tests/unit/server/test_quota.py
+++ b/tests/unit/server/test_quota.py
@@ -15,7 +15,7 @@ from starlette.middleware.base import BaseHTTPMiddleware
 from llama_stack.core.datatypes import QuotaConfig, QuotaPeriod
 from llama_stack.core.server.quota import QuotaMiddleware
 from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig
-from llama_stack.providers.utils.kvstore import register_kvstore_backends
+from llama_stack.core.storage.kvstore import register_kvstore_backends
 
 
 @pytest.fixture
diff --git a/tests/unit/server/test_resolver.py b/tests/unit/server/test_resolver.py
index 8f8a61ea7..a1b03f630 100644
--- a/tests/unit/server/test_resolver.py
+++ b/tests/unit/server/test_resolver.py
@@ -24,8 +24,8 @@ from llama_stack.core.storage.datatypes import (
     SqlStoreReference,
     StorageConfig,
 )
-from llama_stack.providers.utils.kvstore import register_kvstore_backends
-from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
+from llama_stack.core.storage.kvstore import register_kvstore_backends
+from llama_stack.core.storage.sqlstore.sqlstore import register_sqlstore_backends
 from llama_stack_api import Inference, InlineProviderSpec, ProviderSpec
 
 
diff --git a/tests/unit/utils/inference/test_inference_store.py b/tests/unit/utils/inference/test_inference_store.py
index bdcc529ce..22d4ec1e5 100644
--- a/tests/unit/utils/inference/test_inference_store.py
+++ b/tests/unit/utils/inference/test_inference_store.py
@@ -9,8 +9,8 @@ import time
 import pytest
 
 from llama_stack.core.storage.datatypes import InferenceStoreReference, SqliteSqlStoreConfig
+from llama_stack.core.storage.sqlstore.sqlstore import register_sqlstore_backends
 from llama_stack.providers.utils.inference.inference_store import InferenceStore
-from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
 from llama_stack_api import (
     OpenAIAssistantMessageParam,
     OpenAIChatCompletion,
diff --git a/tests/unit/utils/kvstore/test_sqlite_memory.py b/tests/unit/utils/kvstore/test_sqlite_memory.py
index a31377306..1aaf57b44 100644
--- a/tests/unit/utils/kvstore/test_sqlite_memory.py
+++ b/tests/unit/utils/kvstore/test_sqlite_memory.py
@@ -5,8 +5,8 @@
 # the root directory of this source tree.
 
 
-from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
-from llama_stack.providers.utils.kvstore.sqlite.sqlite import SqliteKVStoreImpl
+from llama_stack.core.storage.kvstore.config import SqliteKVStoreConfig
+from llama_stack.core.storage.kvstore.sqlite.sqlite import SqliteKVStoreImpl
 
 
 async def test_memory_kvstore_persistence_behavior():
diff --git a/tests/unit/utils/responses/test_responses_store.py b/tests/unit/utils/responses/test_responses_store.py
index 8c108d9c1..a71fb39f6 100644
--- a/tests/unit/utils/responses/test_responses_store.py
+++ b/tests/unit/utils/responses/test_responses_store.py
@@ -11,8 +11,8 @@ from uuid import uuid4
 import pytest
 
 from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqliteSqlStoreConfig
+from llama_stack.core.storage.sqlstore.sqlstore import register_sqlstore_backends
 from llama_stack.providers.utils.responses.responses_store import ResponsesStore
-from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
 from llama_stack_api import OpenAIMessageParam, OpenAIResponseInput, OpenAIResponseObject, OpenAIUserMessageParam, Order
 
 
diff --git a/tests/unit/utils/sqlstore/test_sqlstore.py b/tests/unit/utils/sqlstore/test_sqlstore.py
index d7ba0dc89..421e3b69d 100644
--- a/tests/unit/utils/sqlstore/test_sqlstore.py
+++ b/tests/unit/utils/sqlstore/test_sqlstore.py
@@ -9,9 +9,9 @@ from tempfile import TemporaryDirectory
 
 import pytest
 
-from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
-from llama_stack.providers.utils.sqlstore.sqlalchemy_sqlstore import SqlAlchemySqlStoreImpl
-from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
+from llama_stack.core.storage.sqlstore.sqlalchemy_sqlstore import SqlAlchemySqlStoreImpl
+from llama_stack.core.storage.sqlstore.sqlstore import SqliteSqlStoreConfig
+from llama_stack_api.internal.sqlstore import ColumnDefinition, ColumnType
 
 
 async def test_sqlite_sqlstore():
diff --git a/tests/unit/utils/test_authorized_sqlstore.py b/tests/unit/utils/test_authorized_sqlstore.py
index d85e784a9..e9a6b511b 100644
--- a/tests/unit/utils/test_authorized_sqlstore.py
+++ b/tests/unit/utils/test_authorized_sqlstore.py
@@ -10,13 +10,13 @@ from unittest.mock import patch
 from llama_stack.core.access_control.access_control import default_policy, is_action_allowed
 from llama_stack.core.access_control.datatypes import Action
 from llama_stack.core.datatypes import User
-from llama_stack.providers.utils.sqlstore.api import ColumnType
-from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore, SqlRecord
-from llama_stack.providers.utils.sqlstore.sqlalchemy_sqlstore import SqlAlchemySqlStoreImpl
-from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
+from llama_stack.core.storage.sqlstore.authorized_sqlstore import AuthorizedSqlStore, SqlRecord
+from llama_stack.core.storage.sqlstore.sqlalchemy_sqlstore import SqlAlchemySqlStoreImpl
+from llama_stack.core.storage.sqlstore.sqlstore import SqliteSqlStoreConfig
+from llama_stack_api.internal.sqlstore import ColumnType
 
 
-@patch("llama_stack.providers.utils.sqlstore.authorized_sqlstore.get_authenticated_user")
+@patch("llama_stack.core.storage.sqlstore.authorized_sqlstore.get_authenticated_user")
 async def test_authorized_fetch_with_where_sql_access_control(mock_get_authenticated_user):
     """Test that fetch_all works correctly with where_sql for access control"""
     with TemporaryDirectory() as tmp_dir:
@@ -78,7 +78,7 @@ async def test_authorized_fetch_with_where_sql_access_control(mock_get_authentic
         assert row["title"] == "User Document"
 
 
-@patch("llama_stack.providers.utils.sqlstore.authorized_sqlstore.get_authenticated_user")
+@patch("llama_stack.core.storage.sqlstore.authorized_sqlstore.get_authenticated_user")
 async def test_sql_policy_consistency(mock_get_authenticated_user):
     """Test that SQL WHERE clause logic exactly matches is_action_allowed policy logic"""
     with TemporaryDirectory() as tmp_dir:
@@ -164,7 +164,7 @@ async def test_sql_policy_consistency(mock_get_authenticated_user):
             )
 
 
-@patch("llama_stack.providers.utils.sqlstore.authorized_sqlstore.get_authenticated_user")
+@patch("llama_stack.core.storage.sqlstore.authorized_sqlstore.get_authenticated_user")
 async def test_authorized_store_user_attribute_capture(mock_get_authenticated_user):
     """Test that user attributes are properly captured during insert"""
     with TemporaryDirectory() as tmp_dir:

From 91f1b352b4ca2c6d9a4624663bffbd2a8d98fb69 Mon Sep 17 00:00:00 2001
From: Charlie Doern <cdoern@redhat.com>
Date: Tue, 18 Nov 2025 18:22:26 -0500
Subject: [PATCH 45/62] chore: add storage sane defaults (#4182)

# What does this PR do?

since `StackRunConfig` requires certain parts of `StorageConfig`, it'd
probably make sense to template in some defaults that will "just work"
for most usecases

specifically introduce`ServerStoresConfig` defaults for inference,
metadata, conversations and prompts. We already actually funnel in
defaults for these sections ad-hoc throughout the codebase

additionally set some `backends` defaults for the `StorageConfig`.

This will alleviate some weirdness for `--providers` for run/list-deps
and also some work I have to better align our list-deps/run datatypes

---------

Signed-off-by: Charlie Doern <cdoern@redhat.com>
---
 src/llama_stack/core/storage/datatypes.py     | 27 ++++++++++++++++---
 .../unit/conversations/test_conversations.py  |  6 +++++
 tests/unit/core/test_stack_validation.py      | 23 +++++++++++++---
 3 files changed, 49 insertions(+), 7 deletions(-)

diff --git a/src/llama_stack/core/storage/datatypes.py b/src/llama_stack/core/storage/datatypes.py
index 4b17b9ea9..527c1b828 100644
--- a/src/llama_stack/core/storage/datatypes.py
+++ b/src/llama_stack/core/storage/datatypes.py
@@ -12,6 +12,8 @@ from typing import Annotated, Literal
 
 from pydantic import BaseModel, Field, field_validator
 
+from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
+
 
 class StorageBackendType(StrEnum):
     KV_REDIS = "kv_redis"
@@ -256,15 +258,24 @@ class ResponsesStoreReference(InferenceStoreReference):
 
 class ServerStoresConfig(BaseModel):
     metadata: KVStoreReference | None = Field(
-        default=None,
+        default=KVStoreReference(
+            backend="kv_default",
+            namespace="registry",
+        ),
         description="Metadata store configuration (uses KV backend)",
     )
     inference: InferenceStoreReference | None = Field(
-        default=None,
+        default=InferenceStoreReference(
+            backend="sql_default",
+            table_name="inference_store",
+        ),
         description="Inference store configuration (uses SQL backend)",
     )
     conversations: SqlStoreReference | None = Field(
-        default=None,
+        default=SqlStoreReference(
+            backend="sql_default",
+            table_name="openai_conversations",
+        ),
         description="Conversations store configuration (uses SQL backend)",
     )
     responses: ResponsesStoreReference | None = Field(
@@ -272,13 +283,21 @@ class ServerStoresConfig(BaseModel):
         description="Responses store configuration (uses SQL backend)",
     )
     prompts: KVStoreReference | None = Field(
-        default=None,
+        default=KVStoreReference(backend="kv_default", namespace="prompts"),
         description="Prompts store configuration (uses KV backend)",
     )
 
 
 class StorageConfig(BaseModel):
     backends: dict[str, StorageBackendConfig] = Field(
+        default={
+            "kv_default": SqliteKVStoreConfig(
+                db_path=f"${{env.SQLITE_STORE_DIR:={DISTRIBS_BASE_DIR}}}/kvstore.db",
+            ),
+            "sql_default": SqliteSqlStoreConfig(
+                db_path=f"${{env.SQLITE_STORE_DIR:={DISTRIBS_BASE_DIR}}}/sql_store.db",
+            ),
+        },
         description="Named backend configurations (e.g., 'default', 'cache')",
     )
     stores: ServerStoresConfig = Field(
diff --git a/tests/unit/conversations/test_conversations.py b/tests/unit/conversations/test_conversations.py
index e8286576b..3f9df5fc0 100644
--- a/tests/unit/conversations/test_conversations.py
+++ b/tests/unit/conversations/test_conversations.py
@@ -38,6 +38,9 @@ async def service():
             },
             stores=ServerStoresConfig(
                 conversations=SqlStoreReference(backend="sql_test", table_name="openai_conversations"),
+                metadata=None,
+                inference=None,
+                prompts=None,
             ),
         )
         register_sqlstore_backends({"sql_test": storage.backends["sql_test"]})
@@ -142,6 +145,9 @@ async def test_policy_configuration():
             },
             stores=ServerStoresConfig(
                 conversations=SqlStoreReference(backend="sql_test", table_name="openai_conversations"),
+                metadata=None,
+                inference=None,
+                prompts=None,
             ),
         )
         register_sqlstore_backends({"sql_test": storage.backends["sql_test"]})
diff --git a/tests/unit/core/test_stack_validation.py b/tests/unit/core/test_stack_validation.py
index 462a25c8b..5f75bc522 100644
--- a/tests/unit/core/test_stack_validation.py
+++ b/tests/unit/core/test_stack_validation.py
@@ -10,8 +10,9 @@ from unittest.mock import AsyncMock
 
 import pytest
 
-from llama_stack.core.datatypes import QualifiedModel, SafetyConfig, StackRunConfig, StorageConfig, VectorStoresConfig
+from llama_stack.core.datatypes import QualifiedModel, SafetyConfig, StackRunConfig, VectorStoresConfig
 from llama_stack.core.stack import validate_safety_config, validate_vector_stores_config
+from llama_stack.core.storage.datatypes import ServerStoresConfig, StorageConfig
 from llama_stack_api import Api, ListModelsResponse, ListShieldsResponse, Model, ModelType, Shield
 
 
@@ -21,7 +22,15 @@ class TestVectorStoresValidation:
         run_config = StackRunConfig(
             image_name="test",
             providers={},
-            storage=StorageConfig(backends={}, stores={}),
+            storage=StorageConfig(
+                backends={},
+                stores=ServerStoresConfig(
+                    metadata=None,
+                    inference=None,
+                    conversations=None,
+                    prompts=None,
+                ),
+            ),
             vector_stores=VectorStoresConfig(
                 default_provider_id="faiss",
                 default_embedding_model=QualifiedModel(
@@ -41,7 +50,15 @@ class TestVectorStoresValidation:
         run_config = StackRunConfig(
             image_name="test",
             providers={},
-            storage=StorageConfig(backends={}, stores={}),
+            storage=StorageConfig(
+                backends={},
+                stores=ServerStoresConfig(
+                    metadata=None,
+                    inference=None,
+                    conversations=None,
+                    prompts=None,
+                ),
+            ),
             vector_stores=VectorStoresConfig(
                 default_provider_id="faiss",
                 default_embedding_model=QualifiedModel(

From d5cd0eea14a3e061bc9a6e48bd606190ebaf907b Mon Sep 17 00:00:00 2001
From: Charlie Doern <cdoern@redhat.com>
Date: Wed, 19 Nov 2025 11:44:28 -0500
Subject: [PATCH 46/62] feat!: standardize base_url for inference (#4177)

# What does this PR do?

Completes #3732 by removing runtime URL transformations and requiring
users to provide full URLs in configuration. All providers now use
'base_url' consistently and respect the exact URL provided without
appending paths like /v1 or /openai/v1 at runtime.

BREAKING CHANGE: Users must update configs to include full URL paths
(e.g., http://localhost:11434/v1 instead of http://localhost:11434).

Closes #3732

## Test Plan

Existing tests should pass even with the URL changes, due to default
URLs being altered.

Add unit test to enforce URL standardization across remote inference
providers (verifies all use 'base_url' field with HttpUrl | None type)

Signed-off-by: Charlie Doern <cdoern@redhat.com>
---
 .../docs/providers/inference/remote_azure.mdx |  4 +-
 .../providers/inference/remote_cerebras.mdx   |  4 +-
 .../providers/inference/remote_databricks.mdx |  4 +-
 .../providers/inference/remote_fireworks.mdx  |  4 +-
 docs/docs/providers/inference/remote_groq.mdx |  4 +-
 .../inference/remote_llama-openai-compat.mdx  |  4 +-
 .../providers/inference/remote_nvidia.mdx     |  6 +-
 .../providers/inference/remote_ollama.mdx     |  4 +-
 .../providers/inference/remote_openai.mdx     |  2 +-
 .../inference/remote_passthrough.mdx          |  4 +-
 .../providers/inference/remote_runpod.mdx     |  4 +-
 .../providers/inference/remote_sambanova.mdx  |  4 +-
 docs/docs/providers/inference/remote_tgi.mdx  |  4 +-
 .../providers/inference/remote_together.mdx   |  4 +-
 docs/docs/providers/inference/remote_vllm.mdx |  4 +-
 .../providers/inference/remote_watsonx.mdx    |  4 +-
 scripts/docker.sh                             |  4 +-
 scripts/install.sh                            |  2 +-
 .../ci-tests/run-with-postgres-store.yaml     | 21 ++++---
 .../distributions/ci-tests/run.yaml           | 21 ++++---
 .../distributions/nvidia/run-with-safety.yaml |  3 +-
 src/llama_stack/distributions/nvidia/run.yaml |  3 +-
 .../distributions/open-benchmark/run.yaml     |  4 +-
 .../distributions/postgres-demo/run.yaml      |  2 +-
 .../starter-gpu/run-with-postgres-store.yaml  | 21 ++++---
 .../distributions/starter-gpu/run.yaml        | 21 ++++---
 .../starter/run-with-postgres-store.yaml      | 21 ++++---
 .../distributions/starter/run.yaml            | 21 ++++---
 .../distributions/watsonx/run.yaml            |  2 +-
 .../providers/remote/inference/azure/azure.py |  4 +-
 .../remote/inference/azure/config.py          |  9 +--
 .../remote/inference/cerebras/cerebras.py     |  4 +-
 .../remote/inference/cerebras/config.py       |  8 +--
 .../remote/inference/databricks/config.py     | 10 ++--
 .../remote/inference/databricks/databricks.py | 10 +++-
 .../remote/inference/fireworks/config.py      |  8 +--
 .../remote/inference/fireworks/fireworks.py   |  2 +-
 .../providers/remote/inference/groq/config.py |  8 +--
 .../providers/remote/inference/groq/groq.py   |  2 +-
 .../inference/llama_openai_compat/config.py   |  8 +--
 .../inference/llama_openai_compat/llama.py    |  2 +-
 .../remote/inference/nvidia/config.py         | 16 ++----
 .../remote/inference/nvidia/nvidia.py         |  4 +-
 .../remote/inference/nvidia/utils.py          |  2 +-
 .../remote/inference/ollama/config.py         | 12 ++--
 .../remote/inference/ollama/ollama.py         | 12 +++-
 .../remote/inference/openai/config.py         |  6 +-
 .../remote/inference/openai/openai.py         |  2 +-
 .../remote/inference/passthrough/config.py    |  8 +--
 .../inference/passthrough/passthrough.py      |  4 +-
 .../remote/inference/runpod/config.py         |  6 +-
 .../remote/inference/runpod/runpod.py         |  2 +-
 .../remote/inference/sambanova/config.py      |  8 +--
 .../remote/inference/sambanova/sambanova.py   |  2 +-
 .../providers/remote/inference/tgi/config.py  | 11 ++--
 .../providers/remote/inference/tgi/tgi.py     | 20 ++++---
 .../remote/inference/together/config.py       |  8 +--
 .../remote/inference/together/together.py     |  3 +-
 .../providers/remote/inference/vllm/config.py |  8 +--
 .../providers/remote/inference/vllm/vllm.py   |  6 +-
 .../remote/inference/watsonx/config.py        |  6 +-
 .../remote/inference/watsonx/watsonx.py       |  4 +-
 tests/integration/suites.py                   |  6 +-
 .../test_inference_client_caching.py          |  4 +-
 .../providers/inference/test_remote_vllm.py   |  6 +-
 .../providers/nvidia/test_rerank_inference.py |  2 +-
 tests/unit/providers/test_configs.py          | 56 ++++++++++++++++++-
 67 files changed, 282 insertions(+), 227 deletions(-)

diff --git a/docs/docs/providers/inference/remote_azure.mdx b/docs/docs/providers/inference/remote_azure.mdx
index fd22b157e..0382b42d7 100644
--- a/docs/docs/providers/inference/remote_azure.mdx
+++ b/docs/docs/providers/inference/remote_azure.mdx
@@ -24,7 +24,7 @@ https://learn.microsoft.com/en-us/azure/ai-foundry/openai/overview
 | `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
 | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
 | `api_key` | `SecretStr \| None` | No |  | Authentication credential for the provider |
-| `api_base` | `HttpUrl` | No |  | Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com) |
+| `base_url` | `HttpUrl \| None` | No |  | Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com/openai/v1) |
 | `api_version` | `str \| None` | No |  | Azure API version for Azure (e.g., 2024-12-01-preview) |
 | `api_type` | `str \| None` | No | azure | Azure API type for Azure (e.g., azure) |
 
@@ -32,7 +32,7 @@ https://learn.microsoft.com/en-us/azure/ai-foundry/openai/overview
 
 ```yaml
 api_key: ${env.AZURE_API_KEY:=}
-api_base: ${env.AZURE_API_BASE:=}
+base_url: ${env.AZURE_API_BASE:=}
 api_version: ${env.AZURE_API_VERSION:=}
 api_type: ${env.AZURE_API_TYPE:=}
 ```
diff --git a/docs/docs/providers/inference/remote_cerebras.mdx b/docs/docs/providers/inference/remote_cerebras.mdx
index 1fb9530bb..9fd390a29 100644
--- a/docs/docs/providers/inference/remote_cerebras.mdx
+++ b/docs/docs/providers/inference/remote_cerebras.mdx
@@ -17,11 +17,11 @@ Cerebras inference provider for running models on Cerebras Cloud platform.
 | `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
 | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
 | `api_key` | `SecretStr \| None` | No |  | Authentication credential for the provider |
-| `base_url` | `str` | No | https://api.cerebras.ai | Base URL for the Cerebras API |
+| `base_url` | `HttpUrl \| None` | No | https://api.cerebras.ai/v1 | Base URL for the Cerebras API |
 
 ## Sample Configuration
 
 ```yaml
-base_url: https://api.cerebras.ai
+base_url: https://api.cerebras.ai/v1
 api_key: ${env.CEREBRAS_API_KEY:=}
 ```
diff --git a/docs/docs/providers/inference/remote_databricks.mdx b/docs/docs/providers/inference/remote_databricks.mdx
index 7a926baf4..d50c52958 100644
--- a/docs/docs/providers/inference/remote_databricks.mdx
+++ b/docs/docs/providers/inference/remote_databricks.mdx
@@ -17,11 +17,11 @@ Databricks inference provider for running models on Databricks' unified analytic
 | `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
 | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
 | `api_token` | `SecretStr \| None` | No |  | The Databricks API token |
-| `url` | `str \| None` | No |  | The URL for the Databricks model serving endpoint |
+| `base_url` | `HttpUrl \| None` | No |  | The URL for the Databricks model serving endpoint (should include /serving-endpoints path) |
 
 ## Sample Configuration
 
 ```yaml
-url: ${env.DATABRICKS_HOST:=}
+base_url: ${env.DATABRICKS_HOST:=}
 api_token: ${env.DATABRICKS_TOKEN:=}
 ```
diff --git a/docs/docs/providers/inference/remote_fireworks.mdx b/docs/docs/providers/inference/remote_fireworks.mdx
index 7db74efc4..a67403a9b 100644
--- a/docs/docs/providers/inference/remote_fireworks.mdx
+++ b/docs/docs/providers/inference/remote_fireworks.mdx
@@ -17,11 +17,11 @@ Fireworks AI inference provider for Llama models and other AI models on the Fire
 | `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
 | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
 | `api_key` | `SecretStr \| None` | No |  | Authentication credential for the provider |
-| `url` | `str` | No | https://api.fireworks.ai/inference/v1 | The URL for the Fireworks server |
+| `base_url` | `HttpUrl \| None` | No | https://api.fireworks.ai/inference/v1 | The URL for the Fireworks server |
 
 ## Sample Configuration
 
 ```yaml
-url: https://api.fireworks.ai/inference/v1
+base_url: https://api.fireworks.ai/inference/v1
 api_key: ${env.FIREWORKS_API_KEY:=}
 ```
diff --git a/docs/docs/providers/inference/remote_groq.mdx b/docs/docs/providers/inference/remote_groq.mdx
index 3ebd6f907..17acd3140 100644
--- a/docs/docs/providers/inference/remote_groq.mdx
+++ b/docs/docs/providers/inference/remote_groq.mdx
@@ -17,11 +17,11 @@ Groq inference provider for ultra-fast inference using Groq's LPU technology.
 | `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
 | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
 | `api_key` | `SecretStr \| None` | No |  | Authentication credential for the provider |
-| `url` | `str` | No | https://api.groq.com | The URL for the Groq AI server |
+| `base_url` | `HttpUrl \| None` | No | https://api.groq.com/openai/v1 | The URL for the Groq AI server |
 
 ## Sample Configuration
 
 ```yaml
-url: https://api.groq.com
+base_url: https://api.groq.com/openai/v1
 api_key: ${env.GROQ_API_KEY:=}
 ```
diff --git a/docs/docs/providers/inference/remote_llama-openai-compat.mdx b/docs/docs/providers/inference/remote_llama-openai-compat.mdx
index f67f40909..69e90b2ac 100644
--- a/docs/docs/providers/inference/remote_llama-openai-compat.mdx
+++ b/docs/docs/providers/inference/remote_llama-openai-compat.mdx
@@ -17,11 +17,11 @@ Llama OpenAI-compatible provider for using Llama models with OpenAI API format.
 | `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
 | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
 | `api_key` | `SecretStr \| None` | No |  | Authentication credential for the provider |
-| `openai_compat_api_base` | `str` | No | https://api.llama.com/compat/v1/ | The URL for the Llama API server |
+| `base_url` | `HttpUrl \| None` | No | https://api.llama.com/compat/v1/ | The URL for the Llama API server |
 
 ## Sample Configuration
 
 ```yaml
-openai_compat_api_base: https://api.llama.com/compat/v1/
+base_url: https://api.llama.com/compat/v1/
 api_key: ${env.LLAMA_API_KEY}
 ```
diff --git a/docs/docs/providers/inference/remote_nvidia.mdx b/docs/docs/providers/inference/remote_nvidia.mdx
index 6646d8b00..a890bc57f 100644
--- a/docs/docs/providers/inference/remote_nvidia.mdx
+++ b/docs/docs/providers/inference/remote_nvidia.mdx
@@ -17,15 +17,13 @@ NVIDIA inference provider for accessing NVIDIA NIM models and AI services.
 | `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
 | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
 | `api_key` | `SecretStr \| None` | No |  | Authentication credential for the provider |
-| `url` | `str` | No | https://integrate.api.nvidia.com | A base url for accessing the NVIDIA NIM |
+| `base_url` | `HttpUrl \| None` | No | https://integrate.api.nvidia.com/v1 | A base url for accessing the NVIDIA NIM |
 | `timeout` | `int` | No | 60 | Timeout for the HTTP requests |
-| `append_api_version` | `bool` | No | True | When set to false, the API version will not be appended to the base_url. By default, it is true. |
 | `rerank_model_to_url` | `dict[str, str]` | No | `{'nv-rerank-qa-mistral-4b:1': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking', 'nvidia/nv-rerankqa-mistral-4b-v3': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/nv-rerankqa-mistral-4b-v3/reranking', 'nvidia/llama-3.2-nv-rerankqa-1b-v2': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v2/reranking'}` | Mapping of rerank model identifiers to their API endpoints.  |
 
 ## Sample Configuration
 
 ```yaml
-url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
+base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
 api_key: ${env.NVIDIA_API_KEY:=}
-append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
 ```
diff --git a/docs/docs/providers/inference/remote_ollama.mdx b/docs/docs/providers/inference/remote_ollama.mdx
index 497bfed52..f9be84add 100644
--- a/docs/docs/providers/inference/remote_ollama.mdx
+++ b/docs/docs/providers/inference/remote_ollama.mdx
@@ -16,10 +16,10 @@ Ollama inference provider for running local models through the Ollama runtime.
 |-------|------|----------|---------|-------------|
 | `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
 | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
-| `url` | `str` | No | http://localhost:11434 |  |
+| `base_url` | `HttpUrl \| None` | No | http://localhost:11434/v1 |  |
 
 ## Sample Configuration
 
 ```yaml
-url: ${env.OLLAMA_URL:=http://localhost:11434}
+base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1}
 ```
diff --git a/docs/docs/providers/inference/remote_openai.mdx b/docs/docs/providers/inference/remote_openai.mdx
index 4931118fd..3ac3a21ad 100644
--- a/docs/docs/providers/inference/remote_openai.mdx
+++ b/docs/docs/providers/inference/remote_openai.mdx
@@ -17,7 +17,7 @@ OpenAI inference provider for accessing GPT models and other OpenAI services.
 | `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
 | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
 | `api_key` | `SecretStr \| None` | No |  | Authentication credential for the provider |
-| `base_url` | `str` | No | https://api.openai.com/v1 | Base URL for OpenAI API |
+| `base_url` | `HttpUrl \| None` | No | https://api.openai.com/v1 | Base URL for OpenAI API |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/inference/remote_passthrough.mdx b/docs/docs/providers/inference/remote_passthrough.mdx
index 009961d49..325ecc352 100644
--- a/docs/docs/providers/inference/remote_passthrough.mdx
+++ b/docs/docs/providers/inference/remote_passthrough.mdx
@@ -17,11 +17,11 @@ Passthrough inference provider for connecting to any external inference service
 | `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
 | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
 | `api_key` | `SecretStr \| None` | No |  | Authentication credential for the provider |
-| `url` | `str` | No |  | The URL for the passthrough endpoint |
+| `base_url` | `HttpUrl \| None` | No |  | The URL for the passthrough endpoint |
 
 ## Sample Configuration
 
 ```yaml
-url: ${env.PASSTHROUGH_URL}
+base_url: ${env.PASSTHROUGH_URL}
 api_key: ${env.PASSTHROUGH_API_KEY}
 ```
diff --git a/docs/docs/providers/inference/remote_runpod.mdx b/docs/docs/providers/inference/remote_runpod.mdx
index 3b67e157d..6cdcdd3b5 100644
--- a/docs/docs/providers/inference/remote_runpod.mdx
+++ b/docs/docs/providers/inference/remote_runpod.mdx
@@ -17,11 +17,11 @@ RunPod inference provider for running models on RunPod's cloud GPU platform.
 | `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
 | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
 | `api_token` | `SecretStr \| None` | No |  | The API token |
-| `url` | `str \| None` | No |  | The URL for the Runpod model serving endpoint |
+| `base_url` | `HttpUrl \| None` | No |  | The URL for the Runpod model serving endpoint |
 
 ## Sample Configuration
 
 ```yaml
-url: ${env.RUNPOD_URL:=}
+base_url: ${env.RUNPOD_URL:=}
 api_token: ${env.RUNPOD_API_TOKEN}
 ```
diff --git a/docs/docs/providers/inference/remote_sambanova.mdx b/docs/docs/providers/inference/remote_sambanova.mdx
index 6f4c5d7f6..bbefdb0f0 100644
--- a/docs/docs/providers/inference/remote_sambanova.mdx
+++ b/docs/docs/providers/inference/remote_sambanova.mdx
@@ -17,11 +17,11 @@ SambaNova inference provider for running models on SambaNova's dataflow architec
 | `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
 | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
 | `api_key` | `SecretStr \| None` | No |  | Authentication credential for the provider |
-| `url` | `str` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova AI server |
+| `base_url` | `HttpUrl \| None` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova AI server |
 
 ## Sample Configuration
 
 ```yaml
-url: https://api.sambanova.ai/v1
+base_url: https://api.sambanova.ai/v1
 api_key: ${env.SAMBANOVA_API_KEY:=}
 ```
diff --git a/docs/docs/providers/inference/remote_tgi.mdx b/docs/docs/providers/inference/remote_tgi.mdx
index cd5ea7661..3790acdd4 100644
--- a/docs/docs/providers/inference/remote_tgi.mdx
+++ b/docs/docs/providers/inference/remote_tgi.mdx
@@ -16,10 +16,10 @@ Text Generation Inference (TGI) provider for HuggingFace model serving.
 |-------|------|----------|---------|-------------|
 | `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
 | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
-| `url` | `str` | No |  | The URL for the TGI serving endpoint |
+| `base_url` | `HttpUrl \| None` | No |  | The URL for the TGI serving endpoint (should include /v1 path) |
 
 ## Sample Configuration
 
 ```yaml
-url: ${env.TGI_URL:=}
+base_url: ${env.TGI_URL:=}
 ```
diff --git a/docs/docs/providers/inference/remote_together.mdx b/docs/docs/providers/inference/remote_together.mdx
index 43192cc9e..dc025b5ac 100644
--- a/docs/docs/providers/inference/remote_together.mdx
+++ b/docs/docs/providers/inference/remote_together.mdx
@@ -17,11 +17,11 @@ Together AI inference provider for open-source models and collaborative AI devel
 | `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
 | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
 | `api_key` | `SecretStr \| None` | No |  | Authentication credential for the provider |
-| `url` | `str` | No | https://api.together.xyz/v1 | The URL for the Together AI server |
+| `base_url` | `HttpUrl \| None` | No | https://api.together.xyz/v1 | The URL for the Together AI server |
 
 ## Sample Configuration
 
 ```yaml
-url: https://api.together.xyz/v1
+base_url: https://api.together.xyz/v1
 api_key: ${env.TOGETHER_API_KEY:=}
 ```
diff --git a/docs/docs/providers/inference/remote_vllm.mdx b/docs/docs/providers/inference/remote_vllm.mdx
index 81620dbca..a52c24adb 100644
--- a/docs/docs/providers/inference/remote_vllm.mdx
+++ b/docs/docs/providers/inference/remote_vllm.mdx
@@ -17,14 +17,14 @@ Remote vLLM inference provider for connecting to vLLM servers.
 | `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
 | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
 | `api_token` | `SecretStr \| None` | No |  | The API token |
-| `url` | `str \| None` | No |  | The URL for the vLLM model serving endpoint |
+| `base_url` | `HttpUrl \| None` | No |  | The URL for the vLLM model serving endpoint |
 | `max_tokens` | `int` | No | 4096 | Maximum number of tokens to generate. |
 | `tls_verify` | `bool \| str` | No | True | Whether to verify TLS certificates. Can be a boolean or a path to a CA certificate file. |
 
 ## Sample Configuration
 
 ```yaml
-url: ${env.VLLM_URL:=}
+base_url: ${env.VLLM_URL:=}
 max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
 api_token: ${env.VLLM_API_TOKEN:=fake}
 tls_verify: ${env.VLLM_TLS_VERIFY:=true}
diff --git a/docs/docs/providers/inference/remote_watsonx.mdx b/docs/docs/providers/inference/remote_watsonx.mdx
index 3a1dba3b4..47d543e3a 100644
--- a/docs/docs/providers/inference/remote_watsonx.mdx
+++ b/docs/docs/providers/inference/remote_watsonx.mdx
@@ -17,14 +17,14 @@ IBM WatsonX inference provider for accessing AI models on IBM's WatsonX platform
 | `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
 | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
 | `api_key` | `SecretStr \| None` | No |  | Authentication credential for the provider |
-| `url` | `str` | No | https://us-south.ml.cloud.ibm.com | A base url for accessing the watsonx.ai |
+| `base_url` | `HttpUrl \| None` | No | https://us-south.ml.cloud.ibm.com | A base url for accessing the watsonx.ai |
 | `project_id` | `str \| None` | No |  | The watsonx.ai project ID |
 | `timeout` | `int` | No | 60 | Timeout for the HTTP requests |
 
 ## Sample Configuration
 
 ```yaml
-url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}
+base_url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}
 api_key: ${env.WATSONX_API_KEY:=}
 project_id: ${env.WATSONX_PROJECT_ID:=}
 ```
diff --git a/scripts/docker.sh b/scripts/docker.sh
index b56df8c03..3b2db5ca7 100755
--- a/scripts/docker.sh
+++ b/scripts/docker.sh
@@ -287,9 +287,9 @@ start_container() {
     # On macOS/Windows, use host.docker.internal to reach host from container
     # On Linux with --network host, use localhost
     if [[ "$(uname)" == "Darwin" ]] || [[ "$(uname)" == *"MINGW"* ]]; then
-        OLLAMA_URL="${OLLAMA_URL:-http://host.docker.internal:11434}"
+        OLLAMA_URL="${OLLAMA_URL:-http://host.docker.internal:11434/v1}"
     else
-        OLLAMA_URL="${OLLAMA_URL:-http://localhost:11434}"
+        OLLAMA_URL="${OLLAMA_URL:-http://localhost:11434/v1}"
     fi
     DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OLLAMA_URL=$OLLAMA_URL"
 
diff --git a/scripts/install.sh b/scripts/install.sh
index 5e4939767..7fe1d3243 100755
--- a/scripts/install.sh
+++ b/scripts/install.sh
@@ -640,7 +640,7 @@ cmd=( run -d "${PLATFORM_OPTS[@]}" --name llama-stack \
       --network llama-net \
       -p "${PORT}:${PORT}" \
       "${server_env_opts[@]}" \
-      -e OLLAMA_URL="http://ollama-server:${OLLAMA_PORT}" \
+      -e OLLAMA_URL="http://ollama-server:${OLLAMA_PORT}/v1" \
       "${SERVER_IMAGE}" --port "${PORT}")
 
 log "🦙 Starting Llama Stack..."
diff --git a/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml b/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml
index 5384b58fe..d942c23a4 100644
--- a/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml
+++ b/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml
@@ -17,32 +17,32 @@ providers:
   - provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
     provider_type: remote::cerebras
     config:
-      base_url: https://api.cerebras.ai
+      base_url: https://api.cerebras.ai/v1
       api_key: ${env.CEREBRAS_API_KEY:=}
   - provider_id: ${env.OLLAMA_URL:+ollama}
     provider_type: remote::ollama
     config:
-      url: ${env.OLLAMA_URL:=http://localhost:11434}
+      base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1}
   - provider_id: ${env.VLLM_URL:+vllm}
     provider_type: remote::vllm
     config:
-      url: ${env.VLLM_URL:=}
+      base_url: ${env.VLLM_URL:=}
       max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
       api_token: ${env.VLLM_API_TOKEN:=fake}
       tls_verify: ${env.VLLM_TLS_VERIFY:=true}
   - provider_id: ${env.TGI_URL:+tgi}
     provider_type: remote::tgi
     config:
-      url: ${env.TGI_URL:=}
+      base_url: ${env.TGI_URL:=}
   - provider_id: fireworks
     provider_type: remote::fireworks
     config:
-      url: https://api.fireworks.ai/inference/v1
+      base_url: https://api.fireworks.ai/inference/v1
       api_key: ${env.FIREWORKS_API_KEY:=}
   - provider_id: together
     provider_type: remote::together
     config:
-      url: https://api.together.xyz/v1
+      base_url: https://api.together.xyz/v1
       api_key: ${env.TOGETHER_API_KEY:=}
   - provider_id: bedrock
     provider_type: remote::bedrock
@@ -52,9 +52,8 @@ providers:
   - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
     provider_type: remote::nvidia
     config:
-      url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
+      base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
       api_key: ${env.NVIDIA_API_KEY:=}
-      append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
   - provider_id: openai
     provider_type: remote::openai
     config:
@@ -76,18 +75,18 @@ providers:
   - provider_id: groq
     provider_type: remote::groq
     config:
-      url: https://api.groq.com
+      base_url: https://api.groq.com/openai/v1
       api_key: ${env.GROQ_API_KEY:=}
   - provider_id: sambanova
     provider_type: remote::sambanova
     config:
-      url: https://api.sambanova.ai/v1
+      base_url: https://api.sambanova.ai/v1
       api_key: ${env.SAMBANOVA_API_KEY:=}
   - provider_id: ${env.AZURE_API_KEY:+azure}
     provider_type: remote::azure
     config:
       api_key: ${env.AZURE_API_KEY:=}
-      api_base: ${env.AZURE_API_BASE:=}
+      base_url: ${env.AZURE_API_BASE:=}
       api_version: ${env.AZURE_API_VERSION:=}
       api_type: ${env.AZURE_API_TYPE:=}
   - provider_id: sentence-transformers
diff --git a/src/llama_stack/distributions/ci-tests/run.yaml b/src/llama_stack/distributions/ci-tests/run.yaml
index 1118d2ad1..8b1cd2bb2 100644
--- a/src/llama_stack/distributions/ci-tests/run.yaml
+++ b/src/llama_stack/distributions/ci-tests/run.yaml
@@ -17,32 +17,32 @@ providers:
   - provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
     provider_type: remote::cerebras
     config:
-      base_url: https://api.cerebras.ai
+      base_url: https://api.cerebras.ai/v1
       api_key: ${env.CEREBRAS_API_KEY:=}
   - provider_id: ${env.OLLAMA_URL:+ollama}
     provider_type: remote::ollama
     config:
-      url: ${env.OLLAMA_URL:=http://localhost:11434}
+      base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1}
   - provider_id: ${env.VLLM_URL:+vllm}
     provider_type: remote::vllm
     config:
-      url: ${env.VLLM_URL:=}
+      base_url: ${env.VLLM_URL:=}
       max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
       api_token: ${env.VLLM_API_TOKEN:=fake}
       tls_verify: ${env.VLLM_TLS_VERIFY:=true}
   - provider_id: ${env.TGI_URL:+tgi}
     provider_type: remote::tgi
     config:
-      url: ${env.TGI_URL:=}
+      base_url: ${env.TGI_URL:=}
   - provider_id: fireworks
     provider_type: remote::fireworks
     config:
-      url: https://api.fireworks.ai/inference/v1
+      base_url: https://api.fireworks.ai/inference/v1
       api_key: ${env.FIREWORKS_API_KEY:=}
   - provider_id: together
     provider_type: remote::together
     config:
-      url: https://api.together.xyz/v1
+      base_url: https://api.together.xyz/v1
       api_key: ${env.TOGETHER_API_KEY:=}
   - provider_id: bedrock
     provider_type: remote::bedrock
@@ -52,9 +52,8 @@ providers:
   - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
     provider_type: remote::nvidia
     config:
-      url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
+      base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
       api_key: ${env.NVIDIA_API_KEY:=}
-      append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
   - provider_id: openai
     provider_type: remote::openai
     config:
@@ -76,18 +75,18 @@ providers:
   - provider_id: groq
     provider_type: remote::groq
     config:
-      url: https://api.groq.com
+      base_url: https://api.groq.com/openai/v1
       api_key: ${env.GROQ_API_KEY:=}
   - provider_id: sambanova
     provider_type: remote::sambanova
     config:
-      url: https://api.sambanova.ai/v1
+      base_url: https://api.sambanova.ai/v1
       api_key: ${env.SAMBANOVA_API_KEY:=}
   - provider_id: ${env.AZURE_API_KEY:+azure}
     provider_type: remote::azure
     config:
       api_key: ${env.AZURE_API_KEY:=}
-      api_base: ${env.AZURE_API_BASE:=}
+      base_url: ${env.AZURE_API_BASE:=}
       api_version: ${env.AZURE_API_VERSION:=}
       api_type: ${env.AZURE_API_TYPE:=}
   - provider_id: sentence-transformers
diff --git a/src/llama_stack/distributions/nvidia/run-with-safety.yaml b/src/llama_stack/distributions/nvidia/run-with-safety.yaml
index 1d57ad17a..d2c7dd090 100644
--- a/src/llama_stack/distributions/nvidia/run-with-safety.yaml
+++ b/src/llama_stack/distributions/nvidia/run-with-safety.yaml
@@ -16,9 +16,8 @@ providers:
   - provider_id: nvidia
     provider_type: remote::nvidia
     config:
-      url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
+      base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
       api_key: ${env.NVIDIA_API_KEY:=}
-      append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
   - provider_id: nvidia
     provider_type: remote::nvidia
     config:
diff --git a/src/llama_stack/distributions/nvidia/run.yaml b/src/llama_stack/distributions/nvidia/run.yaml
index 8c50b8bfb..c267587c7 100644
--- a/src/llama_stack/distributions/nvidia/run.yaml
+++ b/src/llama_stack/distributions/nvidia/run.yaml
@@ -16,9 +16,8 @@ providers:
   - provider_id: nvidia
     provider_type: remote::nvidia
     config:
-      url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
+      base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
       api_key: ${env.NVIDIA_API_KEY:=}
-      append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
   vector_io:
   - provider_id: faiss
     provider_type: inline::faiss
diff --git a/src/llama_stack/distributions/open-benchmark/run.yaml b/src/llama_stack/distributions/open-benchmark/run.yaml
index 912e48dd3..7ebc58841 100644
--- a/src/llama_stack/distributions/open-benchmark/run.yaml
+++ b/src/llama_stack/distributions/open-benchmark/run.yaml
@@ -27,12 +27,12 @@ providers:
   - provider_id: groq
     provider_type: remote::groq
     config:
-      url: https://api.groq.com
+      base_url: https://api.groq.com/openai/v1
       api_key: ${env.GROQ_API_KEY:=}
   - provider_id: together
     provider_type: remote::together
     config:
-      url: https://api.together.xyz/v1
+      base_url: https://api.together.xyz/v1
       api_key: ${env.TOGETHER_API_KEY:=}
   vector_io:
   - provider_id: sqlite-vec
diff --git a/src/llama_stack/distributions/postgres-demo/run.yaml b/src/llama_stack/distributions/postgres-demo/run.yaml
index dd1c2bc7f..049f519cd 100644
--- a/src/llama_stack/distributions/postgres-demo/run.yaml
+++ b/src/llama_stack/distributions/postgres-demo/run.yaml
@@ -11,7 +11,7 @@ providers:
   - provider_id: vllm-inference
     provider_type: remote::vllm
     config:
-      url: ${env.VLLM_URL:=http://localhost:8000/v1}
+      base_url: ${env.VLLM_URL:=}
       max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
       api_token: ${env.VLLM_API_TOKEN:=fake}
       tls_verify: ${env.VLLM_TLS_VERIFY:=true}
diff --git a/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml b/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml
index e29ada6f4..75cc9d188 100644
--- a/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml
+++ b/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml
@@ -17,32 +17,32 @@ providers:
   - provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
     provider_type: remote::cerebras
     config:
-      base_url: https://api.cerebras.ai
+      base_url: https://api.cerebras.ai/v1
       api_key: ${env.CEREBRAS_API_KEY:=}
   - provider_id: ${env.OLLAMA_URL:+ollama}
     provider_type: remote::ollama
     config:
-      url: ${env.OLLAMA_URL:=http://localhost:11434}
+      base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1}
   - provider_id: ${env.VLLM_URL:+vllm}
     provider_type: remote::vllm
     config:
-      url: ${env.VLLM_URL:=}
+      base_url: ${env.VLLM_URL:=}
       max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
       api_token: ${env.VLLM_API_TOKEN:=fake}
       tls_verify: ${env.VLLM_TLS_VERIFY:=true}
   - provider_id: ${env.TGI_URL:+tgi}
     provider_type: remote::tgi
     config:
-      url: ${env.TGI_URL:=}
+      base_url: ${env.TGI_URL:=}
   - provider_id: fireworks
     provider_type: remote::fireworks
     config:
-      url: https://api.fireworks.ai/inference/v1
+      base_url: https://api.fireworks.ai/inference/v1
       api_key: ${env.FIREWORKS_API_KEY:=}
   - provider_id: together
     provider_type: remote::together
     config:
-      url: https://api.together.xyz/v1
+      base_url: https://api.together.xyz/v1
       api_key: ${env.TOGETHER_API_KEY:=}
   - provider_id: bedrock
     provider_type: remote::bedrock
@@ -52,9 +52,8 @@ providers:
   - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
     provider_type: remote::nvidia
     config:
-      url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
+      base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
       api_key: ${env.NVIDIA_API_KEY:=}
-      append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
   - provider_id: openai
     provider_type: remote::openai
     config:
@@ -76,18 +75,18 @@ providers:
   - provider_id: groq
     provider_type: remote::groq
     config:
-      url: https://api.groq.com
+      base_url: https://api.groq.com/openai/v1
       api_key: ${env.GROQ_API_KEY:=}
   - provider_id: sambanova
     provider_type: remote::sambanova
     config:
-      url: https://api.sambanova.ai/v1
+      base_url: https://api.sambanova.ai/v1
       api_key: ${env.SAMBANOVA_API_KEY:=}
   - provider_id: ${env.AZURE_API_KEY:+azure}
     provider_type: remote::azure
     config:
       api_key: ${env.AZURE_API_KEY:=}
-      api_base: ${env.AZURE_API_BASE:=}
+      base_url: ${env.AZURE_API_BASE:=}
       api_version: ${env.AZURE_API_VERSION:=}
       api_type: ${env.AZURE_API_TYPE:=}
   - provider_id: sentence-transformers
diff --git a/src/llama_stack/distributions/starter-gpu/run.yaml b/src/llama_stack/distributions/starter-gpu/run.yaml
index 7149b8659..09c7be5a1 100644
--- a/src/llama_stack/distributions/starter-gpu/run.yaml
+++ b/src/llama_stack/distributions/starter-gpu/run.yaml
@@ -17,32 +17,32 @@ providers:
   - provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
     provider_type: remote::cerebras
     config:
-      base_url: https://api.cerebras.ai
+      base_url: https://api.cerebras.ai/v1
       api_key: ${env.CEREBRAS_API_KEY:=}
   - provider_id: ${env.OLLAMA_URL:+ollama}
     provider_type: remote::ollama
     config:
-      url: ${env.OLLAMA_URL:=http://localhost:11434}
+      base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1}
   - provider_id: ${env.VLLM_URL:+vllm}
     provider_type: remote::vllm
     config:
-      url: ${env.VLLM_URL:=}
+      base_url: ${env.VLLM_URL:=}
       max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
       api_token: ${env.VLLM_API_TOKEN:=fake}
       tls_verify: ${env.VLLM_TLS_VERIFY:=true}
   - provider_id: ${env.TGI_URL:+tgi}
     provider_type: remote::tgi
     config:
-      url: ${env.TGI_URL:=}
+      base_url: ${env.TGI_URL:=}
   - provider_id: fireworks
     provider_type: remote::fireworks
     config:
-      url: https://api.fireworks.ai/inference/v1
+      base_url: https://api.fireworks.ai/inference/v1
       api_key: ${env.FIREWORKS_API_KEY:=}
   - provider_id: together
     provider_type: remote::together
     config:
-      url: https://api.together.xyz/v1
+      base_url: https://api.together.xyz/v1
       api_key: ${env.TOGETHER_API_KEY:=}
   - provider_id: bedrock
     provider_type: remote::bedrock
@@ -52,9 +52,8 @@ providers:
   - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
     provider_type: remote::nvidia
     config:
-      url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
+      base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
       api_key: ${env.NVIDIA_API_KEY:=}
-      append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
   - provider_id: openai
     provider_type: remote::openai
     config:
@@ -76,18 +75,18 @@ providers:
   - provider_id: groq
     provider_type: remote::groq
     config:
-      url: https://api.groq.com
+      base_url: https://api.groq.com/openai/v1
       api_key: ${env.GROQ_API_KEY:=}
   - provider_id: sambanova
     provider_type: remote::sambanova
     config:
-      url: https://api.sambanova.ai/v1
+      base_url: https://api.sambanova.ai/v1
       api_key: ${env.SAMBANOVA_API_KEY:=}
   - provider_id: ${env.AZURE_API_KEY:+azure}
     provider_type: remote::azure
     config:
       api_key: ${env.AZURE_API_KEY:=}
-      api_base: ${env.AZURE_API_BASE:=}
+      base_url: ${env.AZURE_API_BASE:=}
       api_version: ${env.AZURE_API_VERSION:=}
       api_type: ${env.AZURE_API_TYPE:=}
   - provider_id: sentence-transformers
diff --git a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml
index 437674bf9..f59c809d2 100644
--- a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml
+++ b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml
@@ -17,32 +17,32 @@ providers:
   - provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
     provider_type: remote::cerebras
     config:
-      base_url: https://api.cerebras.ai
+      base_url: https://api.cerebras.ai/v1
       api_key: ${env.CEREBRAS_API_KEY:=}
   - provider_id: ${env.OLLAMA_URL:+ollama}
     provider_type: remote::ollama
     config:
-      url: ${env.OLLAMA_URL:=http://localhost:11434}
+      base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1}
   - provider_id: ${env.VLLM_URL:+vllm}
     provider_type: remote::vllm
     config:
-      url: ${env.VLLM_URL:=}
+      base_url: ${env.VLLM_URL:=}
       max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
       api_token: ${env.VLLM_API_TOKEN:=fake}
       tls_verify: ${env.VLLM_TLS_VERIFY:=true}
   - provider_id: ${env.TGI_URL:+tgi}
     provider_type: remote::tgi
     config:
-      url: ${env.TGI_URL:=}
+      base_url: ${env.TGI_URL:=}
   - provider_id: fireworks
     provider_type: remote::fireworks
     config:
-      url: https://api.fireworks.ai/inference/v1
+      base_url: https://api.fireworks.ai/inference/v1
       api_key: ${env.FIREWORKS_API_KEY:=}
   - provider_id: together
     provider_type: remote::together
     config:
-      url: https://api.together.xyz/v1
+      base_url: https://api.together.xyz/v1
       api_key: ${env.TOGETHER_API_KEY:=}
   - provider_id: bedrock
     provider_type: remote::bedrock
@@ -52,9 +52,8 @@ providers:
   - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
     provider_type: remote::nvidia
     config:
-      url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
+      base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
       api_key: ${env.NVIDIA_API_KEY:=}
-      append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
   - provider_id: openai
     provider_type: remote::openai
     config:
@@ -76,18 +75,18 @@ providers:
   - provider_id: groq
     provider_type: remote::groq
     config:
-      url: https://api.groq.com
+      base_url: https://api.groq.com/openai/v1
       api_key: ${env.GROQ_API_KEY:=}
   - provider_id: sambanova
     provider_type: remote::sambanova
     config:
-      url: https://api.sambanova.ai/v1
+      base_url: https://api.sambanova.ai/v1
       api_key: ${env.SAMBANOVA_API_KEY:=}
   - provider_id: ${env.AZURE_API_KEY:+azure}
     provider_type: remote::azure
     config:
       api_key: ${env.AZURE_API_KEY:=}
-      api_base: ${env.AZURE_API_BASE:=}
+      base_url: ${env.AZURE_API_BASE:=}
       api_version: ${env.AZURE_API_VERSION:=}
       api_type: ${env.AZURE_API_TYPE:=}
   - provider_id: sentence-transformers
diff --git a/src/llama_stack/distributions/starter/run.yaml b/src/llama_stack/distributions/starter/run.yaml
index 0ce392810..435bb22a7 100644
--- a/src/llama_stack/distributions/starter/run.yaml
+++ b/src/llama_stack/distributions/starter/run.yaml
@@ -17,32 +17,32 @@ providers:
   - provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
     provider_type: remote::cerebras
     config:
-      base_url: https://api.cerebras.ai
+      base_url: https://api.cerebras.ai/v1
       api_key: ${env.CEREBRAS_API_KEY:=}
   - provider_id: ${env.OLLAMA_URL:+ollama}
     provider_type: remote::ollama
     config:
-      url: ${env.OLLAMA_URL:=http://localhost:11434}
+      base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1}
   - provider_id: ${env.VLLM_URL:+vllm}
     provider_type: remote::vllm
     config:
-      url: ${env.VLLM_URL:=}
+      base_url: ${env.VLLM_URL:=}
       max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
       api_token: ${env.VLLM_API_TOKEN:=fake}
       tls_verify: ${env.VLLM_TLS_VERIFY:=true}
   - provider_id: ${env.TGI_URL:+tgi}
     provider_type: remote::tgi
     config:
-      url: ${env.TGI_URL:=}
+      base_url: ${env.TGI_URL:=}
   - provider_id: fireworks
     provider_type: remote::fireworks
     config:
-      url: https://api.fireworks.ai/inference/v1
+      base_url: https://api.fireworks.ai/inference/v1
       api_key: ${env.FIREWORKS_API_KEY:=}
   - provider_id: together
     provider_type: remote::together
     config:
-      url: https://api.together.xyz/v1
+      base_url: https://api.together.xyz/v1
       api_key: ${env.TOGETHER_API_KEY:=}
   - provider_id: bedrock
     provider_type: remote::bedrock
@@ -52,9 +52,8 @@ providers:
   - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
     provider_type: remote::nvidia
     config:
-      url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
+      base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
       api_key: ${env.NVIDIA_API_KEY:=}
-      append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
   - provider_id: openai
     provider_type: remote::openai
     config:
@@ -76,18 +75,18 @@ providers:
   - provider_id: groq
     provider_type: remote::groq
     config:
-      url: https://api.groq.com
+      base_url: https://api.groq.com/openai/v1
       api_key: ${env.GROQ_API_KEY:=}
   - provider_id: sambanova
     provider_type: remote::sambanova
     config:
-      url: https://api.sambanova.ai/v1
+      base_url: https://api.sambanova.ai/v1
       api_key: ${env.SAMBANOVA_API_KEY:=}
   - provider_id: ${env.AZURE_API_KEY:+azure}
     provider_type: remote::azure
     config:
       api_key: ${env.AZURE_API_KEY:=}
-      api_base: ${env.AZURE_API_BASE:=}
+      base_url: ${env.AZURE_API_BASE:=}
       api_version: ${env.AZURE_API_VERSION:=}
       api_type: ${env.AZURE_API_TYPE:=}
   - provider_id: sentence-transformers
diff --git a/src/llama_stack/distributions/watsonx/run.yaml b/src/llama_stack/distributions/watsonx/run.yaml
index 8456115d2..f8c489fe3 100644
--- a/src/llama_stack/distributions/watsonx/run.yaml
+++ b/src/llama_stack/distributions/watsonx/run.yaml
@@ -15,7 +15,7 @@ providers:
   - provider_id: watsonx
     provider_type: remote::watsonx
     config:
-      url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}
+      base_url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}
       api_key: ${env.WATSONX_API_KEY:=}
       project_id: ${env.WATSONX_PROJECT_ID:=}
   vector_io:
diff --git a/src/llama_stack/providers/remote/inference/azure/azure.py b/src/llama_stack/providers/remote/inference/azure/azure.py
index 134d01b15..c977d75d5 100644
--- a/src/llama_stack/providers/remote/inference/azure/azure.py
+++ b/src/llama_stack/providers/remote/inference/azure/azure.py
@@ -4,8 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from urllib.parse import urljoin
-
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 
 from .config import AzureConfig
@@ -22,4 +20,4 @@ class AzureInferenceAdapter(OpenAIMixin):
 
         Returns the Azure API base URL from the configuration.
         """
-        return urljoin(str(self.config.api_base), "/openai/v1")
+        return str(self.config.base_url)
diff --git a/src/llama_stack/providers/remote/inference/azure/config.py b/src/llama_stack/providers/remote/inference/azure/config.py
index b801b91b2..f6407a183 100644
--- a/src/llama_stack/providers/remote/inference/azure/config.py
+++ b/src/llama_stack/providers/remote/inference/azure/config.py
@@ -32,8 +32,9 @@ class AzureProviderDataValidator(BaseModel):
 
 @json_schema_type
 class AzureConfig(RemoteInferenceProviderConfig):
-    api_base: HttpUrl = Field(
-        description="Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com)",
+    base_url: HttpUrl | None = Field(
+        default=None,
+        description="Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com/openai/v1)",
     )
     api_version: str | None = Field(
         default_factory=lambda: os.getenv("AZURE_API_VERSION"),
@@ -48,14 +49,14 @@ class AzureConfig(RemoteInferenceProviderConfig):
     def sample_run_config(
         cls,
         api_key: str = "${env.AZURE_API_KEY:=}",
-        api_base: str = "${env.AZURE_API_BASE:=}",
+        base_url: str = "${env.AZURE_API_BASE:=}",
         api_version: str = "${env.AZURE_API_VERSION:=}",
         api_type: str = "${env.AZURE_API_TYPE:=}",
         **kwargs,
     ) -> dict[str, Any]:
         return {
             "api_key": api_key,
-            "api_base": api_base,
+            "base_url": base_url,
             "api_version": api_version,
             "api_type": api_type,
         }
diff --git a/src/llama_stack/providers/remote/inference/cerebras/cerebras.py b/src/llama_stack/providers/remote/inference/cerebras/cerebras.py
index 680431e22..23c27df1e 100644
--- a/src/llama_stack/providers/remote/inference/cerebras/cerebras.py
+++ b/src/llama_stack/providers/remote/inference/cerebras/cerebras.py
@@ -4,8 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from urllib.parse import urljoin
-
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 from llama_stack_api import (
     OpenAIEmbeddingsRequestWithExtraBody,
@@ -21,7 +19,7 @@ class CerebrasInferenceAdapter(OpenAIMixin):
     provider_data_api_key_field: str = "cerebras_api_key"
 
     def get_base_url(self) -> str:
-        return urljoin(self.config.base_url, "v1")
+        return str(self.config.base_url)
 
     async def openai_embeddings(
         self,
diff --git a/src/llama_stack/providers/remote/inference/cerebras/config.py b/src/llama_stack/providers/remote/inference/cerebras/config.py
index db357fd1c..ea88abbea 100644
--- a/src/llama_stack/providers/remote/inference/cerebras/config.py
+++ b/src/llama_stack/providers/remote/inference/cerebras/config.py
@@ -7,12 +7,12 @@
 import os
 from typing import Any
 
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, HttpUrl
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
 from llama_stack_api import json_schema_type
 
-DEFAULT_BASE_URL = "https://api.cerebras.ai"
+DEFAULT_BASE_URL = "https://api.cerebras.ai/v1"
 
 
 class CerebrasProviderDataValidator(BaseModel):
@@ -24,8 +24,8 @@ class CerebrasProviderDataValidator(BaseModel):
 
 @json_schema_type
 class CerebrasImplConfig(RemoteInferenceProviderConfig):
-    base_url: str = Field(
-        default=os.environ.get("CEREBRAS_BASE_URL", DEFAULT_BASE_URL),
+    base_url: HttpUrl | None = Field(
+        default=HttpUrl(os.environ.get("CEREBRAS_BASE_URL", DEFAULT_BASE_URL)),
         description="Base URL for the Cerebras API",
     )
 
diff --git a/src/llama_stack/providers/remote/inference/databricks/config.py b/src/llama_stack/providers/remote/inference/databricks/config.py
index bd409fa13..44cb862f9 100644
--- a/src/llama_stack/providers/remote/inference/databricks/config.py
+++ b/src/llama_stack/providers/remote/inference/databricks/config.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from pydantic import BaseModel, Field, SecretStr
+from pydantic import BaseModel, Field, HttpUrl, SecretStr
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
 from llama_stack_api import json_schema_type
@@ -21,9 +21,9 @@ class DatabricksProviderDataValidator(BaseModel):
 
 @json_schema_type
 class DatabricksImplConfig(RemoteInferenceProviderConfig):
-    url: str | None = Field(
+    base_url: HttpUrl | None = Field(
         default=None,
-        description="The URL for the Databricks model serving endpoint",
+        description="The URL for the Databricks model serving endpoint (should include /serving-endpoints path)",
     )
     auth_credential: SecretStr | None = Field(
         default=None,
@@ -34,11 +34,11 @@ class DatabricksImplConfig(RemoteInferenceProviderConfig):
     @classmethod
     def sample_run_config(
         cls,
-        url: str = "${env.DATABRICKS_HOST:=}",
+        base_url: str = "${env.DATABRICKS_HOST:=}",
         api_token: str = "${env.DATABRICKS_TOKEN:=}",
         **kwargs: Any,
     ) -> dict[str, Any]:
         return {
-            "url": url,
+            "base_url": base_url,
             "api_token": api_token,
         }
diff --git a/src/llama_stack/providers/remote/inference/databricks/databricks.py b/src/llama_stack/providers/remote/inference/databricks/databricks.py
index c07d97b67..f2f8832f6 100644
--- a/src/llama_stack/providers/remote/inference/databricks/databricks.py
+++ b/src/llama_stack/providers/remote/inference/databricks/databricks.py
@@ -29,15 +29,21 @@ class DatabricksInferenceAdapter(OpenAIMixin):
     }
 
     def get_base_url(self) -> str:
-        return f"{self.config.url}/serving-endpoints"
+        return str(self.config.base_url)
 
     async def list_provider_model_ids(self) -> Iterable[str]:
         # Filter out None values from endpoint names
         api_token = self._get_api_key_from_config_or_provider_data()
+        # WorkspaceClient expects base host without /serving-endpoints suffix
+        base_url_str = str(self.config.base_url)
+        if base_url_str.endswith("/serving-endpoints"):
+            host = base_url_str[:-18]  # Remove '/serving-endpoints'
+        else:
+            host = base_url_str
         return [
             endpoint.name  # type: ignore[misc]
             for endpoint in WorkspaceClient(
-                host=self.config.url, token=api_token
+                host=host, token=api_token
             ).serving_endpoints.list()  # TODO: this is not async
         ]
 
diff --git a/src/llama_stack/providers/remote/inference/fireworks/config.py b/src/llama_stack/providers/remote/inference/fireworks/config.py
index e36c76054..c59b5f270 100644
--- a/src/llama_stack/providers/remote/inference/fireworks/config.py
+++ b/src/llama_stack/providers/remote/inference/fireworks/config.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from pydantic import Field
+from pydantic import Field, HttpUrl
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
 from llama_stack_api import json_schema_type
@@ -14,14 +14,14 @@ from llama_stack_api import json_schema_type
 
 @json_schema_type
 class FireworksImplConfig(RemoteInferenceProviderConfig):
-    url: str = Field(
-        default="https://api.fireworks.ai/inference/v1",
+    base_url: HttpUrl | None = Field(
+        default=HttpUrl("https://api.fireworks.ai/inference/v1"),
         description="The URL for the Fireworks server",
     )
 
     @classmethod
     def sample_run_config(cls, api_key: str = "${env.FIREWORKS_API_KEY:=}", **kwargs) -> dict[str, Any]:
         return {
-            "url": "https://api.fireworks.ai/inference/v1",
+            "base_url": "https://api.fireworks.ai/inference/v1",
             "api_key": api_key,
         }
diff --git a/src/llama_stack/providers/remote/inference/fireworks/fireworks.py b/src/llama_stack/providers/remote/inference/fireworks/fireworks.py
index 7e2b73546..61ea0b1f6 100644
--- a/src/llama_stack/providers/remote/inference/fireworks/fireworks.py
+++ b/src/llama_stack/providers/remote/inference/fireworks/fireworks.py
@@ -24,4 +24,4 @@ class FireworksInferenceAdapter(OpenAIMixin):
     provider_data_api_key_field: str = "fireworks_api_key"
 
     def get_base_url(self) -> str:
-        return "https://api.fireworks.ai/inference/v1"
+        return str(self.config.base_url)
diff --git a/src/llama_stack/providers/remote/inference/groq/config.py b/src/llama_stack/providers/remote/inference/groq/config.py
index cca53a4e8..e5c29c271 100644
--- a/src/llama_stack/providers/remote/inference/groq/config.py
+++ b/src/llama_stack/providers/remote/inference/groq/config.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, HttpUrl
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
 from llama_stack_api import json_schema_type
@@ -21,14 +21,14 @@ class GroqProviderDataValidator(BaseModel):
 
 @json_schema_type
 class GroqConfig(RemoteInferenceProviderConfig):
-    url: str = Field(
-        default="https://api.groq.com",
+    base_url: HttpUrl | None = Field(
+        default=HttpUrl("https://api.groq.com/openai/v1"),
         description="The URL for the Groq AI server",
     )
 
     @classmethod
     def sample_run_config(cls, api_key: str = "${env.GROQ_API_KEY:=}", **kwargs) -> dict[str, Any]:
         return {
-            "url": "https://api.groq.com",
+            "base_url": "https://api.groq.com/openai/v1",
             "api_key": api_key,
         }
diff --git a/src/llama_stack/providers/remote/inference/groq/groq.py b/src/llama_stack/providers/remote/inference/groq/groq.py
index 3a4f2626d..f99de91ca 100644
--- a/src/llama_stack/providers/remote/inference/groq/groq.py
+++ b/src/llama_stack/providers/remote/inference/groq/groq.py
@@ -15,4 +15,4 @@ class GroqInferenceAdapter(OpenAIMixin):
     provider_data_api_key_field: str = "groq_api_key"
 
     def get_base_url(self) -> str:
-        return f"{self.config.url}/openai/v1"
+        return str(self.config.base_url)
diff --git a/src/llama_stack/providers/remote/inference/llama_openai_compat/config.py b/src/llama_stack/providers/remote/inference/llama_openai_compat/config.py
index ded210d89..a0f80d969 100644
--- a/src/llama_stack/providers/remote/inference/llama_openai_compat/config.py
+++ b/src/llama_stack/providers/remote/inference/llama_openai_compat/config.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, HttpUrl
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
 from llama_stack_api import json_schema_type
@@ -21,14 +21,14 @@ class LlamaProviderDataValidator(BaseModel):
 
 @json_schema_type
 class LlamaCompatConfig(RemoteInferenceProviderConfig):
-    openai_compat_api_base: str = Field(
-        default="https://api.llama.com/compat/v1/",
+    base_url: HttpUrl | None = Field(
+        default=HttpUrl("https://api.llama.com/compat/v1/"),
         description="The URL for the Llama API server",
     )
 
     @classmethod
     def sample_run_config(cls, api_key: str = "${env.LLAMA_API_KEY}", **kwargs) -> dict[str, Any]:
         return {
-            "openai_compat_api_base": "https://api.llama.com/compat/v1/",
+            "base_url": "https://api.llama.com/compat/v1/",
             "api_key": api_key,
         }
diff --git a/src/llama_stack/providers/remote/inference/llama_openai_compat/llama.py b/src/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
index a5f67ecd1..f29aebf36 100644
--- a/src/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
+++ b/src/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
@@ -31,7 +31,7 @@ class LlamaCompatInferenceAdapter(OpenAIMixin):
 
         :return: The Llama API base URL
         """
-        return self.config.openai_compat_api_base
+        return str(self.config.base_url)
 
     async def openai_completion(
         self,
diff --git a/src/llama_stack/providers/remote/inference/nvidia/config.py b/src/llama_stack/providers/remote/inference/nvidia/config.py
index e5b0c6b73..e1e9a0ea9 100644
--- a/src/llama_stack/providers/remote/inference/nvidia/config.py
+++ b/src/llama_stack/providers/remote/inference/nvidia/config.py
@@ -7,7 +7,7 @@
 import os
 from typing import Any
 
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, HttpUrl
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
 from llama_stack_api import json_schema_type
@@ -44,18 +44,14 @@ class NVIDIAConfig(RemoteInferenceProviderConfig):
     URL of your running NVIDIA NIM and do not need to set the api_key.
     """
 
-    url: str = Field(
-        default_factory=lambda: os.getenv("NVIDIA_BASE_URL", "https://integrate.api.nvidia.com"),
+    base_url: HttpUrl | None = Field(
+        default_factory=lambda: os.getenv("NVIDIA_BASE_URL", "https://integrate.api.nvidia.com/v1"),
         description="A base url for accessing the NVIDIA NIM",
     )
     timeout: int = Field(
         default=60,
         description="Timeout for the HTTP requests",
     )
-    append_api_version: bool = Field(
-        default_factory=lambda: os.getenv("NVIDIA_APPEND_API_VERSION", "True").lower() != "false",
-        description="When set to false, the API version will not be appended to the base_url. By default, it is true.",
-    )
     rerank_model_to_url: dict[str, str] = Field(
         default_factory=lambda: {
             "nv-rerank-qa-mistral-4b:1": "https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking",
@@ -68,13 +64,11 @@ class NVIDIAConfig(RemoteInferenceProviderConfig):
     @classmethod
     def sample_run_config(
         cls,
-        url: str = "${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}",
+        base_url: HttpUrl | None = "${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}",
         api_key: str = "${env.NVIDIA_API_KEY:=}",
-        append_api_version: bool = "${env.NVIDIA_APPEND_API_VERSION:=True}",
         **kwargs,
     ) -> dict[str, Any]:
         return {
-            "url": url,
+            "base_url": base_url,
             "api_key": api_key,
-            "append_api_version": append_api_version,
         }
diff --git a/src/llama_stack/providers/remote/inference/nvidia/nvidia.py b/src/llama_stack/providers/remote/inference/nvidia/nvidia.py
index 17f8775bf..5d0d52d6a 100644
--- a/src/llama_stack/providers/remote/inference/nvidia/nvidia.py
+++ b/src/llama_stack/providers/remote/inference/nvidia/nvidia.py
@@ -44,7 +44,7 @@ class NVIDIAInferenceAdapter(OpenAIMixin):
     }
 
     async def initialize(self) -> None:
-        logger.info(f"Initializing NVIDIAInferenceAdapter({self.config.url})...")
+        logger.info(f"Initializing NVIDIAInferenceAdapter({self.config.base_url})...")
 
         if _is_nvidia_hosted(self.config):
             if not self.config.auth_credential:
@@ -72,7 +72,7 @@ class NVIDIAInferenceAdapter(OpenAIMixin):
 
         :return: The NVIDIA API base URL
         """
-        return f"{self.config.url}/v1" if self.config.append_api_version else self.config.url
+        return str(self.config.base_url)
 
     async def list_provider_model_ids(self) -> Iterable[str]:
         """
diff --git a/src/llama_stack/providers/remote/inference/nvidia/utils.py b/src/llama_stack/providers/remote/inference/nvidia/utils.py
index 46ee939d9..c138d1fc5 100644
--- a/src/llama_stack/providers/remote/inference/nvidia/utils.py
+++ b/src/llama_stack/providers/remote/inference/nvidia/utils.py
@@ -8,4 +8,4 @@ from . import NVIDIAConfig
 
 
 def _is_nvidia_hosted(config: NVIDIAConfig) -> bool:
-    return "integrate.api.nvidia.com" in config.url
+    return "integrate.api.nvidia.com" in str(config.base_url)
diff --git a/src/llama_stack/providers/remote/inference/ollama/config.py b/src/llama_stack/providers/remote/inference/ollama/config.py
index 416b847a0..60dd34fa8 100644
--- a/src/llama_stack/providers/remote/inference/ollama/config.py
+++ b/src/llama_stack/providers/remote/inference/ollama/config.py
@@ -6,20 +6,22 @@
 
 from typing import Any
 
-from pydantic import Field, SecretStr
+from pydantic import Field, HttpUrl, SecretStr
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
 
-DEFAULT_OLLAMA_URL = "http://localhost:11434"
+DEFAULT_OLLAMA_URL = "http://localhost:11434/v1"
 
 
 class OllamaImplConfig(RemoteInferenceProviderConfig):
     auth_credential: SecretStr | None = Field(default=None, exclude=True)
 
-    url: str = DEFAULT_OLLAMA_URL
+    base_url: HttpUrl | None = Field(default=HttpUrl(DEFAULT_OLLAMA_URL))
 
     @classmethod
-    def sample_run_config(cls, url: str = "${env.OLLAMA_URL:=http://localhost:11434}", **kwargs) -> dict[str, Any]:
+    def sample_run_config(
+        cls, base_url: str = "${env.OLLAMA_URL:=http://localhost:11434/v1}", **kwargs
+    ) -> dict[str, Any]:
         return {
-            "url": url,
+            "base_url": base_url,
         }
diff --git a/src/llama_stack/providers/remote/inference/ollama/ollama.py b/src/llama_stack/providers/remote/inference/ollama/ollama.py
index d1bf85361..e8b872384 100644
--- a/src/llama_stack/providers/remote/inference/ollama/ollama.py
+++ b/src/llama_stack/providers/remote/inference/ollama/ollama.py
@@ -55,17 +55,23 @@ class OllamaInferenceAdapter(OpenAIMixin):
         # ollama client attaches itself to the current event loop (sadly?)
         loop = asyncio.get_running_loop()
         if loop not in self._clients:
-            self._clients[loop] = AsyncOllamaClient(host=self.config.url)
+            # Ollama client expects base URL without /v1 suffix
+            base_url_str = str(self.config.base_url)
+            if base_url_str.endswith("/v1"):
+                host = base_url_str[:-3]
+            else:
+                host = base_url_str
+            self._clients[loop] = AsyncOllamaClient(host=host)
         return self._clients[loop]
 
     def get_api_key(self):
         return "NO KEY REQUIRED"
 
     def get_base_url(self):
-        return self.config.url.rstrip("/") + "/v1"
+        return str(self.config.base_url)
 
     async def initialize(self) -> None:
-        logger.info(f"checking connectivity to Ollama at `{self.config.url}`...")
+        logger.info(f"checking connectivity to Ollama at `{self.config.base_url}`...")
         r = await self.health()
         if r["status"] == HealthStatus.ERROR:
             logger.warning(
diff --git a/src/llama_stack/providers/remote/inference/openai/config.py b/src/llama_stack/providers/remote/inference/openai/config.py
index ab28e571f..2057cd0d6 100644
--- a/src/llama_stack/providers/remote/inference/openai/config.py
+++ b/src/llama_stack/providers/remote/inference/openai/config.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, HttpUrl
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
 from llama_stack_api import json_schema_type
@@ -21,8 +21,8 @@ class OpenAIProviderDataValidator(BaseModel):
 
 @json_schema_type
 class OpenAIConfig(RemoteInferenceProviderConfig):
-    base_url: str = Field(
-        default="https://api.openai.com/v1",
+    base_url: HttpUrl | None = Field(
+        default=HttpUrl("https://api.openai.com/v1"),
         description="Base URL for OpenAI API",
     )
 
diff --git a/src/llama_stack/providers/remote/inference/openai/openai.py b/src/llama_stack/providers/remote/inference/openai/openai.py
index 52bc48f1a..2d465546a 100644
--- a/src/llama_stack/providers/remote/inference/openai/openai.py
+++ b/src/llama_stack/providers/remote/inference/openai/openai.py
@@ -35,4 +35,4 @@ class OpenAIInferenceAdapter(OpenAIMixin):
 
         Returns the OpenAI API base URL from the configuration.
         """
-        return self.config.base_url
+        return str(self.config.base_url)
diff --git a/src/llama_stack/providers/remote/inference/passthrough/config.py b/src/llama_stack/providers/remote/inference/passthrough/config.py
index 54508b6fb..f45806e79 100644
--- a/src/llama_stack/providers/remote/inference/passthrough/config.py
+++ b/src/llama_stack/providers/remote/inference/passthrough/config.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from pydantic import Field
+from pydantic import Field, HttpUrl
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
 from llama_stack_api import json_schema_type
@@ -14,16 +14,16 @@ from llama_stack_api import json_schema_type
 
 @json_schema_type
 class PassthroughImplConfig(RemoteInferenceProviderConfig):
-    url: str = Field(
+    base_url: HttpUrl | None = Field(
         default=None,
         description="The URL for the passthrough endpoint",
     )
 
     @classmethod
     def sample_run_config(
-        cls, url: str = "${env.PASSTHROUGH_URL}", api_key: str = "${env.PASSTHROUGH_API_KEY}", **kwargs
+        cls, base_url: HttpUrl | None = "${env.PASSTHROUGH_URL}", api_key: str = "${env.PASSTHROUGH_API_KEY}", **kwargs
     ) -> dict[str, Any]:
         return {
-            "url": url,
+            "base_url": base_url,
             "api_key": api_key,
         }
diff --git a/src/llama_stack/providers/remote/inference/passthrough/passthrough.py b/src/llama_stack/providers/remote/inference/passthrough/passthrough.py
index 75eedf026..b0e2e74ad 100644
--- a/src/llama_stack/providers/remote/inference/passthrough/passthrough.py
+++ b/src/llama_stack/providers/remote/inference/passthrough/passthrough.py
@@ -82,8 +82,8 @@ class PassthroughInferenceAdapter(NeedsRequestProviderData, Inference):
 
     def _get_passthrough_url(self) -> str:
         """Get the passthrough URL from config or provider data."""
-        if self.config.url is not None:
-            return self.config.url
+        if self.config.base_url is not None:
+            return str(self.config.base_url)
 
         provider_data = self.get_request_provider_data()
         if provider_data is None:
diff --git a/src/llama_stack/providers/remote/inference/runpod/config.py b/src/llama_stack/providers/remote/inference/runpod/config.py
index 2ee56ca94..8d06f5263 100644
--- a/src/llama_stack/providers/remote/inference/runpod/config.py
+++ b/src/llama_stack/providers/remote/inference/runpod/config.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from pydantic import BaseModel, Field, SecretStr
+from pydantic import BaseModel, Field, HttpUrl, SecretStr
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
 from llama_stack_api import json_schema_type
@@ -21,7 +21,7 @@ class RunpodProviderDataValidator(BaseModel):
 
 @json_schema_type
 class RunpodImplConfig(RemoteInferenceProviderConfig):
-    url: str | None = Field(
+    base_url: HttpUrl | None = Field(
         default=None,
         description="The URL for the Runpod model serving endpoint",
     )
@@ -34,6 +34,6 @@ class RunpodImplConfig(RemoteInferenceProviderConfig):
     @classmethod
     def sample_run_config(cls, **kwargs: Any) -> dict[str, Any]:
         return {
-            "url": "${env.RUNPOD_URL:=}",
+            "base_url": "${env.RUNPOD_URL:=}",
             "api_token": "${env.RUNPOD_API_TOKEN}",
         }
diff --git a/src/llama_stack/providers/remote/inference/runpod/runpod.py b/src/llama_stack/providers/remote/inference/runpod/runpod.py
index 9c770cc24..04ad12851 100644
--- a/src/llama_stack/providers/remote/inference/runpod/runpod.py
+++ b/src/llama_stack/providers/remote/inference/runpod/runpod.py
@@ -28,7 +28,7 @@ class RunpodInferenceAdapter(OpenAIMixin):
 
     def get_base_url(self) -> str:
         """Get base URL for OpenAI client."""
-        return self.config.url
+        return str(self.config.base_url)
 
     async def openai_chat_completion(
         self,
diff --git a/src/llama_stack/providers/remote/inference/sambanova/config.py b/src/llama_stack/providers/remote/inference/sambanova/config.py
index 93679ba99..79cda75a0 100644
--- a/src/llama_stack/providers/remote/inference/sambanova/config.py
+++ b/src/llama_stack/providers/remote/inference/sambanova/config.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, HttpUrl
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
 from llama_stack_api import json_schema_type
@@ -21,14 +21,14 @@ class SambaNovaProviderDataValidator(BaseModel):
 
 @json_schema_type
 class SambaNovaImplConfig(RemoteInferenceProviderConfig):
-    url: str = Field(
-        default="https://api.sambanova.ai/v1",
+    base_url: HttpUrl | None = Field(
+        default=HttpUrl("https://api.sambanova.ai/v1"),
         description="The URL for the SambaNova AI server",
     )
 
     @classmethod
     def sample_run_config(cls, api_key: str = "${env.SAMBANOVA_API_KEY:=}", **kwargs) -> dict[str, Any]:
         return {
-            "url": "https://api.sambanova.ai/v1",
+            "base_url": "https://api.sambanova.ai/v1",
             "api_key": api_key,
         }
diff --git a/src/llama_stack/providers/remote/inference/sambanova/sambanova.py b/src/llama_stack/providers/remote/inference/sambanova/sambanova.py
index daa4b1670..cb01e3a90 100644
--- a/src/llama_stack/providers/remote/inference/sambanova/sambanova.py
+++ b/src/llama_stack/providers/remote/inference/sambanova/sambanova.py
@@ -25,4 +25,4 @@ class SambaNovaInferenceAdapter(OpenAIMixin):
 
         :return: The SambaNova base URL
         """
-        return self.config.url
+        return str(self.config.base_url)
diff --git a/src/llama_stack/providers/remote/inference/tgi/config.py b/src/llama_stack/providers/remote/inference/tgi/config.py
index 74edc8523..44cb4b812 100644
--- a/src/llama_stack/providers/remote/inference/tgi/config.py
+++ b/src/llama_stack/providers/remote/inference/tgi/config.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 
-from pydantic import BaseModel, Field, SecretStr
+from pydantic import BaseModel, Field, HttpUrl, SecretStr
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
 from llama_stack_api import json_schema_type
@@ -15,18 +15,19 @@ from llama_stack_api import json_schema_type
 class TGIImplConfig(RemoteInferenceProviderConfig):
     auth_credential: SecretStr | None = Field(default=None, exclude=True)
 
-    url: str = Field(
-        description="The URL for the TGI serving endpoint",
+    base_url: HttpUrl | None = Field(
+        default=None,
+        description="The URL for the TGI serving endpoint (should include /v1 path)",
     )
 
     @classmethod
     def sample_run_config(
         cls,
-        url: str = "${env.TGI_URL:=}",
+        base_url: str = "${env.TGI_URL:=}",
         **kwargs,
     ):
         return {
-            "url": url,
+            "base_url": base_url,
         }
 
 
diff --git a/src/llama_stack/providers/remote/inference/tgi/tgi.py b/src/llama_stack/providers/remote/inference/tgi/tgi.py
index dd47ccc62..5dc8c33f7 100644
--- a/src/llama_stack/providers/remote/inference/tgi/tgi.py
+++ b/src/llama_stack/providers/remote/inference/tgi/tgi.py
@@ -8,7 +8,7 @@
 from collections.abc import Iterable
 
 from huggingface_hub import AsyncInferenceClient, HfApi
-from pydantic import SecretStr
+from pydantic import HttpUrl, SecretStr
 
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
@@ -23,7 +23,7 @@ log = get_logger(name=__name__, category="inference::tgi")
 
 
 class _HfAdapter(OpenAIMixin):
-    url: str
+    base_url: HttpUrl
     api_key: SecretStr
 
     hf_client: AsyncInferenceClient
@@ -36,7 +36,7 @@ class _HfAdapter(OpenAIMixin):
         return "NO KEY REQUIRED"
 
     def get_base_url(self):
-        return self.url
+        return self.base_url
 
     async def list_provider_model_ids(self) -> Iterable[str]:
         return [self.model_id]
@@ -50,14 +50,20 @@ class _HfAdapter(OpenAIMixin):
 
 class TGIAdapter(_HfAdapter):
     async def initialize(self, config: TGIImplConfig) -> None:
-        if not config.url:
+        if not config.base_url:
             raise ValueError("You must provide a URL in run.yaml (or via the TGI_URL environment variable) to use TGI.")
-        log.info(f"Initializing TGI client with url={config.url}")
-        self.hf_client = AsyncInferenceClient(model=config.url, provider="hf-inference")
+        log.info(f"Initializing TGI client with url={config.base_url}")
+        # Extract base URL without /v1 for HF client initialization
+        base_url_str = str(config.base_url).rstrip("/")
+        if base_url_str.endswith("/v1"):
+            base_url_for_client = base_url_str[:-3]
+        else:
+            base_url_for_client = base_url_str
+        self.hf_client = AsyncInferenceClient(model=base_url_for_client, provider="hf-inference")
         endpoint_info = await self.hf_client.get_endpoint_info()
         self.max_tokens = endpoint_info["max_total_tokens"]
         self.model_id = endpoint_info["model_id"]
-        self.url = f"{config.url.rstrip('/')}/v1"
+        self.base_url = config.base_url
         self.api_key = SecretStr("NO_KEY")
 
 
diff --git a/src/llama_stack/providers/remote/inference/together/config.py b/src/llama_stack/providers/remote/inference/together/config.py
index c1b3c4a55..16f0686ba 100644
--- a/src/llama_stack/providers/remote/inference/together/config.py
+++ b/src/llama_stack/providers/remote/inference/together/config.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from pydantic import Field
+from pydantic import Field, HttpUrl
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
 from llama_stack_api import json_schema_type
@@ -14,14 +14,14 @@ from llama_stack_api import json_schema_type
 
 @json_schema_type
 class TogetherImplConfig(RemoteInferenceProviderConfig):
-    url: str = Field(
-        default="https://api.together.xyz/v1",
+    base_url: HttpUrl | None = Field(
+        default=HttpUrl("https://api.together.xyz/v1"),
         description="The URL for the Together AI server",
     )
 
     @classmethod
     def sample_run_config(cls, **kwargs) -> dict[str, Any]:
         return {
-            "url": "https://api.together.xyz/v1",
+            "base_url": "https://api.together.xyz/v1",
             "api_key": "${env.TOGETHER_API_KEY:=}",
         }
diff --git a/src/llama_stack/providers/remote/inference/together/together.py b/src/llama_stack/providers/remote/inference/together/together.py
index cd34aec5e..0826dbcd2 100644
--- a/src/llama_stack/providers/remote/inference/together/together.py
+++ b/src/llama_stack/providers/remote/inference/together/together.py
@@ -9,7 +9,6 @@ from collections.abc import Iterable
 from typing import Any, cast
 
 from together import AsyncTogether  # type: ignore[import-untyped]
-from together.constants import BASE_URL  # type: ignore[import-untyped]
 
 from llama_stack.core.request_headers import NeedsRequestProviderData
 from llama_stack.log import get_logger
@@ -42,7 +41,7 @@ class TogetherInferenceAdapter(OpenAIMixin, NeedsRequestProviderData):
     provider_data_api_key_field: str = "together_api_key"
 
     def get_base_url(self):
-        return BASE_URL
+        return str(self.config.base_url)
 
     def _get_client(self) -> AsyncTogether:
         together_api_key = None
diff --git a/src/llama_stack/providers/remote/inference/vllm/config.py b/src/llama_stack/providers/remote/inference/vllm/config.py
index c43533ee4..db6c74431 100644
--- a/src/llama_stack/providers/remote/inference/vllm/config.py
+++ b/src/llama_stack/providers/remote/inference/vllm/config.py
@@ -6,7 +6,7 @@
 
 from pathlib import Path
 
-from pydantic import Field, SecretStr, field_validator
+from pydantic import Field, HttpUrl, SecretStr, field_validator
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
 from llama_stack_api import json_schema_type
@@ -14,7 +14,7 @@ from llama_stack_api import json_schema_type
 
 @json_schema_type
 class VLLMInferenceAdapterConfig(RemoteInferenceProviderConfig):
-    url: str | None = Field(
+    base_url: HttpUrl | None = Field(
         default=None,
         description="The URL for the vLLM model serving endpoint",
     )
@@ -48,11 +48,11 @@ class VLLMInferenceAdapterConfig(RemoteInferenceProviderConfig):
     @classmethod
     def sample_run_config(
         cls,
-        url: str = "${env.VLLM_URL:=}",
+        base_url: str = "${env.VLLM_URL:=}",
         **kwargs,
     ):
         return {
-            "url": url,
+            "base_url": base_url,
             "max_tokens": "${env.VLLM_MAX_TOKENS:=4096}",
             "api_token": "${env.VLLM_API_TOKEN:=fake}",
             "tls_verify": "${env.VLLM_TLS_VERIFY:=true}",
diff --git a/src/llama_stack/providers/remote/inference/vllm/vllm.py b/src/llama_stack/providers/remote/inference/vllm/vllm.py
index 1510e9384..6664ca36b 100644
--- a/src/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/src/llama_stack/providers/remote/inference/vllm/vllm.py
@@ -39,12 +39,12 @@ class VLLMInferenceAdapter(OpenAIMixin):
 
     def get_base_url(self) -> str:
         """Get the base URL from config."""
-        if not self.config.url:
+        if not self.config.base_url:
             raise ValueError("No base URL configured")
-        return self.config.url
+        return str(self.config.base_url)
 
     async def initialize(self) -> None:
-        if not self.config.url:
+        if not self.config.base_url:
             raise ValueError(
                 "You must provide a URL in run.yaml (or via the VLLM_URL environment variable) to use vLLM."
             )
diff --git a/src/llama_stack/providers/remote/inference/watsonx/config.py b/src/llama_stack/providers/remote/inference/watsonx/config.py
index 914f80820..be2b2c0ab 100644
--- a/src/llama_stack/providers/remote/inference/watsonx/config.py
+++ b/src/llama_stack/providers/remote/inference/watsonx/config.py
@@ -7,7 +7,7 @@
 import os
 from typing import Any
 
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, HttpUrl
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
 from llama_stack_api import json_schema_type
@@ -23,7 +23,7 @@ class WatsonXProviderDataValidator(BaseModel):
 
 @json_schema_type
 class WatsonXConfig(RemoteInferenceProviderConfig):
-    url: str = Field(
+    base_url: HttpUrl | None = Field(
         default_factory=lambda: os.getenv("WATSONX_BASE_URL", "https://us-south.ml.cloud.ibm.com"),
         description="A base url for accessing the watsonx.ai",
     )
@@ -39,7 +39,7 @@ class WatsonXConfig(RemoteInferenceProviderConfig):
     @classmethod
     def sample_run_config(cls, **kwargs) -> dict[str, Any]:
         return {
-            "url": "${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}",
+            "base_url": "${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}",
             "api_key": "${env.WATSONX_API_KEY:=}",
             "project_id": "${env.WATSONX_PROJECT_ID:=}",
         }
diff --git a/src/llama_stack/providers/remote/inference/watsonx/watsonx.py b/src/llama_stack/providers/remote/inference/watsonx/watsonx.py
index aab9e2dca..5684f6c17 100644
--- a/src/llama_stack/providers/remote/inference/watsonx/watsonx.py
+++ b/src/llama_stack/providers/remote/inference/watsonx/watsonx.py
@@ -255,7 +255,7 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
         )
 
     def get_base_url(self) -> str:
-        return self.config.url
+        return str(self.config.base_url)
 
     # Copied from OpenAIMixin
     async def check_model_availability(self, model: str) -> bool:
@@ -316,7 +316,7 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
         """
         Retrieves foundation model specifications from the watsonx.ai API.
         """
-        url = f"{self.config.url}/ml/v1/foundation_model_specs?version=2023-10-25"
+        url = f"{str(self.config.base_url)}/ml/v1/foundation_model_specs?version=2023-10-25"
         headers = {
             # Note that there is no authorization header.  Listing models does not require authentication.
             "Content-Type": "application/json",
diff --git a/tests/integration/suites.py b/tests/integration/suites.py
index 7689657b4..10c872705 100644
--- a/tests/integration/suites.py
+++ b/tests/integration/suites.py
@@ -50,7 +50,7 @@ SETUP_DEFINITIONS: dict[str, Setup] = {
         name="ollama",
         description="Local Ollama provider with text + safety models",
         env={
-            "OLLAMA_URL": "http://0.0.0.0:11434",
+            "OLLAMA_URL": "http://0.0.0.0:11434/v1",
             "SAFETY_MODEL": "ollama/llama-guard3:1b",
         },
         defaults={
@@ -64,7 +64,7 @@ SETUP_DEFINITIONS: dict[str, Setup] = {
         name="ollama",
         description="Local Ollama provider with a vision model",
         env={
-            "OLLAMA_URL": "http://0.0.0.0:11434",
+            "OLLAMA_URL": "http://0.0.0.0:11434/v1",
         },
         defaults={
             "vision_model": "ollama/llama3.2-vision:11b",
@@ -75,7 +75,7 @@ SETUP_DEFINITIONS: dict[str, Setup] = {
         name="ollama-postgres",
         description="Server-mode tests with Postgres-backed persistence",
         env={
-            "OLLAMA_URL": "http://0.0.0.0:11434",
+            "OLLAMA_URL": "http://0.0.0.0:11434/v1",
             "SAFETY_MODEL": "ollama/llama-guard3:1b",
             "POSTGRES_HOST": "127.0.0.1",
             "POSTGRES_PORT": "5432",
diff --git a/tests/unit/providers/inference/test_inference_client_caching.py b/tests/unit/providers/inference/test_inference_client_caching.py
index aa3a2c77a..6ddf790af 100644
--- a/tests/unit/providers/inference/test_inference_client_caching.py
+++ b/tests/unit/providers/inference/test_inference_client_caching.py
@@ -120,7 +120,7 @@ from llama_stack.providers.remote.inference.watsonx.watsonx import WatsonXInfere
             VLLMInferenceAdapter,
             "llama_stack.providers.remote.inference.vllm.VLLMProviderDataValidator",
             {
-                "url": "http://fake",
+                "base_url": "http://fake",
             },
         ),
     ],
@@ -153,7 +153,7 @@ def test_litellm_provider_data_used(config_cls, adapter_cls, provider_data_valid
     """Validate data for LiteLLM-based providers.  Similar to test_openai_provider_data_used, but without the
     assumption that there is an OpenAI-compatible client object."""
 
-    inference_adapter = adapter_cls(config=config_cls())
+    inference_adapter = adapter_cls(config=config_cls(base_url="http://fake"))
 
     inference_adapter.__provider_spec__ = MagicMock()
     inference_adapter.__provider_spec__.provider_data_validator = provider_data_validator
diff --git a/tests/unit/providers/inference/test_remote_vllm.py b/tests/unit/providers/inference/test_remote_vllm.py
index 958895cc4..0cf8ed306 100644
--- a/tests/unit/providers/inference/test_remote_vllm.py
+++ b/tests/unit/providers/inference/test_remote_vllm.py
@@ -40,7 +40,7 @@ from llama_stack_api import (
 
 @pytest.fixture(scope="function")
 async def vllm_inference_adapter():
-    config = VLLMInferenceAdapterConfig(url="http://mocked.localhost:12345")
+    config = VLLMInferenceAdapterConfig(base_url="http://mocked.localhost:12345")
     inference_adapter = VLLMInferenceAdapter(config=config)
     inference_adapter.model_store = AsyncMock()
     await inference_adapter.initialize()
@@ -204,7 +204,7 @@ async def test_vllm_completion_extra_body():
     via extra_body to the underlying OpenAI client through the InferenceRouter.
     """
     # Set up the vLLM adapter
-    config = VLLMInferenceAdapterConfig(url="http://mocked.localhost:12345")
+    config = VLLMInferenceAdapterConfig(base_url="http://mocked.localhost:12345")
     vllm_adapter = VLLMInferenceAdapter(config=config)
     vllm_adapter.__provider_id__ = "vllm"
     await vllm_adapter.initialize()
@@ -277,7 +277,7 @@ async def test_vllm_chat_completion_extra_body():
     via extra_body to the underlying OpenAI client through the InferenceRouter for chat completion.
     """
     # Set up the vLLM adapter
-    config = VLLMInferenceAdapterConfig(url="http://mocked.localhost:12345")
+    config = VLLMInferenceAdapterConfig(base_url="http://mocked.localhost:12345")
     vllm_adapter = VLLMInferenceAdapter(config=config)
     vllm_adapter.__provider_id__ = "vllm"
     await vllm_adapter.initialize()
diff --git a/tests/unit/providers/nvidia/test_rerank_inference.py b/tests/unit/providers/nvidia/test_rerank_inference.py
index ee62910b8..4ad9dc766 100644
--- a/tests/unit/providers/nvidia/test_rerank_inference.py
+++ b/tests/unit/providers/nvidia/test_rerank_inference.py
@@ -146,7 +146,7 @@ async def test_hosted_model_not_in_endpoint_mapping():
 
 async def test_self_hosted_ignores_endpoint():
     adapter = create_adapter(
-        config=NVIDIAConfig(url="http://localhost:8000", api_key=None),
+        config=NVIDIAConfig(base_url="http://localhost:8000", api_key=None),
         rerank_endpoints={"test-model": "https://model.endpoint/rerank"},  # This should be ignored for self-hosted.
     )
     mock_session = MockSession(MockResponse())
diff --git a/tests/unit/providers/test_configs.py b/tests/unit/providers/test_configs.py
index 867cfffbc..b4ba78394 100644
--- a/tests/unit/providers/test_configs.py
+++ b/tests/unit/providers/test_configs.py
@@ -4,8 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
+from typing import get_args, get_origin
+
 import pytest
-from pydantic import BaseModel
+from pydantic import BaseModel, HttpUrl
 
 from llama_stack.core.distribution import get_provider_registry, providable_apis
 from llama_stack.core.utils.dynamic import instantiate_class_type
@@ -41,3 +43,55 @@ class TestProviderConfigurations:
 
         sample_config = config_type.sample_run_config(__distro_dir__="foobarbaz")
         assert isinstance(sample_config, dict), f"{config_class_name}.sample_run_config() did not return a dict"
+
+    def test_remote_inference_url_standardization(self):
+        """Verify all remote inference providers use standardized base_url configuration."""
+        provider_registry = get_provider_registry()
+        inference_providers = provider_registry.get("inference", {})
+
+        # Filter for remote providers only
+        remote_providers = {k: v for k, v in inference_providers.items() if k.startswith("remote::")}
+
+        failures = []
+        for provider_type, provider_spec in remote_providers.items():
+            try:
+                config_class_name = provider_spec.config_class
+                config_type = instantiate_class_type(config_class_name)
+
+                # Check that config has base_url field (not url)
+                if hasattr(config_type, "model_fields"):
+                    fields = config_type.model_fields
+
+                    # Should NOT have 'url' field (old pattern)
+                    if "url" in fields:
+                        failures.append(
+                            f"{provider_type}: Uses deprecated 'url' field instead of 'base_url'. "
+                            f"Please rename to 'base_url' for consistency."
+                        )
+
+                    # Should have 'base_url' field with HttpUrl | None type
+                    if "base_url" in fields:
+                        field_info = fields["base_url"]
+                        annotation = field_info.annotation
+
+                        # Check if it's HttpUrl or HttpUrl | None
+                        # get_origin() returns Union for (X | Y), None for plain types
+                        # get_args() returns the types inside Union, e.g. (HttpUrl, NoneType)
+                        is_valid = False
+                        if get_origin(annotation) is not None:  # It's a Union/Optional
+                            if HttpUrl in get_args(annotation):
+                                is_valid = True
+                        elif annotation == HttpUrl:  # Plain HttpUrl without | None
+                            is_valid = True
+
+                        if not is_valid:
+                            failures.append(
+                                f"{provider_type}: base_url field has incorrect type annotation. "
+                                f"Expected 'HttpUrl | None', got '{annotation}'"
+                            )
+
+            except Exception as e:
+                failures.append(f"{provider_type}: Error checking URL standardization: {str(e)}")
+
+        if failures:
+            pytest.fail("URL standardization violations found:\n" + "\n".join(f"  - {f}" for f in failures))

From 4e9633f7c35e00607e1c5e75b2e14fbc97cff6b8 Mon Sep 17 00:00:00 2001
From: Anik <anikbhattacharya93@gmail.com>
Date: Wed, 19 Nov 2025 13:04:24 -0500
Subject: [PATCH 47/62] feat: Make Safety API an optional dependency for
 meta-reference agents provider (#4169)

# What does this PR do?

Change Safety API from required to optional dependency, following the
established pattern used for other optional dependencies in Llama Stack.

The provider now starts successfully without Safety API configured.
Requests that explicitly include guardrails will receive a clear error
message when Safety API is unavailable.

This enables local development and testing without Safety API while
maintaining clear error messages when guardrail features are requested.

Closes #4165

Signed-off-by: Anik Bhattacharjee <anbhatta@redhat.com>

## Test Plan
<!-- Describe the tests you ran to verify your changes with result
summaries. *Provide clear instructions so the plan can be easily
re-executed.* -->

1. New unit tests added in
`tests/unit/providers/agents/meta_reference/test_safety_optional.py`

2. Integration tests performed with the files in
https://gist.github.com/anik120/c33cef497ec7085e1fe2164e0705b8d6

 (i) test with `test_integration_no_safety_fail.yaml`:

Config WITHOUT Safety API, should fail with helpful error since
`required_safety_api` is `true` by default
```
$ uv run llama stack run test_integration_no_safety_fail.yaml 2>&1 | grep -B 5 -A 15 "ValueError.*Safety\|Safety API is
  required"
File "/Users/anbhatta/go/src/github.com/llamastack/llama-stack/src/llama_stack/providers/inline/agents/meta_reference
  /__init__.py", line 27, in get_provider_impl
      raise ValueError(
      ...<9 lines>...
      )
  ValueError: Safety API is required but not configured.

  To run without safety checks, explicitly set in your configuration:
    providers:
      agents:
        - provider_id: meta-reference
          provider_type: inline::meta-reference
          config:
            require_safety_api: false

  Warning: This disables all safety guardrails for this agents provider.
```

(ii) test with `test_integration_no_safety_works.yaml`

Config WITHOUT Safety API, **but** `require_safety_api=false` is
explicitly set, should succeed

```
$ uv run llama stack run test_integration_no_safety_works.yaml
 INFO     2025-11-16 09:49:10,044 llama_stack.cli.stack.run:169 cli: Using run configuration:

           /Users/anbhatta/go/src/github.com/llamastack/llama-stack/test_integration_no_safety_works.yaml

  INFO     2025-11-16 09:49:10,052 llama_stack.cli.stack.run:228 cli: HTTPS enabled with certificates:

             Key: None

             Cert: None

  .
  .
  .
  INFO     2025-11-16 09:49:38,528 llama_stack.core.stack:495 core: starting registry refresh task

  INFO     2025-11-16 09:49:38,534 uvicorn.error:62 uncategorized: Application startup complete.

  INFO     2025-11-16 09:49:38,535 uvicorn.error:216 uncategorized: Uvicorn running on http://0.0.0.0:8321 (Press CTRL+C
```


Signed-off-by: Anik Bhattacharjee <anbhatta@redhat.com>

Signed-off-by: Anik Bhattacharjee <anbhatta@redhat.com>
---
 .../inline/agents/meta_reference/__init__.py  |   2 +-
 .../inline/agents/meta_reference/agents.py    |   2 +-
 .../responses/openai_responses.py             |  10 +-
 .../meta_reference/responses/streaming.py     |   3 +-
 .../agents/meta_reference/responses/utils.py  |   6 +-
 src/llama_stack/providers/registry/agents.py  |   4 +-
 .../meta_reference/test_safety_optional.py    | 206 ++++++++++++++++++
 7 files changed, 227 insertions(+), 6 deletions(-)
 create mode 100644 tests/unit/providers/agents/meta_reference/test_safety_optional.py

diff --git a/src/llama_stack/providers/inline/agents/meta_reference/__init__.py b/src/llama_stack/providers/inline/agents/meta_reference/__init__.py
index 91287617a..b3fb814e3 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/__init__.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/__init__.py
@@ -23,7 +23,7 @@ async def get_provider_impl(
         config,
         deps[Api.inference],
         deps[Api.vector_io],
-        deps[Api.safety],
+        deps.get(Api.safety),
         deps[Api.tool_runtime],
         deps[Api.tool_groups],
         deps[Api.conversations],
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/agents.py b/src/llama_stack/providers/inline/agents/meta_reference/agents.py
index ba83a9576..2d5aa6c04 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/agents.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/agents.py
@@ -41,7 +41,7 @@ class MetaReferenceAgentsImpl(Agents):
         config: MetaReferenceAgentsImplConfig,
         inference_api: Inference,
         vector_io_api: VectorIO,
-        safety_api: Safety,
+        safety_api: Safety | None,
         tool_runtime_api: ToolRuntime,
         tool_groups_api: ToolGroups,
         conversations_api: Conversations,
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
index 7e080a675..11bfb1417 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
@@ -67,7 +67,7 @@ class OpenAIResponsesImpl:
         tool_runtime_api: ToolRuntime,
         responses_store: ResponsesStore,
         vector_io_api: VectorIO,  # VectorIO
-        safety_api: Safety,
+        safety_api: Safety | None,
         conversations_api: Conversations,
     ):
         self.inference_api = inference_api
@@ -273,6 +273,14 @@ class OpenAIResponsesImpl:
 
         guardrail_ids = extract_guardrail_ids(guardrails) if guardrails else []
 
+        # Validate that Safety API is available if guardrails are requested
+        if guardrail_ids and self.safety_api is None:
+            raise ValueError(
+                "Cannot process guardrails: Safety API is not configured.\n\n"
+                "To use guardrails, ensure the Safety API is configured in your stack, or remove "
+                "the 'guardrails' parameter from your request."
+            )
+
         if conversation is not None:
             if previous_response_id is not None:
                 raise ValueError(
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
index cdbd87244..0ef74f1f1 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
@@ -66,6 +66,7 @@ from llama_stack_api import (
     OpenAIResponseUsage,
     OpenAIResponseUsageInputTokensDetails,
     OpenAIResponseUsageOutputTokensDetails,
+    Safety,
     WebSearchToolTypes,
 )
 
@@ -111,7 +112,7 @@ class StreamingResponseOrchestrator:
         max_infer_iters: int,
         tool_executor,  # Will be the tool execution logic from the main class
         instructions: str | None,
-        safety_api,
+        safety_api: Safety | None,
         guardrail_ids: list[str] | None = None,
         prompt: OpenAIResponsePrompt | None = None,
         parallel_tool_calls: bool | None = None,
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/utils.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/utils.py
index 943bbae41..25460bcfe 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/utils.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/utils.py
@@ -320,11 +320,15 @@ def is_function_tool_call(
     return False
 
 
-async def run_guardrails(safety_api: Safety, messages: str, guardrail_ids: list[str]) -> str | None:
+async def run_guardrails(safety_api: Safety | None, messages: str, guardrail_ids: list[str]) -> str | None:
     """Run guardrails against messages and return violation message if blocked."""
     if not messages:
         return None
 
+    # If safety API is not available, skip guardrails
+    if safety_api is None:
+        return None
+
     # Look up shields to get their provider_resource_id (actual model ID)
     model_ids = []
     # TODO: list_shields not in Safety interface but available at runtime via API routing
diff --git a/src/llama_stack/providers/registry/agents.py b/src/llama_stack/providers/registry/agents.py
index 2c68750a6..e85be99d6 100644
--- a/src/llama_stack/providers/registry/agents.py
+++ b/src/llama_stack/providers/registry/agents.py
@@ -30,12 +30,14 @@ def available_providers() -> list[ProviderSpec]:
             config_class="llama_stack.providers.inline.agents.meta_reference.MetaReferenceAgentsImplConfig",
             api_dependencies=[
                 Api.inference,
-                Api.safety,
                 Api.vector_io,
                 Api.tool_runtime,
                 Api.tool_groups,
                 Api.conversations,
             ],
+            optional_api_dependencies=[
+                Api.safety,
+            ],
             description="Meta's reference implementation of an agent system that can use tools, access vector databases, and perform complex reasoning tasks.",
         ),
     ]
diff --git a/tests/unit/providers/agents/meta_reference/test_safety_optional.py b/tests/unit/providers/agents/meta_reference/test_safety_optional.py
new file mode 100644
index 000000000..b48d38b29
--- /dev/null
+++ b/tests/unit/providers/agents/meta_reference/test_safety_optional.py
@@ -0,0 +1,206 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+"""Tests for making Safety API optional in meta-reference agents provider.
+
+This test suite validates the changes introduced to fix issue #4165, which
+allows running the meta-reference agents provider without the Safety API.
+Safety API is now an optional dependency, and errors are raised at request time
+when guardrails are explicitly requested without Safety API configured.
+"""
+
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from llama_stack.core.datatypes import Api
+from llama_stack.core.storage.datatypes import KVStoreReference, ResponsesStoreReference
+from llama_stack.providers.inline.agents.meta_reference import get_provider_impl
+from llama_stack.providers.inline.agents.meta_reference.config import (
+    AgentPersistenceConfig,
+    MetaReferenceAgentsImplConfig,
+)
+from llama_stack.providers.inline.agents.meta_reference.responses.utils import (
+    run_guardrails,
+)
+
+
+@pytest.fixture
+def mock_persistence_config():
+    """Create a mock persistence configuration."""
+    return AgentPersistenceConfig(
+        agent_state=KVStoreReference(
+            backend="kv_default",
+            namespace="agents",
+        ),
+        responses=ResponsesStoreReference(
+            backend="sql_default",
+            table_name="responses",
+        ),
+    )
+
+
+@pytest.fixture
+def mock_deps():
+    """Create mock dependencies for the agents provider."""
+    # Create mock APIs
+    inference_api = AsyncMock()
+    vector_io_api = AsyncMock()
+    tool_runtime_api = AsyncMock()
+    tool_groups_api = AsyncMock()
+    conversations_api = AsyncMock()
+
+    return {
+        Api.inference: inference_api,
+        Api.vector_io: vector_io_api,
+        Api.tool_runtime: tool_runtime_api,
+        Api.tool_groups: tool_groups_api,
+        Api.conversations: conversations_api,
+    }
+
+
+class TestProviderInitialization:
+    """Test provider initialization with different safety API configurations."""
+
+    async def test_initialization_with_safety_api_present(self, mock_persistence_config, mock_deps):
+        """Test successful initialization when Safety API is configured."""
+        config = MetaReferenceAgentsImplConfig(persistence=mock_persistence_config)
+
+        # Add safety API to deps
+        safety_api = AsyncMock()
+        mock_deps[Api.safety] = safety_api
+
+        # Mock the initialize method to avoid actual initialization
+        with patch(
+            "llama_stack.providers.inline.agents.meta_reference.agents.MetaReferenceAgentsImpl.initialize",
+            new_callable=AsyncMock,
+        ):
+            # Should not raise any exception
+            provider = await get_provider_impl(config, mock_deps, policy=[], telemetry_enabled=False)
+            assert provider is not None
+
+    async def test_initialization_without_safety_api(self, mock_persistence_config, mock_deps):
+        """Test successful initialization when Safety API is not configured."""
+        config = MetaReferenceAgentsImplConfig(persistence=mock_persistence_config)
+
+        # Safety API is NOT in mock_deps - provider should still start
+        # Mock the initialize method to avoid actual initialization
+        with patch(
+            "llama_stack.providers.inline.agents.meta_reference.agents.MetaReferenceAgentsImpl.initialize",
+            new_callable=AsyncMock,
+        ):
+            # Should not raise any exception
+            provider = await get_provider_impl(config, mock_deps, policy=[], telemetry_enabled=False)
+            assert provider is not None
+            assert provider.safety_api is None
+
+
+class TestGuardrailsFunctionality:
+    """Test run_guardrails function with optional safety API."""
+
+    async def test_run_guardrails_with_none_safety_api(self):
+        """Test that run_guardrails returns None when safety_api is None."""
+        result = await run_guardrails(safety_api=None, messages="test message", guardrail_ids=["llama-guard"])
+        assert result is None
+
+    async def test_run_guardrails_with_empty_messages(self):
+        """Test that run_guardrails returns None for empty messages."""
+        # Test with None safety API
+        result = await run_guardrails(safety_api=None, messages="", guardrail_ids=["llama-guard"])
+        assert result is None
+
+        # Test with mock safety API
+        mock_safety_api = AsyncMock()
+        result = await run_guardrails(safety_api=mock_safety_api, messages="", guardrail_ids=["llama-guard"])
+        assert result is None
+
+    async def test_run_guardrails_with_none_safety_api_ignores_guardrails(self):
+        """Test that guardrails are skipped when safety_api is None, even if guardrail_ids are provided."""
+        # Should not raise exception, just return None
+        result = await run_guardrails(
+            safety_api=None,
+            messages="potentially harmful content",
+            guardrail_ids=["llama-guard", "content-filter"],
+        )
+        assert result is None
+
+    async def test_create_response_rejects_guardrails_without_safety_api(self, mock_persistence_config, mock_deps):
+        """Test that create_openai_response raises error when guardrails requested but Safety API unavailable."""
+        from llama_stack.providers.inline.agents.meta_reference.responses.openai_responses import (
+            OpenAIResponsesImpl,
+        )
+        from llama_stack_api import ResponseGuardrailSpec
+
+        # Create OpenAIResponsesImpl with no safety API
+        with patch("llama_stack.providers.inline.agents.meta_reference.responses.openai_responses.ResponsesStore"):
+            impl = OpenAIResponsesImpl(
+                inference_api=mock_deps[Api.inference],
+                tool_groups_api=mock_deps[Api.tool_groups],
+                tool_runtime_api=mock_deps[Api.tool_runtime],
+                responses_store=MagicMock(),
+                vector_io_api=mock_deps[Api.vector_io],
+                safety_api=None,  # No Safety API
+                conversations_api=mock_deps[Api.conversations],
+            )
+
+            # Test with string guardrail
+            with pytest.raises(ValueError) as exc_info:
+                await impl.create_openai_response(
+                    input="test input",
+                    model="test-model",
+                    guardrails=["llama-guard"],
+                )
+            assert "Cannot process guardrails: Safety API is not configured" in str(exc_info.value)
+
+            # Test with ResponseGuardrailSpec
+            with pytest.raises(ValueError) as exc_info:
+                await impl.create_openai_response(
+                    input="test input",
+                    model="test-model",
+                    guardrails=[ResponseGuardrailSpec(type="llama-guard")],
+                )
+            assert "Cannot process guardrails: Safety API is not configured" in str(exc_info.value)
+
+    async def test_create_response_succeeds_without_guardrails_and_no_safety_api(
+        self, mock_persistence_config, mock_deps
+    ):
+        """Test that create_openai_response works when no guardrails requested and Safety API unavailable."""
+        from llama_stack.providers.inline.agents.meta_reference.responses.openai_responses import (
+            OpenAIResponsesImpl,
+        )
+
+        # Create OpenAIResponsesImpl with no safety API
+        with (
+            patch("llama_stack.providers.inline.agents.meta_reference.responses.openai_responses.ResponsesStore"),
+            patch.object(OpenAIResponsesImpl, "_create_streaming_response", new_callable=AsyncMock) as mock_stream,
+        ):
+            # Mock the streaming response to return a simple async generator
+            async def mock_generator():
+                yield MagicMock()
+
+            mock_stream.return_value = mock_generator()
+
+            impl = OpenAIResponsesImpl(
+                inference_api=mock_deps[Api.inference],
+                tool_groups_api=mock_deps[Api.tool_groups],
+                tool_runtime_api=mock_deps[Api.tool_runtime],
+                responses_store=MagicMock(),
+                vector_io_api=mock_deps[Api.vector_io],
+                safety_api=None,  # No Safety API
+                conversations_api=mock_deps[Api.conversations],
+            )
+
+            # Should not raise when no guardrails requested
+            # Note: This will still fail later in execution due to mocking, but should pass the validation
+            try:
+                await impl.create_openai_response(
+                    input="test input",
+                    model="test-model",
+                    guardrails=None,  # No guardrails
+                )
+            except Exception as e:
+                # Ensure the error is NOT about missing Safety API
+                assert "Cannot process guardrails: Safety API is not configured" not in str(e)

From 40b11efac44c9ac7fcc56d4f1f93f0d92f45f8c2 Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Wed, 19 Nov 2025 10:07:53 -0800
Subject: [PATCH 48/62] feat(tests): add TypeScript client integration test
 support (#4185)

Integration tests can now validate the TypeScript SDK alongside Python
tests when running against server-mode stacks. Currently, this only adds
a _small_ number of tests. We should extend only if truly needed -- this
smoke check may be sufficient.

When `RUN_CLIENT_TS_TESTS=1` is set, the test script runs TypeScript
tests after Python tests pass. Tests are mapped via
`tests/integration/client-typescript/suites.json` which defines which
TypeScript test files correspond to each Python suite/setup combination.

The fact that we need exact "test_id"s (which are actually generated by
pytest) to be hardcoded inside the Typescript tests (so we hit the
recorded paths) is a big smell and it might become grating, but maybe
the benefit is worth it if we keep this test suite _small_ and targeted.

## Test Plan

Run with TypeScript tests enabled:
```bash
OPENAI_API_KEY=dummy RUN_CLIENT_TS_TESTS=1 \
  scripts/integration-tests.sh --stack-config server:ci-tests --suite responses --setup gpt
```
---
 .../setup-typescript-client/action.yml        |   35 +
 .github/workflows/integration-tests.yml       |   16 +
 .gitignore                                    |    2 +
 scripts/get_setup_env.py                      |   24 +-
 scripts/integration-tests.sh                  |   76 +
 tests/integration/README.md                   |   20 +
 .../__tests__/inference.test.ts               |  104 +
 .../__tests__/responses.test.ts               |  132 +
 .../jest.integration.config.js                |   31 +
 .../client-typescript/package-lock.json       | 5507 +++++++++++++++++
 .../client-typescript/package.json            |   18 +
 .../client-typescript/run-tests.js            |   63 +
 tests/integration/client-typescript/setup.ts  |  162 +
 .../integration/client-typescript/suites.json |   12 +
 .../client-typescript/tsconfig.json           |   16 +
 15 files changed, 6208 insertions(+), 10 deletions(-)
 create mode 100644 .github/actions/setup-typescript-client/action.yml
 create mode 100644 tests/integration/client-typescript/__tests__/inference.test.ts
 create mode 100644 tests/integration/client-typescript/__tests__/responses.test.ts
 create mode 100644 tests/integration/client-typescript/jest.integration.config.js
 create mode 100644 tests/integration/client-typescript/package-lock.json
 create mode 100644 tests/integration/client-typescript/package.json
 create mode 100755 tests/integration/client-typescript/run-tests.js
 create mode 100644 tests/integration/client-typescript/setup.ts
 create mode 100644 tests/integration/client-typescript/suites.json
 create mode 100644 tests/integration/client-typescript/tsconfig.json

diff --git a/.github/actions/setup-typescript-client/action.yml b/.github/actions/setup-typescript-client/action.yml
new file mode 100644
index 000000000..8b78ba70c
--- /dev/null
+++ b/.github/actions/setup-typescript-client/action.yml
@@ -0,0 +1,35 @@
+name: Setup TypeScript client
+description: Conditionally checkout and link llama-stack-client-typescript based on client-version
+inputs:
+  client-version:
+    description: 'Client version (latest or published)'
+    required: true
+
+outputs:
+  ts-client-path:
+    description: 'Path or version to use for TypeScript client'
+    value: ${{ steps.set-path.outputs.ts-client-path }}
+
+runs:
+  using: "composite"
+  steps:
+    - name: Checkout TypeScript client (latest)
+      if: ${{ inputs.client-version == 'latest' }}
+      uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+      with:
+        repository: llamastack/llama-stack-client-typescript
+        ref: main
+        path: .ts-client-checkout
+
+    - name: Set TS_CLIENT_PATH
+      id: set-path
+      shell: bash
+      run: |
+        if [ "${{ inputs.client-version }}" = "latest" ]; then
+          echo "ts-client-path=${{ github.workspace }}/.ts-client-checkout" >> $GITHUB_OUTPUT
+        elif [ "${{ inputs.client-version }}" = "published" ]; then
+          echo "ts-client-path=^0.3.2" >> $GITHUB_OUTPUT
+        else
+          echo "::error::Invalid client-version: ${{ inputs.client-version }}"
+          exit 1
+        fi
diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml
index 71c7933b4..8073f6a15 100644
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@@ -93,11 +93,27 @@ jobs:
           suite: ${{ matrix.config.suite }}
           inference-mode: 'replay'
 
+      - name: Setup Node.js for TypeScript client tests
+        if: ${{ matrix.client == 'server' }}
+        uses: actions/setup-node@39370e3970a6d050c480ffad4ff0ed4d3fdee5af # v4.1.0
+        with:
+          node-version: '20'
+          cache: 'npm'
+          cache-dependency-path: tests/integration/client-typescript/package-lock.json
+
+      - name: Setup TypeScript client
+        if: ${{ matrix.client == 'server' }}
+        id: setup-ts-client
+        uses: ./.github/actions/setup-typescript-client
+        with:
+          client-version: ${{ matrix.client-version }}
+
       - name: Run tests
         if: ${{ matrix.config.allowed_clients == null || contains(matrix.config.allowed_clients, matrix.client) }}
         uses: ./.github/actions/run-and-record-tests
         env:
           OPENAI_API_KEY: dummy
+          TS_CLIENT_PATH: ${{ steps.setup-ts-client.outputs.ts-client-path || '' }}
         with:
           stack-config: >-
             ${{ matrix.config.stack_config
diff --git a/.gitignore b/.gitignore
index f5ca450b2..0d8fd5a2f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -35,3 +35,5 @@ docs/static/imported-files/
 docs/docs/api-deprecated/
 docs/docs/api-experimental/
 docs/docs/api/
+tests/integration/client-typescript/node_modules/
+.ts-client-checkout/
diff --git a/scripts/get_setup_env.py b/scripts/get_setup_env.py
index fad601e76..755cfefea 100755
--- a/scripts/get_setup_env.py
+++ b/scripts/get_setup_env.py
@@ -16,16 +16,16 @@ import sys
 from tests.integration.suites import SETUP_DEFINITIONS, SUITE_DEFINITIONS
 
 
-def get_setup_env_vars(setup_name, suite_name=None):
+def get_setup_config(setup_name, suite_name=None):
     """
-    Get environment variables for a setup, with optional suite default fallback.
+    Get full configuration (env vars + defaults) for a setup.
 
     Args:
         setup_name: Name of the setup (e.g., 'ollama', 'gpt')
         suite_name: Optional suite name to get default setup if setup_name is None
 
     Returns:
-        Dictionary of environment variables
+        Dictionary with 'env' and 'defaults' keys
     """
     # If no setup specified, try to get default from suite
     if not setup_name and suite_name:
@@ -34,7 +34,7 @@ def get_setup_env_vars(setup_name, suite_name=None):
             setup_name = suite.default_setup
 
     if not setup_name:
-        return {}
+        return {"env": {}, "defaults": {}}
 
     setup = SETUP_DEFINITIONS.get(setup_name)
     if not setup:
@@ -44,27 +44,31 @@ def get_setup_env_vars(setup_name, suite_name=None):
         )
         sys.exit(1)
 
-    return setup.env
+    return {"env": setup.env, "defaults": setup.defaults}
 
 
 def main():
-    parser = argparse.ArgumentParser(description="Extract environment variables from a test setup")
+    parser = argparse.ArgumentParser(description="Extract environment variables and defaults from a test setup")
     parser.add_argument("--setup", help="Setup name (e.g., ollama, gpt)")
     parser.add_argument("--suite", help="Suite name to get default setup from if --setup not provided")
     parser.add_argument("--format", choices=["bash", "json"], default="bash", help="Output format (default: bash)")
 
     args = parser.parse_args()
 
-    env_vars = get_setup_env_vars(args.setup, args.suite)
+    config = get_setup_config(args.setup, args.suite)
 
     if args.format == "bash":
-        # Output as bash export statements
-        for key, value in env_vars.items():
+        # Output env vars as bash export statements
+        for key, value in config["env"].items():
             print(f"export {key}='{value}'")
+        # Output defaults as bash export statements with LLAMA_STACK_TEST_ prefix
+        for key, value in config["defaults"].items():
+            env_key = f"LLAMA_STACK_TEST_{key.upper()}"
+            print(f"export {env_key}='{value}'")
     elif args.format == "json":
         import json
 
-        print(json.dumps(env_vars))
+        print(json.dumps(config))
 
 
 if __name__ == "__main__":
diff --git a/scripts/integration-tests.sh b/scripts/integration-tests.sh
index 8b0002125..20ecd0c4d 100755
--- a/scripts/integration-tests.sh
+++ b/scripts/integration-tests.sh
@@ -181,6 +181,10 @@ echo "$SETUP_ENV"
 eval "$SETUP_ENV"
 echo ""
 
+# Export suite and setup names for TypeScript tests
+export LLAMA_STACK_TEST_SUITE="$TEST_SUITE"
+export LLAMA_STACK_TEST_SETUP="$TEST_SETUP"
+
 ROOT_DIR="$THIS_DIR/.."
 cd $ROOT_DIR
 
@@ -212,6 +216,71 @@ find_available_port() {
     return 1
 }
 
+run_client_ts_tests() {
+    if ! command -v npm &>/dev/null; then
+        echo "npm could not be found; ensure Node.js is installed"
+        return 1
+    fi
+
+    pushd tests/integration/client-typescript >/dev/null
+
+    # Determine if TS_CLIENT_PATH is a directory path or an npm version
+    if [[ -d "$TS_CLIENT_PATH" ]]; then
+        # It's a directory path - use local checkout
+        if [[ ! -f "$TS_CLIENT_PATH/package.json" ]]; then
+            echo "Error: $TS_CLIENT_PATH exists but doesn't look like llama-stack-client-typescript (no package.json)"
+            popd >/dev/null
+            return 1
+        fi
+        echo "Using local llama-stack-client-typescript from: $TS_CLIENT_PATH"
+
+        # Build the TypeScript client first
+        echo "Building TypeScript client..."
+        pushd "$TS_CLIENT_PATH" >/dev/null
+        npm install --silent
+        npm run build --silent
+        popd >/dev/null
+
+        # Install other dependencies first
+        if [[ "${CI:-}" == "true" || "${CI:-}" == "1" ]]; then
+            npm ci --silent
+        else
+            npm install --silent
+        fi
+
+        # Then install the client from local directory
+        echo "Installing llama-stack-client from: $TS_CLIENT_PATH"
+        npm install "$TS_CLIENT_PATH" --silent
+    else
+        # It's an npm version specifier - install from npm
+        echo "Installing llama-stack-client@${TS_CLIENT_PATH} from npm"
+        if [[ "${CI:-}" == "true" || "${CI:-}" == "1" ]]; then
+            npm ci --silent
+            npm install "llama-stack-client@${TS_CLIENT_PATH}" --silent
+        else
+            npm install "llama-stack-client@${TS_CLIENT_PATH}" --silent
+        fi
+    fi
+
+    # Verify installation
+    echo "Verifying llama-stack-client installation..."
+    if npm list llama-stack-client 2>/dev/null | grep -q llama-stack-client; then
+        echo "✅ llama-stack-client successfully installed"
+        npm list llama-stack-client
+    else
+        echo "❌ llama-stack-client not found in node_modules"
+        echo "Installed packages:"
+        npm list --depth=0
+        popd >/dev/null
+        return 1
+    fi
+
+    echo "Running TypeScript tests for suite $TEST_SUITE (setup $TEST_SETUP)"
+    npm test
+
+    popd >/dev/null
+}
+
 # Start Llama Stack Server if needed
 if [[ "$STACK_CONFIG" == *"server:"* && "$COLLECT_ONLY" == false ]]; then
     # Find an available port for the server
@@ -221,6 +290,7 @@ if [[ "$STACK_CONFIG" == *"server:"* && "$COLLECT_ONLY" == false ]]; then
         exit 1
     fi
     export LLAMA_STACK_PORT
+    export TEST_API_BASE_URL="http://localhost:$LLAMA_STACK_PORT"
     echo "Will use port: $LLAMA_STACK_PORT"
 
     stop_server() {
@@ -298,6 +368,7 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then
         exit 1
     fi
     export LLAMA_STACK_PORT
+    export TEST_API_BASE_URL="http://localhost:$LLAMA_STACK_PORT"
     echo "Will use port: $LLAMA_STACK_PORT"
 
     echo "=== Building Docker Image for distribution: $DISTRO ==="
@@ -506,5 +577,10 @@ else
     exit 1
 fi
 
+# Run TypeScript client tests if TS_CLIENT_PATH is set
+if [[ $exit_code -eq 0 && -n "${TS_CLIENT_PATH:-}" && "${LLAMA_STACK_TEST_STACK_CONFIG_TYPE:-}" == "server" ]]; then
+    run_client_ts_tests
+fi
+
 echo ""
 echo "=== Integration Tests Complete ==="
diff --git a/tests/integration/README.md b/tests/integration/README.md
index f581073ae..3559b785c 100644
--- a/tests/integration/README.md
+++ b/tests/integration/README.md
@@ -211,3 +211,23 @@ def test_asymmetric_embeddings(llama_stack_client, embedding_model_id):
 
     assert query_response.embeddings is not None
 ```
+
+## TypeScript Client Replays
+
+TypeScript SDK tests can run alongside Python tests when testing against `server:<config>` stacks. Set `TS_CLIENT_PATH` to the path or version of `llama-stack-client-typescript` to enable:
+
+```bash
+# Use published npm package (responses suite)
+TS_CLIENT_PATH=^0.3.2 scripts/integration-tests.sh --stack-config server:ci-tests --suite responses --setup gpt
+
+# Use local checkout from ~/.cache (recommended for development)
+git clone https://github.com/llamastack/llama-stack-client-typescript.git ~/.cache/llama-stack-client-typescript
+TS_CLIENT_PATH=~/.cache/llama-stack-client-typescript scripts/integration-tests.sh --stack-config server:ci-tests --suite responses --setup gpt
+
+# Run base suite with TypeScript tests
+TS_CLIENT_PATH=~/.cache/llama-stack-client-typescript scripts/integration-tests.sh --stack-config server:ci-tests --suite base --setup ollama
+```
+
+TypeScript tests run immediately after Python tests pass, using the same replay fixtures. The mapping between Python suites/setups and TypeScript test files is defined in `tests/integration/client-typescript/suites.json`.
+
+If `TS_CLIENT_PATH` is unset, TypeScript tests are skipped entirely.
diff --git a/tests/integration/client-typescript/__tests__/inference.test.ts b/tests/integration/client-typescript/__tests__/inference.test.ts
new file mode 100644
index 000000000..b0734fed7
--- /dev/null
+++ b/tests/integration/client-typescript/__tests__/inference.test.ts
@@ -0,0 +1,104 @@
+// Copyright (c) Meta Platforms, Inc. and affiliates.
+// All rights reserved.
+//
+// This source code is licensed under the terms described in the LICENSE file in
+// the root directory of this source tree.
+
+/**
+ * Integration tests for Inference API (Chat Completions).
+ * Ported from: llama-stack/tests/integration/inference/test_openai_completion.py
+ *
+ * IMPORTANT: Test cases must match EXACTLY with Python tests to use recorded API responses.
+ */
+
+import { createTestClient, requireTextModel } from '../setup';
+
+describe('Inference API - Chat Completions', () => {
+  // Test cases matching llama-stack/tests/integration/test_cases/inference/chat_completion.json
+  const chatCompletionTestCases = [
+    {
+      id: 'non_streaming_01',
+      question: 'Which planet do humans live on?',
+      expected: 'earth',
+      testId:
+        'tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[client_with_models-txt=ollama/llama3.2:3b-instruct-fp16-inference:chat_completion:non_streaming_01]',
+    },
+    {
+      id: 'non_streaming_02',
+      question: 'Which planet has rings around it with a name starting with letter S?',
+      expected: 'saturn',
+      testId:
+        'tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[client_with_models-txt=ollama/llama3.2:3b-instruct-fp16-inference:chat_completion:non_streaming_02]',
+    },
+  ];
+
+  const streamingTestCases = [
+    {
+      id: 'streaming_01',
+      question: "What's the name of the Sun in latin?",
+      expected: 'sol',
+      testId:
+        'tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[client_with_models-txt=ollama/llama3.2:3b-instruct-fp16-inference:chat_completion:streaming_01]',
+    },
+    {
+      id: 'streaming_02',
+      question: 'What is the name of the US captial?',
+      expected: 'washington',
+      testId:
+        'tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[client_with_models-txt=ollama/llama3.2:3b-instruct-fp16-inference:chat_completion:streaming_02]',
+    },
+  ];
+
+  test.each(chatCompletionTestCases)(
+    'chat completion non-streaming: $id',
+    async ({ question, expected, testId }) => {
+      const client = createTestClient(testId);
+      const textModel = requireTextModel();
+
+      const response = await client.chat.completions.create({
+        model: textModel,
+        messages: [
+          {
+            role: 'user',
+            content: question,
+          },
+        ],
+        stream: false,
+      });
+
+      // Non-streaming responses have choices with message property
+      const choice = response.choices[0];
+      expect(choice).toBeDefined();
+      if (!choice || !('message' in choice)) {
+        throw new Error('Expected non-streaming response with message');
+      }
+      const content = choice.message.content;
+      expect(content).toBeDefined();
+      const messageContent = typeof content === 'string' ? content.toLowerCase().trim() : '';
+      expect(messageContent.length).toBeGreaterThan(0);
+      expect(messageContent).toContain(expected.toLowerCase());
+    },
+  );
+
+  test.each(streamingTestCases)('chat completion streaming: $id', async ({ question, expected, testId }) => {
+    const client = createTestClient(testId);
+    const textModel = requireTextModel();
+
+    const stream = await client.chat.completions.create({
+      model: textModel,
+      messages: [{ role: 'user', content: question }],
+      stream: true,
+    });
+
+    const streamedContent: string[] = [];
+    for await (const chunk of stream) {
+      if (chunk.choices && chunk.choices.length > 0 && chunk.choices[0]?.delta?.content) {
+        streamedContent.push(chunk.choices[0].delta.content);
+      }
+    }
+
+    expect(streamedContent.length).toBeGreaterThan(0);
+    const fullContent = streamedContent.join('').toLowerCase().trim();
+    expect(fullContent).toContain(expected.toLowerCase());
+  });
+});
diff --git a/tests/integration/client-typescript/__tests__/responses.test.ts b/tests/integration/client-typescript/__tests__/responses.test.ts
new file mode 100644
index 000000000..0fc2a3245
--- /dev/null
+++ b/tests/integration/client-typescript/__tests__/responses.test.ts
@@ -0,0 +1,132 @@
+// Copyright (c) Meta Platforms, Inc. and affiliates.
+// All rights reserved.
+//
+// This source code is licensed under the terms described in the LICENSE file in
+// the root directory of this source tree.
+
+/**
+ * Integration tests for Responses API.
+ * Ported from: llama-stack/tests/integration/responses/test_basic_responses.py
+ *
+ * IMPORTANT: Test cases and IDs must match EXACTLY with Python tests to use recorded API responses.
+ */
+
+import { createTestClient, requireTextModel, getResponseOutputText } from '../setup';
+
+describe('Responses API - Basic', () => {
+  // Test cases matching llama-stack/tests/integration/responses/fixtures/test_cases.py
+  const basicTestCases = [
+    {
+      id: 'earth',
+      input: 'Which planet do humans live on?',
+      expected: 'earth',
+      // Use client_with_models fixture to match non-streaming recordings
+      testId:
+        'tests/integration/responses/test_basic_responses.py::test_response_non_streaming_basic[client_with_models-txt=openai/gpt-4o-earth]',
+    },
+    {
+      id: 'saturn',
+      input: 'Which planet has rings around it with a name starting with letter S?',
+      expected: 'saturn',
+      testId:
+        'tests/integration/responses/test_basic_responses.py::test_response_non_streaming_basic[client_with_models-txt=openai/gpt-4o-saturn]',
+    },
+  ];
+
+  test.each(basicTestCases)('non-streaming basic response: $id', async ({ input, expected, testId }) => {
+    // Create client with test_id for all requests
+    const client = createTestClient(testId);
+    const textModel = requireTextModel();
+
+    // Create a response
+    const response = await client.responses.create({
+      model: textModel,
+      input,
+      stream: false,
+    });
+
+    // Verify response has content
+    const outputText = getResponseOutputText(response).toLowerCase().trim();
+    expect(outputText.length).toBeGreaterThan(0);
+    expect(outputText).toContain(expected.toLowerCase());
+
+    // Verify usage is reported
+    expect(response.usage).toBeDefined();
+    expect(response.usage!.input_tokens).toBeGreaterThan(0);
+    expect(response.usage!.output_tokens).toBeGreaterThan(0);
+    expect(response.usage!.total_tokens).toBe(response.usage!.input_tokens + response.usage!.output_tokens);
+
+    // Verify stored response matches
+    const retrievedResponse = await client.responses.retrieve(response.id);
+    expect(getResponseOutputText(retrievedResponse)).toBe(getResponseOutputText(response));
+
+    // Test follow-up with previous_response_id
+    const nextResponse = await client.responses.create({
+      model: textModel,
+      input: 'Repeat your previous response in all caps.',
+      previous_response_id: response.id,
+    });
+    const nextOutputText = getResponseOutputText(nextResponse).trim();
+    expect(nextOutputText).toContain(expected.toUpperCase());
+  });
+
+  test.each(basicTestCases)('streaming basic response: $id', async ({ input, expected, testId }) => {
+    // Modify test_id for streaming variant
+    const streamingTestId = testId.replace(
+      'test_response_non_streaming_basic',
+      'test_response_streaming_basic',
+    );
+    const client = createTestClient(streamingTestId);
+    const textModel = requireTextModel();
+
+    // Create a streaming response
+    const stream = await client.responses.create({
+      model: textModel,
+      input,
+      stream: true,
+    });
+
+    const events: any[] = [];
+    let responseId = '';
+
+    for await (const chunk of stream) {
+      events.push(chunk);
+
+      if (chunk.type === 'response.created') {
+        // Verify response.created is the first event
+        expect(events.length).toBe(1);
+        expect(chunk.response.status).toBe('in_progress');
+        responseId = chunk.response.id;
+      } else if (chunk.type === 'response.completed') {
+        // Verify response.completed comes after response.created
+        expect(events.length).toBeGreaterThanOrEqual(2);
+        expect(chunk.response.status).toBe('completed');
+        expect(chunk.response.id).toBe(responseId);
+
+        // Verify content quality
+        const outputText = getResponseOutputText(chunk.response).toLowerCase().trim();
+        expect(outputText.length).toBeGreaterThan(0);
+        expect(outputText).toContain(expected.toLowerCase());
+
+        // Verify usage is reported
+        expect(chunk.response.usage).toBeDefined();
+        expect(chunk.response.usage!.input_tokens).toBeGreaterThan(0);
+        expect(chunk.response.usage!.output_tokens).toBeGreaterThan(0);
+        expect(chunk.response.usage!.total_tokens).toBe(
+          chunk.response.usage!.input_tokens + chunk.response.usage!.output_tokens,
+        );
+      }
+    }
+
+    // Verify we got both events
+    expect(events.length).toBeGreaterThanOrEqual(2);
+    const firstEvent = events[0];
+    const lastEvent = events[events.length - 1];
+    expect(firstEvent.type).toBe('response.created');
+    expect(lastEvent.type).toBe('response.completed');
+
+    // Verify stored response matches streamed response
+    const retrievedResponse = await client.responses.retrieve(responseId);
+    expect(getResponseOutputText(retrievedResponse)).toBe(getResponseOutputText(lastEvent.response));
+  });
+});
diff --git a/tests/integration/client-typescript/jest.integration.config.js b/tests/integration/client-typescript/jest.integration.config.js
new file mode 100644
index 000000000..769bd177a
--- /dev/null
+++ b/tests/integration/client-typescript/jest.integration.config.js
@@ -0,0 +1,31 @@
+// Copyright (c) Meta Platforms, Inc. and affiliates.
+// All rights reserved.
+//
+// This source code is licensed under the terms described in the LICENSE file in
+// the root directory of this source tree.
+
+/** @type {import('ts-jest').JestConfigWithTsJest} */
+module.exports = {
+  preset: 'ts-jest/presets/default-esm',
+  testEnvironment: 'node',
+  extensionsToTreatAsEsm: ['.ts'],
+  moduleNameMapper: {
+    '^(\\.{1,2}/.*)\\.js$': '$1',
+  },
+  transform: {
+    '^.+\\.tsx?$': [
+      'ts-jest',
+      {
+        useESM: true,
+        tsconfig: {
+          module: 'ES2022',
+          moduleResolution: 'bundler',
+        },
+      },
+    ],
+  },
+  testMatch: ['<rootDir>/__tests__/**/*.test.ts'],
+  setupFilesAfterEnv: ['<rootDir>/setup.ts'],
+  testTimeout: 60000, // 60 seconds (integration tests can be slow)
+  watchman: false, // Disable watchman to avoid permission issues
+};
diff --git a/tests/integration/client-typescript/package-lock.json b/tests/integration/client-typescript/package-lock.json
new file mode 100644
index 000000000..f118a07e3
--- /dev/null
+++ b/tests/integration/client-typescript/package-lock.json
@@ -0,0 +1,5507 @@
+{
+  "name": "llama-stack-typescript-integration-tests",
+  "version": "0.0.1",
+  "lockfileVersion": 3,
+  "requires": true,
+  "packages": {
+    "": {
+      "name": "llama-stack-typescript-integration-tests",
+      "version": "0.0.1",
+      "dependencies": {
+        "llama-stack-client": "^0.3.2"
+      },
+      "devDependencies": {
+        "@swc/core": "^1.3.102",
+        "@swc/jest": "^0.2.29",
+        "@types/jest": "^29.4.0",
+        "@types/node": "^20.0.0",
+        "jest": "^29.4.0",
+        "ts-jest": "^29.1.0",
+        "typescript": "^5.0.0"
+      }
+    },
+    "node_modules/@babel/code-frame": {
+      "version": "7.27.1",
+      "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.27.1.tgz",
+      "integrity": "sha512-cjQ7ZlQ0Mv3b47hABuTevyTuYN4i+loJKGeV9flcCgIK37cCXRh+L1bd3iBHlynerhQ7BhCkn2BPbQUL+rGqFg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/helper-validator-identifier": "^7.27.1",
+        "js-tokens": "^4.0.0",
+        "picocolors": "^1.1.1"
+      },
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
+    "node_modules/@babel/compat-data": {
+      "version": "7.28.5",
+      "resolved": "https://registry.npmjs.org/@babel/compat-data/-/compat-data-7.28.5.tgz",
+      "integrity": "sha512-6uFXyCayocRbqhZOB+6XcuZbkMNimwfVGFji8CTZnCzOHVGvDqzvitu1re2AU5LROliz7eQPhB8CpAMvnx9EjA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
+    "node_modules/@babel/core": {
+      "version": "7.28.5",
+      "resolved": "https://registry.npmjs.org/@babel/core/-/core-7.28.5.tgz",
+      "integrity": "sha512-e7jT4DxYvIDLk1ZHmU/m/mB19rex9sv0c2ftBtjSBv+kVM/902eh0fINUzD7UwLLNR+jU585GxUJ8/EBfAM5fw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/code-frame": "^7.27.1",
+        "@babel/generator": "^7.28.5",
+        "@babel/helper-compilation-targets": "^7.27.2",
+        "@babel/helper-module-transforms": "^7.28.3",
+        "@babel/helpers": "^7.28.4",
+        "@babel/parser": "^7.28.5",
+        "@babel/template": "^7.27.2",
+        "@babel/traverse": "^7.28.5",
+        "@babel/types": "^7.28.5",
+        "@jridgewell/remapping": "^2.3.5",
+        "convert-source-map": "^2.0.0",
+        "debug": "^4.1.0",
+        "gensync": "^1.0.0-beta.2",
+        "json5": "^2.2.3",
+        "semver": "^6.3.1"
+      },
+      "engines": {
+        "node": ">=6.9.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/babel"
+      }
+    },
+    "node_modules/@babel/generator": {
+      "version": "7.28.5",
+      "resolved": "https://registry.npmjs.org/@babel/generator/-/generator-7.28.5.tgz",
+      "integrity": "sha512-3EwLFhZ38J4VyIP6WNtt2kUdW9dokXA9Cr4IVIFHuCpZ3H8/YFOl5JjZHisrn1fATPBmKKqXzDFvh9fUwHz6CQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/parser": "^7.28.5",
+        "@babel/types": "^7.28.5",
+        "@jridgewell/gen-mapping": "^0.3.12",
+        "@jridgewell/trace-mapping": "^0.3.28",
+        "jsesc": "^3.0.2"
+      },
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
+    "node_modules/@babel/helper-compilation-targets": {
+      "version": "7.27.2",
+      "resolved": "https://registry.npmjs.org/@babel/helper-compilation-targets/-/helper-compilation-targets-7.27.2.tgz",
+      "integrity": "sha512-2+1thGUUWWjLTYTHZWK1n8Yga0ijBz1XAhUXcKy81rd5g6yh7hGqMp45v7cadSbEHc9G3OTv45SyneRN3ps4DQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/compat-data": "^7.27.2",
+        "@babel/helper-validator-option": "^7.27.1",
+        "browserslist": "^4.24.0",
+        "lru-cache": "^5.1.1",
+        "semver": "^6.3.1"
+      },
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
+    "node_modules/@babel/helper-globals": {
+      "version": "7.28.0",
+      "resolved": "https://registry.npmjs.org/@babel/helper-globals/-/helper-globals-7.28.0.tgz",
+      "integrity": "sha512-+W6cISkXFa1jXsDEdYA8HeevQT/FULhxzR99pxphltZcVaugps53THCeiWA8SguxxpSp3gKPiuYfSWopkLQ4hw==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
+    "node_modules/@babel/helper-module-imports": {
+      "version": "7.27.1",
+      "resolved": "https://registry.npmjs.org/@babel/helper-module-imports/-/helper-module-imports-7.27.1.tgz",
+      "integrity": "sha512-0gSFWUPNXNopqtIPQvlD5WgXYI5GY2kP2cCvoT8kczjbfcfuIljTbcWrulD1CIPIX2gt1wghbDy08yE1p+/r3w==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/traverse": "^7.27.1",
+        "@babel/types": "^7.27.1"
+      },
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
+    "node_modules/@babel/helper-module-transforms": {
+      "version": "7.28.3",
+      "resolved": "https://registry.npmjs.org/@babel/helper-module-transforms/-/helper-module-transforms-7.28.3.tgz",
+      "integrity": "sha512-gytXUbs8k2sXS9PnQptz5o0QnpLL51SwASIORY6XaBKF88nsOT0Zw9szLqlSGQDP/4TljBAD5y98p2U1fqkdsw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/helper-module-imports": "^7.27.1",
+        "@babel/helper-validator-identifier": "^7.27.1",
+        "@babel/traverse": "^7.28.3"
+      },
+      "engines": {
+        "node": ">=6.9.0"
+      },
+      "peerDependencies": {
+        "@babel/core": "^7.0.0"
+      }
+    },
+    "node_modules/@babel/helper-plugin-utils": {
+      "version": "7.27.1",
+      "resolved": "https://registry.npmjs.org/@babel/helper-plugin-utils/-/helper-plugin-utils-7.27.1.tgz",
+      "integrity": "sha512-1gn1Up5YXka3YYAHGKpbideQ5Yjf1tDa9qYcgysz+cNCXukyLl6DjPXhD3VRwSb8c0J9tA4b2+rHEZtc6R0tlw==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
+    "node_modules/@babel/helper-string-parser": {
+      "version": "7.27.1",
+      "resolved": "https://registry.npmjs.org/@babel/helper-string-parser/-/helper-string-parser-7.27.1.tgz",
+      "integrity": "sha512-qMlSxKbpRlAridDExk92nSobyDdpPijUq2DW6oDnUqd0iOGxmQjyqhMIihI9+zv4LPyZdRje2cavWPbCbWm3eA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
+    "node_modules/@babel/helper-validator-identifier": {
+      "version": "7.28.5",
+      "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.28.5.tgz",
+      "integrity": "sha512-qSs4ifwzKJSV39ucNjsvc6WVHs6b7S03sOh2OcHF9UHfVPqWWALUsNUVzhSBiItjRZoLHx7nIarVjqKVusUZ1Q==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
+    "node_modules/@babel/helper-validator-option": {
+      "version": "7.27.1",
+      "resolved": "https://registry.npmjs.org/@babel/helper-validator-option/-/helper-validator-option-7.27.1.tgz",
+      "integrity": "sha512-YvjJow9FxbhFFKDSuFnVCe2WxXk1zWc22fFePVNEaWJEu8IrZVlda6N0uHwzZrUM1il7NC9Mlp4MaJYbYd9JSg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
+    "node_modules/@babel/helpers": {
+      "version": "7.28.4",
+      "resolved": "https://registry.npmjs.org/@babel/helpers/-/helpers-7.28.4.tgz",
+      "integrity": "sha512-HFN59MmQXGHVyYadKLVumYsA9dBFun/ldYxipEjzA4196jpLZd8UjEEBLkbEkvfYreDqJhZxYAWFPtrfhNpj4w==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/template": "^7.27.2",
+        "@babel/types": "^7.28.4"
+      },
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
+    "node_modules/@babel/parser": {
+      "version": "7.28.5",
+      "resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.28.5.tgz",
+      "integrity": "sha512-KKBU1VGYR7ORr3At5HAtUQ+TV3SzRCXmA/8OdDZiLDBIZxVyzXuztPjfLd3BV1PRAQGCMWWSHYhL0F8d5uHBDQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/types": "^7.28.5"
+      },
+      "bin": {
+        "parser": "bin/babel-parser.js"
+      },
+      "engines": {
+        "node": ">=6.0.0"
+      }
+    },
+    "node_modules/@babel/plugin-syntax-async-generators": {
+      "version": "7.8.4",
+      "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-async-generators/-/plugin-syntax-async-generators-7.8.4.tgz",
+      "integrity": "sha512-tycmZxkGfZaxhMRbXlPXuVFpdWlXpir2W4AMhSJgRKzk/eDlIXOhb2LHWoLpDF7TEHylV5zNhykX6KAgHJmTNw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/helper-plugin-utils": "^7.8.0"
+      },
+      "peerDependencies": {
+        "@babel/core": "^7.0.0-0"
+      }
+    },
+    "node_modules/@babel/plugin-syntax-bigint": {
+      "version": "7.8.3",
+      "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-bigint/-/plugin-syntax-bigint-7.8.3.tgz",
+      "integrity": "sha512-wnTnFlG+YxQm3vDxpGE57Pj0srRU4sHE/mDkt1qv2YJJSeUAec2ma4WLUnUPeKjyrfntVwe/N6dCXpU+zL3Npg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/helper-plugin-utils": "^7.8.0"
+      },
+      "peerDependencies": {
+        "@babel/core": "^7.0.0-0"
+      }
+    },
+    "node_modules/@babel/plugin-syntax-class-properties": {
+      "version": "7.12.13",
+      "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-class-properties/-/plugin-syntax-class-properties-7.12.13.tgz",
+      "integrity": "sha512-fm4idjKla0YahUNgFNLCB0qySdsoPiZP3iQE3rky0mBUtMZ23yDJ9SJdg6dXTSDnulOVqiF3Hgr9nbXvXTQZYA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/helper-plugin-utils": "^7.12.13"
+      },
+      "peerDependencies": {
+        "@babel/core": "^7.0.0-0"
+      }
+    },
+    "node_modules/@babel/plugin-syntax-class-static-block": {
+      "version": "7.14.5",
+      "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-class-static-block/-/plugin-syntax-class-static-block-7.14.5.tgz",
+      "integrity": "sha512-b+YyPmr6ldyNnM6sqYeMWE+bgJcJpO6yS4QD7ymxgH34GBPNDM/THBh8iunyvKIZztiwLH4CJZ0RxTk9emgpjw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/helper-plugin-utils": "^7.14.5"
+      },
+      "engines": {
+        "node": ">=6.9.0"
+      },
+      "peerDependencies": {
+        "@babel/core": "^7.0.0-0"
+      }
+    },
+    "node_modules/@babel/plugin-syntax-import-attributes": {
+      "version": "7.27.1",
+      "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-import-attributes/-/plugin-syntax-import-attributes-7.27.1.tgz",
+      "integrity": "sha512-oFT0FrKHgF53f4vOsZGi2Hh3I35PfSmVs4IBFLFj4dnafP+hIWDLg3VyKmUHfLoLHlyxY4C7DGtmHuJgn+IGww==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/helper-plugin-utils": "^7.27.1"
+      },
+      "engines": {
+        "node": ">=6.9.0"
+      },
+      "peerDependencies": {
+        "@babel/core": "^7.0.0-0"
+      }
+    },
+    "node_modules/@babel/plugin-syntax-import-meta": {
+      "version": "7.10.4",
+      "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-import-meta/-/plugin-syntax-import-meta-7.10.4.tgz",
+      "integrity": "sha512-Yqfm+XDx0+Prh3VSeEQCPU81yC+JWZ2pDPFSS4ZdpfZhp4MkFMaDC1UqseovEKwSUpnIL7+vK+Clp7bfh0iD7g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/helper-plugin-utils": "^7.10.4"
+      },
+      "peerDependencies": {
+        "@babel/core": "^7.0.0-0"
+      }
+    },
+    "node_modules/@babel/plugin-syntax-json-strings": {
+      "version": "7.8.3",
+      "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-json-strings/-/plugin-syntax-json-strings-7.8.3.tgz",
+      "integrity": "sha512-lY6kdGpWHvjoe2vk4WrAapEuBR69EMxZl+RoGRhrFGNYVK8mOPAW8VfbT/ZgrFbXlDNiiaxQnAtgVCZ6jv30EA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/helper-plugin-utils": "^7.8.0"
+      },
+      "peerDependencies": {
+        "@babel/core": "^7.0.0-0"
+      }
+    },
+    "node_modules/@babel/plugin-syntax-jsx": {
+      "version": "7.27.1",
+      "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-jsx/-/plugin-syntax-jsx-7.27.1.tgz",
+      "integrity": "sha512-y8YTNIeKoyhGd9O0Jiyzyyqk8gdjnumGTQPsz0xOZOQ2RmkVJeZ1vmmfIvFEKqucBG6axJGBZDE/7iI5suUI/w==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/helper-plugin-utils": "^7.27.1"
+      },
+      "engines": {
+        "node": ">=6.9.0"
+      },
+      "peerDependencies": {
+        "@babel/core": "^7.0.0-0"
+      }
+    },
+    "node_modules/@babel/plugin-syntax-logical-assignment-operators": {
+      "version": "7.10.4",
+      "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-logical-assignment-operators/-/plugin-syntax-logical-assignment-operators-7.10.4.tgz",
+      "integrity": "sha512-d8waShlpFDinQ5MtvGU9xDAOzKH47+FFoney2baFIoMr952hKOLp1HR7VszoZvOsV/4+RRszNY7D17ba0te0ig==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/helper-plugin-utils": "^7.10.4"
+      },
+      "peerDependencies": {
+        "@babel/core": "^7.0.0-0"
+      }
+    },
+    "node_modules/@babel/plugin-syntax-nullish-coalescing-operator": {
+      "version": "7.8.3",
+      "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-nullish-coalescing-operator/-/plugin-syntax-nullish-coalescing-operator-7.8.3.tgz",
+      "integrity": "sha512-aSff4zPII1u2QD7y+F8oDsz19ew4IGEJg9SVW+bqwpwtfFleiQDMdzA/R+UlWDzfnHFCxxleFT0PMIrR36XLNQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/helper-plugin-utils": "^7.8.0"
+      },
+      "peerDependencies": {
+        "@babel/core": "^7.0.0-0"
+      }
+    },
+    "node_modules/@babel/plugin-syntax-numeric-separator": {
+      "version": "7.10.4",
+      "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-numeric-separator/-/plugin-syntax-numeric-separator-7.10.4.tgz",
+      "integrity": "sha512-9H6YdfkcK/uOnY/K7/aA2xpzaAgkQn37yzWUMRK7OaPOqOpGS1+n0H5hxT9AUw9EsSjPW8SVyMJwYRtWs3X3ug==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/helper-plugin-utils": "^7.10.4"
+      },
+      "peerDependencies": {
+        "@babel/core": "^7.0.0-0"
+      }
+    },
+    "node_modules/@babel/plugin-syntax-object-rest-spread": {
+      "version": "7.8.3",
+      "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-object-rest-spread/-/plugin-syntax-object-rest-spread-7.8.3.tgz",
+      "integrity": "sha512-XoqMijGZb9y3y2XskN+P1wUGiVwWZ5JmoDRwx5+3GmEplNyVM2s2Dg8ILFQm8rWM48orGy5YpI5Bl8U1y7ydlA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/helper-plugin-utils": "^7.8.0"
+      },
+      "peerDependencies": {
+        "@babel/core": "^7.0.0-0"
+      }
+    },
+    "node_modules/@babel/plugin-syntax-optional-catch-binding": {
+      "version": "7.8.3",
+      "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-optional-catch-binding/-/plugin-syntax-optional-catch-binding-7.8.3.tgz",
+      "integrity": "sha512-6VPD0Pc1lpTqw0aKoeRTMiB+kWhAoT24PA+ksWSBrFtl5SIRVpZlwN3NNPQjehA2E/91FV3RjLWoVTglWcSV3Q==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/helper-plugin-utils": "^7.8.0"
+      },
+      "peerDependencies": {
+        "@babel/core": "^7.0.0-0"
+      }
+    },
+    "node_modules/@babel/plugin-syntax-optional-chaining": {
+      "version": "7.8.3",
+      "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-optional-chaining/-/plugin-syntax-optional-chaining-7.8.3.tgz",
+      "integrity": "sha512-KoK9ErH1MBlCPxV0VANkXW2/dw4vlbGDrFgz8bmUsBGYkFRcbRwMh6cIJubdPrkxRwuGdtCk0v/wPTKbQgBjkg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/helper-plugin-utils": "^7.8.0"
+      },
+      "peerDependencies": {
+        "@babel/core": "^7.0.0-0"
+      }
+    },
+    "node_modules/@babel/plugin-syntax-private-property-in-object": {
+      "version": "7.14.5",
+      "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-private-property-in-object/-/plugin-syntax-private-property-in-object-7.14.5.tgz",
+      "integrity": "sha512-0wVnp9dxJ72ZUJDV27ZfbSj6iHLoytYZmh3rFcxNnvsJF3ktkzLDZPy/mA17HGsaQT3/DQsWYX1f1QGWkCoVUg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/helper-plugin-utils": "^7.14.5"
+      },
+      "engines": {
+        "node": ">=6.9.0"
+      },
+      "peerDependencies": {
+        "@babel/core": "^7.0.0-0"
+      }
+    },
+    "node_modules/@babel/plugin-syntax-top-level-await": {
+      "version": "7.14.5",
+      "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-top-level-await/-/plugin-syntax-top-level-await-7.14.5.tgz",
+      "integrity": "sha512-hx++upLv5U1rgYfwe1xBQUhRmU41NEvpUvrp8jkrSCdvGSnM5/qdRMtylJ6PG5OFkBaHkbTAKTnd3/YyESRHFw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/helper-plugin-utils": "^7.14.5"
+      },
+      "engines": {
+        "node": ">=6.9.0"
+      },
+      "peerDependencies": {
+        "@babel/core": "^7.0.0-0"
+      }
+    },
+    "node_modules/@babel/plugin-syntax-typescript": {
+      "version": "7.27.1",
+      "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-typescript/-/plugin-syntax-typescript-7.27.1.tgz",
+      "integrity": "sha512-xfYCBMxveHrRMnAWl1ZlPXOZjzkN82THFvLhQhFXFt81Z5HnN+EtUkZhv/zcKpmT3fzmWZB0ywiBrbC3vogbwQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/helper-plugin-utils": "^7.27.1"
+      },
+      "engines": {
+        "node": ">=6.9.0"
+      },
+      "peerDependencies": {
+        "@babel/core": "^7.0.0-0"
+      }
+    },
+    "node_modules/@babel/template": {
+      "version": "7.27.2",
+      "resolved": "https://registry.npmjs.org/@babel/template/-/template-7.27.2.tgz",
+      "integrity": "sha512-LPDZ85aEJyYSd18/DkjNh4/y1ntkE5KwUHWTiqgRxruuZL2F1yuHligVHLvcHY2vMHXttKFpJn6LwfI7cw7ODw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/code-frame": "^7.27.1",
+        "@babel/parser": "^7.27.2",
+        "@babel/types": "^7.27.1"
+      },
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
+    "node_modules/@babel/traverse": {
+      "version": "7.28.5",
+      "resolved": "https://registry.npmjs.org/@babel/traverse/-/traverse-7.28.5.tgz",
+      "integrity": "sha512-TCCj4t55U90khlYkVV/0TfkJkAkUg3jZFA3Neb7unZT8CPok7iiRfaX0F+WnqWqt7OxhOn0uBKXCw4lbL8W0aQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/code-frame": "^7.27.1",
+        "@babel/generator": "^7.28.5",
+        "@babel/helper-globals": "^7.28.0",
+        "@babel/parser": "^7.28.5",
+        "@babel/template": "^7.27.2",
+        "@babel/types": "^7.28.5",
+        "debug": "^4.3.1"
+      },
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
+    "node_modules/@babel/types": {
+      "version": "7.28.5",
+      "resolved": "https://registry.npmjs.org/@babel/types/-/types-7.28.5.tgz",
+      "integrity": "sha512-qQ5m48eI/MFLQ5PxQj4PFaprjyCTLI37ElWMmNs0K8Lk3dVeOdNpB3ks8jc7yM5CDmVC73eMVk/trk3fgmrUpA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/helper-string-parser": "^7.27.1",
+        "@babel/helper-validator-identifier": "^7.28.5"
+      },
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
+    "node_modules/@bcoe/v8-coverage": {
+      "version": "0.2.3",
+      "resolved": "https://registry.npmjs.org/@bcoe/v8-coverage/-/v8-coverage-0.2.3.tgz",
+      "integrity": "sha512-0hYQ8SB4Db5zvZB4axdMHGwEaQjkZzFjQiN9LVYvIFB2nSUHW9tYpxWriPrWDASIxiaXax83REcLxuSdnGPZtw==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/@istanbuljs/load-nyc-config": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/@istanbuljs/load-nyc-config/-/load-nyc-config-1.1.0.tgz",
+      "integrity": "sha512-VjeHSlIzpv/NyD3N0YuHfXOPDIixcA1q2ZV98wsMqcYlPmv2n3Yb2lYP9XMElnaFVXg5A7YLTeLu6V84uQDjmQ==",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "camelcase": "^5.3.1",
+        "find-up": "^4.1.0",
+        "get-package-type": "^0.1.0",
+        "js-yaml": "^3.13.1",
+        "resolve-from": "^5.0.0"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/@istanbuljs/schema": {
+      "version": "0.1.3",
+      "resolved": "https://registry.npmjs.org/@istanbuljs/schema/-/schema-0.1.3.tgz",
+      "integrity": "sha512-ZXRY4jNvVgSVQ8DL3LTcakaAtXwTVUxE81hslsyD2AtoXW/wVob10HkOJ1X/pAlcI7D+2YoZKg5do8G/w6RYgA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/@jest/console": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/@jest/console/-/console-29.7.0.tgz",
+      "integrity": "sha512-5Ni4CU7XHQi32IJ398EEP4RrB8eV09sXP2ROqD4bksHrnTree52PsxvX8tpL8LvTZ3pFzXyPbNQReSN41CAhOg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/types": "^29.6.3",
+        "@types/node": "*",
+        "chalk": "^4.0.0",
+        "jest-message-util": "^29.7.0",
+        "jest-util": "^29.7.0",
+        "slash": "^3.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/@jest/console/node_modules/@jest/schemas": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/@jest/schemas/-/schemas-29.6.3.tgz",
+      "integrity": "sha512-mo5j5X+jIZmJQveBKeS/clAueipV7KgiX1vMgCxam1RNYiqE1w62n0/tJJnHtjW8ZHcQco5gY85jA3mi0L+nSA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@sinclair/typebox": "^0.27.8"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/@jest/console/node_modules/@jest/types": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/@jest/types/-/types-29.6.3.tgz",
+      "integrity": "sha512-u3UPsIilWKOM3F9CXtrG8LEJmNxwoCQC/XVj4IKYXvvpx7QIi/Kg1LI5uDmDpKlac62NUtX7eLjRh+jVZcLOzw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/schemas": "^29.6.3",
+        "@types/istanbul-lib-coverage": "^2.0.0",
+        "@types/istanbul-reports": "^3.0.0",
+        "@types/node": "*",
+        "@types/yargs": "^17.0.8",
+        "chalk": "^4.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/@jest/console/node_modules/@sinclair/typebox": {
+      "version": "0.27.8",
+      "resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.27.8.tgz",
+      "integrity": "sha512-+Fj43pSMwJs4KRrH/938Uf+uAELIgVBmQzg/q1YG10djyfA3TnrU8N8XzqCh/okZdszqBQTZf96idMfE5lnwTA==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/@jest/core": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/@jest/core/-/core-29.7.0.tgz",
+      "integrity": "sha512-n7aeXWKMnGtDA48y8TLWJPJmLmmZ642Ceo78cYWEpiD7FzDgmNDV/GCVRorPABdXLJZ/9wzzgZAlHjXjxDHGsg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/console": "^29.7.0",
+        "@jest/reporters": "^29.7.0",
+        "@jest/test-result": "^29.7.0",
+        "@jest/transform": "^29.7.0",
+        "@jest/types": "^29.6.3",
+        "@types/node": "*",
+        "ansi-escapes": "^4.2.1",
+        "chalk": "^4.0.0",
+        "ci-info": "^3.2.0",
+        "exit": "^0.1.2",
+        "graceful-fs": "^4.2.9",
+        "jest-changed-files": "^29.7.0",
+        "jest-config": "^29.7.0",
+        "jest-haste-map": "^29.7.0",
+        "jest-message-util": "^29.7.0",
+        "jest-regex-util": "^29.6.3",
+        "jest-resolve": "^29.7.0",
+        "jest-resolve-dependencies": "^29.7.0",
+        "jest-runner": "^29.7.0",
+        "jest-runtime": "^29.7.0",
+        "jest-snapshot": "^29.7.0",
+        "jest-util": "^29.7.0",
+        "jest-validate": "^29.7.0",
+        "jest-watcher": "^29.7.0",
+        "micromatch": "^4.0.4",
+        "pretty-format": "^29.7.0",
+        "slash": "^3.0.0",
+        "strip-ansi": "^6.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      },
+      "peerDependencies": {
+        "node-notifier": "^8.0.1 || ^9.0.0 || ^10.0.0"
+      },
+      "peerDependenciesMeta": {
+        "node-notifier": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@jest/core/node_modules/@jest/schemas": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/@jest/schemas/-/schemas-29.6.3.tgz",
+      "integrity": "sha512-mo5j5X+jIZmJQveBKeS/clAueipV7KgiX1vMgCxam1RNYiqE1w62n0/tJJnHtjW8ZHcQco5gY85jA3mi0L+nSA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@sinclair/typebox": "^0.27.8"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/@jest/core/node_modules/@jest/types": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/@jest/types/-/types-29.6.3.tgz",
+      "integrity": "sha512-u3UPsIilWKOM3F9CXtrG8LEJmNxwoCQC/XVj4IKYXvvpx7QIi/Kg1LI5uDmDpKlac62NUtX7eLjRh+jVZcLOzw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/schemas": "^29.6.3",
+        "@types/istanbul-lib-coverage": "^2.0.0",
+        "@types/istanbul-reports": "^3.0.0",
+        "@types/node": "*",
+        "@types/yargs": "^17.0.8",
+        "chalk": "^4.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/@jest/core/node_modules/@sinclair/typebox": {
+      "version": "0.27.8",
+      "resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.27.8.tgz",
+      "integrity": "sha512-+Fj43pSMwJs4KRrH/938Uf+uAELIgVBmQzg/q1YG10djyfA3TnrU8N8XzqCh/okZdszqBQTZf96idMfE5lnwTA==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/@jest/core/node_modules/jest-regex-util": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/jest-regex-util/-/jest-regex-util-29.6.3.tgz",
+      "integrity": "sha512-KJJBsRCyyLNWCNBOvZyRDnAIfUiRJ8v+hOBQYGn8gDyF3UegwiP4gwRR3/SDa42g1YbVycTidUF3rKjyLFDWbg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/@jest/create-cache-key-function": {
+      "version": "30.2.0",
+      "resolved": "https://registry.npmjs.org/@jest/create-cache-key-function/-/create-cache-key-function-30.2.0.tgz",
+      "integrity": "sha512-44F4l4Enf+MirJN8X/NhdGkl71k5rBYiwdVlo4HxOwbu0sHV8QKrGEedb1VUU4K3W7fBKE0HGfbn7eZm0Ti3zg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/types": "30.2.0"
+      },
+      "engines": {
+        "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0"
+      }
+    },
+    "node_modules/@jest/environment": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/@jest/environment/-/environment-29.7.0.tgz",
+      "integrity": "sha512-aQIfHDq33ExsN4jP1NWGXhxgQ/wixs60gDiKO+XVMd8Mn0NWPWgc34ZQDTb2jKaUWQ7MuwoitXAsN2XVXNMpAw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/fake-timers": "^29.7.0",
+        "@jest/types": "^29.6.3",
+        "@types/node": "*",
+        "jest-mock": "^29.7.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/@jest/environment/node_modules/@jest/schemas": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/@jest/schemas/-/schemas-29.6.3.tgz",
+      "integrity": "sha512-mo5j5X+jIZmJQveBKeS/clAueipV7KgiX1vMgCxam1RNYiqE1w62n0/tJJnHtjW8ZHcQco5gY85jA3mi0L+nSA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@sinclair/typebox": "^0.27.8"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/@jest/environment/node_modules/@jest/types": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/@jest/types/-/types-29.6.3.tgz",
+      "integrity": "sha512-u3UPsIilWKOM3F9CXtrG8LEJmNxwoCQC/XVj4IKYXvvpx7QIi/Kg1LI5uDmDpKlac62NUtX7eLjRh+jVZcLOzw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/schemas": "^29.6.3",
+        "@types/istanbul-lib-coverage": "^2.0.0",
+        "@types/istanbul-reports": "^3.0.0",
+        "@types/node": "*",
+        "@types/yargs": "^17.0.8",
+        "chalk": "^4.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/@jest/environment/node_modules/@sinclair/typebox": {
+      "version": "0.27.8",
+      "resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.27.8.tgz",
+      "integrity": "sha512-+Fj43pSMwJs4KRrH/938Uf+uAELIgVBmQzg/q1YG10djyfA3TnrU8N8XzqCh/okZdszqBQTZf96idMfE5lnwTA==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/@jest/expect": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/@jest/expect/-/expect-29.7.0.tgz",
+      "integrity": "sha512-8uMeAMycttpva3P1lBHB8VciS9V0XAr3GymPpipdyQXbBcuhkLQOSe8E/p92RyAdToS6ZD1tFkX+CkhoECE0dQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "expect": "^29.7.0",
+        "jest-snapshot": "^29.7.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/@jest/expect-utils": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/@jest/expect-utils/-/expect-utils-29.7.0.tgz",
+      "integrity": "sha512-GlsNBWiFQFCVi9QVSx7f5AgMeLxe9YCCs5PuP2O2LdjDAA8Jh9eX7lA1Jq/xdXw3Wb3hyvlFNfZIfcRetSzYcA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "jest-get-type": "^29.6.3"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/@jest/fake-timers": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/@jest/fake-timers/-/fake-timers-29.7.0.tgz",
+      "integrity": "sha512-q4DH1Ha4TTFPdxLsqDXK1d3+ioSL7yL5oCMJZgDYm6i+6CygW5E5xVr/D1HdsGxjt1ZWSfUAs9OxSB/BNelWrQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/types": "^29.6.3",
+        "@sinonjs/fake-timers": "^10.0.2",
+        "@types/node": "*",
+        "jest-message-util": "^29.7.0",
+        "jest-mock": "^29.7.0",
+        "jest-util": "^29.7.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/@jest/fake-timers/node_modules/@jest/schemas": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/@jest/schemas/-/schemas-29.6.3.tgz",
+      "integrity": "sha512-mo5j5X+jIZmJQveBKeS/clAueipV7KgiX1vMgCxam1RNYiqE1w62n0/tJJnHtjW8ZHcQco5gY85jA3mi0L+nSA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@sinclair/typebox": "^0.27.8"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/@jest/fake-timers/node_modules/@jest/types": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/@jest/types/-/types-29.6.3.tgz",
+      "integrity": "sha512-u3UPsIilWKOM3F9CXtrG8LEJmNxwoCQC/XVj4IKYXvvpx7QIi/Kg1LI5uDmDpKlac62NUtX7eLjRh+jVZcLOzw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/schemas": "^29.6.3",
+        "@types/istanbul-lib-coverage": "^2.0.0",
+        "@types/istanbul-reports": "^3.0.0",
+        "@types/node": "*",
+        "@types/yargs": "^17.0.8",
+        "chalk": "^4.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/@jest/fake-timers/node_modules/@sinclair/typebox": {
+      "version": "0.27.8",
+      "resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.27.8.tgz",
+      "integrity": "sha512-+Fj43pSMwJs4KRrH/938Uf+uAELIgVBmQzg/q1YG10djyfA3TnrU8N8XzqCh/okZdszqBQTZf96idMfE5lnwTA==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/@jest/globals": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/@jest/globals/-/globals-29.7.0.tgz",
+      "integrity": "sha512-mpiz3dutLbkW2MNFubUGUEVLkTGiqW6yLVTA+JbP6fI6J5iL9Y0Nlg8k95pcF8ctKwCS7WVxteBs29hhfAotzQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/environment": "^29.7.0",
+        "@jest/expect": "^29.7.0",
+        "@jest/types": "^29.6.3",
+        "jest-mock": "^29.7.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/@jest/globals/node_modules/@jest/schemas": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/@jest/schemas/-/schemas-29.6.3.tgz",
+      "integrity": "sha512-mo5j5X+jIZmJQveBKeS/clAueipV7KgiX1vMgCxam1RNYiqE1w62n0/tJJnHtjW8ZHcQco5gY85jA3mi0L+nSA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@sinclair/typebox": "^0.27.8"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/@jest/globals/node_modules/@jest/types": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/@jest/types/-/types-29.6.3.tgz",
+      "integrity": "sha512-u3UPsIilWKOM3F9CXtrG8LEJmNxwoCQC/XVj4IKYXvvpx7QIi/Kg1LI5uDmDpKlac62NUtX7eLjRh+jVZcLOzw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/schemas": "^29.6.3",
+        "@types/istanbul-lib-coverage": "^2.0.0",
+        "@types/istanbul-reports": "^3.0.0",
+        "@types/node": "*",
+        "@types/yargs": "^17.0.8",
+        "chalk": "^4.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/@jest/globals/node_modules/@sinclair/typebox": {
+      "version": "0.27.8",
+      "resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.27.8.tgz",
+      "integrity": "sha512-+Fj43pSMwJs4KRrH/938Uf+uAELIgVBmQzg/q1YG10djyfA3TnrU8N8XzqCh/okZdszqBQTZf96idMfE5lnwTA==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/@jest/pattern": {
+      "version": "30.0.1",
+      "resolved": "https://registry.npmjs.org/@jest/pattern/-/pattern-30.0.1.tgz",
+      "integrity": "sha512-gWp7NfQW27LaBQz3TITS8L7ZCQ0TLvtmI//4OwlQRx4rnWxcPNIYjxZpDcN4+UlGxgm3jS5QPz8IPTCkb59wZA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@types/node": "*",
+        "jest-regex-util": "30.0.1"
+      },
+      "engines": {
+        "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0"
+      }
+    },
+    "node_modules/@jest/reporters": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/@jest/reporters/-/reporters-29.7.0.tgz",
+      "integrity": "sha512-DApq0KJbJOEzAFYjHADNNxAE3KbhxQB1y5Kplb5Waqw6zVbuWatSnMjE5gs8FUgEPmNsnZA3NCWl9NG0ia04Pg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@bcoe/v8-coverage": "^0.2.3",
+        "@jest/console": "^29.7.0",
+        "@jest/test-result": "^29.7.0",
+        "@jest/transform": "^29.7.0",
+        "@jest/types": "^29.6.3",
+        "@jridgewell/trace-mapping": "^0.3.18",
+        "@types/node": "*",
+        "chalk": "^4.0.0",
+        "collect-v8-coverage": "^1.0.0",
+        "exit": "^0.1.2",
+        "glob": "^7.1.3",
+        "graceful-fs": "^4.2.9",
+        "istanbul-lib-coverage": "^3.0.0",
+        "istanbul-lib-instrument": "^6.0.0",
+        "istanbul-lib-report": "^3.0.0",
+        "istanbul-lib-source-maps": "^4.0.0",
+        "istanbul-reports": "^3.1.3",
+        "jest-message-util": "^29.7.0",
+        "jest-util": "^29.7.0",
+        "jest-worker": "^29.7.0",
+        "slash": "^3.0.0",
+        "string-length": "^4.0.1",
+        "strip-ansi": "^6.0.0",
+        "v8-to-istanbul": "^9.0.1"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      },
+      "peerDependencies": {
+        "node-notifier": "^8.0.1 || ^9.0.0 || ^10.0.0"
+      },
+      "peerDependenciesMeta": {
+        "node-notifier": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@jest/reporters/node_modules/@jest/schemas": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/@jest/schemas/-/schemas-29.6.3.tgz",
+      "integrity": "sha512-mo5j5X+jIZmJQveBKeS/clAueipV7KgiX1vMgCxam1RNYiqE1w62n0/tJJnHtjW8ZHcQco5gY85jA3mi0L+nSA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@sinclair/typebox": "^0.27.8"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/@jest/reporters/node_modules/@jest/types": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/@jest/types/-/types-29.6.3.tgz",
+      "integrity": "sha512-u3UPsIilWKOM3F9CXtrG8LEJmNxwoCQC/XVj4IKYXvvpx7QIi/Kg1LI5uDmDpKlac62NUtX7eLjRh+jVZcLOzw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/schemas": "^29.6.3",
+        "@types/istanbul-lib-coverage": "^2.0.0",
+        "@types/istanbul-reports": "^3.0.0",
+        "@types/node": "*",
+        "@types/yargs": "^17.0.8",
+        "chalk": "^4.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/@jest/reporters/node_modules/@sinclair/typebox": {
+      "version": "0.27.8",
+      "resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.27.8.tgz",
+      "integrity": "sha512-+Fj43pSMwJs4KRrH/938Uf+uAELIgVBmQzg/q1YG10djyfA3TnrU8N8XzqCh/okZdszqBQTZf96idMfE5lnwTA==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/@jest/schemas": {
+      "version": "30.0.5",
+      "resolved": "https://registry.npmjs.org/@jest/schemas/-/schemas-30.0.5.tgz",
+      "integrity": "sha512-DmdYgtezMkh3cpU8/1uyXakv3tJRcmcXxBOcO0tbaozPwpmh4YMsnWrQm9ZmZMfa5ocbxzbFk6O4bDPEc/iAnA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@sinclair/typebox": "^0.34.0"
+      },
+      "engines": {
+        "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0"
+      }
+    },
+    "node_modules/@jest/source-map": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/@jest/source-map/-/source-map-29.6.3.tgz",
+      "integrity": "sha512-MHjT95QuipcPrpLM+8JMSzFx6eHp5Bm+4XeFDJlwsvVBjmKNiIAvasGK2fxz2WbGRlnvqehFbh07MMa7n3YJnw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jridgewell/trace-mapping": "^0.3.18",
+        "callsites": "^3.0.0",
+        "graceful-fs": "^4.2.9"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/@jest/test-result": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/@jest/test-result/-/test-result-29.7.0.tgz",
+      "integrity": "sha512-Fdx+tv6x1zlkJPcWXmMDAG2HBnaR9XPSd5aDWQVsfrZmLVT3lU1cwyxLgRmXR9yrq4NBoEm9BMsfgFzTQAbJYA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/console": "^29.7.0",
+        "@jest/types": "^29.6.3",
+        "@types/istanbul-lib-coverage": "^2.0.0",
+        "collect-v8-coverage": "^1.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/@jest/test-result/node_modules/@jest/schemas": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/@jest/schemas/-/schemas-29.6.3.tgz",
+      "integrity": "sha512-mo5j5X+jIZmJQveBKeS/clAueipV7KgiX1vMgCxam1RNYiqE1w62n0/tJJnHtjW8ZHcQco5gY85jA3mi0L+nSA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@sinclair/typebox": "^0.27.8"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/@jest/test-result/node_modules/@jest/types": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/@jest/types/-/types-29.6.3.tgz",
+      "integrity": "sha512-u3UPsIilWKOM3F9CXtrG8LEJmNxwoCQC/XVj4IKYXvvpx7QIi/Kg1LI5uDmDpKlac62NUtX7eLjRh+jVZcLOzw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/schemas": "^29.6.3",
+        "@types/istanbul-lib-coverage": "^2.0.0",
+        "@types/istanbul-reports": "^3.0.0",
+        "@types/node": "*",
+        "@types/yargs": "^17.0.8",
+        "chalk": "^4.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/@jest/test-result/node_modules/@sinclair/typebox": {
+      "version": "0.27.8",
+      "resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.27.8.tgz",
+      "integrity": "sha512-+Fj43pSMwJs4KRrH/938Uf+uAELIgVBmQzg/q1YG10djyfA3TnrU8N8XzqCh/okZdszqBQTZf96idMfE5lnwTA==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/@jest/test-sequencer": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/@jest/test-sequencer/-/test-sequencer-29.7.0.tgz",
+      "integrity": "sha512-GQwJ5WZVrKnOJuiYiAF52UNUJXgTZx1NHjFSEB0qEMmSZKAkdMoIzw/Cj6x6NF4AvV23AUqDpFzQkN/eYCYTxw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/test-result": "^29.7.0",
+        "graceful-fs": "^4.2.9",
+        "jest-haste-map": "^29.7.0",
+        "slash": "^3.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/@jest/transform": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/@jest/transform/-/transform-29.7.0.tgz",
+      "integrity": "sha512-ok/BTPFzFKVMwO5eOHRrvnBVHdRy9IrsrW1GpMaQ9MCnilNLXQKmAX8s1YXDFaai9xJpac2ySzV0YeRRECr2Vw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/core": "^7.11.6",
+        "@jest/types": "^29.6.3",
+        "@jridgewell/trace-mapping": "^0.3.18",
+        "babel-plugin-istanbul": "^6.1.1",
+        "chalk": "^4.0.0",
+        "convert-source-map": "^2.0.0",
+        "fast-json-stable-stringify": "^2.1.0",
+        "graceful-fs": "^4.2.9",
+        "jest-haste-map": "^29.7.0",
+        "jest-regex-util": "^29.6.3",
+        "jest-util": "^29.7.0",
+        "micromatch": "^4.0.4",
+        "pirates": "^4.0.4",
+        "slash": "^3.0.0",
+        "write-file-atomic": "^4.0.2"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/@jest/transform/node_modules/@jest/schemas": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/@jest/schemas/-/schemas-29.6.3.tgz",
+      "integrity": "sha512-mo5j5X+jIZmJQveBKeS/clAueipV7KgiX1vMgCxam1RNYiqE1w62n0/tJJnHtjW8ZHcQco5gY85jA3mi0L+nSA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@sinclair/typebox": "^0.27.8"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/@jest/transform/node_modules/@jest/types": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/@jest/types/-/types-29.6.3.tgz",
+      "integrity": "sha512-u3UPsIilWKOM3F9CXtrG8LEJmNxwoCQC/XVj4IKYXvvpx7QIi/Kg1LI5uDmDpKlac62NUtX7eLjRh+jVZcLOzw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/schemas": "^29.6.3",
+        "@types/istanbul-lib-coverage": "^2.0.0",
+        "@types/istanbul-reports": "^3.0.0",
+        "@types/node": "*",
+        "@types/yargs": "^17.0.8",
+        "chalk": "^4.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/@jest/transform/node_modules/@sinclair/typebox": {
+      "version": "0.27.8",
+      "resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.27.8.tgz",
+      "integrity": "sha512-+Fj43pSMwJs4KRrH/938Uf+uAELIgVBmQzg/q1YG10djyfA3TnrU8N8XzqCh/okZdszqBQTZf96idMfE5lnwTA==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/@jest/transform/node_modules/jest-regex-util": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/jest-regex-util/-/jest-regex-util-29.6.3.tgz",
+      "integrity": "sha512-KJJBsRCyyLNWCNBOvZyRDnAIfUiRJ8v+hOBQYGn8gDyF3UegwiP4gwRR3/SDa42g1YbVycTidUF3rKjyLFDWbg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/@jest/types": {
+      "version": "30.2.0",
+      "resolved": "https://registry.npmjs.org/@jest/types/-/types-30.2.0.tgz",
+      "integrity": "sha512-H9xg1/sfVvyfU7o3zMfBEjQ1gcsdeTMgqHoYdN79tuLqfTtuu7WckRA1R5whDwOzxaZAeMKTYWqP+WCAi0CHsg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/pattern": "30.0.1",
+        "@jest/schemas": "30.0.5",
+        "@types/istanbul-lib-coverage": "^2.0.6",
+        "@types/istanbul-reports": "^3.0.4",
+        "@types/node": "*",
+        "@types/yargs": "^17.0.33",
+        "chalk": "^4.1.2"
+      },
+      "engines": {
+        "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0"
+      }
+    },
+    "node_modules/@jridgewell/gen-mapping": {
+      "version": "0.3.13",
+      "resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.13.tgz",
+      "integrity": "sha512-2kkt/7niJ6MgEPxF0bYdQ6etZaA+fQvDcLKckhy1yIQOzaoKjBBjSj63/aLVjYE3qhRt5dvM+uUyfCg6UKCBbA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jridgewell/sourcemap-codec": "^1.5.0",
+        "@jridgewell/trace-mapping": "^0.3.24"
+      }
+    },
+    "node_modules/@jridgewell/remapping": {
+      "version": "2.3.5",
+      "resolved": "https://registry.npmjs.org/@jridgewell/remapping/-/remapping-2.3.5.tgz",
+      "integrity": "sha512-LI9u/+laYG4Ds1TDKSJW2YPrIlcVYOwi2fUC6xB43lueCjgxV4lffOCZCtYFiH6TNOX+tQKXx97T4IKHbhyHEQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jridgewell/gen-mapping": "^0.3.5",
+        "@jridgewell/trace-mapping": "^0.3.24"
+      }
+    },
+    "node_modules/@jridgewell/resolve-uri": {
+      "version": "3.1.2",
+      "resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.2.tgz",
+      "integrity": "sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=6.0.0"
+      }
+    },
+    "node_modules/@jridgewell/sourcemap-codec": {
+      "version": "1.5.5",
+      "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.5.tgz",
+      "integrity": "sha512-cYQ9310grqxueWbl+WuIUIaiUaDcj7WOq5fVhEljNVgRfOUhY9fy2zTvfoqWsnebh8Sl70VScFbICvJnLKB0Og==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/@jridgewell/trace-mapping": {
+      "version": "0.3.31",
+      "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.31.tgz",
+      "integrity": "sha512-zzNR+SdQSDJzc8joaeP8QQoCQr8NuYx2dIIytl1QeBEZHJ9uW6hebsrYgbz8hJwUQao3TWCMtmfV8Nu1twOLAw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jridgewell/resolve-uri": "^3.1.0",
+        "@jridgewell/sourcemap-codec": "^1.4.14"
+      }
+    },
+    "node_modules/@sinclair/typebox": {
+      "version": "0.34.41",
+      "resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.34.41.tgz",
+      "integrity": "sha512-6gS8pZzSXdyRHTIqoqSVknxolr1kzfy4/CeDnrzsVz8TTIWUbOBr6gnzOmTYJ3eXQNh4IYHIGi5aIL7sOZ2G/g==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/@sinonjs/commons": {
+      "version": "3.0.1",
+      "resolved": "https://registry.npmjs.org/@sinonjs/commons/-/commons-3.0.1.tgz",
+      "integrity": "sha512-K3mCHKQ9sVh8o1C9cxkwxaOmXoAMlDxC1mYyHrjqOWEcBjYr76t96zL2zlj5dUGZ3HSw240X1qgH3Mjf1yJWpQ==",
+      "dev": true,
+      "license": "BSD-3-Clause",
+      "dependencies": {
+        "type-detect": "4.0.8"
+      }
+    },
+    "node_modules/@sinonjs/fake-timers": {
+      "version": "10.3.0",
+      "resolved": "https://registry.npmjs.org/@sinonjs/fake-timers/-/fake-timers-10.3.0.tgz",
+      "integrity": "sha512-V4BG07kuYSUkTCSBHG8G8TNhM+F19jXFWnQtzj+we8DrkpSBCee9Z3Ms8yiGer/dlmhe35/Xdgyo3/0rQKg7YA==",
+      "dev": true,
+      "license": "BSD-3-Clause",
+      "dependencies": {
+        "@sinonjs/commons": "^3.0.0"
+      }
+    },
+    "node_modules/@swc/core": {
+      "version": "1.15.2",
+      "resolved": "https://registry.npmjs.org/@swc/core/-/core-1.15.2.tgz",
+      "integrity": "sha512-OQm+yJdXxvSjqGeaWhP6Ia264ogifwAO7Q12uTDVYj/Ks4jBTI4JknlcjDRAXtRhqbWsfbZyK/5RtuIPyptk3w==",
+      "dev": true,
+      "hasInstallScript": true,
+      "license": "Apache-2.0",
+      "dependencies": {
+        "@swc/counter": "^0.1.3",
+        "@swc/types": "^0.1.25"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/swc"
+      },
+      "optionalDependencies": {
+        "@swc/core-darwin-arm64": "1.15.2",
+        "@swc/core-darwin-x64": "1.15.2",
+        "@swc/core-linux-arm-gnueabihf": "1.15.2",
+        "@swc/core-linux-arm64-gnu": "1.15.2",
+        "@swc/core-linux-arm64-musl": "1.15.2",
+        "@swc/core-linux-x64-gnu": "1.15.2",
+        "@swc/core-linux-x64-musl": "1.15.2",
+        "@swc/core-win32-arm64-msvc": "1.15.2",
+        "@swc/core-win32-ia32-msvc": "1.15.2",
+        "@swc/core-win32-x64-msvc": "1.15.2"
+      },
+      "peerDependencies": {
+        "@swc/helpers": ">=0.5.17"
+      },
+      "peerDependenciesMeta": {
+        "@swc/helpers": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@swc/core-darwin-arm64": {
+      "version": "1.15.2",
+      "resolved": "https://registry.npmjs.org/@swc/core-darwin-arm64/-/core-darwin-arm64-1.15.2.tgz",
+      "integrity": "sha512-Ghyz4RJv4zyXzrUC1B2MLQBbppIB5c4jMZJybX2ebdEQAvryEKp3gq1kBksCNsatKGmEgXul88SETU19sMWcrw==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "Apache-2.0 AND MIT",
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/@swc/core-darwin-x64": {
+      "version": "1.15.2",
+      "resolved": "https://registry.npmjs.org/@swc/core-darwin-x64/-/core-darwin-x64-1.15.2.tgz",
+      "integrity": "sha512-7n/PGJOcL2QoptzL42L5xFFfXY5rFxLHnuz1foU+4ruUTG8x2IebGhtwVTpaDN8ShEv2UZObBlT1rrXTba15Zw==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "Apache-2.0 AND MIT",
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/@swc/core-linux-arm-gnueabihf": {
+      "version": "1.15.2",
+      "resolved": "https://registry.npmjs.org/@swc/core-linux-arm-gnueabihf/-/core-linux-arm-gnueabihf-1.15.2.tgz",
+      "integrity": "sha512-ZUQVCfRJ9wimuxkStRSlLwqX4TEDmv6/J+E6FicGkQ6ssLMWoKDy0cAo93HiWt/TWEee5vFhFaSQYzCuBEGO6A==",
+      "cpu": [
+        "arm"
+      ],
+      "dev": true,
+      "license": "Apache-2.0",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/@swc/core-linux-arm64-gnu": {
+      "version": "1.15.2",
+      "resolved": "https://registry.npmjs.org/@swc/core-linux-arm64-gnu/-/core-linux-arm64-gnu-1.15.2.tgz",
+      "integrity": "sha512-GZh3pYBmfnpQ+JIg+TqLuz+pM+Mjsk5VOzi8nwKn/m+GvQBsxD5ectRtxuWUxMGNG8h0lMy4SnHRqdK3/iJl7A==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "Apache-2.0 AND MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/@swc/core-linux-arm64-musl": {
+      "version": "1.15.2",
+      "resolved": "https://registry.npmjs.org/@swc/core-linux-arm64-musl/-/core-linux-arm64-musl-1.15.2.tgz",
+      "integrity": "sha512-5av6VYZZeneiYIodwzGMlnyVakpuYZryGzFIbgu1XP8wVylZxduEzup4eP8atiMDFmIm+s4wn8GySJmYqeJC0A==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "Apache-2.0 AND MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/@swc/core-linux-x64-gnu": {
+      "version": "1.15.2",
+      "resolved": "https://registry.npmjs.org/@swc/core-linux-x64-gnu/-/core-linux-x64-gnu-1.15.2.tgz",
+      "integrity": "sha512-1nO/UfdCLuT/uE/7oB3EZgTeZDCIa6nL72cFEpdegnqpJVNDI6Qb8U4g/4lfVPkmHq2lvxQ0L+n+JdgaZLhrRA==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "Apache-2.0 AND MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/@swc/core-linux-x64-musl": {
+      "version": "1.15.2",
+      "resolved": "https://registry.npmjs.org/@swc/core-linux-x64-musl/-/core-linux-x64-musl-1.15.2.tgz",
+      "integrity": "sha512-Ksfrb0Tx310kr+TLiUOvB/I80lyZ3lSOp6cM18zmNRT/92NB4mW8oX2Jo7K4eVEI2JWyaQUAFubDSha2Q+439A==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "Apache-2.0 AND MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/@swc/core-win32-arm64-msvc": {
+      "version": "1.15.2",
+      "resolved": "https://registry.npmjs.org/@swc/core-win32-arm64-msvc/-/core-win32-arm64-msvc-1.15.2.tgz",
+      "integrity": "sha512-IzUb5RlMUY0r1A9IuJrQ7Tbts1wWb73/zXVXT8VhewbHGoNlBKE0qUhKMED6Tv4wDF+pmbtUJmKXDthytAvLmg==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "Apache-2.0 AND MIT",
+      "optional": true,
+      "os": [
+        "win32"
+      ],
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/@swc/core-win32-ia32-msvc": {
+      "version": "1.15.2",
+      "resolved": "https://registry.npmjs.org/@swc/core-win32-ia32-msvc/-/core-win32-ia32-msvc-1.15.2.tgz",
+      "integrity": "sha512-kCATEzuY2LP9AlbU2uScjcVhgnCAkRdu62vbce17Ro5kxEHxYWcugkveyBRS3AqZGtwAKYbMAuNloer9LS/hpw==",
+      "cpu": [
+        "ia32"
+      ],
+      "dev": true,
+      "license": "Apache-2.0 AND MIT",
+      "optional": true,
+      "os": [
+        "win32"
+      ],
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/@swc/core-win32-x64-msvc": {
+      "version": "1.15.2",
+      "resolved": "https://registry.npmjs.org/@swc/core-win32-x64-msvc/-/core-win32-x64-msvc-1.15.2.tgz",
+      "integrity": "sha512-iJaHeYCF4jTn7OEKSa3KRiuVFIVYts8jYjNmCdyz1u5g8HRyTDISD76r8+ljEOgm36oviRQvcXaw6LFp1m0yyA==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "Apache-2.0 AND MIT",
+      "optional": true,
+      "os": [
+        "win32"
+      ],
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/@swc/counter": {
+      "version": "0.1.3",
+      "resolved": "https://registry.npmjs.org/@swc/counter/-/counter-0.1.3.tgz",
+      "integrity": "sha512-e2BR4lsJkkRlKZ/qCHPw9ZaSxc0MVUd7gtbtaB7aMvHeJVYe8sOB8DBZkP2DtISHGSku9sCK6T6cnY0CtXrOCQ==",
+      "dev": true,
+      "license": "Apache-2.0"
+    },
+    "node_modules/@swc/jest": {
+      "version": "0.2.39",
+      "resolved": "https://registry.npmjs.org/@swc/jest/-/jest-0.2.39.tgz",
+      "integrity": "sha512-eyokjOwYd0Q8RnMHri+8/FS1HIrIUKK/sRrFp8c1dThUOfNeCWbLmBP1P5VsKdvmkd25JaH+OKYwEYiAYg9YAA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/create-cache-key-function": "^30.0.0",
+        "@swc/counter": "^0.1.3",
+        "jsonc-parser": "^3.2.0"
+      },
+      "engines": {
+        "npm": ">= 7.0.0"
+      },
+      "peerDependencies": {
+        "@swc/core": "*"
+      }
+    },
+    "node_modules/@swc/types": {
+      "version": "0.1.25",
+      "resolved": "https://registry.npmjs.org/@swc/types/-/types-0.1.25.tgz",
+      "integrity": "sha512-iAoY/qRhNH8a/hBvm3zKj9qQ4oc2+3w1unPJa2XvTK3XjeLXtzcCingVPw/9e5mn1+0yPqxcBGp9Jf0pkfMb1g==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "dependencies": {
+        "@swc/counter": "^0.1.3"
+      }
+    },
+    "node_modules/@types/babel__core": {
+      "version": "7.20.5",
+      "resolved": "https://registry.npmjs.org/@types/babel__core/-/babel__core-7.20.5.tgz",
+      "integrity": "sha512-qoQprZvz5wQFJwMDqeseRXWv3rqMvhgpbXFfVyWhbx9X47POIA6i/+dXefEmZKoAgOaTdaIgNSMqMIU61yRyzA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/parser": "^7.20.7",
+        "@babel/types": "^7.20.7",
+        "@types/babel__generator": "*",
+        "@types/babel__template": "*",
+        "@types/babel__traverse": "*"
+      }
+    },
+    "node_modules/@types/babel__generator": {
+      "version": "7.27.0",
+      "resolved": "https://registry.npmjs.org/@types/babel__generator/-/babel__generator-7.27.0.tgz",
+      "integrity": "sha512-ufFd2Xi92OAVPYsy+P4n7/U7e68fex0+Ee8gSG9KX7eo084CWiQ4sdxktvdl0bOPupXtVJPY19zk6EwWqUQ8lg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/types": "^7.0.0"
+      }
+    },
+    "node_modules/@types/babel__template": {
+      "version": "7.4.4",
+      "resolved": "https://registry.npmjs.org/@types/babel__template/-/babel__template-7.4.4.tgz",
+      "integrity": "sha512-h/NUaSyG5EyxBIp8YRxo4RMe2/qQgvyowRwVMzhYhBCONbW8PUsg4lkFMrhgZhUe5z3L3MiLDuvyJ/CaPa2A8A==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/parser": "^7.1.0",
+        "@babel/types": "^7.0.0"
+      }
+    },
+    "node_modules/@types/babel__traverse": {
+      "version": "7.28.0",
+      "resolved": "https://registry.npmjs.org/@types/babel__traverse/-/babel__traverse-7.28.0.tgz",
+      "integrity": "sha512-8PvcXf70gTDZBgt9ptxJ8elBeBjcLOAcOtoO/mPJjtji1+CdGbHgm77om1GrsPxsiE+uXIpNSK64UYaIwQXd4Q==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/types": "^7.28.2"
+      }
+    },
+    "node_modules/@types/graceful-fs": {
+      "version": "4.1.9",
+      "resolved": "https://registry.npmjs.org/@types/graceful-fs/-/graceful-fs-4.1.9.tgz",
+      "integrity": "sha512-olP3sd1qOEe5dXTSaFvQG+02VdRXcdytWLAZsAq1PecU8uqQAhkrnbli7DagjtXKW/Bl7YJbUsa8MPcuc8LHEQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@types/node": "*"
+      }
+    },
+    "node_modules/@types/istanbul-lib-coverage": {
+      "version": "2.0.6",
+      "resolved": "https://registry.npmjs.org/@types/istanbul-lib-coverage/-/istanbul-lib-coverage-2.0.6.tgz",
+      "integrity": "sha512-2QF/t/auWm0lsy8XtKVPG19v3sSOQlJe/YHZgfjb/KBBHOGSV+J2q/S671rcq9uTBrLAXmZpqJiaQbMT+zNU1w==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/@types/istanbul-lib-report": {
+      "version": "3.0.3",
+      "resolved": "https://registry.npmjs.org/@types/istanbul-lib-report/-/istanbul-lib-report-3.0.3.tgz",
+      "integrity": "sha512-NQn7AHQnk/RSLOxrBbGyJM/aVQ+pjj5HCgasFxc0K/KhoATfQ/47AyUl15I2yBUpihjmas+a+VJBOqecrFH+uA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@types/istanbul-lib-coverage": "*"
+      }
+    },
+    "node_modules/@types/istanbul-reports": {
+      "version": "3.0.4",
+      "resolved": "https://registry.npmjs.org/@types/istanbul-reports/-/istanbul-reports-3.0.4.tgz",
+      "integrity": "sha512-pk2B1NWalF9toCRu6gjBzR69syFjP4Od8WRAX+0mmf9lAjCRicLOWc+ZrxZHx/0XRjotgkF9t6iaMJ+aXcOdZQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@types/istanbul-lib-report": "*"
+      }
+    },
+    "node_modules/@types/jest": {
+      "version": "29.5.14",
+      "resolved": "https://registry.npmjs.org/@types/jest/-/jest-29.5.14.tgz",
+      "integrity": "sha512-ZN+4sdnLUbo8EVvVc2ao0GFW6oVrQRPn4K2lglySj7APvSrgzxHiNNK99us4WDMi57xxA2yggblIAMNhXOotLQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "expect": "^29.0.0",
+        "pretty-format": "^29.0.0"
+      }
+    },
+    "node_modules/@types/node": {
+      "version": "20.19.25",
+      "resolved": "https://registry.npmjs.org/@types/node/-/node-20.19.25.tgz",
+      "integrity": "sha512-ZsJzA5thDQMSQO788d7IocwwQbI8B5OPzmqNvpf3NY/+MHDAS759Wo0gd2WQeXYt5AAAQjzcrTVC6SKCuYgoCQ==",
+      "license": "MIT",
+      "dependencies": {
+        "undici-types": "~6.21.0"
+      }
+    },
+    "node_modules/@types/node-fetch": {
+      "version": "2.6.13",
+      "resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-2.6.13.tgz",
+      "integrity": "sha512-QGpRVpzSaUs30JBSGPjOg4Uveu384erbHBoT1zeONvyCfwQxIkUshLAOqN/k9EjGviPRmWTTe6aH2qySWKTVSw==",
+      "license": "MIT",
+      "dependencies": {
+        "@types/node": "*",
+        "form-data": "^4.0.4"
+      }
+    },
+    "node_modules/@types/stack-utils": {
+      "version": "2.0.3",
+      "resolved": "https://registry.npmjs.org/@types/stack-utils/-/stack-utils-2.0.3.tgz",
+      "integrity": "sha512-9aEbYZ3TbYMznPdcdr3SmIrLXwC/AKZXQeCf9Pgao5CKb8CyHuEX5jzWPTkvregvhRJHcpRO6BFoGW9ycaOkYw==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/@types/yargs": {
+      "version": "17.0.35",
+      "resolved": "https://registry.npmjs.org/@types/yargs/-/yargs-17.0.35.tgz",
+      "integrity": "sha512-qUHkeCyQFxMXg79wQfTtfndEC+N9ZZg76HJftDJp+qH2tV7Gj4OJi7l+PiWwJ+pWtW8GwSmqsDj/oymhrTWXjg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@types/yargs-parser": "*"
+      }
+    },
+    "node_modules/@types/yargs-parser": {
+      "version": "21.0.3",
+      "resolved": "https://registry.npmjs.org/@types/yargs-parser/-/yargs-parser-21.0.3.tgz",
+      "integrity": "sha512-I4q9QU9MQv4oEOz4tAHJtNz1cwuLxn2F3xcc2iV5WdqLPpUnj30aUuxt1mAxYTG+oe8CZMV/+6rU4S4gRDzqtQ==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/abort-controller": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/abort-controller/-/abort-controller-3.0.0.tgz",
+      "integrity": "sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==",
+      "license": "MIT",
+      "dependencies": {
+        "event-target-shim": "^5.0.0"
+      },
+      "engines": {
+        "node": ">=6.5"
+      }
+    },
+    "node_modules/agentkeepalive": {
+      "version": "4.6.0",
+      "resolved": "https://registry.npmjs.org/agentkeepalive/-/agentkeepalive-4.6.0.tgz",
+      "integrity": "sha512-kja8j7PjmncONqaTsB8fQ+wE2mSU2DJ9D4XKoJ5PFWIdRMa6SLSN1ff4mOr4jCbfRSsxR4keIiySJU0N9T5hIQ==",
+      "license": "MIT",
+      "dependencies": {
+        "humanize-ms": "^1.2.1"
+      },
+      "engines": {
+        "node": ">= 8.0.0"
+      }
+    },
+    "node_modules/ansi-escapes": {
+      "version": "4.3.2",
+      "resolved": "https://registry.npmjs.org/ansi-escapes/-/ansi-escapes-4.3.2.tgz",
+      "integrity": "sha512-gKXj5ALrKWQLsYG9jlTRmR/xKluxHV+Z9QEwNIgCfM1/uwPMCuzVVnh5mwTd+OuBZcwSIMbqssNWRm1lE51QaQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "type-fest": "^0.21.3"
+      },
+      "engines": {
+        "node": ">=8"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/ansi-regex": {
+      "version": "5.0.1",
+      "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz",
+      "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/ansi-styles": {
+      "version": "4.3.0",
+      "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz",
+      "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "color-convert": "^2.0.1"
+      },
+      "engines": {
+        "node": ">=8"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/ansi-styles?sponsor=1"
+      }
+    },
+    "node_modules/anymatch": {
+      "version": "3.1.3",
+      "resolved": "https://registry.npmjs.org/anymatch/-/anymatch-3.1.3.tgz",
+      "integrity": "sha512-KMReFUr0B4t+D+OBkjR3KYqvocp2XaSzO55UcB6mgQMd3KbcE+mWTyvVV7D/zsdEbNnV6acZUutkiHQXvTr1Rw==",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "normalize-path": "^3.0.0",
+        "picomatch": "^2.0.4"
+      },
+      "engines": {
+        "node": ">= 8"
+      }
+    },
+    "node_modules/argparse": {
+      "version": "1.0.10",
+      "resolved": "https://registry.npmjs.org/argparse/-/argparse-1.0.10.tgz",
+      "integrity": "sha512-o5Roy6tNG4SL/FOkCAN6RzjiakZS25RLYFrcMttJqbdd8BWrnA+fGz57iN5Pb06pvBGvl5gQ0B48dJlslXvoTg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "sprintf-js": "~1.0.2"
+      }
+    },
+    "node_modules/asynckit": {
+      "version": "0.4.0",
+      "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
+      "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==",
+      "license": "MIT"
+    },
+    "node_modules/babel-jest": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/babel-jest/-/babel-jest-29.7.0.tgz",
+      "integrity": "sha512-BrvGY3xZSwEcCzKvKsCi2GgHqDqsYkOP4/by5xCgIwGXQxIEh+8ew3gmrE1y7XRR6LHZIj6yLYnUi/mm2KXKBg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/transform": "^29.7.0",
+        "@types/babel__core": "^7.1.14",
+        "babel-plugin-istanbul": "^6.1.1",
+        "babel-preset-jest": "^29.6.3",
+        "chalk": "^4.0.0",
+        "graceful-fs": "^4.2.9",
+        "slash": "^3.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      },
+      "peerDependencies": {
+        "@babel/core": "^7.8.0"
+      }
+    },
+    "node_modules/babel-plugin-istanbul": {
+      "version": "6.1.1",
+      "resolved": "https://registry.npmjs.org/babel-plugin-istanbul/-/babel-plugin-istanbul-6.1.1.tgz",
+      "integrity": "sha512-Y1IQok9821cC9onCx5otgFfRm7Lm+I+wwxOx738M/WLPZ9Q42m4IG5W0FNX8WLL2gYMZo3JkuXIH2DOpWM+qwA==",
+      "dev": true,
+      "license": "BSD-3-Clause",
+      "dependencies": {
+        "@babel/helper-plugin-utils": "^7.0.0",
+        "@istanbuljs/load-nyc-config": "^1.0.0",
+        "@istanbuljs/schema": "^0.1.2",
+        "istanbul-lib-instrument": "^5.0.4",
+        "test-exclude": "^6.0.0"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/babel-plugin-istanbul/node_modules/istanbul-lib-instrument": {
+      "version": "5.2.1",
+      "resolved": "https://registry.npmjs.org/istanbul-lib-instrument/-/istanbul-lib-instrument-5.2.1.tgz",
+      "integrity": "sha512-pzqtp31nLv/XFOzXGuvhCb8qhjmTVo5vjVk19XE4CRlSWz0KoeJ3bw9XsA7nOp9YBf4qHjwBxkDzKcME/J29Yg==",
+      "dev": true,
+      "license": "BSD-3-Clause",
+      "dependencies": {
+        "@babel/core": "^7.12.3",
+        "@babel/parser": "^7.14.7",
+        "@istanbuljs/schema": "^0.1.2",
+        "istanbul-lib-coverage": "^3.2.0",
+        "semver": "^6.3.0"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/babel-plugin-jest-hoist": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/babel-plugin-jest-hoist/-/babel-plugin-jest-hoist-29.6.3.tgz",
+      "integrity": "sha512-ESAc/RJvGTFEzRwOTT4+lNDk/GNHMkKbNzsvT0qKRfDyyYTskxB5rnU2njIDYVxXCBHHEI1c0YwHob3WaYujOg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/template": "^7.3.3",
+        "@babel/types": "^7.3.3",
+        "@types/babel__core": "^7.1.14",
+        "@types/babel__traverse": "^7.0.6"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/babel-preset-current-node-syntax": {
+      "version": "1.2.0",
+      "resolved": "https://registry.npmjs.org/babel-preset-current-node-syntax/-/babel-preset-current-node-syntax-1.2.0.tgz",
+      "integrity": "sha512-E/VlAEzRrsLEb2+dv8yp3bo4scof3l9nR4lrld+Iy5NyVqgVYUJnDAmunkhPMisRI32Qc4iRiz425d8vM++2fg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/plugin-syntax-async-generators": "^7.8.4",
+        "@babel/plugin-syntax-bigint": "^7.8.3",
+        "@babel/plugin-syntax-class-properties": "^7.12.13",
+        "@babel/plugin-syntax-class-static-block": "^7.14.5",
+        "@babel/plugin-syntax-import-attributes": "^7.24.7",
+        "@babel/plugin-syntax-import-meta": "^7.10.4",
+        "@babel/plugin-syntax-json-strings": "^7.8.3",
+        "@babel/plugin-syntax-logical-assignment-operators": "^7.10.4",
+        "@babel/plugin-syntax-nullish-coalescing-operator": "^7.8.3",
+        "@babel/plugin-syntax-numeric-separator": "^7.10.4",
+        "@babel/plugin-syntax-object-rest-spread": "^7.8.3",
+        "@babel/plugin-syntax-optional-catch-binding": "^7.8.3",
+        "@babel/plugin-syntax-optional-chaining": "^7.8.3",
+        "@babel/plugin-syntax-private-property-in-object": "^7.14.5",
+        "@babel/plugin-syntax-top-level-await": "^7.14.5"
+      },
+      "peerDependencies": {
+        "@babel/core": "^7.0.0 || ^8.0.0-0"
+      }
+    },
+    "node_modules/babel-preset-jest": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/babel-preset-jest/-/babel-preset-jest-29.6.3.tgz",
+      "integrity": "sha512-0B3bhxR6snWXJZtR/RliHTDPRgn1sNHOR0yVtq/IiQFyuOVjFS+wuio/R4gSNkyYmKmJB4wGZv2NZanmKmTnNA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "babel-plugin-jest-hoist": "^29.6.3",
+        "babel-preset-current-node-syntax": "^1.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      },
+      "peerDependencies": {
+        "@babel/core": "^7.0.0"
+      }
+    },
+    "node_modules/balanced-match": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
+      "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/baseline-browser-mapping": {
+      "version": "2.8.29",
+      "resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.8.29.tgz",
+      "integrity": "sha512-sXdt2elaVnhpDNRDz+1BDx1JQoJRuNk7oVlAlbGiFkLikHCAQiccexF/9e91zVi6RCgqspl04aP+6Cnl9zRLrA==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "bin": {
+        "baseline-browser-mapping": "dist/cli.js"
+      }
+    },
+    "node_modules/brace-expansion": {
+      "version": "1.1.12",
+      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
+      "integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "balanced-match": "^1.0.0",
+        "concat-map": "0.0.1"
+      }
+    },
+    "node_modules/braces": {
+      "version": "3.0.3",
+      "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz",
+      "integrity": "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "fill-range": "^7.1.1"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/browserslist": {
+      "version": "4.28.0",
+      "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.28.0.tgz",
+      "integrity": "sha512-tbydkR/CxfMwelN0vwdP/pLkDwyAASZ+VfWm4EOwlB6SWhx1sYnWLqo8N5j0rAzPfzfRaxt0mM/4wPU/Su84RQ==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "opencollective",
+          "url": "https://opencollective.com/browserslist"
+        },
+        {
+          "type": "tidelift",
+          "url": "https://tidelift.com/funding/github/npm/browserslist"
+        },
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/ai"
+        }
+      ],
+      "license": "MIT",
+      "dependencies": {
+        "baseline-browser-mapping": "^2.8.25",
+        "caniuse-lite": "^1.0.30001754",
+        "electron-to-chromium": "^1.5.249",
+        "node-releases": "^2.0.27",
+        "update-browserslist-db": "^1.1.4"
+      },
+      "bin": {
+        "browserslist": "cli.js"
+      },
+      "engines": {
+        "node": "^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7"
+      }
+    },
+    "node_modules/bs-logger": {
+      "version": "0.2.6",
+      "resolved": "https://registry.npmjs.org/bs-logger/-/bs-logger-0.2.6.tgz",
+      "integrity": "sha512-pd8DCoxmbgc7hyPKOvxtqNcjYoOsABPQdcCUjGp3d42VR2CX1ORhk2A87oqqu5R1kk+76nsxZupkmyd+MVtCog==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "fast-json-stable-stringify": "2.x"
+      },
+      "engines": {
+        "node": ">= 6"
+      }
+    },
+    "node_modules/bser": {
+      "version": "2.1.1",
+      "resolved": "https://registry.npmjs.org/bser/-/bser-2.1.1.tgz",
+      "integrity": "sha512-gQxTNE/GAfIIrmHLUE3oJyp5FO6HRBfhjnw4/wMmA63ZGDJnWBmgY/lyQBpnDUkGmAhbSe39tx2d/iTOAfglwQ==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "dependencies": {
+        "node-int64": "^0.4.0"
+      }
+    },
+    "node_modules/buffer-from": {
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.2.tgz",
+      "integrity": "sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/call-bind-apply-helpers": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz",
+      "integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==",
+      "license": "MIT",
+      "dependencies": {
+        "es-errors": "^1.3.0",
+        "function-bind": "^1.1.2"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/callsites": {
+      "version": "3.1.0",
+      "resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz",
+      "integrity": "sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/camelcase": {
+      "version": "5.3.1",
+      "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-5.3.1.tgz",
+      "integrity": "sha512-L28STB170nwWS63UjtlEOE3dldQApaJXZkOI1uMFfzf3rRuPegHaHesyee+YxQ+W6SvRDQV6UrdOdRiR153wJg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/caniuse-lite": {
+      "version": "1.0.30001755",
+      "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001755.tgz",
+      "integrity": "sha512-44V+Jm6ctPj7R52Na4TLi3Zri4dWUljJd+RDm+j8LtNCc/ihLCT+X1TzoOAkRETEWqjuLnh9581Tl80FvK7jVA==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "opencollective",
+          "url": "https://opencollective.com/browserslist"
+        },
+        {
+          "type": "tidelift",
+          "url": "https://tidelift.com/funding/github/npm/caniuse-lite"
+        },
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/ai"
+        }
+      ],
+      "license": "CC-BY-4.0"
+    },
+    "node_modules/chalk": {
+      "version": "4.1.2",
+      "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz",
+      "integrity": "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "ansi-styles": "^4.1.0",
+        "supports-color": "^7.1.0"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/chalk?sponsor=1"
+      }
+    },
+    "node_modules/char-regex": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/char-regex/-/char-regex-1.0.2.tgz",
+      "integrity": "sha512-kWWXztvZ5SBQV+eRgKFeh8q5sLuZY2+8WUIzlxWVTg+oGwY14qylx1KbKzHd8P6ZYkAg0xyIDU9JMHhyJMZ1jw==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/ci-info": {
+      "version": "3.9.0",
+      "resolved": "https://registry.npmjs.org/ci-info/-/ci-info-3.9.0.tgz",
+      "integrity": "sha512-NIxF55hv4nSqQswkAeiOi1r83xy8JldOFDTWiug55KBu9Jnblncd2U6ViHmYgHf01TPZS77NJBhBMKdWj9HQMQ==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/sibiraj-s"
+        }
+      ],
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/cjs-module-lexer": {
+      "version": "1.4.3",
+      "resolved": "https://registry.npmjs.org/cjs-module-lexer/-/cjs-module-lexer-1.4.3.tgz",
+      "integrity": "sha512-9z8TZaGM1pfswYeXrUpzPrkx8UnWYdhJclsiYMm6x/w5+nN+8Tf/LnAgfLGQCm59qAOxU8WwHEq2vNwF6i4j+Q==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/cliui": {
+      "version": "8.0.1",
+      "resolved": "https://registry.npmjs.org/cliui/-/cliui-8.0.1.tgz",
+      "integrity": "sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "string-width": "^4.2.0",
+        "strip-ansi": "^6.0.1",
+        "wrap-ansi": "^7.0.0"
+      },
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/co": {
+      "version": "4.6.0",
+      "resolved": "https://registry.npmjs.org/co/-/co-4.6.0.tgz",
+      "integrity": "sha512-QVb0dM5HvG+uaxitm8wONl7jltx8dqhfU33DcqtOZcLSVIKSDDLDi7+0LbAKiyI8hD9u42m2YxXSkMGWThaecQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "iojs": ">= 1.0.0",
+        "node": ">= 0.12.0"
+      }
+    },
+    "node_modules/collect-v8-coverage": {
+      "version": "1.0.3",
+      "resolved": "https://registry.npmjs.org/collect-v8-coverage/-/collect-v8-coverage-1.0.3.tgz",
+      "integrity": "sha512-1L5aqIkwPfiodaMgQunkF1zRhNqifHBmtbbbxcr6yVxxBnliw4TDOW6NxpO8DJLgJ16OT+Y4ztZqP6p/FtXnAw==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/color-convert": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz",
+      "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "color-name": "~1.1.4"
+      },
+      "engines": {
+        "node": ">=7.0.0"
+      }
+    },
+    "node_modules/color-name": {
+      "version": "1.1.4",
+      "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz",
+      "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/combined-stream": {
+      "version": "1.0.8",
+      "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
+      "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
+      "license": "MIT",
+      "dependencies": {
+        "delayed-stream": "~1.0.0"
+      },
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
+    "node_modules/concat-map": {
+      "version": "0.0.1",
+      "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz",
+      "integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/convert-source-map": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/convert-source-map/-/convert-source-map-2.0.0.tgz",
+      "integrity": "sha512-Kvp459HrV2FEJ1CAsi1Ku+MY3kasH19TFykTz2xWmMeq6bk2NU3XXvfJ+Q61m0xktWwt+1HSYf3JZsTms3aRJg==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/create-jest": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/create-jest/-/create-jest-29.7.0.tgz",
+      "integrity": "sha512-Adz2bdH0Vq3F53KEMJOoftQFutWCukm6J24wbPWRO4k1kMY7gS7ds/uoJkNuV8wDCtWWnuwGcJwpWcih+zEW1Q==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/types": "^29.6.3",
+        "chalk": "^4.0.0",
+        "exit": "^0.1.2",
+        "graceful-fs": "^4.2.9",
+        "jest-config": "^29.7.0",
+        "jest-util": "^29.7.0",
+        "prompts": "^2.0.1"
+      },
+      "bin": {
+        "create-jest": "bin/create-jest.js"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/create-jest/node_modules/@jest/schemas": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/@jest/schemas/-/schemas-29.6.3.tgz",
+      "integrity": "sha512-mo5j5X+jIZmJQveBKeS/clAueipV7KgiX1vMgCxam1RNYiqE1w62n0/tJJnHtjW8ZHcQco5gY85jA3mi0L+nSA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@sinclair/typebox": "^0.27.8"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/create-jest/node_modules/@jest/types": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/@jest/types/-/types-29.6.3.tgz",
+      "integrity": "sha512-u3UPsIilWKOM3F9CXtrG8LEJmNxwoCQC/XVj4IKYXvvpx7QIi/Kg1LI5uDmDpKlac62NUtX7eLjRh+jVZcLOzw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/schemas": "^29.6.3",
+        "@types/istanbul-lib-coverage": "^2.0.0",
+        "@types/istanbul-reports": "^3.0.0",
+        "@types/node": "*",
+        "@types/yargs": "^17.0.8",
+        "chalk": "^4.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/create-jest/node_modules/@sinclair/typebox": {
+      "version": "0.27.8",
+      "resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.27.8.tgz",
+      "integrity": "sha512-+Fj43pSMwJs4KRrH/938Uf+uAELIgVBmQzg/q1YG10djyfA3TnrU8N8XzqCh/okZdszqBQTZf96idMfE5lnwTA==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/cross-spawn": {
+      "version": "7.0.6",
+      "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz",
+      "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "path-key": "^3.1.0",
+        "shebang-command": "^2.0.0",
+        "which": "^2.0.1"
+      },
+      "engines": {
+        "node": ">= 8"
+      }
+    },
+    "node_modules/debug": {
+      "version": "4.4.3",
+      "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz",
+      "integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "ms": "^2.1.3"
+      },
+      "engines": {
+        "node": ">=6.0"
+      },
+      "peerDependenciesMeta": {
+        "supports-color": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/dedent": {
+      "version": "1.7.0",
+      "resolved": "https://registry.npmjs.org/dedent/-/dedent-1.7.0.tgz",
+      "integrity": "sha512-HGFtf8yhuhGhqO07SV79tRp+br4MnbdjeVxotpn1QBl30pcLLCQjX5b2295ll0fv8RKDKsmWYrl05usHM9CewQ==",
+      "dev": true,
+      "license": "MIT",
+      "peerDependencies": {
+        "babel-plugin-macros": "^3.1.0"
+      },
+      "peerDependenciesMeta": {
+        "babel-plugin-macros": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/deepmerge": {
+      "version": "4.3.1",
+      "resolved": "https://registry.npmjs.org/deepmerge/-/deepmerge-4.3.1.tgz",
+      "integrity": "sha512-3sUqbMEc77XqpdNO7FRyRog+eW3ph+GYCbj+rK+uYyRMuwsVy0rMiVtPn+QJlKFvWP/1PYpapqYn0Me2knFn+A==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/delayed-stream": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
+      "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.4.0"
+      }
+    },
+    "node_modules/detect-newline": {
+      "version": "3.1.0",
+      "resolved": "https://registry.npmjs.org/detect-newline/-/detect-newline-3.1.0.tgz",
+      "integrity": "sha512-TLz+x/vEXm/Y7P7wn1EJFNLxYpUD4TgMosxY6fAVJUnJMbupHBOncxyWUG9OpTaH9EBD7uFI5LfEgmMOc54DsA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/diff-sequences": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/diff-sequences/-/diff-sequences-29.6.3.tgz",
+      "integrity": "sha512-EjePK1srD3P08o2j4f0ExnylqRs5B9tJjcp9t1krH2qRi8CCdsYfwe9JgSLurFBWwq4uOlipzfk5fHNvwFKr8Q==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/dunder-proto": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz",
+      "integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==",
+      "license": "MIT",
+      "dependencies": {
+        "call-bind-apply-helpers": "^1.0.1",
+        "es-errors": "^1.3.0",
+        "gopd": "^1.2.0"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/electron-to-chromium": {
+      "version": "1.5.255",
+      "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.255.tgz",
+      "integrity": "sha512-Z9oIp4HrFF/cZkDPMpz2XSuVpc1THDpT4dlmATFlJUIBVCy9Vap5/rIXsASP1CscBacBqhabwh8vLctqBwEerQ==",
+      "dev": true,
+      "license": "ISC"
+    },
+    "node_modules/emittery": {
+      "version": "0.13.1",
+      "resolved": "https://registry.npmjs.org/emittery/-/emittery-0.13.1.tgz",
+      "integrity": "sha512-DeWwawk6r5yR9jFgnDKYt4sLS0LmHJJi3ZOnb5/JdbYwj3nW+FxQnHIjhBKz8YLC7oRNPVM9NQ47I3CVx34eqQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=12"
+      },
+      "funding": {
+        "url": "https://github.com/sindresorhus/emittery?sponsor=1"
+      }
+    },
+    "node_modules/emoji-regex": {
+      "version": "8.0.0",
+      "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz",
+      "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/error-ex": {
+      "version": "1.3.4",
+      "resolved": "https://registry.npmjs.org/error-ex/-/error-ex-1.3.4.tgz",
+      "integrity": "sha512-sqQamAnR14VgCr1A618A3sGrygcpK+HEbenA/HiEAkkUwcZIIB/tgWqHFxWgOyDh4nB4JCRimh79dR5Ywc9MDQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "is-arrayish": "^0.2.1"
+      }
+    },
+    "node_modules/es-define-property": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz",
+      "integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/es-errors": {
+      "version": "1.3.0",
+      "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz",
+      "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/es-object-atoms": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz",
+      "integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==",
+      "license": "MIT",
+      "dependencies": {
+        "es-errors": "^1.3.0"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/es-set-tostringtag": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/es-set-tostringtag/-/es-set-tostringtag-2.1.0.tgz",
+      "integrity": "sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==",
+      "license": "MIT",
+      "dependencies": {
+        "es-errors": "^1.3.0",
+        "get-intrinsic": "^1.2.6",
+        "has-tostringtag": "^1.0.2",
+        "hasown": "^2.0.2"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/escalade": {
+      "version": "3.2.0",
+      "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz",
+      "integrity": "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/escape-string-regexp": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-2.0.0.tgz",
+      "integrity": "sha512-UpzcLCXolUWcNu5HtVMHYdXJjArjsF9C0aNnquZYY4uW/Vu0miy5YoWvbV345HauVvcAUnpRuhMMcqTcGOY2+w==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/esprima": {
+      "version": "4.0.1",
+      "resolved": "https://registry.npmjs.org/esprima/-/esprima-4.0.1.tgz",
+      "integrity": "sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==",
+      "dev": true,
+      "license": "BSD-2-Clause",
+      "bin": {
+        "esparse": "bin/esparse.js",
+        "esvalidate": "bin/esvalidate.js"
+      },
+      "engines": {
+        "node": ">=4"
+      }
+    },
+    "node_modules/event-target-shim": {
+      "version": "5.0.1",
+      "resolved": "https://registry.npmjs.org/event-target-shim/-/event-target-shim-5.0.1.tgz",
+      "integrity": "sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/execa": {
+      "version": "5.1.1",
+      "resolved": "https://registry.npmjs.org/execa/-/execa-5.1.1.tgz",
+      "integrity": "sha512-8uSpZZocAZRBAPIEINJj3Lo9HyGitllczc27Eh5YYojjMFMn8yHMDMaUHE2Jqfq05D/wucwI4JGURyXt1vchyg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "cross-spawn": "^7.0.3",
+        "get-stream": "^6.0.0",
+        "human-signals": "^2.1.0",
+        "is-stream": "^2.0.0",
+        "merge-stream": "^2.0.0",
+        "npm-run-path": "^4.0.1",
+        "onetime": "^5.1.2",
+        "signal-exit": "^3.0.3",
+        "strip-final-newline": "^2.0.0"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/sindresorhus/execa?sponsor=1"
+      }
+    },
+    "node_modules/exit": {
+      "version": "0.1.2",
+      "resolved": "https://registry.npmjs.org/exit/-/exit-0.1.2.tgz",
+      "integrity": "sha512-Zk/eNKV2zbjpKzrsQ+n1G6poVbErQxJ0LBOJXaKZ1EViLzH+hrLu9cdXI4zw9dBQJslwBEpbQ2P1oS7nDxs6jQ==",
+      "dev": true,
+      "engines": {
+        "node": ">= 0.8.0"
+      }
+    },
+    "node_modules/expect": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/expect/-/expect-29.7.0.tgz",
+      "integrity": "sha512-2Zks0hf1VLFYI1kbh0I5jP3KHHyCHpkfyHBzsSXRFgl/Bg9mWYfMW8oD+PdMPlEwy5HNsR9JutYy6pMeOh61nw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/expect-utils": "^29.7.0",
+        "jest-get-type": "^29.6.3",
+        "jest-matcher-utils": "^29.7.0",
+        "jest-message-util": "^29.7.0",
+        "jest-util": "^29.7.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/fast-json-stable-stringify": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/fast-json-stable-stringify/-/fast-json-stable-stringify-2.1.0.tgz",
+      "integrity": "sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/fb-watchman": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/fb-watchman/-/fb-watchman-2.0.2.tgz",
+      "integrity": "sha512-p5161BqbuCaSnB8jIbzQHOlpgsPmK5rJVDfDKO91Axs5NC1uu3HRQm6wt9cd9/+GtQQIO53JdGXXoyDpTAsgYA==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "dependencies": {
+        "bser": "2.1.1"
+      }
+    },
+    "node_modules/fill-range": {
+      "version": "7.1.1",
+      "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz",
+      "integrity": "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "to-regex-range": "^5.0.1"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/find-up": {
+      "version": "4.1.0",
+      "resolved": "https://registry.npmjs.org/find-up/-/find-up-4.1.0.tgz",
+      "integrity": "sha512-PpOwAdQ/YlXQ2vj8a3h8IipDuYRi3wceVQQGYWxNINccq40Anw7BlsEXCMbt1Zt+OLA6Fq9suIpIWD0OsnISlw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "locate-path": "^5.0.0",
+        "path-exists": "^4.0.0"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/form-data": {
+      "version": "4.0.5",
+      "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.5.tgz",
+      "integrity": "sha512-8RipRLol37bNs2bhoV67fiTEvdTrbMUYcFTiy3+wuuOnUog2QBHCZWXDRijWQfAkhBj2Uf5UnVaiWwA5vdd82w==",
+      "license": "MIT",
+      "dependencies": {
+        "asynckit": "^0.4.0",
+        "combined-stream": "^1.0.8",
+        "es-set-tostringtag": "^2.1.0",
+        "hasown": "^2.0.2",
+        "mime-types": "^2.1.12"
+      },
+      "engines": {
+        "node": ">= 6"
+      }
+    },
+    "node_modules/form-data-encoder": {
+      "version": "1.7.2",
+      "resolved": "https://registry.npmjs.org/form-data-encoder/-/form-data-encoder-1.7.2.tgz",
+      "integrity": "sha512-qfqtYan3rxrnCk1VYaA4H+Ms9xdpPqvLZa6xmMgFvhO32x7/3J/ExcTd6qpxM0vH2GdMI+poehyBZvqfMTto8A==",
+      "license": "MIT"
+    },
+    "node_modules/formdata-node": {
+      "version": "4.4.1",
+      "resolved": "https://registry.npmjs.org/formdata-node/-/formdata-node-4.4.1.tgz",
+      "integrity": "sha512-0iirZp3uVDjVGt9p49aTaqjk84TrglENEDuqfdlZQ1roC9CWlPk6Avf8EEnZNcAqPonwkG35x4n3ww/1THYAeQ==",
+      "license": "MIT",
+      "dependencies": {
+        "node-domexception": "1.0.0",
+        "web-streams-polyfill": "4.0.0-beta.3"
+      },
+      "engines": {
+        "node": ">= 12.20"
+      }
+    },
+    "node_modules/fs.realpath": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz",
+      "integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==",
+      "dev": true,
+      "license": "ISC"
+    },
+    "node_modules/fsevents": {
+      "version": "2.3.3",
+      "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz",
+      "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==",
+      "dev": true,
+      "hasInstallScript": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "node": "^8.16.0 || ^10.6.0 || >=11.0.0"
+      }
+    },
+    "node_modules/function-bind": {
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz",
+      "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==",
+      "license": "MIT",
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/gensync": {
+      "version": "1.0.0-beta.2",
+      "resolved": "https://registry.npmjs.org/gensync/-/gensync-1.0.0-beta.2.tgz",
+      "integrity": "sha512-3hN7NaskYvMDLQY55gnW3NQ+mesEAepTqlg+VEbj7zzqEMBVNhzcGYYeqFo/TlYz6eQiFcp1HcsCZO+nGgS8zg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
+    "node_modules/get-caller-file": {
+      "version": "2.0.5",
+      "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz",
+      "integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==",
+      "dev": true,
+      "license": "ISC",
+      "engines": {
+        "node": "6.* || 8.* || >= 10.*"
+      }
+    },
+    "node_modules/get-intrinsic": {
+      "version": "1.3.0",
+      "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz",
+      "integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==",
+      "license": "MIT",
+      "dependencies": {
+        "call-bind-apply-helpers": "^1.0.2",
+        "es-define-property": "^1.0.1",
+        "es-errors": "^1.3.0",
+        "es-object-atoms": "^1.1.1",
+        "function-bind": "^1.1.2",
+        "get-proto": "^1.0.1",
+        "gopd": "^1.2.0",
+        "has-symbols": "^1.1.0",
+        "hasown": "^2.0.2",
+        "math-intrinsics": "^1.1.0"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/get-package-type": {
+      "version": "0.1.0",
+      "resolved": "https://registry.npmjs.org/get-package-type/-/get-package-type-0.1.0.tgz",
+      "integrity": "sha512-pjzuKtY64GYfWizNAJ0fr9VqttZkNiK2iS430LtIHzjBEr6bX8Am2zm4sW4Ro5wjWW5cAlRL1qAMTcXbjNAO2Q==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8.0.0"
+      }
+    },
+    "node_modules/get-proto": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz",
+      "integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==",
+      "license": "MIT",
+      "dependencies": {
+        "dunder-proto": "^1.0.1",
+        "es-object-atoms": "^1.0.0"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/get-stream": {
+      "version": "6.0.1",
+      "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-6.0.1.tgz",
+      "integrity": "sha512-ts6Wi+2j3jQjqi70w5AlN8DFnkSwC+MqmxEzdEALB2qXZYV3X/b1CTfgPLGJNMeAWxdPfU8FO1ms3NUfaHCPYg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/glob": {
+      "version": "7.2.3",
+      "resolved": "https://registry.npmjs.org/glob/-/glob-7.2.3.tgz",
+      "integrity": "sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==",
+      "deprecated": "Glob versions prior to v9 are no longer supported",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "fs.realpath": "^1.0.0",
+        "inflight": "^1.0.4",
+        "inherits": "2",
+        "minimatch": "^3.1.1",
+        "once": "^1.3.0",
+        "path-is-absolute": "^1.0.0"
+      },
+      "engines": {
+        "node": "*"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/isaacs"
+      }
+    },
+    "node_modules/gopd": {
+      "version": "1.2.0",
+      "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz",
+      "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/graceful-fs": {
+      "version": "4.2.11",
+      "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz",
+      "integrity": "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==",
+      "dev": true,
+      "license": "ISC"
+    },
+    "node_modules/handlebars": {
+      "version": "4.7.8",
+      "resolved": "https://registry.npmjs.org/handlebars/-/handlebars-4.7.8.tgz",
+      "integrity": "sha512-vafaFqs8MZkRrSX7sFVUdo3ap/eNiLnb4IakshzvP56X5Nr1iGKAIqdX6tMlm6HcNRIkr6AxO5jFEoJzzpT8aQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "minimist": "^1.2.5",
+        "neo-async": "^2.6.2",
+        "source-map": "^0.6.1",
+        "wordwrap": "^1.0.0"
+      },
+      "bin": {
+        "handlebars": "bin/handlebars"
+      },
+      "engines": {
+        "node": ">=0.4.7"
+      },
+      "optionalDependencies": {
+        "uglify-js": "^3.1.4"
+      }
+    },
+    "node_modules/has-flag": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz",
+      "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/has-symbols": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz",
+      "integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/has-tostringtag": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/has-tostringtag/-/has-tostringtag-1.0.2.tgz",
+      "integrity": "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==",
+      "license": "MIT",
+      "dependencies": {
+        "has-symbols": "^1.0.3"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/hasown": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz",
+      "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==",
+      "license": "MIT",
+      "dependencies": {
+        "function-bind": "^1.1.2"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/html-escaper": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/html-escaper/-/html-escaper-2.0.2.tgz",
+      "integrity": "sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/human-signals": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/human-signals/-/human-signals-2.1.0.tgz",
+      "integrity": "sha512-B4FFZ6q/T2jhhksgkbEW3HBvWIfDW85snkQgawt07S7J5QXTk6BkNV+0yAeZrM5QpMAdYlocGoljn0sJ/WQkFw==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "engines": {
+        "node": ">=10.17.0"
+      }
+    },
+    "node_modules/humanize-ms": {
+      "version": "1.2.1",
+      "resolved": "https://registry.npmjs.org/humanize-ms/-/humanize-ms-1.2.1.tgz",
+      "integrity": "sha512-Fl70vYtsAFb/C06PTS9dZBo7ihau+Tu/DNCk/OyHhea07S+aeMWpFFkUaXRa8fI+ScZbEI8dfSxwY7gxZ9SAVQ==",
+      "license": "MIT",
+      "dependencies": {
+        "ms": "^2.0.0"
+      }
+    },
+    "node_modules/import-local": {
+      "version": "3.2.0",
+      "resolved": "https://registry.npmjs.org/import-local/-/import-local-3.2.0.tgz",
+      "integrity": "sha512-2SPlun1JUPWoM6t3F0dw0FkCF/jWY8kttcY4f599GLTSjh2OCuuhdTkJQsEcZzBqbXZGKMK2OqW1oZsjtf/gQA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "pkg-dir": "^4.2.0",
+        "resolve-cwd": "^3.0.0"
+      },
+      "bin": {
+        "import-local-fixture": "fixtures/cli.js"
+      },
+      "engines": {
+        "node": ">=8"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/imurmurhash": {
+      "version": "0.1.4",
+      "resolved": "https://registry.npmjs.org/imurmurhash/-/imurmurhash-0.1.4.tgz",
+      "integrity": "sha512-JmXMZ6wuvDmLiHEml9ykzqO6lwFbof0GG4IkcGaENdCRDDmMVnny7s5HsIgHCbaq0w2MyPhDqkhTUgS2LU2PHA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.8.19"
+      }
+    },
+    "node_modules/inflight": {
+      "version": "1.0.6",
+      "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz",
+      "integrity": "sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA==",
+      "deprecated": "This module is not supported, and leaks memory. Do not use it. Check out lru-cache if you want a good and tested way to coalesce async requests by a key value, which is much more comprehensive and powerful.",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "once": "^1.3.0",
+        "wrappy": "1"
+      }
+    },
+    "node_modules/inherits": {
+      "version": "2.0.4",
+      "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
+      "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==",
+      "dev": true,
+      "license": "ISC"
+    },
+    "node_modules/is-arrayish": {
+      "version": "0.2.1",
+      "resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.2.1.tgz",
+      "integrity": "sha512-zz06S8t0ozoDXMG+ube26zeCTNXcKIPJZJi8hBrF4idCLms4CG9QtK7qBl1boi5ODzFpjswb5JPmHCbMpjaYzg==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/is-core-module": {
+      "version": "2.16.1",
+      "resolved": "https://registry.npmjs.org/is-core-module/-/is-core-module-2.16.1.tgz",
+      "integrity": "sha512-UfoeMA6fIJ8wTYFEUjelnaGI67v6+N7qXJEvQuIGa99l4xsCruSYOVSQ0uPANn4dAzm8lkYPaKLrrijLq7x23w==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "hasown": "^2.0.2"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/is-fullwidth-code-point": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz",
+      "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/is-generator-fn": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/is-generator-fn/-/is-generator-fn-2.1.0.tgz",
+      "integrity": "sha512-cTIB4yPYL/Grw0EaSzASzg6bBy9gqCofvWN8okThAYIxKJZC+udlRAmGbM0XLeniEJSs8uEgHPGuHSe1XsOLSQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/is-number": {
+      "version": "7.0.0",
+      "resolved": "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz",
+      "integrity": "sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.12.0"
+      }
+    },
+    "node_modules/is-stream": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/is-stream/-/is-stream-2.0.1.tgz",
+      "integrity": "sha512-hFoiJiTl63nn+kstHGBtewWSKnQLpyb155KHheA1l39uvtO9nWIop1p3udqPcUd/xbF1VLMO4n7OI6p7RbngDg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/isexe": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz",
+      "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==",
+      "dev": true,
+      "license": "ISC"
+    },
+    "node_modules/istanbul-lib-coverage": {
+      "version": "3.2.2",
+      "resolved": "https://registry.npmjs.org/istanbul-lib-coverage/-/istanbul-lib-coverage-3.2.2.tgz",
+      "integrity": "sha512-O8dpsF+r0WV/8MNRKfnmrtCWhuKjxrq2w+jpzBL5UZKTi2LeVWnWOmWRxFlesJONmc+wLAGvKQZEOanko0LFTg==",
+      "dev": true,
+      "license": "BSD-3-Clause",
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/istanbul-lib-instrument": {
+      "version": "6.0.3",
+      "resolved": "https://registry.npmjs.org/istanbul-lib-instrument/-/istanbul-lib-instrument-6.0.3.tgz",
+      "integrity": "sha512-Vtgk7L/R2JHyyGW07spoFlB8/lpjiOLTjMdms6AFMraYt3BaJauod/NGrfnVG/y4Ix1JEuMRPDPEj2ua+zz1/Q==",
+      "dev": true,
+      "license": "BSD-3-Clause",
+      "dependencies": {
+        "@babel/core": "^7.23.9",
+        "@babel/parser": "^7.23.9",
+        "@istanbuljs/schema": "^0.1.3",
+        "istanbul-lib-coverage": "^3.2.0",
+        "semver": "^7.5.4"
+      },
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/istanbul-lib-instrument/node_modules/semver": {
+      "version": "7.7.3",
+      "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.3.tgz",
+      "integrity": "sha512-SdsKMrI9TdgjdweUSR9MweHA4EJ8YxHn8DFaDisvhVlUOe4BF1tLD7GAj0lIqWVl+dPb/rExr0Btby5loQm20Q==",
+      "dev": true,
+      "license": "ISC",
+      "bin": {
+        "semver": "bin/semver.js"
+      },
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/istanbul-lib-report": {
+      "version": "3.0.1",
+      "resolved": "https://registry.npmjs.org/istanbul-lib-report/-/istanbul-lib-report-3.0.1.tgz",
+      "integrity": "sha512-GCfE1mtsHGOELCU8e/Z7YWzpmybrx/+dSTfLrvY8qRmaY6zXTKWn6WQIjaAFw069icm6GVMNkgu0NzI4iPZUNw==",
+      "dev": true,
+      "license": "BSD-3-Clause",
+      "dependencies": {
+        "istanbul-lib-coverage": "^3.0.0",
+        "make-dir": "^4.0.0",
+        "supports-color": "^7.1.0"
+      },
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/istanbul-lib-source-maps": {
+      "version": "4.0.1",
+      "resolved": "https://registry.npmjs.org/istanbul-lib-source-maps/-/istanbul-lib-source-maps-4.0.1.tgz",
+      "integrity": "sha512-n3s8EwkdFIJCG3BPKBYvskgXGoy88ARzvegkitk60NxRdwltLOTaH7CUiMRXvwYorl0Q712iEjcWB+fK/MrWVw==",
+      "dev": true,
+      "license": "BSD-3-Clause",
+      "dependencies": {
+        "debug": "^4.1.1",
+        "istanbul-lib-coverage": "^3.0.0",
+        "source-map": "^0.6.1"
+      },
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/istanbul-reports": {
+      "version": "3.2.0",
+      "resolved": "https://registry.npmjs.org/istanbul-reports/-/istanbul-reports-3.2.0.tgz",
+      "integrity": "sha512-HGYWWS/ehqTV3xN10i23tkPkpH46MLCIMFNCaaKNavAXTF1RkqxawEPtnjnGZ6XKSInBKkiOA5BKS+aZiY3AvA==",
+      "dev": true,
+      "license": "BSD-3-Clause",
+      "dependencies": {
+        "html-escaper": "^2.0.0",
+        "istanbul-lib-report": "^3.0.0"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/jest": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/jest/-/jest-29.7.0.tgz",
+      "integrity": "sha512-NIy3oAFp9shda19hy4HK0HRTWKtPJmGdnvywu01nOqNC2vZg+Z+fvJDxpMQA88eb2I9EcafcdjYgsDthnYTvGw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/core": "^29.7.0",
+        "@jest/types": "^29.6.3",
+        "import-local": "^3.0.2",
+        "jest-cli": "^29.7.0"
+      },
+      "bin": {
+        "jest": "bin/jest.js"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      },
+      "peerDependencies": {
+        "node-notifier": "^8.0.1 || ^9.0.0 || ^10.0.0"
+      },
+      "peerDependenciesMeta": {
+        "node-notifier": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/jest-changed-files": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/jest-changed-files/-/jest-changed-files-29.7.0.tgz",
+      "integrity": "sha512-fEArFiwf1BpQ+4bXSprcDc3/x4HSzL4al2tozwVpDFpsxALjLYdyiIK4e5Vz66GQJIbXJ82+35PtysofptNX2w==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "execa": "^5.0.0",
+        "jest-util": "^29.7.0",
+        "p-limit": "^3.1.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-circus": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/jest-circus/-/jest-circus-29.7.0.tgz",
+      "integrity": "sha512-3E1nCMgipcTkCocFwM90XXQab9bS+GMsjdpmPrlelaxwD93Ad8iVEjX/vvHPdLPnFf+L40u+5+iutRdA1N9myw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/environment": "^29.7.0",
+        "@jest/expect": "^29.7.0",
+        "@jest/test-result": "^29.7.0",
+        "@jest/types": "^29.6.3",
+        "@types/node": "*",
+        "chalk": "^4.0.0",
+        "co": "^4.6.0",
+        "dedent": "^1.0.0",
+        "is-generator-fn": "^2.0.0",
+        "jest-each": "^29.7.0",
+        "jest-matcher-utils": "^29.7.0",
+        "jest-message-util": "^29.7.0",
+        "jest-runtime": "^29.7.0",
+        "jest-snapshot": "^29.7.0",
+        "jest-util": "^29.7.0",
+        "p-limit": "^3.1.0",
+        "pretty-format": "^29.7.0",
+        "pure-rand": "^6.0.0",
+        "slash": "^3.0.0",
+        "stack-utils": "^2.0.3"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-circus/node_modules/@jest/schemas": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/@jest/schemas/-/schemas-29.6.3.tgz",
+      "integrity": "sha512-mo5j5X+jIZmJQveBKeS/clAueipV7KgiX1vMgCxam1RNYiqE1w62n0/tJJnHtjW8ZHcQco5gY85jA3mi0L+nSA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@sinclair/typebox": "^0.27.8"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-circus/node_modules/@jest/types": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/@jest/types/-/types-29.6.3.tgz",
+      "integrity": "sha512-u3UPsIilWKOM3F9CXtrG8LEJmNxwoCQC/XVj4IKYXvvpx7QIi/Kg1LI5uDmDpKlac62NUtX7eLjRh+jVZcLOzw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/schemas": "^29.6.3",
+        "@types/istanbul-lib-coverage": "^2.0.0",
+        "@types/istanbul-reports": "^3.0.0",
+        "@types/node": "*",
+        "@types/yargs": "^17.0.8",
+        "chalk": "^4.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-circus/node_modules/@sinclair/typebox": {
+      "version": "0.27.8",
+      "resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.27.8.tgz",
+      "integrity": "sha512-+Fj43pSMwJs4KRrH/938Uf+uAELIgVBmQzg/q1YG10djyfA3TnrU8N8XzqCh/okZdszqBQTZf96idMfE5lnwTA==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/jest-cli": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/jest-cli/-/jest-cli-29.7.0.tgz",
+      "integrity": "sha512-OVVobw2IubN/GSYsxETi+gOe7Ka59EFMR/twOU3Jb2GnKKeMGJB5SGUUrEz3SFVmJASUdZUzy83sLNNQ2gZslg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/core": "^29.7.0",
+        "@jest/test-result": "^29.7.0",
+        "@jest/types": "^29.6.3",
+        "chalk": "^4.0.0",
+        "create-jest": "^29.7.0",
+        "exit": "^0.1.2",
+        "import-local": "^3.0.2",
+        "jest-config": "^29.7.0",
+        "jest-util": "^29.7.0",
+        "jest-validate": "^29.7.0",
+        "yargs": "^17.3.1"
+      },
+      "bin": {
+        "jest": "bin/jest.js"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      },
+      "peerDependencies": {
+        "node-notifier": "^8.0.1 || ^9.0.0 || ^10.0.0"
+      },
+      "peerDependenciesMeta": {
+        "node-notifier": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/jest-cli/node_modules/@jest/schemas": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/@jest/schemas/-/schemas-29.6.3.tgz",
+      "integrity": "sha512-mo5j5X+jIZmJQveBKeS/clAueipV7KgiX1vMgCxam1RNYiqE1w62n0/tJJnHtjW8ZHcQco5gY85jA3mi0L+nSA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@sinclair/typebox": "^0.27.8"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-cli/node_modules/@jest/types": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/@jest/types/-/types-29.6.3.tgz",
+      "integrity": "sha512-u3UPsIilWKOM3F9CXtrG8LEJmNxwoCQC/XVj4IKYXvvpx7QIi/Kg1LI5uDmDpKlac62NUtX7eLjRh+jVZcLOzw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/schemas": "^29.6.3",
+        "@types/istanbul-lib-coverage": "^2.0.0",
+        "@types/istanbul-reports": "^3.0.0",
+        "@types/node": "*",
+        "@types/yargs": "^17.0.8",
+        "chalk": "^4.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-cli/node_modules/@sinclair/typebox": {
+      "version": "0.27.8",
+      "resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.27.8.tgz",
+      "integrity": "sha512-+Fj43pSMwJs4KRrH/938Uf+uAELIgVBmQzg/q1YG10djyfA3TnrU8N8XzqCh/okZdszqBQTZf96idMfE5lnwTA==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/jest-config": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/jest-config/-/jest-config-29.7.0.tgz",
+      "integrity": "sha512-uXbpfeQ7R6TZBqI3/TxCU4q4ttk3u0PJeC+E0zbfSoSjq6bJ7buBPxzQPL0ifrkY4DNu4JUdk0ImlBUYi840eQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/core": "^7.11.6",
+        "@jest/test-sequencer": "^29.7.0",
+        "@jest/types": "^29.6.3",
+        "babel-jest": "^29.7.0",
+        "chalk": "^4.0.0",
+        "ci-info": "^3.2.0",
+        "deepmerge": "^4.2.2",
+        "glob": "^7.1.3",
+        "graceful-fs": "^4.2.9",
+        "jest-circus": "^29.7.0",
+        "jest-environment-node": "^29.7.0",
+        "jest-get-type": "^29.6.3",
+        "jest-regex-util": "^29.6.3",
+        "jest-resolve": "^29.7.0",
+        "jest-runner": "^29.7.0",
+        "jest-util": "^29.7.0",
+        "jest-validate": "^29.7.0",
+        "micromatch": "^4.0.4",
+        "parse-json": "^5.2.0",
+        "pretty-format": "^29.7.0",
+        "slash": "^3.0.0",
+        "strip-json-comments": "^3.1.1"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      },
+      "peerDependencies": {
+        "@types/node": "*",
+        "ts-node": ">=9.0.0"
+      },
+      "peerDependenciesMeta": {
+        "@types/node": {
+          "optional": true
+        },
+        "ts-node": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/jest-config/node_modules/@jest/schemas": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/@jest/schemas/-/schemas-29.6.3.tgz",
+      "integrity": "sha512-mo5j5X+jIZmJQveBKeS/clAueipV7KgiX1vMgCxam1RNYiqE1w62n0/tJJnHtjW8ZHcQco5gY85jA3mi0L+nSA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@sinclair/typebox": "^0.27.8"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-config/node_modules/@jest/types": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/@jest/types/-/types-29.6.3.tgz",
+      "integrity": "sha512-u3UPsIilWKOM3F9CXtrG8LEJmNxwoCQC/XVj4IKYXvvpx7QIi/Kg1LI5uDmDpKlac62NUtX7eLjRh+jVZcLOzw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/schemas": "^29.6.3",
+        "@types/istanbul-lib-coverage": "^2.0.0",
+        "@types/istanbul-reports": "^3.0.0",
+        "@types/node": "*",
+        "@types/yargs": "^17.0.8",
+        "chalk": "^4.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-config/node_modules/@sinclair/typebox": {
+      "version": "0.27.8",
+      "resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.27.8.tgz",
+      "integrity": "sha512-+Fj43pSMwJs4KRrH/938Uf+uAELIgVBmQzg/q1YG10djyfA3TnrU8N8XzqCh/okZdszqBQTZf96idMfE5lnwTA==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/jest-config/node_modules/jest-regex-util": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/jest-regex-util/-/jest-regex-util-29.6.3.tgz",
+      "integrity": "sha512-KJJBsRCyyLNWCNBOvZyRDnAIfUiRJ8v+hOBQYGn8gDyF3UegwiP4gwRR3/SDa42g1YbVycTidUF3rKjyLFDWbg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-diff": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/jest-diff/-/jest-diff-29.7.0.tgz",
+      "integrity": "sha512-LMIgiIrhigmPrs03JHpxUh2yISK3vLFPkAodPeo0+BuF7wA2FoQbkEg1u8gBYBThncu7e1oEDUfIXVuTqLRUjw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "chalk": "^4.0.0",
+        "diff-sequences": "^29.6.3",
+        "jest-get-type": "^29.6.3",
+        "pretty-format": "^29.7.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-docblock": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/jest-docblock/-/jest-docblock-29.7.0.tgz",
+      "integrity": "sha512-q617Auw3A612guyaFgsbFeYpNP5t2aoUNLwBUbc/0kD1R4t9ixDbyFTHd1nok4epoVFpr7PmeWHrhvuV3XaJ4g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "detect-newline": "^3.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-each": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/jest-each/-/jest-each-29.7.0.tgz",
+      "integrity": "sha512-gns+Er14+ZrEoC5fhOfYCY1LOHHr0TI+rQUHZS8Ttw2l7gl+80eHc/gFf2Ktkw0+SIACDTeWvpFcv3B04VembQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/types": "^29.6.3",
+        "chalk": "^4.0.0",
+        "jest-get-type": "^29.6.3",
+        "jest-util": "^29.7.0",
+        "pretty-format": "^29.7.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-each/node_modules/@jest/schemas": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/@jest/schemas/-/schemas-29.6.3.tgz",
+      "integrity": "sha512-mo5j5X+jIZmJQveBKeS/clAueipV7KgiX1vMgCxam1RNYiqE1w62n0/tJJnHtjW8ZHcQco5gY85jA3mi0L+nSA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@sinclair/typebox": "^0.27.8"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-each/node_modules/@jest/types": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/@jest/types/-/types-29.6.3.tgz",
+      "integrity": "sha512-u3UPsIilWKOM3F9CXtrG8LEJmNxwoCQC/XVj4IKYXvvpx7QIi/Kg1LI5uDmDpKlac62NUtX7eLjRh+jVZcLOzw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/schemas": "^29.6.3",
+        "@types/istanbul-lib-coverage": "^2.0.0",
+        "@types/istanbul-reports": "^3.0.0",
+        "@types/node": "*",
+        "@types/yargs": "^17.0.8",
+        "chalk": "^4.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-each/node_modules/@sinclair/typebox": {
+      "version": "0.27.8",
+      "resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.27.8.tgz",
+      "integrity": "sha512-+Fj43pSMwJs4KRrH/938Uf+uAELIgVBmQzg/q1YG10djyfA3TnrU8N8XzqCh/okZdszqBQTZf96idMfE5lnwTA==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/jest-environment-node": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/jest-environment-node/-/jest-environment-node-29.7.0.tgz",
+      "integrity": "sha512-DOSwCRqXirTOyheM+4d5YZOrWcdu0LNZ87ewUoywbcb2XR4wKgqiG8vNeYwhjFMbEkfju7wx2GYH0P2gevGvFw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/environment": "^29.7.0",
+        "@jest/fake-timers": "^29.7.0",
+        "@jest/types": "^29.6.3",
+        "@types/node": "*",
+        "jest-mock": "^29.7.0",
+        "jest-util": "^29.7.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-environment-node/node_modules/@jest/schemas": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/@jest/schemas/-/schemas-29.6.3.tgz",
+      "integrity": "sha512-mo5j5X+jIZmJQveBKeS/clAueipV7KgiX1vMgCxam1RNYiqE1w62n0/tJJnHtjW8ZHcQco5gY85jA3mi0L+nSA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@sinclair/typebox": "^0.27.8"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-environment-node/node_modules/@jest/types": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/@jest/types/-/types-29.6.3.tgz",
+      "integrity": "sha512-u3UPsIilWKOM3F9CXtrG8LEJmNxwoCQC/XVj4IKYXvvpx7QIi/Kg1LI5uDmDpKlac62NUtX7eLjRh+jVZcLOzw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/schemas": "^29.6.3",
+        "@types/istanbul-lib-coverage": "^2.0.0",
+        "@types/istanbul-reports": "^3.0.0",
+        "@types/node": "*",
+        "@types/yargs": "^17.0.8",
+        "chalk": "^4.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-environment-node/node_modules/@sinclair/typebox": {
+      "version": "0.27.8",
+      "resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.27.8.tgz",
+      "integrity": "sha512-+Fj43pSMwJs4KRrH/938Uf+uAELIgVBmQzg/q1YG10djyfA3TnrU8N8XzqCh/okZdszqBQTZf96idMfE5lnwTA==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/jest-get-type": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/jest-get-type/-/jest-get-type-29.6.3.tgz",
+      "integrity": "sha512-zrteXnqYxfQh7l5FHyL38jL39di8H8rHoecLH3JNxH3BwOrBsNeabdap5e0I23lD4HHI8W5VFBZqG4Eaq5LNcw==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-haste-map": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/jest-haste-map/-/jest-haste-map-29.7.0.tgz",
+      "integrity": "sha512-fP8u2pyfqx0K1rGn1R9pyE0/KTn+G7PxktWidOBTqFPLYX0b9ksaMFkhK5vrS3DVun09pckLdlx90QthlW7AmA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/types": "^29.6.3",
+        "@types/graceful-fs": "^4.1.3",
+        "@types/node": "*",
+        "anymatch": "^3.0.3",
+        "fb-watchman": "^2.0.0",
+        "graceful-fs": "^4.2.9",
+        "jest-regex-util": "^29.6.3",
+        "jest-util": "^29.7.0",
+        "jest-worker": "^29.7.0",
+        "micromatch": "^4.0.4",
+        "walker": "^1.0.8"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      },
+      "optionalDependencies": {
+        "fsevents": "^2.3.2"
+      }
+    },
+    "node_modules/jest-haste-map/node_modules/@jest/schemas": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/@jest/schemas/-/schemas-29.6.3.tgz",
+      "integrity": "sha512-mo5j5X+jIZmJQveBKeS/clAueipV7KgiX1vMgCxam1RNYiqE1w62n0/tJJnHtjW8ZHcQco5gY85jA3mi0L+nSA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@sinclair/typebox": "^0.27.8"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-haste-map/node_modules/@jest/types": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/@jest/types/-/types-29.6.3.tgz",
+      "integrity": "sha512-u3UPsIilWKOM3F9CXtrG8LEJmNxwoCQC/XVj4IKYXvvpx7QIi/Kg1LI5uDmDpKlac62NUtX7eLjRh+jVZcLOzw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/schemas": "^29.6.3",
+        "@types/istanbul-lib-coverage": "^2.0.0",
+        "@types/istanbul-reports": "^3.0.0",
+        "@types/node": "*",
+        "@types/yargs": "^17.0.8",
+        "chalk": "^4.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-haste-map/node_modules/@sinclair/typebox": {
+      "version": "0.27.8",
+      "resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.27.8.tgz",
+      "integrity": "sha512-+Fj43pSMwJs4KRrH/938Uf+uAELIgVBmQzg/q1YG10djyfA3TnrU8N8XzqCh/okZdszqBQTZf96idMfE5lnwTA==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/jest-haste-map/node_modules/jest-regex-util": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/jest-regex-util/-/jest-regex-util-29.6.3.tgz",
+      "integrity": "sha512-KJJBsRCyyLNWCNBOvZyRDnAIfUiRJ8v+hOBQYGn8gDyF3UegwiP4gwRR3/SDa42g1YbVycTidUF3rKjyLFDWbg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-leak-detector": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/jest-leak-detector/-/jest-leak-detector-29.7.0.tgz",
+      "integrity": "sha512-kYA8IJcSYtST2BY9I+SMC32nDpBT3J2NvWJx8+JCuCdl/CR1I4EKUJROiP8XtCcxqgTTBGJNdbB1A8XRKbTetw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "jest-get-type": "^29.6.3",
+        "pretty-format": "^29.7.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-matcher-utils": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/jest-matcher-utils/-/jest-matcher-utils-29.7.0.tgz",
+      "integrity": "sha512-sBkD+Xi9DtcChsI3L3u0+N0opgPYnCRPtGcQYrgXmR+hmt/fYfWAL0xRXYU8eWOdfuLgBe0YCW3AFtnRLagq/g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "chalk": "^4.0.0",
+        "jest-diff": "^29.7.0",
+        "jest-get-type": "^29.6.3",
+        "pretty-format": "^29.7.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-message-util": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/jest-message-util/-/jest-message-util-29.7.0.tgz",
+      "integrity": "sha512-GBEV4GRADeP+qtB2+6u61stea8mGcOT4mCtrYISZwfu9/ISHFJ/5zOMXYbpBE9RsS5+Gb63DW4FgmnKJ79Kf6w==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/code-frame": "^7.12.13",
+        "@jest/types": "^29.6.3",
+        "@types/stack-utils": "^2.0.0",
+        "chalk": "^4.0.0",
+        "graceful-fs": "^4.2.9",
+        "micromatch": "^4.0.4",
+        "pretty-format": "^29.7.0",
+        "slash": "^3.0.0",
+        "stack-utils": "^2.0.3"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-message-util/node_modules/@jest/schemas": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/@jest/schemas/-/schemas-29.6.3.tgz",
+      "integrity": "sha512-mo5j5X+jIZmJQveBKeS/clAueipV7KgiX1vMgCxam1RNYiqE1w62n0/tJJnHtjW8ZHcQco5gY85jA3mi0L+nSA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@sinclair/typebox": "^0.27.8"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-message-util/node_modules/@jest/types": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/@jest/types/-/types-29.6.3.tgz",
+      "integrity": "sha512-u3UPsIilWKOM3F9CXtrG8LEJmNxwoCQC/XVj4IKYXvvpx7QIi/Kg1LI5uDmDpKlac62NUtX7eLjRh+jVZcLOzw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/schemas": "^29.6.3",
+        "@types/istanbul-lib-coverage": "^2.0.0",
+        "@types/istanbul-reports": "^3.0.0",
+        "@types/node": "*",
+        "@types/yargs": "^17.0.8",
+        "chalk": "^4.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-message-util/node_modules/@sinclair/typebox": {
+      "version": "0.27.8",
+      "resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.27.8.tgz",
+      "integrity": "sha512-+Fj43pSMwJs4KRrH/938Uf+uAELIgVBmQzg/q1YG10djyfA3TnrU8N8XzqCh/okZdszqBQTZf96idMfE5lnwTA==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/jest-mock": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/jest-mock/-/jest-mock-29.7.0.tgz",
+      "integrity": "sha512-ITOMZn+UkYS4ZFh83xYAOzWStloNzJFO2s8DWrE4lhtGD+AorgnbkiKERe4wQVBydIGPx059g6riW5Btp6Llnw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/types": "^29.6.3",
+        "@types/node": "*",
+        "jest-util": "^29.7.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-mock/node_modules/@jest/schemas": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/@jest/schemas/-/schemas-29.6.3.tgz",
+      "integrity": "sha512-mo5j5X+jIZmJQveBKeS/clAueipV7KgiX1vMgCxam1RNYiqE1w62n0/tJJnHtjW8ZHcQco5gY85jA3mi0L+nSA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@sinclair/typebox": "^0.27.8"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-mock/node_modules/@jest/types": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/@jest/types/-/types-29.6.3.tgz",
+      "integrity": "sha512-u3UPsIilWKOM3F9CXtrG8LEJmNxwoCQC/XVj4IKYXvvpx7QIi/Kg1LI5uDmDpKlac62NUtX7eLjRh+jVZcLOzw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/schemas": "^29.6.3",
+        "@types/istanbul-lib-coverage": "^2.0.0",
+        "@types/istanbul-reports": "^3.0.0",
+        "@types/node": "*",
+        "@types/yargs": "^17.0.8",
+        "chalk": "^4.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-mock/node_modules/@sinclair/typebox": {
+      "version": "0.27.8",
+      "resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.27.8.tgz",
+      "integrity": "sha512-+Fj43pSMwJs4KRrH/938Uf+uAELIgVBmQzg/q1YG10djyfA3TnrU8N8XzqCh/okZdszqBQTZf96idMfE5lnwTA==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/jest-pnp-resolver": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/jest-pnp-resolver/-/jest-pnp-resolver-1.2.3.tgz",
+      "integrity": "sha512-+3NpwQEnRoIBtx4fyhblQDPgJI0H1IEIkX7ShLUjPGA7TtUTvI1oiKi3SR4oBR0hQhQR80l4WAe5RrXBwWMA8w==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=6"
+      },
+      "peerDependencies": {
+        "jest-resolve": "*"
+      },
+      "peerDependenciesMeta": {
+        "jest-resolve": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/jest-regex-util": {
+      "version": "30.0.1",
+      "resolved": "https://registry.npmjs.org/jest-regex-util/-/jest-regex-util-30.0.1.tgz",
+      "integrity": "sha512-jHEQgBXAgc+Gh4g0p3bCevgRCVRkB4VB70zhoAE48gxeSr1hfUOsM/C2WoJgVL7Eyg//hudYENbm3Ne+/dRVVA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0"
+      }
+    },
+    "node_modules/jest-resolve": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/jest-resolve/-/jest-resolve-29.7.0.tgz",
+      "integrity": "sha512-IOVhZSrg+UvVAshDSDtHyFCCBUl/Q3AAJv8iZ6ZjnZ74xzvwuzLXid9IIIPgTnY62SJjfuupMKZsZQRsCvxEgA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "chalk": "^4.0.0",
+        "graceful-fs": "^4.2.9",
+        "jest-haste-map": "^29.7.0",
+        "jest-pnp-resolver": "^1.2.2",
+        "jest-util": "^29.7.0",
+        "jest-validate": "^29.7.0",
+        "resolve": "^1.20.0",
+        "resolve.exports": "^2.0.0",
+        "slash": "^3.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-resolve-dependencies": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/jest-resolve-dependencies/-/jest-resolve-dependencies-29.7.0.tgz",
+      "integrity": "sha512-un0zD/6qxJ+S0et7WxeI3H5XSe9lTBBR7bOHCHXkKR6luG5mwDDlIzVQ0V5cZCuoTgEdcdwzTghYkTWfubi+nA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "jest-regex-util": "^29.6.3",
+        "jest-snapshot": "^29.7.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-resolve-dependencies/node_modules/jest-regex-util": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/jest-regex-util/-/jest-regex-util-29.6.3.tgz",
+      "integrity": "sha512-KJJBsRCyyLNWCNBOvZyRDnAIfUiRJ8v+hOBQYGn8gDyF3UegwiP4gwRR3/SDa42g1YbVycTidUF3rKjyLFDWbg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-runner": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/jest-runner/-/jest-runner-29.7.0.tgz",
+      "integrity": "sha512-fsc4N6cPCAahybGBfTRcq5wFR6fpLznMg47sY5aDpsoejOcVYFb07AHuSnR0liMcPTgBsA3ZJL6kFOjPdoNipQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/console": "^29.7.0",
+        "@jest/environment": "^29.7.0",
+        "@jest/test-result": "^29.7.0",
+        "@jest/transform": "^29.7.0",
+        "@jest/types": "^29.6.3",
+        "@types/node": "*",
+        "chalk": "^4.0.0",
+        "emittery": "^0.13.1",
+        "graceful-fs": "^4.2.9",
+        "jest-docblock": "^29.7.0",
+        "jest-environment-node": "^29.7.0",
+        "jest-haste-map": "^29.7.0",
+        "jest-leak-detector": "^29.7.0",
+        "jest-message-util": "^29.7.0",
+        "jest-resolve": "^29.7.0",
+        "jest-runtime": "^29.7.0",
+        "jest-util": "^29.7.0",
+        "jest-watcher": "^29.7.0",
+        "jest-worker": "^29.7.0",
+        "p-limit": "^3.1.0",
+        "source-map-support": "0.5.13"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-runner/node_modules/@jest/schemas": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/@jest/schemas/-/schemas-29.6.3.tgz",
+      "integrity": "sha512-mo5j5X+jIZmJQveBKeS/clAueipV7KgiX1vMgCxam1RNYiqE1w62n0/tJJnHtjW8ZHcQco5gY85jA3mi0L+nSA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@sinclair/typebox": "^0.27.8"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-runner/node_modules/@jest/types": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/@jest/types/-/types-29.6.3.tgz",
+      "integrity": "sha512-u3UPsIilWKOM3F9CXtrG8LEJmNxwoCQC/XVj4IKYXvvpx7QIi/Kg1LI5uDmDpKlac62NUtX7eLjRh+jVZcLOzw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/schemas": "^29.6.3",
+        "@types/istanbul-lib-coverage": "^2.0.0",
+        "@types/istanbul-reports": "^3.0.0",
+        "@types/node": "*",
+        "@types/yargs": "^17.0.8",
+        "chalk": "^4.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-runner/node_modules/@sinclair/typebox": {
+      "version": "0.27.8",
+      "resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.27.8.tgz",
+      "integrity": "sha512-+Fj43pSMwJs4KRrH/938Uf+uAELIgVBmQzg/q1YG10djyfA3TnrU8N8XzqCh/okZdszqBQTZf96idMfE5lnwTA==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/jest-runtime": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/jest-runtime/-/jest-runtime-29.7.0.tgz",
+      "integrity": "sha512-gUnLjgwdGqW7B4LvOIkbKs9WGbn+QLqRQQ9juC6HndeDiezIwhDP+mhMwHWCEcfQ5RUXa6OPnFF8BJh5xegwwQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/environment": "^29.7.0",
+        "@jest/fake-timers": "^29.7.0",
+        "@jest/globals": "^29.7.0",
+        "@jest/source-map": "^29.6.3",
+        "@jest/test-result": "^29.7.0",
+        "@jest/transform": "^29.7.0",
+        "@jest/types": "^29.6.3",
+        "@types/node": "*",
+        "chalk": "^4.0.0",
+        "cjs-module-lexer": "^1.0.0",
+        "collect-v8-coverage": "^1.0.0",
+        "glob": "^7.1.3",
+        "graceful-fs": "^4.2.9",
+        "jest-haste-map": "^29.7.0",
+        "jest-message-util": "^29.7.0",
+        "jest-mock": "^29.7.0",
+        "jest-regex-util": "^29.6.3",
+        "jest-resolve": "^29.7.0",
+        "jest-snapshot": "^29.7.0",
+        "jest-util": "^29.7.0",
+        "slash": "^3.0.0",
+        "strip-bom": "^4.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-runtime/node_modules/@jest/schemas": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/@jest/schemas/-/schemas-29.6.3.tgz",
+      "integrity": "sha512-mo5j5X+jIZmJQveBKeS/clAueipV7KgiX1vMgCxam1RNYiqE1w62n0/tJJnHtjW8ZHcQco5gY85jA3mi0L+nSA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@sinclair/typebox": "^0.27.8"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-runtime/node_modules/@jest/types": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/@jest/types/-/types-29.6.3.tgz",
+      "integrity": "sha512-u3UPsIilWKOM3F9CXtrG8LEJmNxwoCQC/XVj4IKYXvvpx7QIi/Kg1LI5uDmDpKlac62NUtX7eLjRh+jVZcLOzw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/schemas": "^29.6.3",
+        "@types/istanbul-lib-coverage": "^2.0.0",
+        "@types/istanbul-reports": "^3.0.0",
+        "@types/node": "*",
+        "@types/yargs": "^17.0.8",
+        "chalk": "^4.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-runtime/node_modules/@sinclair/typebox": {
+      "version": "0.27.8",
+      "resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.27.8.tgz",
+      "integrity": "sha512-+Fj43pSMwJs4KRrH/938Uf+uAELIgVBmQzg/q1YG10djyfA3TnrU8N8XzqCh/okZdszqBQTZf96idMfE5lnwTA==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/jest-runtime/node_modules/jest-regex-util": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/jest-regex-util/-/jest-regex-util-29.6.3.tgz",
+      "integrity": "sha512-KJJBsRCyyLNWCNBOvZyRDnAIfUiRJ8v+hOBQYGn8gDyF3UegwiP4gwRR3/SDa42g1YbVycTidUF3rKjyLFDWbg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-snapshot": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/jest-snapshot/-/jest-snapshot-29.7.0.tgz",
+      "integrity": "sha512-Rm0BMWtxBcioHr1/OX5YCP8Uov4riHvKPknOGs804Zg9JGZgmIBkbtlxJC/7Z4msKYVbIJtfU+tKb8xlYNfdkw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/core": "^7.11.6",
+        "@babel/generator": "^7.7.2",
+        "@babel/plugin-syntax-jsx": "^7.7.2",
+        "@babel/plugin-syntax-typescript": "^7.7.2",
+        "@babel/types": "^7.3.3",
+        "@jest/expect-utils": "^29.7.0",
+        "@jest/transform": "^29.7.0",
+        "@jest/types": "^29.6.3",
+        "babel-preset-current-node-syntax": "^1.0.0",
+        "chalk": "^4.0.0",
+        "expect": "^29.7.0",
+        "graceful-fs": "^4.2.9",
+        "jest-diff": "^29.7.0",
+        "jest-get-type": "^29.6.3",
+        "jest-matcher-utils": "^29.7.0",
+        "jest-message-util": "^29.7.0",
+        "jest-util": "^29.7.0",
+        "natural-compare": "^1.4.0",
+        "pretty-format": "^29.7.0",
+        "semver": "^7.5.3"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-snapshot/node_modules/@jest/schemas": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/@jest/schemas/-/schemas-29.6.3.tgz",
+      "integrity": "sha512-mo5j5X+jIZmJQveBKeS/clAueipV7KgiX1vMgCxam1RNYiqE1w62n0/tJJnHtjW8ZHcQco5gY85jA3mi0L+nSA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@sinclair/typebox": "^0.27.8"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-snapshot/node_modules/@jest/types": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/@jest/types/-/types-29.6.3.tgz",
+      "integrity": "sha512-u3UPsIilWKOM3F9CXtrG8LEJmNxwoCQC/XVj4IKYXvvpx7QIi/Kg1LI5uDmDpKlac62NUtX7eLjRh+jVZcLOzw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/schemas": "^29.6.3",
+        "@types/istanbul-lib-coverage": "^2.0.0",
+        "@types/istanbul-reports": "^3.0.0",
+        "@types/node": "*",
+        "@types/yargs": "^17.0.8",
+        "chalk": "^4.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-snapshot/node_modules/@sinclair/typebox": {
+      "version": "0.27.8",
+      "resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.27.8.tgz",
+      "integrity": "sha512-+Fj43pSMwJs4KRrH/938Uf+uAELIgVBmQzg/q1YG10djyfA3TnrU8N8XzqCh/okZdszqBQTZf96idMfE5lnwTA==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/jest-snapshot/node_modules/semver": {
+      "version": "7.7.3",
+      "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.3.tgz",
+      "integrity": "sha512-SdsKMrI9TdgjdweUSR9MweHA4EJ8YxHn8DFaDisvhVlUOe4BF1tLD7GAj0lIqWVl+dPb/rExr0Btby5loQm20Q==",
+      "dev": true,
+      "license": "ISC",
+      "bin": {
+        "semver": "bin/semver.js"
+      },
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/jest-util": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/jest-util/-/jest-util-29.7.0.tgz",
+      "integrity": "sha512-z6EbKajIpqGKU56y5KBUgy1dt1ihhQJgWzUlZHArA/+X2ad7Cb5iF+AK1EWVL/Bo7Rz9uurpqw6SiBCefUbCGA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/types": "^29.6.3",
+        "@types/node": "*",
+        "chalk": "^4.0.0",
+        "ci-info": "^3.2.0",
+        "graceful-fs": "^4.2.9",
+        "picomatch": "^2.2.3"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-util/node_modules/@jest/schemas": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/@jest/schemas/-/schemas-29.6.3.tgz",
+      "integrity": "sha512-mo5j5X+jIZmJQveBKeS/clAueipV7KgiX1vMgCxam1RNYiqE1w62n0/tJJnHtjW8ZHcQco5gY85jA3mi0L+nSA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@sinclair/typebox": "^0.27.8"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-util/node_modules/@jest/types": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/@jest/types/-/types-29.6.3.tgz",
+      "integrity": "sha512-u3UPsIilWKOM3F9CXtrG8LEJmNxwoCQC/XVj4IKYXvvpx7QIi/Kg1LI5uDmDpKlac62NUtX7eLjRh+jVZcLOzw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/schemas": "^29.6.3",
+        "@types/istanbul-lib-coverage": "^2.0.0",
+        "@types/istanbul-reports": "^3.0.0",
+        "@types/node": "*",
+        "@types/yargs": "^17.0.8",
+        "chalk": "^4.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-util/node_modules/@sinclair/typebox": {
+      "version": "0.27.8",
+      "resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.27.8.tgz",
+      "integrity": "sha512-+Fj43pSMwJs4KRrH/938Uf+uAELIgVBmQzg/q1YG10djyfA3TnrU8N8XzqCh/okZdszqBQTZf96idMfE5lnwTA==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/jest-validate": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/jest-validate/-/jest-validate-29.7.0.tgz",
+      "integrity": "sha512-ZB7wHqaRGVw/9hST/OuFUReG7M8vKeq0/J2egIGLdvjHCmYqGARhzXmtgi+gVeZ5uXFF219aOc3Ls2yLg27tkw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/types": "^29.6.3",
+        "camelcase": "^6.2.0",
+        "chalk": "^4.0.0",
+        "jest-get-type": "^29.6.3",
+        "leven": "^3.1.0",
+        "pretty-format": "^29.7.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-validate/node_modules/@jest/schemas": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/@jest/schemas/-/schemas-29.6.3.tgz",
+      "integrity": "sha512-mo5j5X+jIZmJQveBKeS/clAueipV7KgiX1vMgCxam1RNYiqE1w62n0/tJJnHtjW8ZHcQco5gY85jA3mi0L+nSA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@sinclair/typebox": "^0.27.8"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-validate/node_modules/@jest/types": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/@jest/types/-/types-29.6.3.tgz",
+      "integrity": "sha512-u3UPsIilWKOM3F9CXtrG8LEJmNxwoCQC/XVj4IKYXvvpx7QIi/Kg1LI5uDmDpKlac62NUtX7eLjRh+jVZcLOzw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/schemas": "^29.6.3",
+        "@types/istanbul-lib-coverage": "^2.0.0",
+        "@types/istanbul-reports": "^3.0.0",
+        "@types/node": "*",
+        "@types/yargs": "^17.0.8",
+        "chalk": "^4.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-validate/node_modules/@sinclair/typebox": {
+      "version": "0.27.8",
+      "resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.27.8.tgz",
+      "integrity": "sha512-+Fj43pSMwJs4KRrH/938Uf+uAELIgVBmQzg/q1YG10djyfA3TnrU8N8XzqCh/okZdszqBQTZf96idMfE5lnwTA==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/jest-validate/node_modules/camelcase": {
+      "version": "6.3.0",
+      "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-6.3.0.tgz",
+      "integrity": "sha512-Gmy6FhYlCY7uOElZUSbxo2UCDH8owEk996gkbrpsgGtrJLM3J7jGxl9Ic7Qwwj4ivOE5AWZWRMecDdF7hqGjFA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/jest-watcher": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/jest-watcher/-/jest-watcher-29.7.0.tgz",
+      "integrity": "sha512-49Fg7WXkU3Vl2h6LbLtMQ/HyB6rXSIX7SqvBLQmssRBGN9I0PNvPmAmCWSOY6SOvrjhI/F7/bGAv9RtnsPA03g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/test-result": "^29.7.0",
+        "@jest/types": "^29.6.3",
+        "@types/node": "*",
+        "ansi-escapes": "^4.2.1",
+        "chalk": "^4.0.0",
+        "emittery": "^0.13.1",
+        "jest-util": "^29.7.0",
+        "string-length": "^4.0.1"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-watcher/node_modules/@jest/schemas": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/@jest/schemas/-/schemas-29.6.3.tgz",
+      "integrity": "sha512-mo5j5X+jIZmJQveBKeS/clAueipV7KgiX1vMgCxam1RNYiqE1w62n0/tJJnHtjW8ZHcQco5gY85jA3mi0L+nSA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@sinclair/typebox": "^0.27.8"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-watcher/node_modules/@jest/types": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/@jest/types/-/types-29.6.3.tgz",
+      "integrity": "sha512-u3UPsIilWKOM3F9CXtrG8LEJmNxwoCQC/XVj4IKYXvvpx7QIi/Kg1LI5uDmDpKlac62NUtX7eLjRh+jVZcLOzw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/schemas": "^29.6.3",
+        "@types/istanbul-lib-coverage": "^2.0.0",
+        "@types/istanbul-reports": "^3.0.0",
+        "@types/node": "*",
+        "@types/yargs": "^17.0.8",
+        "chalk": "^4.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-watcher/node_modules/@sinclair/typebox": {
+      "version": "0.27.8",
+      "resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.27.8.tgz",
+      "integrity": "sha512-+Fj43pSMwJs4KRrH/938Uf+uAELIgVBmQzg/q1YG10djyfA3TnrU8N8XzqCh/okZdszqBQTZf96idMfE5lnwTA==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/jest-worker": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/jest-worker/-/jest-worker-29.7.0.tgz",
+      "integrity": "sha512-eIz2msL/EzL9UFTFFx7jBTkeZfku0yUAyZZZmJ93H2TYEiroIx2PQjEXcwYtYl8zXCxb+PAmA2hLIt/6ZEkPHw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@types/node": "*",
+        "jest-util": "^29.7.0",
+        "merge-stream": "^2.0.0",
+        "supports-color": "^8.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest-worker/node_modules/supports-color": {
+      "version": "8.1.1",
+      "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-8.1.1.tgz",
+      "integrity": "sha512-MpUEN2OodtUzxvKQl72cUF7RQ5EiHsGvSsVG0ia9c5RbWGL2CI4C7EpPS8UTBIplnlzZiNuV56w+FuNxy3ty2Q==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "has-flag": "^4.0.0"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/supports-color?sponsor=1"
+      }
+    },
+    "node_modules/jest/node_modules/@jest/schemas": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/@jest/schemas/-/schemas-29.6.3.tgz",
+      "integrity": "sha512-mo5j5X+jIZmJQveBKeS/clAueipV7KgiX1vMgCxam1RNYiqE1w62n0/tJJnHtjW8ZHcQco5gY85jA3mi0L+nSA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@sinclair/typebox": "^0.27.8"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest/node_modules/@jest/types": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/@jest/types/-/types-29.6.3.tgz",
+      "integrity": "sha512-u3UPsIilWKOM3F9CXtrG8LEJmNxwoCQC/XVj4IKYXvvpx7QIi/Kg1LI5uDmDpKlac62NUtX7eLjRh+jVZcLOzw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/schemas": "^29.6.3",
+        "@types/istanbul-lib-coverage": "^2.0.0",
+        "@types/istanbul-reports": "^3.0.0",
+        "@types/node": "*",
+        "@types/yargs": "^17.0.8",
+        "chalk": "^4.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/jest/node_modules/@sinclair/typebox": {
+      "version": "0.27.8",
+      "resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.27.8.tgz",
+      "integrity": "sha512-+Fj43pSMwJs4KRrH/938Uf+uAELIgVBmQzg/q1YG10djyfA3TnrU8N8XzqCh/okZdszqBQTZf96idMfE5lnwTA==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/js-tokens": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
+      "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/js-yaml": {
+      "version": "3.14.2",
+      "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-3.14.2.tgz",
+      "integrity": "sha512-PMSmkqxr106Xa156c2M265Z+FTrPl+oxd/rgOQy2tijQeK5TxQ43psO1ZCwhVOSdnn+RzkzlRz/eY4BgJBYVpg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "argparse": "^1.0.7",
+        "esprima": "^4.0.0"
+      },
+      "bin": {
+        "js-yaml": "bin/js-yaml.js"
+      }
+    },
+    "node_modules/jsesc": {
+      "version": "3.1.0",
+      "resolved": "https://registry.npmjs.org/jsesc/-/jsesc-3.1.0.tgz",
+      "integrity": "sha512-/sM3dO2FOzXjKQhJuo0Q173wf2KOo8t4I8vHy6lF9poUp7bKT0/NHE8fPX23PwfhnykfqnC2xRxOnVw5XuGIaA==",
+      "dev": true,
+      "license": "MIT",
+      "bin": {
+        "jsesc": "bin/jsesc"
+      },
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/json-parse-even-better-errors": {
+      "version": "2.3.1",
+      "resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz",
+      "integrity": "sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/json5": {
+      "version": "2.2.3",
+      "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.3.tgz",
+      "integrity": "sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg==",
+      "dev": true,
+      "license": "MIT",
+      "bin": {
+        "json5": "lib/cli.js"
+      },
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/jsonc-parser": {
+      "version": "3.3.1",
+      "resolved": "https://registry.npmjs.org/jsonc-parser/-/jsonc-parser-3.3.1.tgz",
+      "integrity": "sha512-HUgH65KyejrUFPvHFPbqOY0rsFip3Bo5wb4ngvdi1EpCYWUQDC5V+Y7mZws+DLkr4M//zQJoanu1SP+87Dv1oQ==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/kleur": {
+      "version": "3.0.3",
+      "resolved": "https://registry.npmjs.org/kleur/-/kleur-3.0.3.tgz",
+      "integrity": "sha512-eTIzlVOSUR+JxdDFepEYcBMtZ9Qqdef+rnzWdRZuMbOywu5tO2w2N7rqjoANZ5k9vywhL6Br1VRjUIgTQx4E8w==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/leven": {
+      "version": "3.1.0",
+      "resolved": "https://registry.npmjs.org/leven/-/leven-3.1.0.tgz",
+      "integrity": "sha512-qsda+H8jTaUaN/x5vzW2rzc+8Rw4TAQ/4KjB46IwK5VH+IlVeeeje/EoZRpiXvIqjFgK84QffqPztGI3VBLG1A==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/lines-and-columns": {
+      "version": "1.2.4",
+      "resolved": "https://registry.npmjs.org/lines-and-columns/-/lines-and-columns-1.2.4.tgz",
+      "integrity": "sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/llama-stack-client": {
+      "version": "0.3.2",
+      "resolved": "https://registry.npmjs.org/llama-stack-client/-/llama-stack-client-0.3.2.tgz",
+      "integrity": "sha512-vzcnIN6k3sp7dhMXSnyrPSd82ACH/H3snj2uF6DgZwZCacKQNp2Y5XIT5qZZgoM1EUXbaxdVYFCeWD9yNCwatw==",
+      "license": "MIT",
+      "dependencies": {
+        "@types/node": "^18.11.18",
+        "@types/node-fetch": "^2.6.4",
+        "abort-controller": "^3.0.0",
+        "agentkeepalive": "^4.2.1",
+        "form-data-encoder": "1.7.2",
+        "formdata-node": "^4.3.2",
+        "node-fetch": "^2.6.7"
+      }
+    },
+    "node_modules/llama-stack-client/node_modules/@types/node": {
+      "version": "18.19.130",
+      "resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.130.tgz",
+      "integrity": "sha512-GRaXQx6jGfL8sKfaIDD6OupbIHBr9jv7Jnaml9tB7l4v068PAOXqfcujMMo5PhbIs6ggR1XODELqahT2R8v0fg==",
+      "license": "MIT",
+      "dependencies": {
+        "undici-types": "~5.26.4"
+      }
+    },
+    "node_modules/llama-stack-client/node_modules/undici-types": {
+      "version": "5.26.5",
+      "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz",
+      "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==",
+      "license": "MIT"
+    },
+    "node_modules/locate-path": {
+      "version": "5.0.0",
+      "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-5.0.0.tgz",
+      "integrity": "sha512-t7hw9pI+WvuwNJXwk5zVHpyhIqzg2qTlklJOf0mVxGSbe3Fp2VieZcduNYjaLDoy6p9uGpQEGWG87WpMKlNq8g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "p-locate": "^4.1.0"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/lodash.memoize": {
+      "version": "4.1.2",
+      "resolved": "https://registry.npmjs.org/lodash.memoize/-/lodash.memoize-4.1.2.tgz",
+      "integrity": "sha512-t7j+NzmgnQzTAYXcsHYLgimltOV1MXHtlOWf6GjL9Kj8GK5FInw5JotxvbOs+IvV1/Dzo04/fCGfLVs7aXb4Ag==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/lru-cache": {
+      "version": "5.1.1",
+      "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-5.1.1.tgz",
+      "integrity": "sha512-KpNARQA3Iwv+jTA0utUVVbrh+Jlrr1Fv0e56GGzAFOXN7dk/FviaDW8LHmK52DlcH4WP2n6gI8vN1aesBFgo9w==",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "yallist": "^3.0.2"
+      }
+    },
+    "node_modules/make-dir": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/make-dir/-/make-dir-4.0.0.tgz",
+      "integrity": "sha512-hXdUTZYIVOt1Ex//jAQi+wTZZpUpwBj/0QsOzqegb3rGMMeJiSEu5xLHnYfBrRV4RH2+OCSOO95Is/7x1WJ4bw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "semver": "^7.5.3"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/make-dir/node_modules/semver": {
+      "version": "7.7.3",
+      "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.3.tgz",
+      "integrity": "sha512-SdsKMrI9TdgjdweUSR9MweHA4EJ8YxHn8DFaDisvhVlUOe4BF1tLD7GAj0lIqWVl+dPb/rExr0Btby5loQm20Q==",
+      "dev": true,
+      "license": "ISC",
+      "bin": {
+        "semver": "bin/semver.js"
+      },
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/make-error": {
+      "version": "1.3.6",
+      "resolved": "https://registry.npmjs.org/make-error/-/make-error-1.3.6.tgz",
+      "integrity": "sha512-s8UhlNe7vPKomQhC1qFelMokr/Sc3AgNbso3n74mVPA5LTZwkB9NlXf4XPamLxJE8h0gh73rM94xvwRT2CVInw==",
+      "dev": true,
+      "license": "ISC"
+    },
+    "node_modules/makeerror": {
+      "version": "1.0.12",
+      "resolved": "https://registry.npmjs.org/makeerror/-/makeerror-1.0.12.tgz",
+      "integrity": "sha512-JmqCvUhmt43madlpFzG4BQzG2Z3m6tvQDNKdClZnO3VbIudJYmxsT0FNJMeiB2+JTSlTQTSbU8QdesVmwJcmLg==",
+      "dev": true,
+      "license": "BSD-3-Clause",
+      "dependencies": {
+        "tmpl": "1.0.5"
+      }
+    },
+    "node_modules/math-intrinsics": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz",
+      "integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/merge-stream": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/merge-stream/-/merge-stream-2.0.0.tgz",
+      "integrity": "sha512-abv/qOcuPfk3URPfDzmZU1LKmuw8kT+0nIHvKrKgFrwifol/doWcdA4ZqsWQ8ENrFKkd67Mfpo/LovbIUsbt3w==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/micromatch": {
+      "version": "4.0.8",
+      "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.8.tgz",
+      "integrity": "sha512-PXwfBhYu0hBCPw8Dn0E+WDYb7af3dSLVWKi3HGv84IdF4TyFoC0ysxFd0Goxw7nSv4T/PzEJQxsYsEiFCKo2BA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "braces": "^3.0.3",
+        "picomatch": "^2.3.1"
+      },
+      "engines": {
+        "node": ">=8.6"
+      }
+    },
+    "node_modules/mime-db": {
+      "version": "1.52.0",
+      "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz",
+      "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/mime-types": {
+      "version": "2.1.35",
+      "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz",
+      "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==",
+      "license": "MIT",
+      "dependencies": {
+        "mime-db": "1.52.0"
+      },
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/mimic-fn": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/mimic-fn/-/mimic-fn-2.1.0.tgz",
+      "integrity": "sha512-OqbOk5oEQeAZ8WXWydlu9HJjz9WVdEIvamMCcXmuqUYjTknH/sqsWvhQ3vgwKFRR1HpjvNBKQ37nbJgYzGqGcg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/minimatch": {
+      "version": "3.1.2",
+      "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz",
+      "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "brace-expansion": "^1.1.7"
+      },
+      "engines": {
+        "node": "*"
+      }
+    },
+    "node_modules/minimist": {
+      "version": "1.2.8",
+      "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz",
+      "integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==",
+      "dev": true,
+      "license": "MIT",
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/ms": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
+      "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==",
+      "license": "MIT"
+    },
+    "node_modules/natural-compare": {
+      "version": "1.4.0",
+      "resolved": "https://registry.npmjs.org/natural-compare/-/natural-compare-1.4.0.tgz",
+      "integrity": "sha512-OWND8ei3VtNC9h7V60qff3SVobHr996CTwgxubgyQYEpg290h9J0buyECNNJexkFm5sOajh5G116RYA1c8ZMSw==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/neo-async": {
+      "version": "2.6.2",
+      "resolved": "https://registry.npmjs.org/neo-async/-/neo-async-2.6.2.tgz",
+      "integrity": "sha512-Yd3UES5mWCSqR+qNT93S3UoYUkqAZ9lLg8a7g9rimsWmYGK8cVToA4/sF3RrshdyV3sAGMXVUmpMYOw+dLpOuw==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/node-domexception": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz",
+      "integrity": "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==",
+      "deprecated": "Use your platform's native DOMException instead",
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/jimmywarting"
+        },
+        {
+          "type": "github",
+          "url": "https://paypal.me/jimmywarting"
+        }
+      ],
+      "license": "MIT",
+      "engines": {
+        "node": ">=10.5.0"
+      }
+    },
+    "node_modules/node-fetch": {
+      "version": "2.7.0",
+      "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz",
+      "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==",
+      "license": "MIT",
+      "dependencies": {
+        "whatwg-url": "^5.0.0"
+      },
+      "engines": {
+        "node": "4.x || >=6.0.0"
+      },
+      "peerDependencies": {
+        "encoding": "^0.1.0"
+      },
+      "peerDependenciesMeta": {
+        "encoding": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/node-int64": {
+      "version": "0.4.0",
+      "resolved": "https://registry.npmjs.org/node-int64/-/node-int64-0.4.0.tgz",
+      "integrity": "sha512-O5lz91xSOeoXP6DulyHfllpq+Eg00MWitZIbtPfoSEvqIHdl5gfcY6hYzDWnj0qD5tz52PI08u9qUvSVeUBeHw==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/node-releases": {
+      "version": "2.0.27",
+      "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.27.tgz",
+      "integrity": "sha512-nmh3lCkYZ3grZvqcCH+fjmQ7X+H0OeZgP40OierEaAptX4XofMh5kwNbWh7lBduUzCcV/8kZ+NDLCwm2iorIlA==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/normalize-path": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/normalize-path/-/normalize-path-3.0.0.tgz",
+      "integrity": "sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/npm-run-path": {
+      "version": "4.0.1",
+      "resolved": "https://registry.npmjs.org/npm-run-path/-/npm-run-path-4.0.1.tgz",
+      "integrity": "sha512-S48WzZW777zhNIrn7gxOlISNAqi9ZC/uQFnRdbeIHhZhCA6UqpkOT8T1G7BvfdgP4Er8gF4sUbaS0i7QvIfCWw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "path-key": "^3.0.0"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/once": {
+      "version": "1.4.0",
+      "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz",
+      "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "wrappy": "1"
+      }
+    },
+    "node_modules/onetime": {
+      "version": "5.1.2",
+      "resolved": "https://registry.npmjs.org/onetime/-/onetime-5.1.2.tgz",
+      "integrity": "sha512-kbpaSSGJTWdAY5KPVeMOKXSrPtr8C8C7wodJbcsd51jRnmD+GZu8Y0VoU6Dm5Z4vWr0Ig/1NKuWRKf7j5aaYSg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "mimic-fn": "^2.1.0"
+      },
+      "engines": {
+        "node": ">=6"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/p-limit": {
+      "version": "3.1.0",
+      "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-3.1.0.tgz",
+      "integrity": "sha512-TYOanM3wGwNGsZN2cVTYPArw454xnXj5qmWF1bEoAc4+cU/ol7GVh7odevjp1FNHduHc3KZMcFduxU5Xc6uJRQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "yocto-queue": "^0.1.0"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/p-locate": {
+      "version": "4.1.0",
+      "resolved": "https://registry.npmjs.org/p-locate/-/p-locate-4.1.0.tgz",
+      "integrity": "sha512-R79ZZ/0wAxKGu3oYMlz8jy/kbhsNrS7SKZ7PxEHBgJ5+F2mtFW2fK2cOtBh1cHYkQsbzFV7I+EoRKe6Yt0oK7A==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "p-limit": "^2.2.0"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/p-locate/node_modules/p-limit": {
+      "version": "2.3.0",
+      "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-2.3.0.tgz",
+      "integrity": "sha512-//88mFWSJx8lxCzwdAABTJL2MyWB12+eIY7MDL2SqLmAkeKU9qxRvWuSyTjm3FUmpBEMuFfckAIqEaVGUDxb6w==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "p-try": "^2.0.0"
+      },
+      "engines": {
+        "node": ">=6"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/p-try": {
+      "version": "2.2.0",
+      "resolved": "https://registry.npmjs.org/p-try/-/p-try-2.2.0.tgz",
+      "integrity": "sha512-R4nPAVTAU0B9D35/Gk3uJf/7XYbQcyohSKdvAxIRSNghFl4e71hVoGnBNQz9cWaXxO2I10KTC+3jMdvvoKw6dQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/parse-json": {
+      "version": "5.2.0",
+      "resolved": "https://registry.npmjs.org/parse-json/-/parse-json-5.2.0.tgz",
+      "integrity": "sha512-ayCKvm/phCGxOkYRSCM82iDwct8/EonSEgCSxWxD7ve6jHggsFl4fZVQBPRNgQoKiuV/odhFrGzQXZwbifC8Rg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/code-frame": "^7.0.0",
+        "error-ex": "^1.3.1",
+        "json-parse-even-better-errors": "^2.3.0",
+        "lines-and-columns": "^1.1.6"
+      },
+      "engines": {
+        "node": ">=8"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/path-exists": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz",
+      "integrity": "sha512-ak9Qy5Q7jYb2Wwcey5Fpvg2KoAc/ZIhLSLOSBmRmygPsGwkVVt0fZa0qrtMz+m6tJTAHfZQ8FnmB4MG4LWy7/w==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/path-is-absolute": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz",
+      "integrity": "sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/path-key": {
+      "version": "3.1.1",
+      "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz",
+      "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/path-parse": {
+      "version": "1.0.7",
+      "resolved": "https://registry.npmjs.org/path-parse/-/path-parse-1.0.7.tgz",
+      "integrity": "sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/picocolors": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz",
+      "integrity": "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==",
+      "dev": true,
+      "license": "ISC"
+    },
+    "node_modules/picomatch": {
+      "version": "2.3.1",
+      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz",
+      "integrity": "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8.6"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/jonschlinkert"
+      }
+    },
+    "node_modules/pirates": {
+      "version": "4.0.7",
+      "resolved": "https://registry.npmjs.org/pirates/-/pirates-4.0.7.tgz",
+      "integrity": "sha512-TfySrs/5nm8fQJDcBDuUng3VOUKsd7S+zqvbOTiGXHfxX4wK31ard+hoNuvkicM/2YFzlpDgABOevKSsB4G/FA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 6"
+      }
+    },
+    "node_modules/pkg-dir": {
+      "version": "4.2.0",
+      "resolved": "https://registry.npmjs.org/pkg-dir/-/pkg-dir-4.2.0.tgz",
+      "integrity": "sha512-HRDzbaKjC+AOWVXxAU/x54COGeIv9eb+6CkDSQoNTt4XyWoIJvuPsXizxu/Fr23EiekbtZwmh1IcIG/l/a10GQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "find-up": "^4.0.0"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/pretty-format": {
+      "version": "29.7.0",
+      "resolved": "https://registry.npmjs.org/pretty-format/-/pretty-format-29.7.0.tgz",
+      "integrity": "sha512-Pdlw/oPxN+aXdmM9R00JVC9WVFoCLTKJvDVLgmJ+qAffBMxsV85l/Lu7sNx4zSzPyoL2euImuEwHhOXdEgNFZQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jest/schemas": "^29.6.3",
+        "ansi-styles": "^5.0.0",
+        "react-is": "^18.0.0"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/pretty-format/node_modules/@jest/schemas": {
+      "version": "29.6.3",
+      "resolved": "https://registry.npmjs.org/@jest/schemas/-/schemas-29.6.3.tgz",
+      "integrity": "sha512-mo5j5X+jIZmJQveBKeS/clAueipV7KgiX1vMgCxam1RNYiqE1w62n0/tJJnHtjW8ZHcQco5gY85jA3mi0L+nSA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@sinclair/typebox": "^0.27.8"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+      }
+    },
+    "node_modules/pretty-format/node_modules/@sinclair/typebox": {
+      "version": "0.27.8",
+      "resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.27.8.tgz",
+      "integrity": "sha512-+Fj43pSMwJs4KRrH/938Uf+uAELIgVBmQzg/q1YG10djyfA3TnrU8N8XzqCh/okZdszqBQTZf96idMfE5lnwTA==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/pretty-format/node_modules/ansi-styles": {
+      "version": "5.2.0",
+      "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-5.2.0.tgz",
+      "integrity": "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/ansi-styles?sponsor=1"
+      }
+    },
+    "node_modules/prompts": {
+      "version": "2.4.2",
+      "resolved": "https://registry.npmjs.org/prompts/-/prompts-2.4.2.tgz",
+      "integrity": "sha512-NxNv/kLguCA7p3jE8oL2aEBsrJWgAakBpgmgK6lpPWV+WuOmY6r2/zbAVnP+T8bQlA0nzHXSJSJW0Hq7ylaD2Q==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "kleur": "^3.0.3",
+        "sisteransi": "^1.0.5"
+      },
+      "engines": {
+        "node": ">= 6"
+      }
+    },
+    "node_modules/pure-rand": {
+      "version": "6.1.0",
+      "resolved": "https://registry.npmjs.org/pure-rand/-/pure-rand-6.1.0.tgz",
+      "integrity": "sha512-bVWawvoZoBYpp6yIoQtQXHZjmz35RSVHnUOTefl8Vcjr8snTPY1wnpSPMWekcFwbxI6gtmT7rSYPFvz71ldiOA==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "individual",
+          "url": "https://github.com/sponsors/dubzzz"
+        },
+        {
+          "type": "opencollective",
+          "url": "https://opencollective.com/fast-check"
+        }
+      ],
+      "license": "MIT"
+    },
+    "node_modules/react-is": {
+      "version": "18.3.1",
+      "resolved": "https://registry.npmjs.org/react-is/-/react-is-18.3.1.tgz",
+      "integrity": "sha512-/LLMVyas0ljjAtoYiPqYiL8VWXzUUdThrmU5+n20DZv+a+ClRoevUzw5JxU+Ieh5/c87ytoTBV9G1FiKfNJdmg==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/require-directory": {
+      "version": "2.1.1",
+      "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz",
+      "integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/resolve": {
+      "version": "1.22.11",
+      "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.22.11.tgz",
+      "integrity": "sha512-RfqAvLnMl313r7c9oclB1HhUEAezcpLjz95wFH4LVuhk9JF/r22qmVP9AMmOU4vMX7Q8pN8jwNg/CSpdFnMjTQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "is-core-module": "^2.16.1",
+        "path-parse": "^1.0.7",
+        "supports-preserve-symlinks-flag": "^1.0.0"
+      },
+      "bin": {
+        "resolve": "bin/resolve"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/resolve-cwd": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/resolve-cwd/-/resolve-cwd-3.0.0.tgz",
+      "integrity": "sha512-OrZaX2Mb+rJCpH/6CpSqt9xFVpN++x01XnN2ie9g6P5/3xelLAkXWVADpdz1IHD/KFfEXyE6V0U01OQ3UO2rEg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "resolve-from": "^5.0.0"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/resolve-from": {
+      "version": "5.0.0",
+      "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-5.0.0.tgz",
+      "integrity": "sha512-qYg9KP24dD5qka9J47d0aVky0N+b4fTU89LN9iDnjB5waksiC49rvMB0PrUJQGoTmH50XPiqOvAjDfaijGxYZw==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/resolve.exports": {
+      "version": "2.0.3",
+      "resolved": "https://registry.npmjs.org/resolve.exports/-/resolve.exports-2.0.3.tgz",
+      "integrity": "sha512-OcXjMsGdhL4XnbShKpAcSqPMzQoYkYyhbEaeSko47MjRP9NfEQMhZkXL1DoFlt9LWQn4YttrdnV6X2OiyzBi+A==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/semver": {
+      "version": "6.3.1",
+      "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.1.tgz",
+      "integrity": "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==",
+      "dev": true,
+      "license": "ISC",
+      "bin": {
+        "semver": "bin/semver.js"
+      }
+    },
+    "node_modules/shebang-command": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz",
+      "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "shebang-regex": "^3.0.0"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/shebang-regex": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz",
+      "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/signal-exit": {
+      "version": "3.0.7",
+      "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-3.0.7.tgz",
+      "integrity": "sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ==",
+      "dev": true,
+      "license": "ISC"
+    },
+    "node_modules/sisteransi": {
+      "version": "1.0.5",
+      "resolved": "https://registry.npmjs.org/sisteransi/-/sisteransi-1.0.5.tgz",
+      "integrity": "sha512-bLGGlR1QxBcynn2d5YmDX4MGjlZvy2MRBDRNHLJ8VI6l6+9FUiyTFNJ0IveOSP0bcXgVDPRcfGqA0pjaqUpfVg==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/slash": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/slash/-/slash-3.0.0.tgz",
+      "integrity": "sha512-g9Q1haeby36OSStwb4ntCGGGaKsaVSjQ68fBxoQcutl5fS1vuY18H3wSt3jFyFtrkx+Kz0V1G85A4MyAdDMi2Q==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/source-map": {
+      "version": "0.6.1",
+      "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz",
+      "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==",
+      "dev": true,
+      "license": "BSD-3-Clause",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/source-map-support": {
+      "version": "0.5.13",
+      "resolved": "https://registry.npmjs.org/source-map-support/-/source-map-support-0.5.13.tgz",
+      "integrity": "sha512-SHSKFHadjVA5oR4PPqhtAVdcBWwRYVd6g6cAXnIbRiIwc2EhPrTuKUBdSLvlEKyIP3GCf89fltvcZiP9MMFA1w==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "buffer-from": "^1.0.0",
+        "source-map": "^0.6.0"
+      }
+    },
+    "node_modules/sprintf-js": {
+      "version": "1.0.3",
+      "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.0.3.tgz",
+      "integrity": "sha512-D9cPgkvLlV3t3IzL0D0YLvGA9Ahk4PcvVwUbN0dSGr1aP0Nrt4AEnTUbuGvquEC0mA64Gqt1fzirlRs5ibXx8g==",
+      "dev": true,
+      "license": "BSD-3-Clause"
+    },
+    "node_modules/stack-utils": {
+      "version": "2.0.6",
+      "resolved": "https://registry.npmjs.org/stack-utils/-/stack-utils-2.0.6.tgz",
+      "integrity": "sha512-XlkWvfIm6RmsWtNJx+uqtKLS8eqFbxUg0ZzLXqY0caEy9l7hruX8IpiDnjsLavoBgqCCR71TqWO8MaXYheJ3RQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "escape-string-regexp": "^2.0.0"
+      },
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/string-length": {
+      "version": "4.0.2",
+      "resolved": "https://registry.npmjs.org/string-length/-/string-length-4.0.2.tgz",
+      "integrity": "sha512-+l6rNN5fYHNhZZy41RXsYptCjA2Igmq4EG7kZAYFQI1E1VTXarr6ZPXBg6eq7Y6eK4FEhY6AJlyuFIb/v/S0VQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "char-regex": "^1.0.2",
+        "strip-ansi": "^6.0.0"
+      },
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/string-width": {
+      "version": "4.2.3",
+      "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz",
+      "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "emoji-regex": "^8.0.0",
+        "is-fullwidth-code-point": "^3.0.0",
+        "strip-ansi": "^6.0.1"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/strip-ansi": {
+      "version": "6.0.1",
+      "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz",
+      "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "ansi-regex": "^5.0.1"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/strip-bom": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/strip-bom/-/strip-bom-4.0.0.tgz",
+      "integrity": "sha512-3xurFv5tEgii33Zi8Jtp55wEIILR9eh34FAW00PZf+JnSsTmV/ioewSgQl97JHvgjoRGwPShsWm+IdrxB35d0w==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/strip-final-newline": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/strip-final-newline/-/strip-final-newline-2.0.0.tgz",
+      "integrity": "sha512-BrpvfNAE3dcvq7ll3xVumzjKjZQ5tI1sEUIKr3Uoks0XUl45St3FlatVqef9prk4jRDzhW6WZg+3bk93y6pLjA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/strip-json-comments": {
+      "version": "3.1.1",
+      "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-3.1.1.tgz",
+      "integrity": "sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/supports-color": {
+      "version": "7.2.0",
+      "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz",
+      "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "has-flag": "^4.0.0"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/supports-preserve-symlinks-flag": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/supports-preserve-symlinks-flag/-/supports-preserve-symlinks-flag-1.0.0.tgz",
+      "integrity": "sha512-ot0WnXS9fgdkgIcePe6RHNk1WA8+muPa6cSjeR3V8K27q9BB1rTE3R1p7Hv0z1ZyAc8s6Vvv8DIyWf681MAt0w==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/test-exclude": {
+      "version": "6.0.0",
+      "resolved": "https://registry.npmjs.org/test-exclude/-/test-exclude-6.0.0.tgz",
+      "integrity": "sha512-cAGWPIyOHU6zlmg88jwm7VRyXnMN7iV68OGAbYDk/Mh/xC/pzVPlQtY6ngoIH/5/tciuhGfvESU8GrHrcxD56w==",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "@istanbuljs/schema": "^0.1.2",
+        "glob": "^7.1.4",
+        "minimatch": "^3.0.4"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/tmpl": {
+      "version": "1.0.5",
+      "resolved": "https://registry.npmjs.org/tmpl/-/tmpl-1.0.5.tgz",
+      "integrity": "sha512-3f0uOEAQwIqGuWW2MVzYg8fV/QNnc/IpuJNG837rLuczAaLVHslWHZQj4IGiEl5Hs3kkbhwL9Ab7Hrsmuj+Smw==",
+      "dev": true,
+      "license": "BSD-3-Clause"
+    },
+    "node_modules/to-regex-range": {
+      "version": "5.0.1",
+      "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz",
+      "integrity": "sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "is-number": "^7.0.0"
+      },
+      "engines": {
+        "node": ">=8.0"
+      }
+    },
+    "node_modules/tr46": {
+      "version": "0.0.3",
+      "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz",
+      "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==",
+      "license": "MIT"
+    },
+    "node_modules/ts-jest": {
+      "version": "29.4.5",
+      "resolved": "https://registry.npmjs.org/ts-jest/-/ts-jest-29.4.5.tgz",
+      "integrity": "sha512-HO3GyiWn2qvTQA4kTgjDcXiMwYQt68a1Y8+JuLRVpdIzm+UOLSHgl/XqR4c6nzJkq5rOkjc02O2I7P7l/Yof0Q==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "bs-logger": "^0.2.6",
+        "fast-json-stable-stringify": "^2.1.0",
+        "handlebars": "^4.7.8",
+        "json5": "^2.2.3",
+        "lodash.memoize": "^4.1.2",
+        "make-error": "^1.3.6",
+        "semver": "^7.7.3",
+        "type-fest": "^4.41.0",
+        "yargs-parser": "^21.1.1"
+      },
+      "bin": {
+        "ts-jest": "cli.js"
+      },
+      "engines": {
+        "node": "^14.15.0 || ^16.10.0 || ^18.0.0 || >=20.0.0"
+      },
+      "peerDependencies": {
+        "@babel/core": ">=7.0.0-beta.0 <8",
+        "@jest/transform": "^29.0.0 || ^30.0.0",
+        "@jest/types": "^29.0.0 || ^30.0.0",
+        "babel-jest": "^29.0.0 || ^30.0.0",
+        "jest": "^29.0.0 || ^30.0.0",
+        "jest-util": "^29.0.0 || ^30.0.0",
+        "typescript": ">=4.3 <6"
+      },
+      "peerDependenciesMeta": {
+        "@babel/core": {
+          "optional": true
+        },
+        "@jest/transform": {
+          "optional": true
+        },
+        "@jest/types": {
+          "optional": true
+        },
+        "babel-jest": {
+          "optional": true
+        },
+        "esbuild": {
+          "optional": true
+        },
+        "jest-util": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/ts-jest/node_modules/semver": {
+      "version": "7.7.3",
+      "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.3.tgz",
+      "integrity": "sha512-SdsKMrI9TdgjdweUSR9MweHA4EJ8YxHn8DFaDisvhVlUOe4BF1tLD7GAj0lIqWVl+dPb/rExr0Btby5loQm20Q==",
+      "dev": true,
+      "license": "ISC",
+      "bin": {
+        "semver": "bin/semver.js"
+      },
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/ts-jest/node_modules/type-fest": {
+      "version": "4.41.0",
+      "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-4.41.0.tgz",
+      "integrity": "sha512-TeTSQ6H5YHvpqVwBRcnLDCBnDOHWYu7IvGbHT6N8AOymcr9PJGjc1GTtiWZTYg0NCgYwvnYWEkVChQAr9bjfwA==",
+      "dev": true,
+      "license": "(MIT OR CC0-1.0)",
+      "engines": {
+        "node": ">=16"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/type-detect": {
+      "version": "4.0.8",
+      "resolved": "https://registry.npmjs.org/type-detect/-/type-detect-4.0.8.tgz",
+      "integrity": "sha512-0fr/mIH1dlO+x7TlcMy+bIDqKPsw/70tVyeHW787goQjhmqaZe10uwLujubK9q9Lg6Fiho1KUKDYz0Z7k7g5/g==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=4"
+      }
+    },
+    "node_modules/type-fest": {
+      "version": "0.21.3",
+      "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-0.21.3.tgz",
+      "integrity": "sha512-t0rzBq87m3fVcduHDUFhKmyyX+9eo6WQjZvf51Ea/M0Q7+T374Jp1aUiyUl0GKxp8M/OETVHSDvmkyPgvX+X2w==",
+      "dev": true,
+      "license": "(MIT OR CC0-1.0)",
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/typescript": {
+      "version": "5.9.3",
+      "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz",
+      "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "bin": {
+        "tsc": "bin/tsc",
+        "tsserver": "bin/tsserver"
+      },
+      "engines": {
+        "node": ">=14.17"
+      }
+    },
+    "node_modules/uglify-js": {
+      "version": "3.19.3",
+      "resolved": "https://registry.npmjs.org/uglify-js/-/uglify-js-3.19.3.tgz",
+      "integrity": "sha512-v3Xu+yuwBXisp6QYTcH4UbH+xYJXqnq2m/LtQVWKWzYc1iehYnLixoQDN9FH6/j9/oybfd6W9Ghwkl8+UMKTKQ==",
+      "dev": true,
+      "license": "BSD-2-Clause",
+      "optional": true,
+      "bin": {
+        "uglifyjs": "bin/uglifyjs"
+      },
+      "engines": {
+        "node": ">=0.8.0"
+      }
+    },
+    "node_modules/undici-types": {
+      "version": "6.21.0",
+      "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz",
+      "integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==",
+      "license": "MIT"
+    },
+    "node_modules/update-browserslist-db": {
+      "version": "1.1.4",
+      "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.1.4.tgz",
+      "integrity": "sha512-q0SPT4xyU84saUX+tomz1WLkxUbuaJnR1xWt17M7fJtEJigJeWUNGUqrauFXsHnqev9y9JTRGwk13tFBuKby4A==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "opencollective",
+          "url": "https://opencollective.com/browserslist"
+        },
+        {
+          "type": "tidelift",
+          "url": "https://tidelift.com/funding/github/npm/browserslist"
+        },
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/ai"
+        }
+      ],
+      "license": "MIT",
+      "dependencies": {
+        "escalade": "^3.2.0",
+        "picocolors": "^1.1.1"
+      },
+      "bin": {
+        "update-browserslist-db": "cli.js"
+      },
+      "peerDependencies": {
+        "browserslist": ">= 4.21.0"
+      }
+    },
+    "node_modules/v8-to-istanbul": {
+      "version": "9.3.0",
+      "resolved": "https://registry.npmjs.org/v8-to-istanbul/-/v8-to-istanbul-9.3.0.tgz",
+      "integrity": "sha512-kiGUalWN+rgBJ/1OHZsBtU4rXZOfj/7rKQxULKlIzwzQSvMJUUNgPwJEEh7gU6xEVxC0ahoOBvN2YI8GH6FNgA==",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "@jridgewell/trace-mapping": "^0.3.12",
+        "@types/istanbul-lib-coverage": "^2.0.1",
+        "convert-source-map": "^2.0.0"
+      },
+      "engines": {
+        "node": ">=10.12.0"
+      }
+    },
+    "node_modules/walker": {
+      "version": "1.0.8",
+      "resolved": "https://registry.npmjs.org/walker/-/walker-1.0.8.tgz",
+      "integrity": "sha512-ts/8E8l5b7kY0vlWLewOkDXMmPdLcVV4GmOQLyxuSswIJsweeFZtAsMF7k1Nszz+TYBQrlYRmzOnr398y1JemQ==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "dependencies": {
+        "makeerror": "1.0.12"
+      }
+    },
+    "node_modules/web-streams-polyfill": {
+      "version": "4.0.0-beta.3",
+      "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-4.0.0-beta.3.tgz",
+      "integrity": "sha512-QW95TCTaHmsYfHDybGMwO5IJIM93I/6vTRk+daHTWFPhwh+C8Cg7j7XyKrwrj8Ib6vYXe0ocYNrmzY4xAAN6ug==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 14"
+      }
+    },
+    "node_modules/webidl-conversions": {
+      "version": "3.0.1",
+      "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz",
+      "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==",
+      "license": "BSD-2-Clause"
+    },
+    "node_modules/whatwg-url": {
+      "version": "5.0.0",
+      "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz",
+      "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==",
+      "license": "MIT",
+      "dependencies": {
+        "tr46": "~0.0.3",
+        "webidl-conversions": "^3.0.0"
+      }
+    },
+    "node_modules/which": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz",
+      "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "isexe": "^2.0.0"
+      },
+      "bin": {
+        "node-which": "bin/node-which"
+      },
+      "engines": {
+        "node": ">= 8"
+      }
+    },
+    "node_modules/wordwrap": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/wordwrap/-/wordwrap-1.0.0.tgz",
+      "integrity": "sha512-gvVzJFlPycKc5dZN4yPkP8w7Dc37BtP1yczEneOb4uq34pXZcvrtRTmWV8W+Ume+XCxKgbjM+nevkyFPMybd4Q==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/wrap-ansi": {
+      "version": "7.0.0",
+      "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz",
+      "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "ansi-styles": "^4.0.0",
+        "string-width": "^4.1.0",
+        "strip-ansi": "^6.0.0"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/wrap-ansi?sponsor=1"
+      }
+    },
+    "node_modules/wrappy": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz",
+      "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==",
+      "dev": true,
+      "license": "ISC"
+    },
+    "node_modules/write-file-atomic": {
+      "version": "4.0.2",
+      "resolved": "https://registry.npmjs.org/write-file-atomic/-/write-file-atomic-4.0.2.tgz",
+      "integrity": "sha512-7KxauUdBmSdWnmpaGFg+ppNjKF8uNLry8LyzjauQDOVONfFLNKrKvQOxZ/VuTIcS/gge/YNahf5RIIQWTSarlg==",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "imurmurhash": "^0.1.4",
+        "signal-exit": "^3.0.7"
+      },
+      "engines": {
+        "node": "^12.13.0 || ^14.15.0 || >=16.0.0"
+      }
+    },
+    "node_modules/y18n": {
+      "version": "5.0.8",
+      "resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz",
+      "integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==",
+      "dev": true,
+      "license": "ISC",
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/yallist": {
+      "version": "3.1.1",
+      "resolved": "https://registry.npmjs.org/yallist/-/yallist-3.1.1.tgz",
+      "integrity": "sha512-a4UGQaWPH59mOXUYnAG2ewncQS4i4F43Tv3JoAM+s2VDAmS9NsK8GpDMLrCHPksFT7h3K6TOoUNn2pb7RoXx4g==",
+      "dev": true,
+      "license": "ISC"
+    },
+    "node_modules/yargs": {
+      "version": "17.7.2",
+      "resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.2.tgz",
+      "integrity": "sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "cliui": "^8.0.1",
+        "escalade": "^3.1.1",
+        "get-caller-file": "^2.0.5",
+        "require-directory": "^2.1.1",
+        "string-width": "^4.2.3",
+        "y18n": "^5.0.5",
+        "yargs-parser": "^21.1.1"
+      },
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/yargs-parser": {
+      "version": "21.1.1",
+      "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.1.1.tgz",
+      "integrity": "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==",
+      "dev": true,
+      "license": "ISC",
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/yocto-queue": {
+      "version": "0.1.0",
+      "resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-0.1.0.tgz",
+      "integrity": "sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    }
+  }
+}
diff --git a/tests/integration/client-typescript/package.json b/tests/integration/client-typescript/package.json
new file mode 100644
index 000000000..e5fe1b8f5
--- /dev/null
+++ b/tests/integration/client-typescript/package.json
@@ -0,0 +1,18 @@
+{
+  "name": "llama-stack-typescript-integration-tests",
+  "version": "0.0.1",
+  "private": true,
+  "description": "TypeScript client integration tests for Llama Stack",
+  "scripts": {
+    "test": "node run-tests.js"
+  },
+  "devDependencies": {
+    "@swc/core": "^1.3.102",
+    "@swc/jest": "^0.2.29",
+    "@types/jest": "^29.4.0",
+    "@types/node": "^20.0.0",
+    "jest": "^29.4.0",
+    "ts-jest": "^29.1.0",
+    "typescript": "^5.0.0"
+  }
+}
diff --git a/tests/integration/client-typescript/run-tests.js b/tests/integration/client-typescript/run-tests.js
new file mode 100755
index 000000000..93df5d8a0
--- /dev/null
+++ b/tests/integration/client-typescript/run-tests.js
@@ -0,0 +1,63 @@
+#!/usr/bin/env node
+// Copyright (c) Meta Platforms, Inc. and affiliates.
+// All rights reserved.
+//
+// This source code is licensed under the terms described in the LICENSE file in
+// the root directory of this source tree.
+
+/**
+ * Test runner that finds and executes TypeScript tests based on suite/setup mapping.
+ * Called by integration-tests.sh via npm test.
+ */
+
+const fs = require('fs');
+const path = require('path');
+const { execSync } = require('child_process');
+
+const suite = process.env.LLAMA_STACK_TEST_SUITE;
+const setup = process.env.LLAMA_STACK_TEST_SETUP || '';
+
+if (!suite) {
+  console.error('Error: LLAMA_STACK_TEST_SUITE environment variable is required');
+  process.exit(1);
+}
+
+// Read suites.json to find matching test files
+const suitesPath = path.join(__dirname, 'suites.json');
+if (!fs.existsSync(suitesPath)) {
+  console.log(`No TypeScript tests configured (${suitesPath} not found)`);
+  process.exit(0);
+}
+
+const suites = JSON.parse(fs.readFileSync(suitesPath, 'utf-8'));
+
+// Find matching entry
+let testFiles = [];
+for (const entry of suites) {
+  if (entry.suite !== suite) {
+    continue;
+  }
+  const entrySetup = entry.setup || '';
+  if (entrySetup && entrySetup !== setup) {
+    continue;
+  }
+  testFiles = entry.files || [];
+  break;
+}
+
+if (testFiles.length === 0) {
+  console.log(`No TypeScript integration tests mapped for suite ${suite} (setup ${setup})`);
+  process.exit(0);
+}
+
+console.log(`Running TypeScript tests for suite ${suite} (setup ${setup}): ${testFiles.join(', ')}`);
+
+// Run Jest with the mapped test files
+try {
+  execSync(`npx jest --config jest.integration.config.js ${testFiles.join(' ')}`, {
+    stdio: 'inherit',
+    cwd: __dirname,
+  });
+} catch (error) {
+  process.exit(error.status || 1);
+}
diff --git a/tests/integration/client-typescript/setup.ts b/tests/integration/client-typescript/setup.ts
new file mode 100644
index 000000000..75cabab74
--- /dev/null
+++ b/tests/integration/client-typescript/setup.ts
@@ -0,0 +1,162 @@
+// Copyright (c) Meta Platforms, Inc. and affiliates.
+// All rights reserved.
+//
+// This source code is licensed under the terms described in the LICENSE file in
+// the root directory of this source tree.
+
+/**
+ * Global setup for integration tests.
+ * This file mimics pytest's fixture system by providing shared test configuration.
+ */
+
+import LlamaStackClient from 'llama-stack-client';
+
+/**
+ * Load test configuration from the Python setup system.
+ * This reads setup definitions from tests/integration/suites.py via get_setup_env.py.
+ */
+function loadTestConfig() {
+  const baseURL = process.env['TEST_API_BASE_URL'];
+  const setupName = process.env['LLAMA_STACK_TEST_SETUP'];
+  const textModel = process.env['LLAMA_STACK_TEST_TEXT_MODEL'];
+  const embeddingModel = process.env['LLAMA_STACK_TEST_EMBEDDING_MODEL'];
+
+  if (!baseURL) {
+    throw new Error(
+      'TEST_API_BASE_URL is required for integration tests. ' +
+        'Run tests using: ./scripts/integration-test.sh',
+    );
+  }
+
+  return {
+    baseURL,
+    textModel,
+    embeddingModel,
+    setupName,
+  };
+}
+
+// Read configuration from environment variables (set by scripts/integration-test.sh)
+export const TEST_CONFIG = loadTestConfig();
+
+// Validate required configuration
+beforeAll(() => {
+  console.log('\n=== Integration Test Configuration ===');
+  console.log(`Base URL: ${TEST_CONFIG.baseURL}`);
+  console.log(`Setup: ${TEST_CONFIG.setupName || 'NOT SET'}`);
+  console.log(
+    `Text Model: ${TEST_CONFIG.textModel || 'NOT SET - tests requiring text model will be skipped'}`,
+  );
+  console.log(
+    `Embedding Model: ${
+      TEST_CONFIG.embeddingModel || 'NOT SET - tests requiring embedding model will be skipped'
+    }`,
+  );
+  console.log('=====================================\n');
+});
+
+/**
+ * Create a client instance for integration tests.
+ * Mimics pytest's `llama_stack_client` fixture.
+ *
+ * @param testId - Test ID to send in X-LlamaStack-Provider-Data header for replay mode.
+ *                 Format: "tests/integration/responses/test_basic_responses.py::test_name[params]"
+ */
+export function createTestClient(testId?: string): LlamaStackClient {
+  const headers: Record<string, string> = {};
+
+  // In server mode with replay, send test ID for recording isolation
+  if (process.env['LLAMA_STACK_TEST_STACK_CONFIG_TYPE'] === 'server' && testId) {
+    headers['X-LlamaStack-Provider-Data'] = JSON.stringify({
+      __test_id: testId,
+    });
+  }
+
+  return new LlamaStackClient({
+    baseURL: TEST_CONFIG.baseURL,
+    timeout: 60000, // 60 seconds
+    defaultHeaders: headers,
+  });
+}
+
+/**
+ * Skip test if required model is not configured.
+ * Mimics pytest's `skip_if_no_model` autouse fixture.
+ */
+export function skipIfNoModel(modelType: 'text' | 'embedding'): typeof test {
+  const model = modelType === 'text' ? TEST_CONFIG.textModel : TEST_CONFIG.embeddingModel;
+
+  if (!model) {
+    const envVar = modelType === 'text' ? 'LLAMA_STACK_TEST_TEXT_MODEL' : 'LLAMA_STACK_TEST_EMBEDDING_MODEL';
+    const message = `Skipping: ${modelType} model not configured (set ${envVar})`;
+    return test.skip.bind(test) as typeof test;
+  }
+
+  return test;
+}
+
+/**
+ * Get the configured text model, throwing if not set.
+ * Use this in tests that absolutely require a text model.
+ */
+export function requireTextModel(): string {
+  if (!TEST_CONFIG.textModel) {
+    throw new Error(
+      'LLAMA_STACK_TEST_TEXT_MODEL environment variable is required. ' +
+        'Run tests using: ./scripts/integration-test.sh',
+    );
+  }
+  return TEST_CONFIG.textModel;
+}
+
+/**
+ * Get the configured embedding model, throwing if not set.
+ * Use this in tests that absolutely require an embedding model.
+ */
+export function requireEmbeddingModel(): string {
+  if (!TEST_CONFIG.embeddingModel) {
+    throw new Error(
+      'LLAMA_STACK_TEST_EMBEDDING_MODEL environment variable is required. ' +
+        'Run tests using: ./scripts/integration-test.sh',
+    );
+  }
+  return TEST_CONFIG.embeddingModel;
+}
+
+/**
+ * Extracts aggregated text output from a ResponseObject.
+ * This concatenates all text content from the response's output array.
+ *
+ * Copied from llama-stack-client's response-helpers until it's available in published version.
+ */
+export function getResponseOutputText(response: any): string {
+  const pieces: string[] = [];
+
+  for (const output of response.output ?? []) {
+    if (!output || output.type !== 'message') {
+      continue;
+    }
+
+    const content = output.content;
+    if (typeof content === 'string') {
+      pieces.push(content);
+      continue;
+    }
+
+    if (!Array.isArray(content)) {
+      continue;
+    }
+
+    for (const item of content) {
+      if (typeof item === 'string') {
+        pieces.push(item);
+        continue;
+      }
+      if (item && item.type === 'output_text' && 'text' in item && typeof item.text === 'string') {
+        pieces.push(item.text);
+      }
+    }
+  }
+
+  return pieces.join('');
+}
diff --git a/tests/integration/client-typescript/suites.json b/tests/integration/client-typescript/suites.json
new file mode 100644
index 000000000..5c5b83058
--- /dev/null
+++ b/tests/integration/client-typescript/suites.json
@@ -0,0 +1,12 @@
+[
+  {
+    "suite": "responses",
+    "setup": "gpt",
+    "files": ["__tests__/responses.test.ts"]
+  },
+  {
+    "suite": "base",
+    "setup": "ollama",
+    "files": ["__tests__/inference.test.ts"]
+  }
+]
diff --git a/tests/integration/client-typescript/tsconfig.json b/tests/integration/client-typescript/tsconfig.json
new file mode 100644
index 000000000..19b6cdeb1
--- /dev/null
+++ b/tests/integration/client-typescript/tsconfig.json
@@ -0,0 +1,16 @@
+{
+  "compilerOptions": {
+    "target": "ES2022",
+    "module": "ES2022",
+    "lib": ["ES2022"],
+    "moduleResolution": "bundler",
+    "esModuleInterop": true,
+    "allowSyntheticDefaultImports": true,
+    "strict": true,
+    "skipLibCheck": true,
+    "resolveJsonModule": true,
+    "types": ["jest", "node"]
+  },
+  "include": ["**/*.ts"],
+  "exclude": ["node_modules"]
+}

From 1e4e02e6224b5a571dde6a181436c52f1ea8a34f Mon Sep 17 00:00:00 2001
From: Sam El-Borai <sam@elborai.me>
Date: Wed, 19 Nov 2025 19:09:12 +0100
Subject: [PATCH 49/62] fix(ci): prefix stainless branches with fork author
 (#4187)

# What does this PR do?
<!-- Provide a short summary of what this PR does and why. Link to
relevant issues if applicable. -->

I believe that should avoid CI issues seen in
https://github.com/llamastack/llama-stack/pull/4173.


Error we see in Stainless logs:

```
(cannot lock ref 'refs/heads/preview/base/fix/issue-3797-metadata-validation': 'refs/heads/preview/base/fix' exists; cannot create 'refs/heads/preview/base/fix/issue-3797-metadata-validation')
```

The issue is that if a branch `fix` exists, `fix/<whatever>` cannot be
created (that's how git refs work unfortunately...). The fix in this PR
is to ensure PRs from forks are using the author as a prefix.

In addition we will do changes to the Stainless API to return better
error messages here, it should have been a 4xx with a meaningful error,
not a 500.

And we will likely need to delete the `fix` branch.

<!-- If resolving an issue, uncomment and update the line below -->
<!-- Closes #[issue-number] -->

## Test Plan
<!-- Describe the tests you ran to verify your changes with result
summaries. *Provide clear instructions so the plan can be easily
re-executed.* -->
---
 .github/workflows/stainless-builds.yml | 56 ++++++++++++++++++++++++--
 1 file changed, 52 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/stainless-builds.yml b/.github/workflows/stainless-builds.yml
index 00c5e3df5..a18c70887 100644
--- a/.github/workflows/stainless-builds.yml
+++ b/.github/workflows/stainless-builds.yml
@@ -59,6 +59,30 @@ jobs:
           ref: ${{ github.event.pull_request.head.sha }}
           fetch-depth: 2
 
+      # Compute the Stainless branch name, prefixing with fork owner if PR is from a fork.
+      # For fork PRs like "contributor:fix/issue-123", this creates "preview/contributor/fix/issue-123"
+      # For same-repo PRs, this creates "preview/fix/issue-123"
+      - name: Compute branch names
+        id: branch-names
+        run: |
+          HEAD_REPO="${{ github.event.pull_request.head.repo.full_name }}"
+          BASE_REPO="${{ github.repository }}"
+          BRANCH_NAME="${{ github.event.pull_request.head.ref }}"
+
+          if [ "$HEAD_REPO" != "$BASE_REPO" ]; then
+            # Fork PR: prefix with fork owner for isolation
+            FORK_OWNER="${{ github.event.pull_request.head.repo.owner.login }}"
+            PREVIEW_BRANCH="preview/${FORK_OWNER}/${BRANCH_NAME}"
+            BASE_BRANCH="preview/base/${FORK_OWNER}/${BRANCH_NAME}"
+          else
+            # Same-repo PR
+            PREVIEW_BRANCH="preview/${BRANCH_NAME}"
+            BASE_BRANCH="preview/base/${BRANCH_NAME}"
+          fi
+
+          echo "preview_branch=${PREVIEW_BRANCH}" >> $GITHUB_OUTPUT
+          echo "base_branch=${BASE_BRANCH}" >> $GITHUB_OUTPUT
+
       # This action builds preview SDKs from the OpenAPI spec changes and
       # posts/updates a comment on the PR with build results and links to the preview.
       - name: Run preview builds
@@ -73,6 +97,8 @@ jobs:
           base_sha: ${{ github.event.pull_request.base.sha }}
           base_ref: ${{ github.event.pull_request.base.ref }}
           head_sha: ${{ github.event.pull_request.head.sha }}
+          branch: ${{ steps.branch-names.outputs.preview_branch }}
+          base_branch: ${{ steps.branch-names.outputs.base_branch }}
 
   merge:
     if: github.event.action == 'closed' && github.event.pull_request.merged == true
@@ -90,12 +116,33 @@ jobs:
           ref: ${{ github.event.pull_request.head.sha }}
           fetch-depth: 2
 
+      # Compute the Stainless branch name, prefixing with fork owner if PR is from a fork.
+      # For fork PRs like "contributor:fix/issue-123", this creates "preview/contributor/fix/issue-123"
+      # For same-repo PRs, this creates "preview/fix/issue-123"
+      - name: Compute branch names
+        id: branch-names
+        run: |
+          HEAD_REPO="${{ github.event.pull_request.head.repo.full_name }}"
+          BASE_REPO="${{ github.repository }}"
+          BRANCH_NAME="${{ github.event.pull_request.head.ref }}"
+
+          if [ "$HEAD_REPO" != "$BASE_REPO" ]; then
+            # Fork PR: prefix with fork owner for isolation
+            FORK_OWNER="${{ github.event.pull_request.head.repo.owner.login }}"
+            MERGE_BRANCH="preview/${FORK_OWNER}/${BRANCH_NAME}"
+          else
+            # Same-repo PR
+            MERGE_BRANCH="preview/${BRANCH_NAME}"
+          fi
+
+          echo "merge_branch=${MERGE_BRANCH}" >> $GITHUB_OUTPUT
+
       # Note that this only merges in changes that happened on the last build on
-      # preview/${{ github.head_ref }}. It's possible that there are OAS/config
-      # changes that haven't been built, if the preview-sdk job didn't finish
+      # the computed preview branch. It's possible that there are OAS/config
+      # changes that haven't been built, if the preview job didn't finish
       # before this step starts. In theory we want to wait for all builds
-      # against preview/${{ github.head_ref }} to complete, but assuming that
-      # the preview-sdk job happens before the PR merge, it should be fine.
+      # against the preview branch to complete, but assuming that
+      # the preview job happens before the PR merge, it should be fine.
       - name: Run merge build
         uses: stainless-api/upload-openapi-spec-action/merge@32823b096b4319c53ee948d702d9052873af485f # 1.6.0
         with:
@@ -108,3 +155,4 @@ jobs:
           base_sha: ${{ github.event.pull_request.base.sha }}
           base_ref: ${{ github.event.pull_request.base.ref }}
           head_sha: ${{ github.event.pull_request.head.sha }}
+          merge_branch: ${{ steps.branch-names.outputs.merge_branch }}

From f18870a22165b8bf3bac297df80a246c130cfb57 Mon Sep 17 00:00:00 2001
From: Roy Belio <34023431+r-bit-rry@users.noreply.github.com>
Date: Wed, 19 Nov 2025 20:16:34 +0200
Subject: [PATCH 50/62] fix: Pydantic validation error with list-type metadata
 in vector search (#3797) (#4173)

# Fix for Issue #3797

## Problem
Vector store search failed with Pydantic ValidationError when chunk
metadata contained list-type values.

**Error:**
```
ValidationError: 3 validation errors for VectorStoreSearchResponse
attributes.tags.str: Input should be a valid string
attributes.tags.float: Input should be a valid number
attributes.tags.bool: Input should be a valid boolean
```

**Root Cause:**
- `Chunk.metadata` accepts `dict[str, Any]` (any type allowed)
- `VectorStoreSearchResponse.attributes` requires `dict[str, str | float
| bool]` (primitives only)
- Direct assignment at line 641 caused validation failure for
non-primitive types

## Solution

Added utility function to filter metadata to primitive types before
creating search response.


## Impact

**Fixed:**
- Vector search works with list metadata (e.g., `tags: ["transformers",
"gpu"]`)
- Lists become searchable as comma-separated strings
- No ValidationError on search responses

**Preserved:**
- Full metadata still available in `VectorStoreContent.metadata`
- No API schema changes
- Backward compatible with existing primitive metadata

**Affected:**
All vector store providers using `OpenAIVectorStoreMixin`: FAISS,
Chroma, Qdrant, Milvus, Weaviate, PGVector, SQLite-vec

## Testing


tests/unit/providers/vector_io/test_vector_utils.py::test_sanitize_metadata_for_attributes

---------

Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Co-authored-by: Francisco Arceo <arceofrancisco@gmail.com>
---
 client-sdks/stainless/openapi.yml             | 14 +++-
 docs/static/deprecated-llama-stack-spec.yaml  | 14 +++-
 .../static/experimental-llama-stack-spec.yaml | 14 +++-
 docs/static/llama-stack-spec.yaml             | 14 +++-
 docs/static/stainless-llama-stack-spec.yaml   | 14 +++-
 src/llama_stack_api/vector_io.py              | 69 ++++++++++++++++-
 .../providers/vector_io/test_vector_utils.py  | 76 ++++++++++++++++++-
 7 files changed, 207 insertions(+), 8 deletions(-)

diff --git a/client-sdks/stainless/openapi.yml b/client-sdks/stainless/openapi.yml
index a6ebc868c..9269b7e39 100644
--- a/client-sdks/stainless/openapi.yml
+++ b/client-sdks/stainless/openapi.yml
@@ -9862,9 +9862,21 @@ components:
           title: Object
           default: vector_store.file
         attributes:
-          additionalProperties: true
+          additionalProperties:
+            anyOf:
+            - type: string
+              maxLength: 512
+            - type: number
+            - type: boolean
+            title: string | number | boolean
+          propertyNames:
+            type: string
+            maxLength: 64
           type: object
+          maxProperties: 16
           title: Attributes
+          description: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. Keys are strings with a maximum length of 64 characters. Values are strings with a maximum length of 512 characters, booleans, or numbers.
+          x-oaiTypeLabel: map
         chunking_strategy:
           oneOf:
           - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
diff --git a/docs/static/deprecated-llama-stack-spec.yaml b/docs/static/deprecated-llama-stack-spec.yaml
index 207af8926..cf9bd14c4 100644
--- a/docs/static/deprecated-llama-stack-spec.yaml
+++ b/docs/static/deprecated-llama-stack-spec.yaml
@@ -6705,9 +6705,21 @@ components:
           title: Object
           default: vector_store.file
         attributes:
-          additionalProperties: true
+          additionalProperties:
+            anyOf:
+            - type: string
+              maxLength: 512
+            - type: number
+            - type: boolean
+            title: string | number | boolean
+          propertyNames:
+            type: string
+            maxLength: 64
           type: object
+          maxProperties: 16
           title: Attributes
+          description: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. Keys are strings with a maximum length of 64 characters. Values are strings with a maximum length of 512 characters, booleans, or numbers.
+          x-oaiTypeLabel: map
         chunking_strategy:
           oneOf:
           - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
diff --git a/docs/static/experimental-llama-stack-spec.yaml b/docs/static/experimental-llama-stack-spec.yaml
index f81a93d33..18ce75562 100644
--- a/docs/static/experimental-llama-stack-spec.yaml
+++ b/docs/static/experimental-llama-stack-spec.yaml
@@ -6061,9 +6061,21 @@ components:
           title: Object
           default: vector_store.file
         attributes:
-          additionalProperties: true
+          additionalProperties:
+            anyOf:
+            - type: string
+              maxLength: 512
+            - type: number
+            - type: boolean
+            title: string | number | boolean
+          propertyNames:
+            type: string
+            maxLength: 64
           type: object
+          maxProperties: 16
           title: Attributes
+          description: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. Keys are strings with a maximum length of 64 characters. Values are strings with a maximum length of 512 characters, booleans, or numbers.
+          x-oaiTypeLabel: map
         chunking_strategy:
           oneOf:
           - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml
index 816f3d0fb..9f7b2ed64 100644
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@@ -8883,9 +8883,21 @@ components:
           title: Object
           default: vector_store.file
         attributes:
-          additionalProperties: true
+          additionalProperties:
+            anyOf:
+            - type: string
+              maxLength: 512
+            - type: number
+            - type: boolean
+            title: string | number | boolean
+          propertyNames:
+            type: string
+            maxLength: 64
           type: object
+          maxProperties: 16
           title: Attributes
+          description: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. Keys are strings with a maximum length of 64 characters. Values are strings with a maximum length of 512 characters, booleans, or numbers.
+          x-oaiTypeLabel: map
         chunking_strategy:
           oneOf:
           - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml
index a6ebc868c..9269b7e39 100644
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@@ -9862,9 +9862,21 @@ components:
           title: Object
           default: vector_store.file
         attributes:
-          additionalProperties: true
+          additionalProperties:
+            anyOf:
+            - type: string
+              maxLength: 512
+            - type: number
+            - type: boolean
+            title: string | number | boolean
+          propertyNames:
+            type: string
+            maxLength: 64
           type: object
+          maxProperties: 16
           title: Attributes
+          description: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. Keys are strings with a maximum length of 64 characters. Values are strings with a maximum length of 512 characters, booleans, or numbers.
+          x-oaiTypeLabel: map
         chunking_strategy:
           oneOf:
           - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
diff --git a/src/llama_stack_api/vector_io.py b/src/llama_stack_api/vector_io.py
index bfad644cc..135468d19 100644
--- a/src/llama_stack_api/vector_io.py
+++ b/src/llama_stack_api/vector_io.py
@@ -11,7 +11,7 @@
 from typing import Annotated, Any, Literal, Protocol, runtime_checkable
 
 from fastapi import Body, Query
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, field_validator
 
 from llama_stack_api.common.tracing import telemetry_traceable
 from llama_stack_api.inference import InterleavedContent
@@ -372,6 +372,65 @@ VectorStoreFileStatus = Literal["completed"] | Literal["in_progress"] | Literal[
 register_schema(VectorStoreFileStatus, name="VectorStoreFileStatus")
 
 
+# VectorStoreFileAttributes type with OpenAPI constraints
+VectorStoreFileAttributes = Annotated[
+    dict[str, Annotated[str, Field(max_length=512)] | float | bool],
+    Field(
+        max_length=16,
+        json_schema_extra={
+            "propertyNames": {"type": "string", "maxLength": 64},
+            "x-oaiTypeLabel": "map",
+        },
+        description=(
+            "Set of 16 key-value pairs that can be attached to an object. This can be "
+            "useful for storing additional information about the object in a structured "
+            "format, and querying for objects via API or the dashboard. Keys are strings "
+            "with a maximum length of 64 characters. Values are strings with a maximum "
+            "length of 512 characters, booleans, or numbers."
+        ),
+    ),
+]
+
+
+def _sanitize_vector_store_attributes(metadata: dict[str, Any] | None) -> dict[str, str | float | bool]:
+    """
+    Sanitize metadata to VectorStoreFileAttributes spec (max 16 properties, primitives only).
+
+    Converts dict[str, Any] to dict[str, str | float | bool]:
+    - Preserves: str (truncated to 512 chars), bool, int/float (as float)
+    - Converts: list -> comma-separated string
+    - Filters: dict, None, other types
+    - Enforces: max 16 properties, max 64 char keys, max 512 char string values
+    """
+    if not metadata:
+        return {}
+
+    sanitized: dict[str, str | float | bool] = {}
+    for key, value in metadata.items():
+        # Enforce max 16 properties
+        if len(sanitized) >= 16:
+            break
+
+        # Enforce max 64 char keys
+        if len(key) > 64:
+            continue
+
+        # Convert to supported primitive types
+        if isinstance(value, bool):
+            sanitized[key] = value
+        elif isinstance(value, int | float):
+            sanitized[key] = float(value)
+        elif isinstance(value, str):
+            # Enforce max 512 char string values
+            sanitized[key] = value[:512] if len(value) > 512 else value
+        elif isinstance(value, list):
+            # Convert lists to comma-separated strings (max 512 chars)
+            list_str = ", ".join(str(item) for item in value)
+            sanitized[key] = list_str[:512] if len(list_str) > 512 else list_str
+
+    return sanitized
+
+
 @json_schema_type
 class VectorStoreFileObject(BaseModel):
     """OpenAI Vector Store File object.
@@ -389,7 +448,7 @@ class VectorStoreFileObject(BaseModel):
 
     id: str
     object: str = "vector_store.file"
-    attributes: dict[str, Any] = Field(default_factory=dict)
+    attributes: VectorStoreFileAttributes = Field(default_factory=dict)
     chunking_strategy: VectorStoreChunkingStrategy
     created_at: int
     last_error: VectorStoreFileLastError | None = None
@@ -397,6 +456,12 @@ class VectorStoreFileObject(BaseModel):
     usage_bytes: int = 0
     vector_store_id: str
 
+    @field_validator("attributes", mode="before")
+    @classmethod
+    def _validate_attributes(cls, v: dict[str, Any] | None) -> dict[str, str | float | bool]:
+        """Sanitize attributes to match VectorStoreFileAttributes OpenAPI spec."""
+        return _sanitize_vector_store_attributes(v)
+
 
 @json_schema_type
 class VectorStoreListFilesResponse(BaseModel):
diff --git a/tests/unit/providers/vector_io/test_vector_utils.py b/tests/unit/providers/vector_io/test_vector_utils.py
index 7f6b4af79..3e6b2971f 100644
--- a/tests/unit/providers/vector_io/test_vector_utils.py
+++ b/tests/unit/providers/vector_io/test_vector_utils.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
-from llama_stack_api import Chunk, ChunkMetadata
+from llama_stack_api import Chunk, ChunkMetadata, VectorStoreFileObject
 
 # This test is a unit test for the chunk_utils.py helpers. This should only contain
 # tests which are specific to this file. More general (API-level) tests should be placed in
@@ -78,3 +78,77 @@ def test_chunk_serialization():
     serialized_chunk = chunk.model_dump()
     assert serialized_chunk["chunk_id"] == "test-chunk-id"
     assert "chunk_id" in serialized_chunk
+
+
+def test_vector_store_file_object_attributes_validation():
+    """Test VectorStoreFileObject validates and sanitizes attributes at input boundary."""
+    # Test with metadata containing lists, nested dicts, and primitives
+    from llama_stack_api.vector_io import VectorStoreChunkingStrategyAuto
+
+    file_obj = VectorStoreFileObject(
+        id="file-123",
+        attributes={
+            "tags": ["transformers", "h100-compatible", "region:us"],  # List -> string
+            "model_name": "granite-3.3-8b",  # String preserved
+            "score": 0.95,  # Float preserved
+            "active": True,  # Bool preserved
+            "count": 42,  # Int -> float
+            "nested": {"key": "value"},  # Dict filtered out
+        },
+        chunking_strategy=VectorStoreChunkingStrategyAuto(),
+        created_at=1234567890,
+        status="completed",
+        vector_store_id="vs-123",
+    )
+
+    # Lists converted to comma-separated strings
+    assert file_obj.attributes["tags"] == "transformers, h100-compatible, region:us"
+    # Primitives preserved
+    assert file_obj.attributes["model_name"] == "granite-3.3-8b"
+    assert file_obj.attributes["score"] == 0.95
+    assert file_obj.attributes["active"] is True
+    assert file_obj.attributes["count"] == 42.0  # int -> float
+    # Complex types filtered out
+    assert "nested" not in file_obj.attributes
+
+
+def test_vector_store_file_object_attributes_constraints():
+    """Test VectorStoreFileObject enforces OpenAPI constraints on attributes."""
+    from llama_stack_api.vector_io import VectorStoreChunkingStrategyAuto
+
+    # Test max 16 properties
+    many_attrs = {f"key{i}": f"value{i}" for i in range(20)}
+    file_obj = VectorStoreFileObject(
+        id="file-123",
+        attributes=many_attrs,
+        chunking_strategy=VectorStoreChunkingStrategyAuto(),
+        created_at=1234567890,
+        status="completed",
+        vector_store_id="vs-123",
+    )
+    assert len(file_obj.attributes) == 16  # Max 16 properties
+
+    # Test max 64 char keys are filtered
+    long_key_attrs = {"a" * 65: "value", "valid_key": "value"}
+    file_obj = VectorStoreFileObject(
+        id="file-124",
+        attributes=long_key_attrs,
+        chunking_strategy=VectorStoreChunkingStrategyAuto(),
+        created_at=1234567890,
+        status="completed",
+        vector_store_id="vs-123",
+    )
+    assert "a" * 65 not in file_obj.attributes
+    assert "valid_key" in file_obj.attributes
+
+    # Test max 512 char string values are truncated
+    long_value_attrs = {"key": "x" * 600}
+    file_obj = VectorStoreFileObject(
+        id="file-125",
+        attributes=long_value_attrs,
+        chunking_strategy=VectorStoreChunkingStrategyAuto(),
+        created_at=1234567890,
+        status="completed",
+        vector_store_id="vs-123",
+    )
+    assert len(file_obj.attributes["key"]) == 512

From 72ea95e2e006645cb30bdf736859b23ae499749b Mon Sep 17 00:00:00 2001
From: Shabana Baig <43451943+s-akhtar-baig@users.noreply.github.com>
Date: Wed, 19 Nov 2025 13:27:56 -0500
Subject: [PATCH 51/62] fix: Fix max_tool_calls for openai provider and add
 integration tests for the max_tool_calls feat (#4190)

# Problem

OpenAI gpt-4 returned an error when built-in and mcp calls were skipped
due to max_tool_calls parameter. Following is from the server log:
```
RuntimeError: OpenAI response failed: Error code: 400 - {'error': {'message': "An assistant message with
'tool_calls' must be followed by tool messages responding to each 'tool_call_id'. The following tool_call_ids
did not have response messages: call_Yi9V1QNpN73dJCAgP2Arcjej", 'type': 'invalid_request_error', 'param':
'messages', 'code': None}}
```

# What does this PR do?

- Fixes error returned by openai/gpt when calls were skipped due to
max_tool_calls. We now return a tool message that explicitly mentions
that the call is skipped.
- Adds integration tests as a follow-up to
PR#[4062](https://github.com/llamastack/llama-stack/pull/4062)

<!-- If resolving an issue, uncomment and update the line below -->
Part 2 for issue
#[3563](https://github.com/llamastack/llama-stack/issues/3563)

## Test Plan
<!-- Describe the tests you ran to verify your changes with result
summaries. *Provide clear instructions so the plan can be easily
re-executed.* -->

- Added integration tests
- Added new recordings

---------

Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com>
---
 .../meta_reference/responses/streaming.py     |   11 +-
 .../agents/test_openai_responses.py           |  166 --
 ...ddde3553f0db5d5a673146d8bb99c072e77cd.json |  773 ++++++++
 ...c43af318468eb4ef84fd4008ebb40824b7e86.json |  593 ++++++
 ...ddbcf60a1fedd85c501850b9f7e759443809f.json |  773 ++++++++
 ...c2a685da5e743820a68de74640451f0072184.json | 1099 +++++++++++
 ...6700e308ebbe9389bc5a1da8f4840fc9031ef.json | 1099 +++++++++++
 ...04cf049a522bd106852b6d09e9baf41df88d3.json | 1634 ++++++++++++++++
 ...de4d4f415f237e22b2b6983677a1e1319a0d3.json |  593 ++++++
 ...d0b947d35c870ff825f06d8997a84dca1f5bf.json | 1661 +++++++++++++++++
 .../responses/test_tool_responses.py          |  152 ++
 11 files changed, 8386 insertions(+), 168 deletions(-)
 create mode 100644 tests/integration/responses/recordings/1997dc007d202497ce456683d24ddde3553f0db5d5a673146d8bb99c072e77cd.json
 create mode 100644 tests/integration/responses/recordings/463ab0e2f2914026cfa3c742259c43af318468eb4ef84fd4008ebb40824b7e86.json
 create mode 100644 tests/integration/responses/recordings/b218af7fa0663e60b12633f54cfddbcf60a1fedd85c501850b9f7e759443809f.json
 create mode 100644 tests/integration/responses/recordings/b2b5903325356ef0d90af4f2bb8c2a685da5e743820a68de74640451f0072184.json
 create mode 100644 tests/integration/responses/recordings/b376e47c185753246e6b47e33dd6700e308ebbe9389bc5a1da8f4840fc9031ef.json
 create mode 100644 tests/integration/responses/recordings/c1b953d78e040ae516301c6dd5004cf049a522bd106852b6d09e9baf41df88d3.json
 create mode 100644 tests/integration/responses/recordings/d073f434d28c2f72bea92232de0de4d4f415f237e22b2b6983677a1e1319a0d3.json
 create mode 100644 tests/integration/responses/recordings/e3e2e64c57bb36f2a6ba5f68410d0b947d35c870ff825f06d8997a84dca1f5bf.json

diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
index 0ef74f1f1..9e901d88b 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
@@ -66,6 +66,7 @@ from llama_stack_api import (
     OpenAIResponseUsage,
     OpenAIResponseUsageInputTokensDetails,
     OpenAIResponseUsageOutputTokensDetails,
+    OpenAIToolMessageParam,
     Safety,
     WebSearchToolTypes,
 )
@@ -906,10 +907,16 @@ class StreamingResponseOrchestrator:
         """Coordinate execution of both function and non-function tool calls."""
         # Execute non-function tool calls
         for tool_call in non_function_tool_calls:
-            # Check if total calls made to built-in and mcp tools exceed max_tool_calls
+            # if total calls made to built-in and mcp tools exceed max_tool_calls
+            # then create a tool response message indicating the call was skipped
             if self.max_tool_calls is not None and self.accumulated_builtin_tool_calls >= self.max_tool_calls:
                 logger.info(f"Ignoring built-in and mcp tool call since reached the limit of {self.max_tool_calls=}.")
-                break
+                skipped_call_message = OpenAIToolMessageParam(
+                    content=f"Tool call skipped: maximum tool calls limit ({self.max_tool_calls}) reached.",
+                    tool_call_id=tool_call.id,
+                )
+                next_turn_messages.append(skipped_call_message)
+                continue
 
             # Find the item_id for this tool call
             matching_item_id = None
diff --git a/tests/integration/agents/test_openai_responses.py b/tests/integration/agents/test_openai_responses.py
index 057cee774..d413d5201 100644
--- a/tests/integration/agents/test_openai_responses.py
+++ b/tests/integration/agents/test_openai_responses.py
@@ -516,169 +516,3 @@ def test_response_with_instructions(openai_client, client_with_models, text_mode
 
     # Verify instructions from previous response was not carried over to the next response
     assert response_with_instructions2.instructions == instructions2
-
-
-@pytest.mark.skip(reason="Tool calling is not reliable.")
-def test_max_tool_calls_with_function_tools(openai_client, client_with_models, text_model_id):
-    """Test handling of max_tool_calls with function tools in responses."""
-    if isinstance(client_with_models, LlamaStackAsLibraryClient):
-        pytest.skip("OpenAI responses are not supported when testing with library client yet.")
-
-    client = openai_client
-    max_tool_calls = 1
-
-    tools = [
-        {
-            "type": "function",
-            "name": "get_weather",
-            "description": "Get weather information for a specified location",
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "location": {
-                        "type": "string",
-                        "description": "The city name (e.g., 'New York', 'London')",
-                    },
-                },
-            },
-        },
-        {
-            "type": "function",
-            "name": "get_time",
-            "description": "Get current time for a specified location",
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "location": {
-                        "type": "string",
-                        "description": "The city name (e.g., 'New York', 'London')",
-                    },
-                },
-            },
-        },
-    ]
-
-    # First create a response that triggers function tools
-    response = client.responses.create(
-        model=text_model_id,
-        input="Can you tell me the weather in Paris and the current time?",
-        tools=tools,
-        stream=False,
-        max_tool_calls=max_tool_calls,
-    )
-
-    # Verify we got two function calls and that the max_tool_calls do not affect function tools
-    assert len(response.output) == 2
-    assert response.output[0].type == "function_call"
-    assert response.output[0].name == "get_weather"
-    assert response.output[0].status == "completed"
-    assert response.output[1].type == "function_call"
-    assert response.output[1].name == "get_time"
-    assert response.output[0].status == "completed"
-
-    # Verify we have a valid max_tool_calls field
-    assert response.max_tool_calls == max_tool_calls
-
-
-def test_max_tool_calls_invalid(openai_client, client_with_models, text_model_id):
-    """Test handling of invalid max_tool_calls in responses."""
-    if isinstance(client_with_models, LlamaStackAsLibraryClient):
-        pytest.skip("OpenAI responses are not supported when testing with library client yet.")
-
-    client = openai_client
-
-    input = "Search for today's top technology news."
-    invalid_max_tool_calls = 0
-    tools = [
-        {"type": "web_search"},
-    ]
-
-    # Create a response with an invalid max_tool_calls value i.e. 0
-    # Handle ValueError from LLS and BadRequestError from OpenAI client
-    with pytest.raises((ValueError, BadRequestError)) as excinfo:
-        client.responses.create(
-            model=text_model_id,
-            input=input,
-            tools=tools,
-            stream=False,
-            max_tool_calls=invalid_max_tool_calls,
-        )
-
-    error_message = str(excinfo.value)
-    assert f"Invalid max_tool_calls={invalid_max_tool_calls}; should be >= 1" in error_message, (
-        f"Expected error message about invalid max_tool_calls, got: {error_message}"
-    )
-
-
-def test_max_tool_calls_with_builtin_tools(openai_client, client_with_models, text_model_id):
-    """Test handling of max_tool_calls with built-in tools in responses."""
-    if isinstance(client_with_models, LlamaStackAsLibraryClient):
-        pytest.skip("OpenAI responses are not supported when testing with library client yet.")
-
-    client = openai_client
-
-    input = "Search for today's top technology and a positive news story. You MUST make exactly two separate web search calls."
-    max_tool_calls = [1, 5]
-    tools = [
-        {"type": "web_search"},
-    ]
-
-    # First create a response that triggers web_search tools without max_tool_calls
-    response = client.responses.create(
-        model=text_model_id,
-        input=input,
-        tools=tools,
-        stream=False,
-    )
-
-    # Verify we got two web search calls followed by a message
-    assert len(response.output) == 3
-    assert response.output[0].type == "web_search_call"
-    assert response.output[0].status == "completed"
-    assert response.output[1].type == "web_search_call"
-    assert response.output[1].status == "completed"
-    assert response.output[2].type == "message"
-    assert response.output[2].status == "completed"
-    assert response.output[2].role == "assistant"
-
-    # Next create a response that triggers web_search tools with max_tool_calls set to 1
-    response_2 = client.responses.create(
-        model=text_model_id,
-        input=input,
-        tools=tools,
-        stream=False,
-        max_tool_calls=max_tool_calls[0],
-    )
-
-    # Verify we got one web search tool call followed by a message
-    assert len(response_2.output) == 2
-    assert response_2.output[0].type == "web_search_call"
-    assert response_2.output[0].status == "completed"
-    assert response_2.output[1].type == "message"
-    assert response_2.output[1].status == "completed"
-    assert response_2.output[1].role == "assistant"
-
-    # Verify we have a valid max_tool_calls field
-    assert response_2.max_tool_calls == max_tool_calls[0]
-
-    # Finally create a response that triggers web_search tools with max_tool_calls set to 5
-    response_3 = client.responses.create(
-        model=text_model_id,
-        input=input,
-        tools=tools,
-        stream=False,
-        max_tool_calls=max_tool_calls[1],
-    )
-
-    # Verify we got two web search calls followed by a message
-    assert len(response_3.output) == 3
-    assert response_3.output[0].type == "web_search_call"
-    assert response_3.output[0].status == "completed"
-    assert response_3.output[1].type == "web_search_call"
-    assert response_3.output[1].status == "completed"
-    assert response_3.output[2].type == "message"
-    assert response_3.output[2].status == "completed"
-    assert response_3.output[2].role == "assistant"
-
-    # Verify we have a valid max_tool_calls field
-    assert response_3.max_tool_calls == max_tool_calls[1]
diff --git a/tests/integration/responses/recordings/1997dc007d202497ce456683d24ddde3553f0db5d5a673146d8bb99c072e77cd.json b/tests/integration/responses/recordings/1997dc007d202497ce456683d24ddde3553f0db5d5a673146d8bb99c072e77cd.json
new file mode 100644
index 000000000..4418331b0
--- /dev/null
+++ b/tests/integration/responses/recordings/1997dc007d202497ce456683d24ddde3553f0db5d5a673146d8bb99c072e77cd.json
@@ -0,0 +1,773 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_max_tool_calls_with_mcp_tools[client_with_models-txt=openai/gpt-4o]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Get the experiment ID for 'boiling_point' and get the user ID for 'charlie'"
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_id",
+            "description": "\n        Get the user ID for a given username. This ID is needed for other operations.\n\n        :param username: The username to look up\n        :return: The user ID for the username\n        ",
+            "parameters": {
+              "properties": {
+                "username": {
+                  "title": "Username",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "username"
+              ],
+              "title": "get_user_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_permissions",
+            "description": "\n        Get the permissions for a user ID. Requires a valid user ID from get_user_id.\n\n        :param user_id: The user ID to check permissions for\n        :return: The permissions for the user\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id"
+              ],
+              "title": "get_user_permissionsArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "check_file_access",
+            "description": "\n        Check if a user can access a specific file. Requires a valid user ID.\n\n        :param user_id: The user ID to check access for\n        :param filename: The filename to check access to\n        :return: Whether the user can access the file (yes/no)\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                },
+                "filename": {
+                  "title": "Filename",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id",
+                "filename"
+              ],
+              "title": "check_file_accessArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_id",
+            "description": "\n        Get the experiment ID for a given experiment name. This ID is needed to get results.\n\n        :param experiment_name: The name of the experiment\n        :return: The experiment ID\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_name": {
+                  "title": "Experiment Name",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_name"
+              ],
+              "title": "get_experiment_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_results",
+            "description": "\n        Get the results for an experiment ID. Requires a valid experiment ID from get_experiment_id.\n\n        :param experiment_id: The experiment ID to get results for\n        :return: The experiment results\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_id": {
+                  "title": "Experiment Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_id"
+              ],
+              "title": "get_experiment_resultsArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1997dc007d20",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "1V9w3bXnppL"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1997dc007d20",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_y8S7JKR2Qhu4Bh1uxdHRcNDg",
+                    "function": {
+                      "arguments": "",
+                      "name": "get_experiment_id"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "YEsj"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1997dc007d20",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"ex",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "n"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1997dc007d20",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "perim",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "Q"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1997dc007d20",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "ent_na",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": ""
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1997dc007d20",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "me\":",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "U"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1997dc007d20",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": " \"boi",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": ""
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1997dc007d20",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "ling_p",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": ""
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1997dc007d20",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "oint",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "ha"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1997dc007d20",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "d5D"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1997dc007d20",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 1,
+                    "id": "call_HELkyZOm2fzLx2CeTH3bEcS2",
+                    "function": {
+                      "arguments": "",
+                      "name": "get_user_id"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "0LbsjDcKz6"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1997dc007d20",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 1,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"us",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "c"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1997dc007d20",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 1,
+                    "id": null,
+                    "function": {
+                      "arguments": "ernam",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "9"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1997dc007d20",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 1,
+                    "id": null,
+                    "function": {
+                      "arguments": "e\": \"c",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "7C0WFn181I3y3l"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1997dc007d20",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 1,
+                    "id": null,
+                    "function": {
+                      "arguments": "harl",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "wf"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1997dc007d20",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 1,
+                    "id": null,
+                    "function": {
+                      "arguments": "ie\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "r"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1997dc007d20",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "FAci"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1997dc007d20",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": {
+            "completion_tokens": 51,
+            "prompt_tokens": 393,
+            "total_tokens": 444,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "6xgpRRdKjviPT"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/463ab0e2f2914026cfa3c742259c43af318468eb4ef84fd4008ebb40824b7e86.json b/tests/integration/responses/recordings/463ab0e2f2914026cfa3c742259c43af318468eb4ef84fd4008ebb40824b7e86.json
new file mode 100644
index 000000000..3bec72d95
--- /dev/null
+++ b/tests/integration/responses/recordings/463ab0e2f2914026cfa3c742259c43af318468eb4ef84fd4008ebb40824b7e86.json
@@ -0,0 +1,593 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_max_tool_calls_with_function_tools[openai_client-txt=openai/gpt-4o]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Can you tell me the weather in Paris and the current time?"
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "type": "function",
+            "name": "get_weather",
+            "description": "Get weather information for a specified location",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "location": {
+                  "type": "string",
+                  "description": "The city name (e.g., 'New York', 'London')"
+                }
+              }
+            },
+            "strict": null
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "type": "function",
+            "name": "get_time",
+            "description": "Get current time for a specified location",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "location": {
+                  "type": "string",
+                  "description": "The city name (e.g., 'New York', 'London')"
+                }
+              }
+            },
+            "strict": null
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-463ab0e2f291",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "QmTXstGvpa8"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-463ab0e2f291",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_HJMoLtHXfCzhlMQOfqIKt0n3",
+                    "function": {
+                      "arguments": "",
+                      "name": "get_weather"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "iFjmkK23KL"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-463ab0e2f291",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"lo",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "7"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-463ab0e2f291",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "catio",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "L"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-463ab0e2f291",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "n\": \"P",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "THa6gWbrWhVmZ6"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-463ab0e2f291",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "aris",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "eL"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-463ab0e2f291",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "jng"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-463ab0e2f291",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 1,
+                    "id": "call_vGKvTKZM7aALMaUw3Jas7lRg",
+                    "function": {
+                      "arguments": "",
+                      "name": "get_time"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "LSailgMcgSl54"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-463ab0e2f291",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 1,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"lo",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "z"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-463ab0e2f291",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 1,
+                    "id": null,
+                    "function": {
+                      "arguments": "catio",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "4"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-463ab0e2f291",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 1,
+                    "id": null,
+                    "function": {
+                      "arguments": "n\": \"P",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "0engr6vRvqXTEP"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-463ab0e2f291",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 1,
+                    "id": null,
+                    "function": {
+                      "arguments": "aris",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "Pe"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-463ab0e2f291",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 1,
+                    "id": null,
+                    "function": {
+                      "arguments": "\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "LU9"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-463ab0e2f291",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "kD7d"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-463ab0e2f291",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": {
+            "completion_tokens": 44,
+            "prompt_tokens": 110,
+            "total_tokens": 154,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "R4ICoxqTqj7ZY"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/b218af7fa0663e60b12633f54cfddbcf60a1fedd85c501850b9f7e759443809f.json b/tests/integration/responses/recordings/b218af7fa0663e60b12633f54cfddbcf60a1fedd85c501850b9f7e759443809f.json
new file mode 100644
index 000000000..ee32a4396
--- /dev/null
+++ b/tests/integration/responses/recordings/b218af7fa0663e60b12633f54cfddbcf60a1fedd85c501850b9f7e759443809f.json
@@ -0,0 +1,773 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_max_tool_calls_with_mcp_tools[openai_client-txt=openai/gpt-4o]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Get the experiment ID for 'boiling_point' and get the user ID for 'charlie'"
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_id",
+            "description": "\n        Get the user ID for a given username. This ID is needed for other operations.\n\n        :param username: The username to look up\n        :return: The user ID for the username\n        ",
+            "parameters": {
+              "properties": {
+                "username": {
+                  "title": "Username",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "username"
+              ],
+              "title": "get_user_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_permissions",
+            "description": "\n        Get the permissions for a user ID. Requires a valid user ID from get_user_id.\n\n        :param user_id: The user ID to check permissions for\n        :return: The permissions for the user\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id"
+              ],
+              "title": "get_user_permissionsArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "check_file_access",
+            "description": "\n        Check if a user can access a specific file. Requires a valid user ID.\n\n        :param user_id: The user ID to check access for\n        :param filename: The filename to check access to\n        :return: Whether the user can access the file (yes/no)\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                },
+                "filename": {
+                  "title": "Filename",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id",
+                "filename"
+              ],
+              "title": "check_file_accessArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_id",
+            "description": "\n        Get the experiment ID for a given experiment name. This ID is needed to get results.\n\n        :param experiment_name: The name of the experiment\n        :return: The experiment ID\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_name": {
+                  "title": "Experiment Name",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_name"
+              ],
+              "title": "get_experiment_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_results",
+            "description": "\n        Get the results for an experiment ID. Requires a valid experiment ID from get_experiment_id.\n\n        :param experiment_id: The experiment ID to get results for\n        :return: The experiment results\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_id": {
+                  "title": "Experiment Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_id"
+              ],
+              "title": "get_experiment_resultsArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b218af7fa066",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "N5OTLR9CfmU"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b218af7fa066",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_z8P1RQv54BLxyMlRdMFkcCGd",
+                    "function": {
+                      "arguments": "",
+                      "name": "get_experiment_id"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "3EKK"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b218af7fa066",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"ex",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "R"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b218af7fa066",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "perim",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "Q"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b218af7fa066",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "ent_na",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": ""
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b218af7fa066",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "me\":",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "6"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b218af7fa066",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": " \"boi",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": ""
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b218af7fa066",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "ling_p",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": ""
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b218af7fa066",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "oint",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "pw"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b218af7fa066",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "Gfk"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b218af7fa066",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 1,
+                    "id": "call_I5tcLgyMADoVwLKDj9HkTCs5",
+                    "function": {
+                      "arguments": "",
+                      "name": "get_user_id"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "Yp7IueDs5V"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b218af7fa066",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 1,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"us",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "8"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b218af7fa066",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 1,
+                    "id": null,
+                    "function": {
+                      "arguments": "ernam",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "X"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b218af7fa066",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 1,
+                    "id": null,
+                    "function": {
+                      "arguments": "e\": \"c",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "2oif8BwVnTCnAF"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b218af7fa066",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 1,
+                    "id": null,
+                    "function": {
+                      "arguments": "harl",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "hv"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b218af7fa066",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 1,
+                    "id": null,
+                    "function": {
+                      "arguments": "ie\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "C"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b218af7fa066",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "ctjO"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b218af7fa066",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": {
+            "completion_tokens": 51,
+            "prompt_tokens": 393,
+            "total_tokens": 444,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "fclbZeBSSKN4C"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/b2b5903325356ef0d90af4f2bb8c2a685da5e743820a68de74640451f0072184.json b/tests/integration/responses/recordings/b2b5903325356ef0d90af4f2bb8c2a685da5e743820a68de74640451f0072184.json
new file mode 100644
index 000000000..2f5d2364f
--- /dev/null
+++ b/tests/integration/responses/recordings/b2b5903325356ef0d90af4f2bb8c2a685da5e743820a68de74640451f0072184.json
@@ -0,0 +1,1099 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_max_tool_calls_with_mcp_tools[client_with_models-txt=openai/gpt-4o]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Get the experiment ID for 'boiling_point' and get the user ID for 'charlie'"
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_y8S7JKR2Qhu4Bh1uxdHRcNDg",
+              "type": "function",
+              "function": {
+                "name": "get_experiment_id",
+                "arguments": "{\"experiment_name\": \"boiling_point\"}"
+              }
+            },
+            {
+              "index": 1,
+              "id": "call_HELkyZOm2fzLx2CeTH3bEcS2",
+              "type": "function",
+              "function": {
+                "name": "get_user_id",
+                "arguments": "{\"username\": \"charlie\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_y8S7JKR2Qhu4Bh1uxdHRcNDg",
+          "content": [
+            {
+              "type": "text",
+              "text": "exp_004"
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_HELkyZOm2fzLx2CeTH3bEcS2",
+          "content": [
+            {
+              "type": "text",
+              "text": "user_11111"
+            }
+          ]
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_id",
+            "description": "\n        Get the user ID for a given username. This ID is needed for other operations.\n\n        :param username: The username to look up\n        :return: The user ID for the username\n        ",
+            "parameters": {
+              "properties": {
+                "username": {
+                  "title": "Username",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "username"
+              ],
+              "title": "get_user_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_permissions",
+            "description": "\n        Get the permissions for a user ID. Requires a valid user ID from get_user_id.\n\n        :param user_id: The user ID to check permissions for\n        :return: The permissions for the user\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id"
+              ],
+              "title": "get_user_permissionsArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "check_file_access",
+            "description": "\n        Check if a user can access a specific file. Requires a valid user ID.\n\n        :param user_id: The user ID to check access for\n        :param filename: The filename to check access to\n        :return: Whether the user can access the file (yes/no)\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                },
+                "filename": {
+                  "title": "Filename",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id",
+                "filename"
+              ],
+              "title": "check_file_accessArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_id",
+            "description": "\n        Get the experiment ID for a given experiment name. This ID is needed to get results.\n\n        :param experiment_name: The name of the experiment\n        :return: The experiment ID\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_name": {
+                  "title": "Experiment Name",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_name"
+              ],
+              "title": "get_experiment_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_results",
+            "description": "\n        Get the results for an experiment ID. Requires a valid experiment ID from get_experiment_id.\n\n        :param experiment_id: The experiment ID to get results for\n        :return: The experiment results\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_id": {
+                  "title": "Experiment Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_id"
+              ],
+              "title": "get_experiment_resultsArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b2b590332535",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "YYi7jfwMArDwjF"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b2b590332535",
+          "choices": [
+            {
+              "delta": {
+                "content": "The",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "02OX5OI6tENcr"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b2b590332535",
+          "choices": [
+            {
+              "delta": {
+                "content": " experiment",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "4WNc0"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b2b590332535",
+          "choices": [
+            {
+              "delta": {
+                "content": " ID",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "tKtJ1sl5pfaDr"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b2b590332535",
+          "choices": [
+            {
+              "delta": {
+                "content": " for",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "Hvj1aWM1rpv8"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b2b590332535",
+          "choices": [
+            {
+              "delta": {
+                "content": " '",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "9E9CvQfqolGi9S"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b2b590332535",
+          "choices": [
+            {
+              "delta": {
+                "content": "bo",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "j4WB9GjVD9jcfN"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b2b590332535",
+          "choices": [
+            {
+              "delta": {
+                "content": "iling",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "TTDWSqM29LF"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b2b590332535",
+          "choices": [
+            {
+              "delta": {
+                "content": "_point",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "AjjxQybBbe"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b2b590332535",
+          "choices": [
+            {
+              "delta": {
+                "content": "'",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "1gVblRiURtILOET"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b2b590332535",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "0R3NJvfpXy2dP"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b2b590332535",
+          "choices": [
+            {
+              "delta": {
+                "content": " `",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "A7ulc3isZRh1Wy"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b2b590332535",
+          "choices": [
+            {
+              "delta": {
+                "content": "exp",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "FPq6iOQwJS1aQ"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b2b590332535",
+          "choices": [
+            {
+              "delta": {
+                "content": "_",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "Kc20HZgwXltY5rS"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b2b590332535",
+          "choices": [
+            {
+              "delta": {
+                "content": "004",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "2FCOJr6gSDviM"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b2b590332535",
+          "choices": [
+            {
+              "delta": {
+                "content": "`,",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "zcC44JB9JLv8DJ"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b2b590332535",
+          "choices": [
+            {
+              "delta": {
+                "content": " and",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "YkHz4dmGI8Ip"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b2b590332535",
+          "choices": [
+            {
+              "delta": {
+                "content": " the",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "WU1FWVwHa8kT"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b2b590332535",
+          "choices": [
+            {
+              "delta": {
+                "content": " user",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "F89Whppjswq"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b2b590332535",
+          "choices": [
+            {
+              "delta": {
+                "content": " ID",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "WSOnxHfHCWTqS"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b2b590332535",
+          "choices": [
+            {
+              "delta": {
+                "content": " for",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "xdc4FO9TTNKE"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b2b590332535",
+          "choices": [
+            {
+              "delta": {
+                "content": " '",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "815WDeN0y91Hke"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b2b590332535",
+          "choices": [
+            {
+              "delta": {
+                "content": "char",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "xp6WP0YmWjNZ"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b2b590332535",
+          "choices": [
+            {
+              "delta": {
+                "content": "lie",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "apUUpE3jkpxjm"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b2b590332535",
+          "choices": [
+            {
+              "delta": {
+                "content": "'",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "TfCA46aEfur7ddv"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b2b590332535",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "4q5btS7EmyGo4"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b2b590332535",
+          "choices": [
+            {
+              "delta": {
+                "content": " `",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "a5UVTkIvEXtjbH"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b2b590332535",
+          "choices": [
+            {
+              "delta": {
+                "content": "user",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "UGU1lPYHNno0"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b2b590332535",
+          "choices": [
+            {
+              "delta": {
+                "content": "_",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "4axBUdqWraTmuNf"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b2b590332535",
+          "choices": [
+            {
+              "delta": {
+                "content": "111",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "ZtMOpwGI78JEH"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b2b590332535",
+          "choices": [
+            {
+              "delta": {
+                "content": "11",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "LqPjHcx2BmtLO1"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b2b590332535",
+          "choices": [
+            {
+              "delta": {
+                "content": "`.",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "l5q2xqEWQx4dA4"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b2b590332535",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "sM6qZWT3Vp"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b2b590332535",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": {
+            "completion_tokens": 32,
+            "prompt_tokens": 465,
+            "total_tokens": 497,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "Nr5ToBPpxyZu4"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/b376e47c185753246e6b47e33dd6700e308ebbe9389bc5a1da8f4840fc9031ef.json b/tests/integration/responses/recordings/b376e47c185753246e6b47e33dd6700e308ebbe9389bc5a1da8f4840fc9031ef.json
new file mode 100644
index 000000000..3c9321759
--- /dev/null
+++ b/tests/integration/responses/recordings/b376e47c185753246e6b47e33dd6700e308ebbe9389bc5a1da8f4840fc9031ef.json
@@ -0,0 +1,1099 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_max_tool_calls_with_mcp_tools[openai_client-txt=openai/gpt-4o]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Get the experiment ID for 'boiling_point' and get the user ID for 'charlie'"
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_z8P1RQv54BLxyMlRdMFkcCGd",
+              "type": "function",
+              "function": {
+                "name": "get_experiment_id",
+                "arguments": "{\"experiment_name\": \"boiling_point\"}"
+              }
+            },
+            {
+              "index": 1,
+              "id": "call_I5tcLgyMADoVwLKDj9HkTCs5",
+              "type": "function",
+              "function": {
+                "name": "get_user_id",
+                "arguments": "{\"username\": \"charlie\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_z8P1RQv54BLxyMlRdMFkcCGd",
+          "content": [
+            {
+              "type": "text",
+              "text": "exp_004"
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_I5tcLgyMADoVwLKDj9HkTCs5",
+          "content": [
+            {
+              "type": "text",
+              "text": "user_11111"
+            }
+          ]
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_id",
+            "description": "\n        Get the user ID for a given username. This ID is needed for other operations.\n\n        :param username: The username to look up\n        :return: The user ID for the username\n        ",
+            "parameters": {
+              "properties": {
+                "username": {
+                  "title": "Username",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "username"
+              ],
+              "title": "get_user_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_permissions",
+            "description": "\n        Get the permissions for a user ID. Requires a valid user ID from get_user_id.\n\n        :param user_id: The user ID to check permissions for\n        :return: The permissions for the user\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id"
+              ],
+              "title": "get_user_permissionsArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "check_file_access",
+            "description": "\n        Check if a user can access a specific file. Requires a valid user ID.\n\n        :param user_id: The user ID to check access for\n        :param filename: The filename to check access to\n        :return: Whether the user can access the file (yes/no)\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                },
+                "filename": {
+                  "title": "Filename",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id",
+                "filename"
+              ],
+              "title": "check_file_accessArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_id",
+            "description": "\n        Get the experiment ID for a given experiment name. This ID is needed to get results.\n\n        :param experiment_name: The name of the experiment\n        :return: The experiment ID\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_name": {
+                  "title": "Experiment Name",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_name"
+              ],
+              "title": "get_experiment_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_results",
+            "description": "\n        Get the results for an experiment ID. Requires a valid experiment ID from get_experiment_id.\n\n        :param experiment_id: The experiment ID to get results for\n        :return: The experiment results\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_id": {
+                  "title": "Experiment Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_id"
+              ],
+              "title": "get_experiment_resultsArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b376e47c1857",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "wwHFAiwvH4WszR"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b376e47c1857",
+          "choices": [
+            {
+              "delta": {
+                "content": "The",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "9715Kiw8g6FeU"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b376e47c1857",
+          "choices": [
+            {
+              "delta": {
+                "content": " experiment",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "f3RUP"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b376e47c1857",
+          "choices": [
+            {
+              "delta": {
+                "content": " ID",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "uTou0sZw0Trqr"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b376e47c1857",
+          "choices": [
+            {
+              "delta": {
+                "content": " for",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "O3FUhiRX4t3O"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b376e47c1857",
+          "choices": [
+            {
+              "delta": {
+                "content": " '",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "8Row2VeWyXlavX"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b376e47c1857",
+          "choices": [
+            {
+              "delta": {
+                "content": "bo",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "R6KU5Aed2Y4hdt"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b376e47c1857",
+          "choices": [
+            {
+              "delta": {
+                "content": "iling",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "aXOqmJlIAIp"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b376e47c1857",
+          "choices": [
+            {
+              "delta": {
+                "content": "_point",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "AEyQ67P1E9"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b376e47c1857",
+          "choices": [
+            {
+              "delta": {
+                "content": "'",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "pxs1ElabWHWYTsE"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b376e47c1857",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "f4fvZlQAsoFLb"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b376e47c1857",
+          "choices": [
+            {
+              "delta": {
+                "content": " `",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "XIUUCRzVlWEjdW"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b376e47c1857",
+          "choices": [
+            {
+              "delta": {
+                "content": "exp",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "x2dM9CVkT0ICQ"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b376e47c1857",
+          "choices": [
+            {
+              "delta": {
+                "content": "_",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "Ls8dfHOXPeHjdGE"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b376e47c1857",
+          "choices": [
+            {
+              "delta": {
+                "content": "004",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "RF1hpcOB964EM"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b376e47c1857",
+          "choices": [
+            {
+              "delta": {
+                "content": "`,",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "QnLWon1Lh1bPrb"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b376e47c1857",
+          "choices": [
+            {
+              "delta": {
+                "content": " and",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "0OHZT5bnbdwa"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b376e47c1857",
+          "choices": [
+            {
+              "delta": {
+                "content": " the",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "jtbU7bWjfj72"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b376e47c1857",
+          "choices": [
+            {
+              "delta": {
+                "content": " user",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "nCopvKj1JIE"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b376e47c1857",
+          "choices": [
+            {
+              "delta": {
+                "content": " ID",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "2ZDuFZoCixweF"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b376e47c1857",
+          "choices": [
+            {
+              "delta": {
+                "content": " for",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "u3QmR0zYiExg"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b376e47c1857",
+          "choices": [
+            {
+              "delta": {
+                "content": " '",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "z6tGgyH3Gw667d"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b376e47c1857",
+          "choices": [
+            {
+              "delta": {
+                "content": "char",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "HalCDTgB5QRV"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b376e47c1857",
+          "choices": [
+            {
+              "delta": {
+                "content": "lie",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "5UJBpMTsZMjVF"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b376e47c1857",
+          "choices": [
+            {
+              "delta": {
+                "content": "'",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "p8zU7xEpcUR63Lh"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b376e47c1857",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "t0fKxlCyUxaFU"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b376e47c1857",
+          "choices": [
+            {
+              "delta": {
+                "content": " `",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "lRSEHqi9mVmVZJ"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b376e47c1857",
+          "choices": [
+            {
+              "delta": {
+                "content": "user",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "8C6DeNABBjpJ"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b376e47c1857",
+          "choices": [
+            {
+              "delta": {
+                "content": "_",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "L4qXmW7bonqcf97"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b376e47c1857",
+          "choices": [
+            {
+              "delta": {
+                "content": "111",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "zje3cRhC3fzKb"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b376e47c1857",
+          "choices": [
+            {
+              "delta": {
+                "content": "11",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "NgeVi1nYcUbkmN"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b376e47c1857",
+          "choices": [
+            {
+              "delta": {
+                "content": "`.",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "d83dlilKTeA1RE"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b376e47c1857",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "HnPRpNWz4n"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b376e47c1857",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": {
+            "completion_tokens": 32,
+            "prompt_tokens": 465,
+            "total_tokens": 497,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "sfrloH58kmZpA"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/c1b953d78e040ae516301c6dd5004cf049a522bd106852b6d09e9baf41df88d3.json b/tests/integration/responses/recordings/c1b953d78e040ae516301c6dd5004cf049a522bd106852b6d09e9baf41df88d3.json
new file mode 100644
index 000000000..821bd20c4
--- /dev/null
+++ b/tests/integration/responses/recordings/c1b953d78e040ae516301c6dd5004cf049a522bd106852b6d09e9baf41df88d3.json
@@ -0,0 +1,1634 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_max_tool_calls_with_mcp_tools[client_with_models-txt=openai/gpt-4o]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Get the experiment ID for 'boiling_point' and get the user ID for 'charlie'"
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_y8S7JKR2Qhu4Bh1uxdHRcNDg",
+              "type": "function",
+              "function": {
+                "name": "get_experiment_id",
+                "arguments": "{\"experiment_name\": \"boiling_point\"}"
+              }
+            },
+            {
+              "index": 1,
+              "id": "call_HELkyZOm2fzLx2CeTH3bEcS2",
+              "type": "function",
+              "function": {
+                "name": "get_user_id",
+                "arguments": "{\"username\": \"charlie\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_y8S7JKR2Qhu4Bh1uxdHRcNDg",
+          "content": [
+            {
+              "type": "text",
+              "text": "exp_004"
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_HELkyZOm2fzLx2CeTH3bEcS2",
+          "content": "Tool call skipped: maximum tool calls limit (1) reached."
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_id",
+            "description": "\n        Get the user ID for a given username. This ID is needed for other operations.\n\n        :param username: The username to look up\n        :return: The user ID for the username\n        ",
+            "parameters": {
+              "properties": {
+                "username": {
+                  "title": "Username",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "username"
+              ],
+              "title": "get_user_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_permissions",
+            "description": "\n        Get the permissions for a user ID. Requires a valid user ID from get_user_id.\n\n        :param user_id: The user ID to check permissions for\n        :return: The permissions for the user\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id"
+              ],
+              "title": "get_user_permissionsArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "check_file_access",
+            "description": "\n        Check if a user can access a specific file. Requires a valid user ID.\n\n        :param user_id: The user ID to check access for\n        :param filename: The filename to check access to\n        :return: Whether the user can access the file (yes/no)\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                },
+                "filename": {
+                  "title": "Filename",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id",
+                "filename"
+              ],
+              "title": "check_file_accessArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_id",
+            "description": "\n        Get the experiment ID for a given experiment name. This ID is needed to get results.\n\n        :param experiment_name: The name of the experiment\n        :return: The experiment ID\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_name": {
+                  "title": "Experiment Name",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_name"
+              ],
+              "title": "get_experiment_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_results",
+            "description": "\n        Get the results for an experiment ID. Requires a valid experiment ID from get_experiment_id.\n\n        :param experiment_id: The experiment ID to get results for\n        :return: The experiment results\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_id": {
+                  "title": "Experiment Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_id"
+              ],
+              "title": "get_experiment_resultsArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c1b953d78e04",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "9zm2knPUrQf9Ti"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c1b953d78e04",
+          "choices": [
+            {
+              "delta": {
+                "content": "The",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "dBZWt7n0cY28K"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c1b953d78e04",
+          "choices": [
+            {
+              "delta": {
+                "content": " experiment",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "gBkUe"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c1b953d78e04",
+          "choices": [
+            {
+              "delta": {
+                "content": " ID",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "DK27AidkjJEUs"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c1b953d78e04",
+          "choices": [
+            {
+              "delta": {
+                "content": " for",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "BvRS3fe55saU"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c1b953d78e04",
+          "choices": [
+            {
+              "delta": {
+                "content": " '",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "Q30TpKRJ8sqbaj"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c1b953d78e04",
+          "choices": [
+            {
+              "delta": {
+                "content": "bo",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "uZIcYxencsPVq7"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c1b953d78e04",
+          "choices": [
+            {
+              "delta": {
+                "content": "iling",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "OTlywqpO2gu"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c1b953d78e04",
+          "choices": [
+            {
+              "delta": {
+                "content": "_point",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "1D39HJt78o"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c1b953d78e04",
+          "choices": [
+            {
+              "delta": {
+                "content": "'",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "z9q3XLiA1zUj69i"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c1b953d78e04",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "YilL3DwdzhGNE"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c1b953d78e04",
+          "choices": [
+            {
+              "delta": {
+                "content": " `",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "yLvB3LVIF9yqTB"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c1b953d78e04",
+          "choices": [
+            {
+              "delta": {
+                "content": "exp",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "aQ2ZgA6wBrzgb"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c1b953d78e04",
+          "choices": [
+            {
+              "delta": {
+                "content": "_",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "0jzpzruxw3CNxO3"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c1b953d78e04",
+          "choices": [
+            {
+              "delta": {
+                "content": "004",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "Wl5Eu8yWUoj2V"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c1b953d78e04",
+          "choices": [
+            {
+              "delta": {
+                "content": "`.",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "F3a7FpN1N5MOoL"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c1b953d78e04",
+          "choices": [
+            {
+              "delta": {
+                "content": " However",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "oC3Sc1Oj"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c1b953d78e04",
+          "choices": [
+            {
+              "delta": {
+                "content": ",",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "dR3KxirqoL6RMvN"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c1b953d78e04",
+          "choices": [
+            {
+              "delta": {
+                "content": " I",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "HDIUF9MxNvDNC8"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c1b953d78e04",
+          "choices": [
+            {
+              "delta": {
+                "content": " wasn't",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "jvYMbj7Jb"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c1b953d78e04",
+          "choices": [
+            {
+              "delta": {
+                "content": " able",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "wA25F90roLY"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c1b953d78e04",
+          "choices": [
+            {
+              "delta": {
+                "content": " to",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "1kP6AeTeGmGNU"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c1b953d78e04",
+          "choices": [
+            {
+              "delta": {
+                "content": " get",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "8zixGSMc9fiH"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c1b953d78e04",
+          "choices": [
+            {
+              "delta": {
+                "content": " the",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "UCSCTgIKkLiT"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c1b953d78e04",
+          "choices": [
+            {
+              "delta": {
+                "content": " user",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "1hHm53qitSi"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c1b953d78e04",
+          "choices": [
+            {
+              "delta": {
+                "content": " ID",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "N3NBeCvE43ZRW"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c1b953d78e04",
+          "choices": [
+            {
+              "delta": {
+                "content": " for",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "ul7bMYRpL04n"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c1b953d78e04",
+          "choices": [
+            {
+              "delta": {
+                "content": " '",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "ABgwNSe6WHqE9N"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c1b953d78e04",
+          "choices": [
+            {
+              "delta": {
+                "content": "char",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "6q5tAeJOMEC8"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c1b953d78e04",
+          "choices": [
+            {
+              "delta": {
+                "content": "lie",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "gxcccAWJYWckn"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c1b953d78e04",
+          "choices": [
+            {
+              "delta": {
+                "content": "'",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "qpqi3k54AaZDnNH"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c1b953d78e04",
+          "choices": [
+            {
+              "delta": {
+                "content": " due",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "OB5oYuchm2uE"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c1b953d78e04",
+          "choices": [
+            {
+              "delta": {
+                "content": " to",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "MKHpNGKsdWpLO"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c1b953d78e04",
+          "choices": [
+            {
+              "delta": {
+                "content": " a",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "zYt4J00NPy69fJ"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c1b953d78e04",
+          "choices": [
+            {
+              "delta": {
+                "content": " tool",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "Z0kM0bozww8"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c1b953d78e04",
+          "choices": [
+            {
+              "delta": {
+                "content": " call",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "qbQA28Mr3PO"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c1b953d78e04",
+          "choices": [
+            {
+              "delta": {
+                "content": " limit",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "ZzevZnpsYj"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c1b953d78e04",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "QBno7Vj0QhMrSjO"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c1b953d78e04",
+          "choices": [
+            {
+              "delta": {
+                "content": " Please",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "hEj0RemlE"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c1b953d78e04",
+          "choices": [
+            {
+              "delta": {
+                "content": " let",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "xN8xRqzcxXCR"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c1b953d78e04",
+          "choices": [
+            {
+              "delta": {
+                "content": " me",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "0LxJ9leKvCunj"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c1b953d78e04",
+          "choices": [
+            {
+              "delta": {
+                "content": " know",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "KoHcgiBEVc6"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c1b953d78e04",
+          "choices": [
+            {
+              "delta": {
+                "content": " if",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "eT2hCjpvISlxh"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c1b953d78e04",
+          "choices": [
+            {
+              "delta": {
+                "content": " you",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "9LJdcoWEzgMP"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c1b953d78e04",
+          "choices": [
+            {
+              "delta": {
+                "content": " would",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "bxChZ0IYYP"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c1b953d78e04",
+          "choices": [
+            {
+              "delta": {
+                "content": " like",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "oU5UBQRKEpI"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c1b953d78e04",
+          "choices": [
+            {
+              "delta": {
+                "content": " me",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "HQHzzykuhNV7v"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c1b953d78e04",
+          "choices": [
+            {
+              "delta": {
+                "content": " to",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "YJ86yXpqctfF5"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c1b953d78e04",
+          "choices": [
+            {
+              "delta": {
+                "content": " attempt",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "ToTM0n5O"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c1b953d78e04",
+          "choices": [
+            {
+              "delta": {
+                "content": " that",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "SateSvqBggb"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c1b953d78e04",
+          "choices": [
+            {
+              "delta": {
+                "content": " again",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "APRnnp4Qce"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c1b953d78e04",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "Xe9yNJcVnFP4PZl"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c1b953d78e04",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "ZH7NR5wSoI"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c1b953d78e04",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": {
+            "completion_tokens": 52,
+            "prompt_tokens": 474,
+            "total_tokens": 526,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "2P0uXrABC0X8d"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/d073f434d28c2f72bea92232de0de4d4f415f237e22b2b6983677a1e1319a0d3.json b/tests/integration/responses/recordings/d073f434d28c2f72bea92232de0de4d4f415f237e22b2b6983677a1e1319a0d3.json
new file mode 100644
index 000000000..450d84176
--- /dev/null
+++ b/tests/integration/responses/recordings/d073f434d28c2f72bea92232de0de4d4f415f237e22b2b6983677a1e1319a0d3.json
@@ -0,0 +1,593 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_max_tool_calls_with_function_tools[client_with_models-txt=openai/gpt-4o]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Can you tell me the weather in Paris and the current time?"
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "type": "function",
+            "name": "get_weather",
+            "description": "Get weather information for a specified location",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "location": {
+                  "type": "string",
+                  "description": "The city name (e.g., 'New York', 'London')"
+                }
+              }
+            },
+            "strict": null
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "type": "function",
+            "name": "get_time",
+            "description": "Get current time for a specified location",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "location": {
+                  "type": "string",
+                  "description": "The city name (e.g., 'New York', 'London')"
+                }
+              }
+            },
+            "strict": null
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d073f434d28c",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "iUduPiCYBRb"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d073f434d28c",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_Wv3G8aEQOJLNXGRaK3hAWzq3",
+                    "function": {
+                      "arguments": "",
+                      "name": "get_weather"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "cqZKgzm65y"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d073f434d28c",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"lo",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "8"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d073f434d28c",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "catio",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "L"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d073f434d28c",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "n\": \"P",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "zbBLzavvnEdLz0"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d073f434d28c",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "aris",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "Gj"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d073f434d28c",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "LQo"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d073f434d28c",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 1,
+                    "id": "call_8xkOmOgJpV77n5W2dSx6ytW6",
+                    "function": {
+                      "arguments": "",
+                      "name": "get_time"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "eltoncGlxI8Go"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d073f434d28c",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 1,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"lo",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "S"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d073f434d28c",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 1,
+                    "id": null,
+                    "function": {
+                      "arguments": "catio",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "N"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d073f434d28c",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 1,
+                    "id": null,
+                    "function": {
+                      "arguments": "n\": \"P",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "2bTn1MaAXYFoVK"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d073f434d28c",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 1,
+                    "id": null,
+                    "function": {
+                      "arguments": "aris",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "VF"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d073f434d28c",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 1,
+                    "id": null,
+                    "function": {
+                      "arguments": "\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "BHi"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d073f434d28c",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "WaYG"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d073f434d28c",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": {
+            "completion_tokens": 44,
+            "prompt_tokens": 110,
+            "total_tokens": 154,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "aevj6ZWLqfCK6"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/e3e2e64c57bb36f2a6ba5f68410d0b947d35c870ff825f06d8997a84dca1f5bf.json b/tests/integration/responses/recordings/e3e2e64c57bb36f2a6ba5f68410d0b947d35c870ff825f06d8997a84dca1f5bf.json
new file mode 100644
index 000000000..089242af3
--- /dev/null
+++ b/tests/integration/responses/recordings/e3e2e64c57bb36f2a6ba5f68410d0b947d35c870ff825f06d8997a84dca1f5bf.json
@@ -0,0 +1,1661 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_max_tool_calls_with_mcp_tools[openai_client-txt=openai/gpt-4o]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Get the experiment ID for 'boiling_point' and get the user ID for 'charlie'"
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_z8P1RQv54BLxyMlRdMFkcCGd",
+              "type": "function",
+              "function": {
+                "name": "get_experiment_id",
+                "arguments": "{\"experiment_name\": \"boiling_point\"}"
+              }
+            },
+            {
+              "index": 1,
+              "id": "call_I5tcLgyMADoVwLKDj9HkTCs5",
+              "type": "function",
+              "function": {
+                "name": "get_user_id",
+                "arguments": "{\"username\": \"charlie\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_z8P1RQv54BLxyMlRdMFkcCGd",
+          "content": [
+            {
+              "type": "text",
+              "text": "exp_004"
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_I5tcLgyMADoVwLKDj9HkTCs5",
+          "content": "Tool call skipped: maximum tool calls limit (1) reached."
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_id",
+            "description": "\n        Get the user ID for a given username. This ID is needed for other operations.\n\n        :param username: The username to look up\n        :return: The user ID for the username\n        ",
+            "parameters": {
+              "properties": {
+                "username": {
+                  "title": "Username",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "username"
+              ],
+              "title": "get_user_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_permissions",
+            "description": "\n        Get the permissions for a user ID. Requires a valid user ID from get_user_id.\n\n        :param user_id: The user ID to check permissions for\n        :return: The permissions for the user\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id"
+              ],
+              "title": "get_user_permissionsArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "check_file_access",
+            "description": "\n        Check if a user can access a specific file. Requires a valid user ID.\n\n        :param user_id: The user ID to check access for\n        :param filename: The filename to check access to\n        :return: Whether the user can access the file (yes/no)\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                },
+                "filename": {
+                  "title": "Filename",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id",
+                "filename"
+              ],
+              "title": "check_file_accessArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_id",
+            "description": "\n        Get the experiment ID for a given experiment name. This ID is needed to get results.\n\n        :param experiment_name: The name of the experiment\n        :return: The experiment ID\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_name": {
+                  "title": "Experiment Name",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_name"
+              ],
+              "title": "get_experiment_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_results",
+            "description": "\n        Get the results for an experiment ID. Requires a valid experiment ID from get_experiment_id.\n\n        :param experiment_id: The experiment ID to get results for\n        :return: The experiment results\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_id": {
+                  "title": "Experiment Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_id"
+              ],
+              "title": "get_experiment_resultsArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "uoj10MYhhjCsjQ"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [
+            {
+              "delta": {
+                "content": "The",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "RbrwfJ20BVqRi"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [
+            {
+              "delta": {
+                "content": " experiment",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "88xHU"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [
+            {
+              "delta": {
+                "content": " ID",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "lXhzWF230RZCL"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [
+            {
+              "delta": {
+                "content": " for",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "McIrBR2XVfyS"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [
+            {
+              "delta": {
+                "content": " '",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "7SiItrYff13YKr"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [
+            {
+              "delta": {
+                "content": "bo",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "pf232bD4VeXdXc"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [
+            {
+              "delta": {
+                "content": "iling",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "z0kyzhP7ioh"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [
+            {
+              "delta": {
+                "content": "_point",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "3TUkmyiT28"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [
+            {
+              "delta": {
+                "content": "'",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "kFAkj6BHwM6YKZQ"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "fiRWSM9LNpP4J"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [
+            {
+              "delta": {
+                "content": " `",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "VRPBkgW9PrA6C7"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [
+            {
+              "delta": {
+                "content": "exp",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "YqSi9vVuexh3e"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [
+            {
+              "delta": {
+                "content": "_",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "y64suQvx1Nfp8Pj"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [
+            {
+              "delta": {
+                "content": "004",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "kouF1KXaF3fSv"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [
+            {
+              "delta": {
+                "content": "`.",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "Ju1xHmwme71tPA"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [
+            {
+              "delta": {
+                "content": " However",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "TZuAhRJ8"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [
+            {
+              "delta": {
+                "content": ",",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "ikVKxLAdOhUPHHa"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [
+            {
+              "delta": {
+                "content": " I",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "pntThOzs2GzlYs"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [
+            {
+              "delta": {
+                "content": " couldn't",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "v4ihoTx"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [
+            {
+              "delta": {
+                "content": " retrieve",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "476NjPo"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [
+            {
+              "delta": {
+                "content": " the",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "AFDAUQw3ezkM"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [
+            {
+              "delta": {
+                "content": " user",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "ztweLiyDuwu"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [
+            {
+              "delta": {
+                "content": " ID",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "q575s9DLRlXDL"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [
+            {
+              "delta": {
+                "content": " for",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "oEoKwHu8H1FD"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [
+            {
+              "delta": {
+                "content": " '",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "KOgPjHTbZYg83A"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [
+            {
+              "delta": {
+                "content": "char",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "PmTsVhsBBtRV"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [
+            {
+              "delta": {
+                "content": "lie",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "hkXsP7qhxNrQ0"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [
+            {
+              "delta": {
+                "content": "'",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "C9RtrovVHvrH33B"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [
+            {
+              "delta": {
+                "content": " at",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "fhJHhlmbEWrnY"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [
+            {
+              "delta": {
+                "content": " this",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "pvYlADlLGnc"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [
+            {
+              "delta": {
+                "content": " time",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "N787ynNkyIU"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [
+            {
+              "delta": {
+                "content": " due",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "lkX5gCjexTSI"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [
+            {
+              "delta": {
+                "content": " to",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "ecopEBh7Ckmai"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [
+            {
+              "delta": {
+                "content": " a",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "Nf1X9c8Z4TduoA"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [
+            {
+              "delta": {
+                "content": " tool",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "MtnVKdm0UnR"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [
+            {
+              "delta": {
+                "content": " call",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "ExJ8aBPckoF"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [
+            {
+              "delta": {
+                "content": " limitation",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "jE7bT"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "AaaLnYdPLucETYH"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [
+            {
+              "delta": {
+                "content": " Please",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "cPsBAfFXF"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [
+            {
+              "delta": {
+                "content": " let",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "nGUo5AX3lQpP"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [
+            {
+              "delta": {
+                "content": " me",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "shpHT1JYFdHrS"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [
+            {
+              "delta": {
+                "content": " know",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "RG8m7peAEPl"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [
+            {
+              "delta": {
+                "content": " if",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "i4q8OeCvU08qi"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [
+            {
+              "delta": {
+                "content": " there's",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "lXBbPXWn"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [
+            {
+              "delta": {
+                "content": " anything",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "EyZRgWl"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [
+            {
+              "delta": {
+                "content": " else",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "h87NDUy4I75"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [
+            {
+              "delta": {
+                "content": " I",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "1CJqPAnvuBVEXV"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [
+            {
+              "delta": {
+                "content": " can",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "9Ava6GiwMlu5"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [
+            {
+              "delta": {
+                "content": " assist",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "fl9TQoNlV"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [
+            {
+              "delta": {
+                "content": " you",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "4PwMuL1TPPvZ"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [
+            {
+              "delta": {
+                "content": " with",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "XeIvTn2s7ap"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [
+            {
+              "delta": {
+                "content": "!",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "U93F4p2ENgwWFKN"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "3P0Kp8n8xH"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e3e2e64c57bb",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": {
+            "completion_tokens": 53,
+            "prompt_tokens": 474,
+            "total_tokens": 527,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "zjt0xUw7Sz8p9"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/test_tool_responses.py b/tests/integration/responses/test_tool_responses.py
index 742d45f8b..49bcd050b 100644
--- a/tests/integration/responses/test_tool_responses.py
+++ b/tests/integration/responses/test_tool_responses.py
@@ -600,3 +600,155 @@ def test_response_streaming_multi_turn_tool_execution(responses_client, text_mod
             assert expected_output.lower() in final_response.output_text.lower(), (
                 f"Expected '{expected_output}' to appear in response: {final_response.output_text}"
             )
+
+
+def test_max_tool_calls_with_function_tools(responses_client, text_model_id):
+    """Test handling of max_tool_calls with function tools in responses."""
+
+    max_tool_calls = 1
+    tools = [
+        {
+            "type": "function",
+            "name": "get_weather",
+            "description": "Get weather information for a specified location",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "location": {
+                        "type": "string",
+                        "description": "The city name (e.g., 'New York', 'London')",
+                    },
+                },
+            },
+        },
+        {
+            "type": "function",
+            "name": "get_time",
+            "description": "Get current time for a specified location",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "location": {
+                        "type": "string",
+                        "description": "The city name (e.g., 'New York', 'London')",
+                    },
+                },
+            },
+        },
+    ]
+
+    response = responses_client.responses.create(
+        model=text_model_id,
+        input="Can you tell me the weather in Paris and the current time?",
+        tools=tools,
+        stream=False,
+        max_tool_calls=max_tool_calls,
+    )
+
+    # Verify we got two function calls and that the max_tool_calls does not affect function tools
+    assert len(response.output) == 2
+    assert response.output[0].type == "function_call"
+    assert response.output[0].name == "get_weather"
+    assert response.output[0].status == "completed"
+    assert response.output[1].type == "function_call"
+    assert response.output[1].name == "get_time"
+    assert response.output[1].status == "completed"
+
+    # Verify we have a valid max_tool_calls field
+    assert response.max_tool_calls == max_tool_calls
+
+
+def test_max_tool_calls_invalid(responses_client, text_model_id):
+    """Test handling of invalid max_tool_calls in responses."""
+
+    input = "Search for today's top technology news."
+    invalid_max_tool_calls = 0
+    tools = [
+        {"type": "web_search"},
+    ]
+
+    # Create a response with an invalid max_tool_calls value i.e. 0
+    # Handle ValueError from LLS and BadRequestError from OpenAI client
+    with pytest.raises((ValueError, llama_stack_client.BadRequestError, openai.BadRequestError)) as excinfo:
+        responses_client.responses.create(
+            model=text_model_id,
+            input=input,
+            tools=tools,
+            stream=False,
+            max_tool_calls=invalid_max_tool_calls,
+        )
+
+    error_message = str(excinfo.value)
+    assert f"Invalid max_tool_calls={invalid_max_tool_calls}; should be >= 1" in error_message, (
+        f"Expected error message about invalid max_tool_calls, got: {error_message}"
+    )
+
+
+def test_max_tool_calls_with_mcp_tools(responses_client, text_model_id):
+    """Test handling of max_tool_calls with mcp tools in responses."""
+
+    with make_mcp_server(tools=dependency_tools()) as mcp_server_info:
+        input = "Get the experiment ID for 'boiling_point' and get the user ID for 'charlie'"
+        max_tool_calls = [1, 5]
+        tools = [
+            {"type": "mcp", "server_label": "localmcp", "server_url": mcp_server_info["server_url"]},
+        ]
+
+        # First create a response that triggers mcp tools without max_tool_calls
+        response = responses_client.responses.create(
+            model=text_model_id,
+            input=input,
+            tools=tools,
+            stream=False,
+        )
+
+        # Verify we got two mcp tool calls followed by a message
+        assert len(response.output) == 4
+        mcp_list_tools = [output for output in response.output if output.type == "mcp_list_tools"]
+        mcp_calls = [output for output in response.output if output.type == "mcp_call"]
+        message_outputs = [output for output in response.output if output.type == "message"]
+        assert len(mcp_list_tools) == 1
+        assert len(mcp_calls) == 2, f"Expected two mcp calls, got {len(mcp_calls)}"
+        assert len(message_outputs) == 1, f"Expected one message output, got {len(message_outputs)}"
+
+        # Next create a response that triggers mcp tools with max_tool_calls set to 1
+        response_2 = responses_client.responses.create(
+            model=text_model_id,
+            input=input,
+            tools=tools,
+            stream=False,
+            max_tool_calls=max_tool_calls[0],
+        )
+
+        # Verify we got one mcp tool call followed by a message
+        assert len(response_2.output) == 3
+        mcp_list_tools = [output for output in response_2.output if output.type == "mcp_list_tools"]
+        mcp_calls = [output for output in response_2.output if output.type == "mcp_call"]
+        message_outputs = [output for output in response_2.output if output.type == "message"]
+        assert len(mcp_list_tools) == 1
+        assert len(mcp_calls) == 1, f"Expected one mcp call, got {len(mcp_calls)}"
+        assert len(message_outputs) == 1, f"Expected one message output, got {len(message_outputs)}"
+
+        # Verify we have a valid max_tool_calls field
+        assert response_2.max_tool_calls == max_tool_calls[0]
+
+        # Finally create a response that triggers mcp tools with max_tool_calls set to 5
+        response_3 = responses_client.responses.create(
+            model=text_model_id,
+            input=input,
+            tools=tools,
+            stream=False,
+            max_tool_calls=max_tool_calls[1],
+        )
+
+        # Verify we got two mcp tool calls followed by a message
+        assert len(response_3.output) == 4
+        mcp_list_tools = [output for output in response_3.output if output.type == "mcp_list_tools"]
+        mcp_calls = [output for output in response_3.output if output.type == "mcp_call"]
+        message_outputs = [output for output in response_3.output if output.type == "message"]
+        assert len(mcp_list_tools) == 1
+        assert len(mcp_calls) == 2, f"Expected two mcp calls, got {len(mcp_calls)}"
+        assert len(message_outputs) == 1, f"Expected one message output, got {len(message_outputs)}"
+
+        # Verify we have a valid max_tool_calls field
+        assert response_3.max_tool_calls == max_tool_calls[1]

From 49d6ef8a7032fb40ea0a2504336364fb7acd4712 Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Wed, 19 Nov 2025 11:01:52 -0800
Subject: [PATCH 52/62] fix(docs): fix glob vulnerability (#4193)

add npm override so docs workspace resolves glob@10.5+
---
 docs/package-lock.json | 122 +++++++----------------------------------
 docs/package.json      |   3 +
 2 files changed, 22 insertions(+), 103 deletions(-)

diff --git a/docs/package-lock.json b/docs/package-lock.json
index 9a435846f..2a548914c 100644
--- a/docs/package-lock.json
+++ b/docs/package-lock.json
@@ -10712,12 +10712,6 @@
       "integrity": "sha512-QMUezzXWII9EV5aTFXW1UBVUO77wYPpjqIF8/AviUCThNeSYZykpoTixUeaNNBwmCev0AMDWMAni+f8Hxb1IFw==",
       "license": "Unlicense"
     },
-    "node_modules/fs.realpath": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz",
-      "integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==",
-      "license": "ISC"
-    },
     "node_modules/fsevents": {
       "version": "2.3.3",
       "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz",
@@ -10821,21 +10815,20 @@
       "license": "ISC"
     },
     "node_modules/glob": {
-      "version": "7.2.3",
-      "resolved": "https://registry.npmjs.org/glob/-/glob-7.2.3.tgz",
-      "integrity": "sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==",
-      "deprecated": "Glob versions prior to v9 are no longer supported",
+      "version": "10.5.0",
+      "resolved": "https://registry.npmjs.org/glob/-/glob-10.5.0.tgz",
+      "integrity": "sha512-DfXN8DfhJ7NH3Oe7cFmu3NCu1wKbkReJ8TorzSAFbSKrlNaQSKfIzqYqVY8zlbs2NLBbWpRiU52GX2PbaBVNkg==",
       "license": "ISC",
       "dependencies": {
-        "fs.realpath": "^1.0.0",
-        "inflight": "^1.0.4",
-        "inherits": "2",
-        "minimatch": "^3.1.1",
-        "once": "^1.3.0",
-        "path-is-absolute": "^1.0.0"
+        "foreground-child": "^3.1.0",
+        "jackspeak": "^3.1.2",
+        "minimatch": "^9.0.4",
+        "minipass": "^7.1.2",
+        "package-json-from-dist": "^1.0.0",
+        "path-scurry": "^1.11.1"
       },
-      "engines": {
-        "node": "*"
+      "bin": {
+        "glob": "dist/esm/bin.mjs"
       },
       "funding": {
         "url": "https://github.com/sponsors/isaacs"
@@ -10859,26 +10852,19 @@
       "integrity": "sha512-lkX1HJXwyMcprw/5YUZc2s7DrpAiHB21/V+E1rHUrVNokkvB6bqMzT0VfV6/86ZNabt1k14YOIaT7nDvOX3Iiw==",
       "license": "BSD-2-Clause"
     },
-    "node_modules/glob/node_modules/brace-expansion": {
-      "version": "1.1.12",
-      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
-      "integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
-      "license": "MIT",
-      "dependencies": {
-        "balanced-match": "^1.0.0",
-        "concat-map": "0.0.1"
-      }
-    },
     "node_modules/glob/node_modules/minimatch": {
-      "version": "3.1.2",
-      "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz",
-      "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==",
+      "version": "9.0.5",
+      "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.5.tgz",
+      "integrity": "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==",
       "license": "ISC",
       "dependencies": {
-        "brace-expansion": "^1.1.7"
+        "brace-expansion": "^2.0.1"
       },
       "engines": {
-        "node": "*"
+        "node": ">=16 || 14 >=14.17"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/isaacs"
       }
     },
     "node_modules/global-dirs": {
@@ -11792,17 +11778,6 @@
         "node": ">=12"
       }
     },
-    "node_modules/inflight": {
-      "version": "1.0.6",
-      "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz",
-      "integrity": "sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA==",
-      "deprecated": "This module is not supported, and leaks memory. Do not use it. Check out lru-cache if you want a good and tested way to coalesce async requests by a key value, which is much more comprehensive and powerful.",
-      "license": "ISC",
-      "dependencies": {
-        "once": "^1.3.0",
-        "wrappy": "1"
-      }
-    },
     "node_modules/inherits": {
       "version": "2.0.4",
       "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
@@ -15570,15 +15545,6 @@
         "node": ">= 0.8"
       }
     },
-    "node_modules/once": {
-      "version": "1.4.0",
-      "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz",
-      "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==",
-      "license": "ISC",
-      "dependencies": {
-        "wrappy": "1"
-      }
-    },
     "node_modules/onetime": {
       "version": "5.1.2",
       "resolved": "https://registry.npmjs.org/onetime/-/onetime-5.1.2.tgz",
@@ -15955,15 +15921,6 @@
         "node": "^12.20.0 || ^14.13.1 || >=16.0.0"
       }
     },
-    "node_modules/path-is-absolute": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz",
-      "integrity": "sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=0.10.0"
-      }
-    },
     "node_modules/path-is-inside": {
       "version": "1.0.2",
       "resolved": "https://registry.npmjs.org/path-is-inside/-/path-is-inside-1.0.2.tgz",
@@ -20038,41 +19995,6 @@
         "node": ">= 6"
       }
     },
-    "node_modules/sucrase/node_modules/glob": {
-      "version": "10.4.5",
-      "resolved": "https://registry.npmjs.org/glob/-/glob-10.4.5.tgz",
-      "integrity": "sha512-7Bv8RF0k6xjo7d4A/PxYLbUCfb6c+Vpd2/mB2yRDlew7Jb5hEXiCD9ibfO7wpk8i4sevK6DFny9h7EYbM3/sHg==",
-      "license": "ISC",
-      "dependencies": {
-        "foreground-child": "^3.1.0",
-        "jackspeak": "^3.1.2",
-        "minimatch": "^9.0.4",
-        "minipass": "^7.1.2",
-        "package-json-from-dist": "^1.0.0",
-        "path-scurry": "^1.11.1"
-      },
-      "bin": {
-        "glob": "dist/esm/bin.mjs"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/isaacs"
-      }
-    },
-    "node_modules/sucrase/node_modules/minimatch": {
-      "version": "9.0.5",
-      "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.5.tgz",
-      "integrity": "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==",
-      "license": "ISC",
-      "dependencies": {
-        "brace-expansion": "^2.0.1"
-      },
-      "engines": {
-        "node": ">=16 || 14 >=14.17"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/isaacs"
-      }
-    },
     "node_modules/supports-color": {
       "version": "7.2.0",
       "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz",
@@ -21620,12 +21542,6 @@
         "url": "https://github.com/chalk/strip-ansi?sponsor=1"
       }
     },
-    "node_modules/wrappy": {
-      "version": "1.0.2",
-      "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz",
-      "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==",
-      "license": "ISC"
-    },
     "node_modules/write-file-atomic": {
       "version": "3.0.3",
       "resolved": "https://registry.npmjs.org/write-file-atomic/-/write-file-atomic-3.0.3.tgz",
diff --git a/docs/package.json b/docs/package.json
index d435c65a9..ca4d02ca1 100644
--- a/docs/package.json
+++ b/docs/package.json
@@ -31,6 +31,9 @@
     "react-dom": "^19.0.0",
     "remark-code-import": "^1.2.0"
   },
+  "overrides": {
+    "glob": "^10.5.0"
+  },
   "browserslist": {
     "production": [
       ">0.5%",

From 88526669821086212affec113d0438324cec3942 Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Wed, 19 Nov 2025 11:23:33 -0800
Subject: [PATCH 53/62] chore: remove dead code from openai_compat utility
 (#4194)

Removes a bunch of dead code from `openai_compat.py`
---
 .../utils/inference/openai_compat.py          | 209 ------------------
 1 file changed, 209 deletions(-)

diff --git a/src/llama_stack/providers/utils/inference/openai_compat.py b/src/llama_stack/providers/utils/inference/openai_compat.py
index 32d41ffde..3ce7d361d 100644
--- a/src/llama_stack/providers/utils/inference/openai_compat.py
+++ b/src/llama_stack/providers/utils/inference/openai_compat.py
@@ -3,23 +3,10 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from collections.abc import Iterable
 from typing import (
     Any,
 )
 
-from openai.types.chat import (
-    ChatCompletionContentPartParam as OpenAIChatCompletionContentPartParam,
-)
-
-try:
-    from openai.types.chat import (
-        ChatCompletionMessageFunctionToolCall as OpenAIChatCompletionMessageFunctionToolCall,
-    )
-except ImportError:
-    from openai.types.chat.chat_completion_message_tool_call import (
-        ChatCompletionMessageToolCall as OpenAIChatCompletionMessageFunctionToolCall,
-    )
 from openai.types.chat import (
     ChatCompletionMessageToolCall,
 )
@@ -32,18 +19,6 @@ from llama_stack.models.llama.datatypes import (
     ToolCall,
     ToolDefinition,
 )
-from llama_stack_api import (
-    URL,
-    GreedySamplingStrategy,
-    ImageContentItem,
-    JsonSchemaResponseFormat,
-    OpenAIResponseFormatParam,
-    SamplingParams,
-    TextContentItem,
-    TopKSamplingStrategy,
-    TopPSamplingStrategy,
-    _URLOrData,
-)
 
 logger = get_logger(name=__name__, category="providers::utils")
 
@@ -73,42 +48,6 @@ class OpenAICompatCompletionResponse(BaseModel):
     choices: list[OpenAICompatCompletionChoice]
 
 
-def get_sampling_strategy_options(params: SamplingParams) -> dict:
-    options = {}
-    if isinstance(params.strategy, GreedySamplingStrategy):
-        options["temperature"] = 0.0
-    elif isinstance(params.strategy, TopPSamplingStrategy):
-        if params.strategy.temperature is not None:
-            options["temperature"] = params.strategy.temperature
-        if params.strategy.top_p is not None:
-            options["top_p"] = params.strategy.top_p
-    elif isinstance(params.strategy, TopKSamplingStrategy):
-        options["top_k"] = params.strategy.top_k
-    else:
-        raise ValueError(f"Unsupported sampling strategy: {params.strategy}")
-
-    return options
-
-
-def get_sampling_options(params: SamplingParams | None) -> dict:
-    if not params:
-        return {}
-
-    options = {}
-    if params:
-        options.update(get_sampling_strategy_options(params))
-        if params.max_tokens:
-            options["max_tokens"] = params.max_tokens
-
-        if params.repetition_penalty is not None and params.repetition_penalty != 1.0:
-            options["repeat_penalty"] = params.repetition_penalty
-
-        if params.stop is not None:
-            options["stop"] = params.stop
-
-    return options
-
-
 def text_from_choice(choice) -> str:
     if hasattr(choice, "delta") and choice.delta:
         return choice.delta.content  # type: ignore[no-any-return]  # external OpenAI types lack precise annotations
@@ -253,154 +192,6 @@ def convert_tooldef_to_openai_tool(tool: ToolDefinition) -> dict:
     return out
 
 
-def _convert_stop_reason_to_openai_finish_reason(stop_reason: StopReason) -> str:
-    """
-    Convert a StopReason to an OpenAI chat completion finish_reason.
-    """
-    return {
-        StopReason.end_of_turn: "stop",
-        StopReason.end_of_message: "tool_calls",
-        StopReason.out_of_tokens: "length",
-    }.get(stop_reason, "stop")
-
-
-def _convert_openai_finish_reason(finish_reason: str) -> StopReason:
-    """
-    Convert an OpenAI chat completion finish_reason to a StopReason.
-
-    finish_reason: Literal["stop", "length", "tool_calls", ...]
-        - stop: model hit a natural stop point or a provided stop sequence
-        - length: maximum number of tokens specified in the request was reached
-        - tool_calls: model called a tool
-
-    ->
-
-    class StopReason(Enum):
-        end_of_turn = "end_of_turn"
-        end_of_message = "end_of_message"
-        out_of_tokens = "out_of_tokens"
-    """
-
-    # TODO(mf): are end_of_turn and end_of_message semantics correct?
-    return {
-        "stop": StopReason.end_of_turn,
-        "length": StopReason.out_of_tokens,
-        "tool_calls": StopReason.end_of_message,
-    }.get(finish_reason, StopReason.end_of_turn)
-
-
-def _convert_openai_request_tools(tools: list[dict[str, Any]] | None = None) -> list[ToolDefinition]:
-    lls_tools: list[ToolDefinition] = []
-    if not tools:
-        return lls_tools
-
-    for tool in tools:
-        tool_fn = tool.get("function", {})
-        tool_name = tool_fn.get("name", None)
-        tool_desc = tool_fn.get("description", None)
-        tool_params = tool_fn.get("parameters", None)
-
-        lls_tool = ToolDefinition(
-            tool_name=tool_name,
-            description=tool_desc,
-            input_schema=tool_params,  # Pass through entire JSON Schema
-        )
-        lls_tools.append(lls_tool)
-    return lls_tools
-
-
-def _convert_openai_request_response_format(
-    response_format: OpenAIResponseFormatParam | None = None,
-):
-    if not response_format:
-        return None
-    # response_format can be a dict or a pydantic model
-    response_format_dict = dict(response_format)  # type: ignore[arg-type]  # OpenAIResponseFormatParam union needs dict conversion
-    if response_format_dict.get("type", "") == "json_schema":
-        return JsonSchemaResponseFormat(
-            type="json_schema",  # type: ignore[arg-type]  # Literal["json_schema"] incompatible with expected type
-            json_schema=response_format_dict.get("json_schema", {}).get("schema", ""),
-        )
-    return None
-
-
-def _convert_openai_tool_calls(
-    tool_calls: list[OpenAIChatCompletionMessageFunctionToolCall],
-) -> list[ToolCall]:
-    """
-    Convert an OpenAI ChatCompletionMessageToolCall list into a list of ToolCall.
-
-    OpenAI ChatCompletionMessageToolCall:
-        id: str
-        function: Function
-        type: Literal["function"]
-
-    OpenAI Function:
-        arguments: str
-        name: str
-
-    ->
-
-    ToolCall:
-        call_id: str
-        tool_name: str
-        arguments: Dict[str, ...]
-    """
-    if not tool_calls:
-        return []  # CompletionMessage tool_calls is not optional
-
-    return [
-        ToolCall(
-            call_id=call.id,
-            tool_name=call.function.name,
-            arguments=call.function.arguments,
-        )
-        for call in tool_calls
-    ]
-
-
-def _convert_openai_sampling_params(
-    max_tokens: int | None = None,
-    temperature: float | None = None,
-    top_p: float | None = None,
-) -> SamplingParams:
-    sampling_params = SamplingParams()
-
-    if max_tokens:
-        sampling_params.max_tokens = max_tokens
-
-    # Map an explicit temperature of 0 to greedy sampling
-    if temperature == 0:
-        sampling_params.strategy = GreedySamplingStrategy()
-    else:
-        # OpenAI defaults to 1.0 for temperature and top_p if unset
-        if temperature is None:
-            temperature = 1.0
-        if top_p is None:
-            top_p = 1.0
-        sampling_params.strategy = TopPSamplingStrategy(temperature=temperature, top_p=top_p)  # type: ignore[assignment]  # SamplingParams.strategy union accepts this type
-
-    return sampling_params
-
-
-def openai_content_to_content(content: str | Iterable[OpenAIChatCompletionContentPartParam] | None):
-    if content is None:
-        return ""
-    if isinstance(content, str):
-        return content
-    elif isinstance(content, list):
-        return [openai_content_to_content(c) for c in content]
-    elif hasattr(content, "type"):
-        if content.type == "text":
-            return TextContentItem(type="text", text=content.text)  # type: ignore[attr-defined]  # Iterable narrowed by hasattr check but mypy doesn't track
-        elif content.type == "image_url":
-            return ImageContentItem(type="image", image=_URLOrData(url=URL(uri=content.image_url.url)))  # type: ignore[attr-defined]  # Iterable narrowed by hasattr check but mypy doesn't track
-        else:
-            raise ValueError(f"Unknown content type: {content.type}")
-    else:
-        raise ValueError(f"Unknown content type: {content}")
-
-
 async def prepare_openai_completion_params(**params):
     async def _prepare_value(value: Any) -> Any:
         new_value = value

From 0757d5a9170011f691c955438736726e6b5fd70e Mon Sep 17 00:00:00 2001
From: Ian Miller <75687988+r3v5@users.noreply.github.com>
Date: Wed, 19 Nov 2025 19:48:11 +0000
Subject: [PATCH 54/62] feat(responses)!: implement support for OpenAI
 compatible prompts in Responses API (#3965)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

# What does this PR do?
<!-- Provide a short summary of what this PR does and why. Link to
relevant issues if applicable. -->
This PR is responsible for providing actual implementation of OpenAI
compatible prompts in Responses API. This is the follow up PR with
actual implementation after introducing #3942

The need of this functionality was initiated in #3514.

> Note, https://github.com/llamastack/llama-stack/pull/3514 is divided
on three separate PRs. Current PR is the third of three.

<!-- If resolving an issue, uncomment and update the line below -->
<!-- Closes #[issue-number] -->
Closes #3321

## Test Plan
<!-- Describe the tests you ran to verify your changes with result
summaries. *Provide clear instructions so the plan can be easily
re-executed.* -->
Manual testing, CI workflow with added unit tests

Comprehensive manual testing with new implementation:

**Test Prompts with Images with text on them in Responses API:**

I used this image for testing purposes: [iphone 17
image](https://github.com/user-attachments/assets/9e2ee821-e394-4bbd-b1c8-d48a3fa315de)

1. Upload an image:

```
curl -X POST http://localhost:8321/v1/files \
  -H "Content-Type: multipart/form-data" \
  -F "file=@/Users/ianmiller/iphone.jpeg" \
  -F "purpose=assistants"
```


`{"object":"file","id":"file-d6d375f238e14f21952cc40246bc8504","bytes":556241,"created_at":1761750049,"expires_at":1793286049,"filename":"iphone.jpeg","purpose":"assistants"}%`

2. Create prompt:

```
curl -X POST http://localhost:8321/v1/prompts \
  -H "Content-Type: application/json" \
  -d '{
    "prompt": "You are a product analysis expert. Analyze the following product:\n\nProduct Name: {{product_name}}\nDescription: {{description}}\n\nImage: {{product_photo}}\n\nProvide a detailed analysis including quality assessment, target audience, and pricing recommendations.",
    "variables": ["product_name", "description", "product_photo"]
  }'
```

`{"prompt":"You are a product analysis expert. Analyze the following
product:\n\nProduct Name: {{product_name}}\nDescription:
{{description}}\n\nImage: {{product_photo}}\n\nProvide a detailed
analysis including quality assessment, target audience, and pricing
recommendations.","version":1,"prompt_id":"pmpt_7be2208cb82cdbc35356354dae1f335d1e9b7baeca21ea62","variables":["product_name","description","product_photo"],"is_default":false}%`


3. Create response:

```
curl -X POST http://localhost:8321/v1/responses \
  -H "Accept: application/json, text/event-stream" \
  -H "Content-Type: application/json" \
  -d '{
    "input": "Please analyze this product",
    "model": "openai/gpt-4o",
    "store": true,
    "prompt": {
      "id": "pmpt_7be2208cb82cdbc35356354dae1f335d1e9b7baeca21ea62",
      "version": "1",
      "variables": {
        "product_name": {
          "type": "input_text",
          "text": "iPhone 17 Pro Max"
        },
         "product_photo": {
          "type": "input_image",
          "file_id": "file-d6d375f238e14f21952cc40246bc8504",
          "detail": "high"
        }
      }
    }
  }'
```


`{"created_at":1761750427,"error":null,"id":"resp_f897f914-e3b8-4783-8223-3ed0d32fcbc6","model":"openai/gpt-4o","object":"response","output":[{"content":[{"text":"###
Product Analysis: iPhone 17 Pro Max\n\n**Quality Assessment:**\n\n-
**Display & Design:**\n - The 6.9-inch display is large, ideal for
streaming and productivity.\n - Anti-reflective technology and 120Hz
refresh rate enhance viewing experience, providing smoother visuals and
reducing glare.\n - Titanium frame suggests a premium build, offering
durability and a sleek appearance.\n\n- **Performance:**\n - The Apple
A19 Pro chip promises significant performance improvements, likely
leading to faster processing and efficient multitasking.\n - 12GB RAM is
substantial for a smartphone, ensuring smooth operation for demanding
apps and games.\n\n- **Camera System:**\n - The triple 48MP camera setup
(wide, ultra-wide, telephoto) is designed for versatile photography
needs, capturing high-resolution photos and videos.\n - The 24MP front
camera will appeal to selfie enthusiasts and content creators needing
quality front-facing shots.\n\n- **Connectivity:**\n - Wi-Fi 7 support
indicates future-proof wireless capabilities, providing faster and more
reliable internet connectivity.\n\n**Target Audience:**\n\n- **Tech
Enthusiasts:** Individuals interested in cutting-edge technology and
performance.\n- **Content Creators:** Users who need a robust camera
system for photo and video production.\n- **Luxury Consumers:** Those
who prefer premium materials and top-of-the-line specs.\n-
**Professionals:** Users who require efficient multitasking and
productivity features.\n\n**Pricing Recommendations:**\n\n- Given the
premium specifications, a higher price point is expected. Consider
pricing competitively within the high-end smartphone market while
justifying cost through unique features like the titanium frame and
advanced connectivity options.\n- Positioning around the $1,200 to
$1,500 range would align with expectations for top-tier devices,
catering to its target audience while ensuring
profitability.\n\nOverall, the iPhone 17 Pro Max showcases a blend of
innovative features and premium design, aimed at users seeking high
performance and superior
aesthetics.","type":"output_text","annotations":[]}],"role":"assistant","type":"message","id":"msg_66f4d844-4d9e-4102-80fc-eb75b34b6dbd","status":"completed"}],"parallel_tool_calls":false,"previous_response_id":null,"prompt":{"id":"pmpt_7be2208cb82cdbc35356354dae1f335d1e9b7baeca21ea62","variables":{"product_name":{"text":"iPhone
17 Pro
Max","type":"input_text"},"product_photo":{"detail":"high","type":"input_image","file_id":"file-d6d375f238e14f21952cc40246bc8504","image_url":null}},"version":"1"},"status":"completed","temperature":null,"text":{"format":{"type":"text"}},"top_p":null,"tools":[],"truncation":null,"usage":{"input_tokens":830,"output_tokens":394,"total_tokens":1224,"input_tokens_details":{"cached_tokens":0},"output_tokens_details":{"reasoning_tokens":0}},"instructions":null}%`

**Test Prompts with PDF files in Responses API:**

I used this PDF file for testing purposes:
[invoicesample.pdf](https://github.com/user-attachments/files/22958943/invoicesample.pdf)

1. Upload PDF:

```
curl -X POST http://localhost:8321/v1/files \
  -H "Content-Type: multipart/form-data" \
  -F "file=@/Users/ianmiller/invoicesample.pdf" \
  -F "purpose=assistants"
```


`{"object":"file","id":"file-7fbb1043a4bb468cab60ffe4b8631d8e","bytes":149568,"created_at":1761750730,"expires_at":1793286730,"filename":"invoicesample.pdf","purpose":"assistants"}%`


2. Create prompt:

```
curl -X POST http://localhost:8321/v1/prompts \
  -H "Content-Type: application/json" \
  -d '{
    "prompt": "You are an accounting and financial analysis expert. Analyze the following invoice document:\n\nInvoice Document: {{invoice_doc}}\n\nProvide a comprehensive analysis",
    "variables": ["invoice_doc"]
  }'
```

`{"prompt":"You are an accounting and financial analysis expert. Analyze
the following invoice document:\n\nInvoice Document:
{{invoice_doc}}\n\nProvide a comprehensive
analysis","version":1,"prompt_id":"pmpt_72e2a184a86f32a568b6afb5455dca5c16bf3cc3f80092dc","variables":["invoice_doc"],"is_default":false}%`


3. Create response:

```
curl -X POST http://localhost:8321/v1/responses \
  -H "Content-Type: application/json" \
  -d '{
    "input": "Please provide a detailed analysis of this invoice",
    "model": "openai/gpt-4o",
    "store": true,
    "prompt": {
      "id": "pmpt_72e2a184a86f32a568b6afb5455dca5c16bf3cc3f80092dc",
      "version": "1",
      "variables": {
        "invoice_doc": {
          "type": "input_file",
          "file_id": "file-7fbb1043a4bb468cab60ffe4b8631d8e",
          "filename": "invoicesample.pdf"
        }
      }
    }
  }'
```


`{"created_at":1761750881,"error":null,"id":"resp_da866913-db06-4702-8000-174daed9dbbb","model":"openai/gpt-4o","object":"response","output":[{"content":[{"text":"Here's
a detailed analysis of the invoice provided:\n\n### Seller
Information\n- **Business Name:** The invoice features a logo with
\"Sunny Farm\" indicating the business identity.\n- **Address:** 123
Somewhere St, Melbourne VIC 3000\n- **Contact Information:** Phone
number (03) 1234 5678\n\n### Buyer Information\n- **Name:** Denny
Gunawan\n- **Address:** 221 Queen St, Melbourne VIC 3000\n\n###
Transaction Details\n- **Invoice Number:** #20130304\n- **Date of
Transaction:** Not explicitly mentioned, likely inferred from the
invoice number or needs clarification.\n\n### Items Purchased\n1.
**Apple**\n - Price: $5.00/kg\n - Quantity: 1 kg\n - Subtotal:
$5.00\n\n2. **Orange**\n - Price: $1.99/kg\n - Quantity: 2 kg\n -
Subtotal: $3.98\n\n3. **Watermelon**\n - Price: $1.69/kg\n - Quantity: 3
kg\n - Subtotal: $5.07\n\n4. **Mango**\n - Price: $9.56/kg\n - Quantity:
2 kg\n - Subtotal: $19.12\n\n5. **Peach**\n - Price: $2.99/kg\n -
Quantity: 1 kg\n - Subtotal: $2.99\n\n### Financial Summary\n-
**Subtotal for Items:** $36.00\n- **GST (Goods and Services Tax):** 10%
of $36.00, which amounts to $3.60\n- **Total Amount Due:** $39.60\n\n###
Notes\n- The invoice includes a placeholder text: \"Lorem ipsum dolor
sit amet...\" which is typically used as filler text. This might
indicate a section intended for terms, conditions, or additional notes
that haven’t been completed.\n\n### Visual and Design Elements\n- The
invoice uses a simple and clear layout, featuring the business logo
prominently and stating essential information such as contact and
transaction details in a structured manner.\n- There is a \"Thank You\"
note at the bottom, which adds a professional and courteous
touch.\n\n### Considerations\n- Ensure the date of the transaction is
clear if there are any future references needed.\n- Replace filler text
with relevant terms and conditions or any special instructions
pertaining to the transaction.\n\nThis invoice appears standard,
representing a small business transaction with clearly itemized products
and applicable
taxes.","type":"output_text","annotations":[]}],"role":"assistant","type":"message","id":"msg_39f3b39e-4684-4444-8e4d-e7395f88c9dc","status":"completed"}],"parallel_tool_calls":false,"previous_response_id":null,"prompt":{"id":"pmpt_72e2a184a86f32a568b6afb5455dca5c16bf3cc3f80092dc","variables":{"invoice_doc":{"type":"input_file","file_data":null,"file_id":"file-7fbb1043a4bb468cab60ffe4b8631d8e","file_url":null,"filename":"invoicesample.pdf"}},"version":"1"},"status":"completed","temperature":null,"text":{"format":{"type":"text"}},"top_p":null,"tools":[],"truncation":null,"usage":{"input_tokens":529,"output_tokens":513,"total_tokens":1042,"input_tokens_details":{"cached_tokens":0},"output_tokens_details":{"reasoning_tokens":0}},"instructions":null}%`

**Test simple text Prompt in Responses API:**

1. Create prompt:

```
 curl -X POST http://localhost:8321/v1/prompts \
  -H "Content-Type: application/json" \
  -d '{
    "prompt": "Hello {{name}}! You are working at {{company}}. Your role is {{role}} at {{company}}. Remember, {{name}}, to be {{tone}}.",
    "variables": ["name", "company", "role", "tone"]
  }'
```

`{"prompt":"Hello {{name}}! You are working at {{company}}. Your role is
{{role}} at {{company}}. Remember, {{name}}, to be
{{tone}}.","version":1,"prompt_id":"pmpt_f340a3164a4f65d975c774ffe38ea42d15e7ce4a835919ef","variables":["name","company","role","tone"],"is_default":false}%`

2. Create response:

```
curl -X POST http://localhost:8321/v1/responses \
  -H "Accept: application/json, text/event-stream" \
  -H "Content-Type: application/json" \
  -d '{
    "input": "What is the capital of Ireland?",
    "model": "openai/gpt-4o",
    "store": true,
    "prompt": {
      "id": "pmpt_f340a3164a4f65d975c774ffe38ea42d15e7ce4a835919ef",
      "version": "1",
      "variables": {
        "name": {
          "type": "input_text",
          "text": "Alice"
        },
        "company": {
          "type": "input_text",
          "text": "Dummy Company"
        },
        "role": {
          "type": "input_text",
          "text": "Geography expert"
        },
        "tone": {
          "type": "input_text",
          "text": "professional and helpful"
        }
      }
    }
  }'

```


`{"created_at":1761751097,"error":null,"id":"resp_1b037b95-d9ae-4ad0-8e76-d953897ecaef","model":"openai/gpt-4o","object":"response","output":[{"content":[{"text":"The
capital of Ireland is
Dublin.","type":"output_text","annotations":[]}],"role":"assistant","type":"message","id":"msg_8e7c72b6-2aa2-4da6-8e57-da4e12fa3ce2","status":"completed"}],"parallel_tool_calls":false,"previous_response_id":null,"prompt":{"id":"pmpt_f340a3164a4f65d975c774ffe38ea42d15e7ce4a835919ef","variables":{"name":{"text":"Alice","type":"input_text"},"company":{"text":"Dummy
Company","type":"input_text"},"role":{"text":"Geography
expert","type":"input_text"},"tone":{"text":"professional and
helpful","type":"input_text"}},"version":"1"},"status":"completed","temperature":null,"text":{"format":{"type":"text"}},"top_p":null,"tools":[],"truncation":null,"usage":{"input_tokens":47,"output_tokens":7,"total_tokens":54,"input_tokens_details":{"cached_tokens":0},"output_tokens_details":{"reasoning_tokens":0}},"instructions":null}%`
---
 .../inline/agents/meta_reference/__init__.py  |   4 +-
 .../inline/agents/meta_reference/agents.py    |   9 +-
 .../responses/openai_responses.py             |  98 +++-
 .../agents/meta_reference/responses/utils.py  | 123 ++++-
 src/llama_stack/providers/registry/agents.py  |   2 +
 .../meta_reference/test_openai_responses.py   | 517 +++++++++++++++++-
 .../test_openai_responses_conversations.py    |   4 +
 .../test_response_conversion_utils.py         |  20 +-
 .../test_responses_safety_utils.py            |   2 +
 .../meta_reference/test_safety_optional.py    |   8 +
 10 files changed, 770 insertions(+), 17 deletions(-)

diff --git a/src/llama_stack/providers/inline/agents/meta_reference/__init__.py b/src/llama_stack/providers/inline/agents/meta_reference/__init__.py
index b3fb814e3..9683baf00 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/__init__.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/__init__.py
@@ -27,8 +27,10 @@ async def get_provider_impl(
         deps[Api.tool_runtime],
         deps[Api.tool_groups],
         deps[Api.conversations],
-        policy,
+        deps[Api.prompts],
+        deps[Api.files],
         telemetry_enabled,
+        policy,
     )
     await impl.initialize()
     return impl
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/agents.py b/src/llama_stack/providers/inline/agents/meta_reference/agents.py
index 2d5aa6c04..ca419a51a 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/agents.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/agents.py
@@ -12,6 +12,7 @@ from llama_stack.providers.utils.responses.responses_store import ResponsesStore
 from llama_stack_api import (
     Agents,
     Conversations,
+    Files,
     Inference,
     ListOpenAIResponseInputItem,
     ListOpenAIResponseObject,
@@ -22,6 +23,7 @@ from llama_stack_api import (
     OpenAIResponsePrompt,
     OpenAIResponseText,
     Order,
+    Prompts,
     ResponseGuardrail,
     Safety,
     ToolGroups,
@@ -45,6 +47,8 @@ class MetaReferenceAgentsImpl(Agents):
         tool_runtime_api: ToolRuntime,
         tool_groups_api: ToolGroups,
         conversations_api: Conversations,
+        prompts_api: Prompts,
+        files_api: Files,
         policy: list[AccessRule],
         telemetry_enabled: bool = False,
     ):
@@ -56,7 +60,8 @@ class MetaReferenceAgentsImpl(Agents):
         self.tool_groups_api = tool_groups_api
         self.conversations_api = conversations_api
         self.telemetry_enabled = telemetry_enabled
-
+        self.prompts_api = prompts_api
+        self.files_api = files_api
         self.in_memory_store = InmemoryKVStoreImpl()
         self.openai_responses_impl: OpenAIResponsesImpl | None = None
         self.policy = policy
@@ -73,6 +78,8 @@ class MetaReferenceAgentsImpl(Agents):
             vector_io_api=self.vector_io_api,
             safety_api=self.safety_api,
             conversations_api=self.conversations_api,
+            prompts_api=self.prompts_api,
+            files_api=self.files_api,
         )
 
     async def shutdown(self) -> None:
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
index 11bfb1417..c8282df69 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
@@ -4,6 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
+import re
 import time
 import uuid
 from collections.abc import AsyncIterator
@@ -18,13 +19,17 @@ from llama_stack.providers.utils.responses.responses_store import (
 from llama_stack_api import (
     ConversationItem,
     Conversations,
+    Files,
     Inference,
     InvalidConversationIdError,
     ListOpenAIResponseInputItem,
     ListOpenAIResponseObject,
+    OpenAIChatCompletionContentPartParam,
     OpenAIDeleteResponseObject,
     OpenAIMessageParam,
     OpenAIResponseInput,
+    OpenAIResponseInputMessageContentFile,
+    OpenAIResponseInputMessageContentImage,
     OpenAIResponseInputMessageContentText,
     OpenAIResponseInputTool,
     OpenAIResponseMessage,
@@ -34,7 +39,9 @@ from llama_stack_api import (
     OpenAIResponseText,
     OpenAIResponseTextFormat,
     OpenAISystemMessageParam,
+    OpenAIUserMessageParam,
     Order,
+    Prompts,
     ResponseGuardrailSpec,
     Safety,
     ToolGroups,
@@ -46,6 +53,7 @@ from .streaming import StreamingResponseOrchestrator
 from .tool_executor import ToolExecutor
 from .types import ChatCompletionContext, ToolContext
 from .utils import (
+    convert_response_content_to_chat_content,
     convert_response_input_to_chat_messages,
     convert_response_text_to_chat_response_format,
     extract_guardrail_ids,
@@ -69,6 +77,8 @@ class OpenAIResponsesImpl:
         vector_io_api: VectorIO,  # VectorIO
         safety_api: Safety | None,
         conversations_api: Conversations,
+        prompts_api: Prompts,
+        files_api: Files,
     ):
         self.inference_api = inference_api
         self.tool_groups_api = tool_groups_api
@@ -82,6 +92,8 @@ class OpenAIResponsesImpl:
             tool_runtime_api=tool_runtime_api,
             vector_io_api=vector_io_api,
         )
+        self.prompts_api = prompts_api
+        self.files_api = files_api
 
     async def _prepend_previous_response(
         self,
@@ -122,11 +134,13 @@ class OpenAIResponsesImpl:
                 # Use stored messages directly and convert only new input
                 message_adapter = TypeAdapter(list[OpenAIMessageParam])
                 messages = message_adapter.validate_python(previous_response.messages)
-                new_messages = await convert_response_input_to_chat_messages(input, previous_messages=messages)
+                new_messages = await convert_response_input_to_chat_messages(
+                    input, previous_messages=messages, files_api=self.files_api
+                )
                 messages.extend(new_messages)
             else:
                 # Backward compatibility: reconstruct from inputs
-                messages = await convert_response_input_to_chat_messages(all_input)
+                messages = await convert_response_input_to_chat_messages(all_input, files_api=self.files_api)
 
             tool_context.recover_tools_from_previous_response(previous_response)
         elif conversation is not None:
@@ -138,7 +152,7 @@ class OpenAIResponsesImpl:
             all_input = input
             if not conversation_items.data:
                 # First turn - just convert the new input
-                messages = await convert_response_input_to_chat_messages(input)
+                messages = await convert_response_input_to_chat_messages(input, files_api=self.files_api)
             else:
                 if not stored_messages:
                     all_input = conversation_items.data
@@ -154,14 +168,82 @@ class OpenAIResponsesImpl:
                     all_input = input
 
                 messages = stored_messages or []
-                new_messages = await convert_response_input_to_chat_messages(all_input, previous_messages=messages)
+                new_messages = await convert_response_input_to_chat_messages(
+                    all_input, previous_messages=messages, files_api=self.files_api
+                )
                 messages.extend(new_messages)
         else:
             all_input = input
-            messages = await convert_response_input_to_chat_messages(all_input)
+            messages = await convert_response_input_to_chat_messages(all_input, files_api=self.files_api)
 
         return all_input, messages, tool_context
 
+    async def _prepend_prompt(
+        self,
+        messages: list[OpenAIMessageParam],
+        openai_response_prompt: OpenAIResponsePrompt | None,
+    ) -> None:
+        """Prepend prompt template to messages, resolving text/image/file variables.
+
+        :param messages: List of OpenAIMessageParam objects
+        :param openai_response_prompt: (Optional) OpenAIResponsePrompt object with variables
+        :returns: string of utf-8 characters
+        """
+        if not openai_response_prompt or not openai_response_prompt.id:
+            return
+
+        prompt_version = int(openai_response_prompt.version) if openai_response_prompt.version else None
+        cur_prompt = await self.prompts_api.get_prompt(openai_response_prompt.id, prompt_version)
+
+        if not cur_prompt or not cur_prompt.prompt:
+            return
+
+        cur_prompt_text = cur_prompt.prompt
+        cur_prompt_variables = cur_prompt.variables
+
+        if not openai_response_prompt.variables:
+            messages.insert(0, OpenAISystemMessageParam(content=cur_prompt_text))
+            return
+
+        # Validate that all provided variables exist in the prompt
+        for name in openai_response_prompt.variables.keys():
+            if name not in cur_prompt_variables:
+                raise ValueError(f"Variable {name} not found in prompt {openai_response_prompt.id}")
+
+        # Separate text and media variables
+        text_substitutions = {}
+        media_content_parts: list[OpenAIChatCompletionContentPartParam] = []
+
+        for name, value in openai_response_prompt.variables.items():
+            # Text variable found
+            if isinstance(value, OpenAIResponseInputMessageContentText):
+                text_substitutions[name] = value.text
+
+            # Media variable found
+            elif isinstance(value, OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile):
+                converted_parts = await convert_response_content_to_chat_content([value], files_api=self.files_api)
+                if isinstance(converted_parts, list):
+                    media_content_parts.extend(converted_parts)
+
+                # Eg: {{product_photo}} becomes "[Image: product_photo]"
+                # This gives the model textual context about what media exists in the prompt
+                var_type = value.type.replace("input_", "").replace("_", " ").title()
+                text_substitutions[name] = f"[{var_type}: {name}]"
+
+        def replace_variable(match: re.Match[str]) -> str:
+            var_name = match.group(1).strip()
+            return str(text_substitutions.get(var_name, match.group(0)))
+
+        pattern = r"\{\{\s*(\w+)\s*\}\}"
+        processed_prompt_text = re.sub(pattern, replace_variable, cur_prompt_text)
+
+        # Insert system message with resolved text
+        messages.insert(0, OpenAISystemMessageParam(content=processed_prompt_text))
+
+        # If we have media, create a new user message because allows to ingest images and files
+        if media_content_parts:
+            messages.append(OpenAIUserMessageParam(content=media_content_parts))
+
     async def get_openai_response(
         self,
         response_id: str,
@@ -297,6 +379,7 @@ class OpenAIResponsesImpl:
             input=input,
             conversation=conversation,
             model=model,
+            prompt=prompt,
             instructions=instructions,
             previous_response_id=previous_response_id,
             store=store,
@@ -350,6 +433,7 @@ class OpenAIResponsesImpl:
         instructions: str | None = None,
         previous_response_id: str | None = None,
         conversation: str | None = None,
+        prompt: OpenAIResponsePrompt | None = None,
         store: bool | None = True,
         temperature: float | None = None,
         text: OpenAIResponseText | None = None,
@@ -372,6 +456,9 @@ class OpenAIResponsesImpl:
         if instructions:
             messages.insert(0, OpenAISystemMessageParam(content=instructions))
 
+        # Prepend reusable prompt (if provided)
+        await self._prepend_prompt(messages, prompt)
+
         # Structured outputs
         response_format = await convert_response_text_to_chat_response_format(text)
 
@@ -394,6 +481,7 @@ class OpenAIResponsesImpl:
             ctx=ctx,
             response_id=response_id,
             created_at=created_at,
+            prompt=prompt,
             text=text,
             max_infer_iters=max_infer_iters,
             parallel_tool_calls=parallel_tool_calls,
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/utils.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/utils.py
index 25460bcfe..7bbf6bd30 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/utils.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/utils.py
@@ -5,11 +5,14 @@
 # the root directory of this source tree.
 
 import asyncio
+import base64
+import mimetypes
 import re
 import uuid
 from collections.abc import Sequence
 
 from llama_stack_api import (
+    Files,
     OpenAIAssistantMessageParam,
     OpenAIChatCompletionContentPartImageParam,
     OpenAIChatCompletionContentPartParam,
@@ -18,6 +21,8 @@ from llama_stack_api import (
     OpenAIChatCompletionToolCallFunction,
     OpenAIChoice,
     OpenAIDeveloperMessageParam,
+    OpenAIFile,
+    OpenAIFileFile,
     OpenAIImageURL,
     OpenAIJSONSchema,
     OpenAIMessageParam,
@@ -29,6 +34,7 @@ from llama_stack_api import (
     OpenAIResponseInput,
     OpenAIResponseInputFunctionToolCallOutput,
     OpenAIResponseInputMessageContent,
+    OpenAIResponseInputMessageContentFile,
     OpenAIResponseInputMessageContentImage,
     OpenAIResponseInputMessageContentText,
     OpenAIResponseInputTool,
@@ -37,9 +43,11 @@ from llama_stack_api import (
     OpenAIResponseMessage,
     OpenAIResponseOutputMessageContent,
     OpenAIResponseOutputMessageContentOutputText,
+    OpenAIResponseOutputMessageFileSearchToolCall,
     OpenAIResponseOutputMessageFunctionToolCall,
     OpenAIResponseOutputMessageMCPCall,
     OpenAIResponseOutputMessageMCPListTools,
+    OpenAIResponseOutputMessageWebSearchToolCall,
     OpenAIResponseText,
     OpenAISystemMessageParam,
     OpenAIToolMessageParam,
@@ -49,6 +57,46 @@ from llama_stack_api import (
 )
 
 
+async def extract_bytes_from_file(file_id: str, files_api: Files) -> bytes:
+    """
+    Extract raw bytes from file using the Files API.
+
+    :param file_id: The file identifier (e.g., "file-abc123")
+    :param files_api: Files API instance
+    :returns: Raw file content as bytes
+    :raises: ValueError if file cannot be retrieved
+    """
+    try:
+        response = await files_api.openai_retrieve_file_content(file_id)
+        return bytes(response.body)
+    except Exception as e:
+        raise ValueError(f"Failed to retrieve file content for file_id '{file_id}': {str(e)}") from e
+
+
+def generate_base64_ascii_text_from_bytes(raw_bytes: bytes) -> str:
+    """
+    Converts raw binary bytes into a safe ASCII text representation for URLs
+
+    :param raw_bytes: the actual bytes that represents file content
+    :returns: string of utf-8 characters
+    """
+    return base64.b64encode(raw_bytes).decode("utf-8")
+
+
+def construct_data_url(ascii_text: str, mime_type: str | None) -> str:
+    """
+    Construct data url with decoded data inside
+
+    :param ascii_text: ASCII content
+    :param mime_type: MIME type of file
+    :returns: data url string (eg. data:image/png,base64,%3Ch1%3EHello%2C%20World%21%3C%2Fh1%3E)
+    """
+    if not mime_type:
+        mime_type = "application/octet-stream"
+
+    return f"data:{mime_type};base64,{ascii_text}"
+
+
 async def convert_chat_choice_to_response_message(
     choice: OpenAIChoice,
     citation_files: dict[str, str] | None = None,
@@ -78,11 +126,15 @@ async def convert_chat_choice_to_response_message(
 
 async def convert_response_content_to_chat_content(
     content: str | Sequence[OpenAIResponseInputMessageContent | OpenAIResponseOutputMessageContent],
+    files_api: Files | None,
 ) -> str | list[OpenAIChatCompletionContentPartParam]:
     """
     Convert the content parts from an OpenAI Response API request into OpenAI Chat Completion content parts.
 
     The content schemas of each API look similar, but are not exactly the same.
+
+    :param content: The content to convert
+    :param files_api: Files API for resolving file_id to raw file content (required if content contains files/images)
     """
     if isinstance(content, str):
         return content
@@ -95,9 +147,68 @@ async def convert_response_content_to_chat_content(
         elif isinstance(content_part, OpenAIResponseOutputMessageContentOutputText):
             converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part.text))
         elif isinstance(content_part, OpenAIResponseInputMessageContentImage):
+            detail = content_part.detail
+            image_mime_type = None
             if content_part.image_url:
-                image_url = OpenAIImageURL(url=content_part.image_url, detail=content_part.detail)
+                image_url = OpenAIImageURL(url=content_part.image_url, detail=detail)
                 converted_parts.append(OpenAIChatCompletionContentPartImageParam(image_url=image_url))
+            elif content_part.file_id:
+                if files_api is None:
+                    raise ValueError("file_ids are not supported by this implementation of the Stack")
+                image_file_response = await files_api.openai_retrieve_file(content_part.file_id)
+                if image_file_response.filename:
+                    image_mime_type, _ = mimetypes.guess_type(image_file_response.filename)
+                raw_image_bytes = await extract_bytes_from_file(content_part.file_id, files_api)
+                ascii_text = generate_base64_ascii_text_from_bytes(raw_image_bytes)
+                image_data_url = construct_data_url(ascii_text, image_mime_type)
+                image_url = OpenAIImageURL(url=image_data_url, detail=detail)
+                converted_parts.append(OpenAIChatCompletionContentPartImageParam(image_url=image_url))
+            else:
+                raise ValueError(
+                    f"Image content must have either 'image_url' or 'file_id'. "
+                    f"Got image_url={content_part.image_url}, file_id={content_part.file_id}"
+                )
+        elif isinstance(content_part, OpenAIResponseInputMessageContentFile):
+            resolved_file_data = None
+            file_data = content_part.file_data
+            file_id = content_part.file_id
+            file_url = content_part.file_url
+            filename = content_part.filename
+            file_mime_type = None
+            if not any([file_data, file_id, file_url]):
+                raise ValueError(
+                    f"File content must have at least one of 'file_data', 'file_id', or 'file_url'. "
+                    f"Got file_data={file_data}, file_id={file_id}, file_url={file_url}"
+                )
+            if file_id:
+                if files_api is None:
+                    raise ValueError("file_ids are not supported by this implementation of the Stack")
+
+                file_response = await files_api.openai_retrieve_file(file_id)
+                if not filename:
+                    filename = file_response.filename
+                file_mime_type, _ = mimetypes.guess_type(file_response.filename)
+                raw_file_bytes = await extract_bytes_from_file(file_id, files_api)
+                ascii_text = generate_base64_ascii_text_from_bytes(raw_file_bytes)
+                resolved_file_data = construct_data_url(ascii_text, file_mime_type)
+            elif file_data:
+                if file_data.startswith("data:"):
+                    resolved_file_data = file_data
+                else:
+                    # Raw base64 data, wrap in data URL format
+                    if filename:
+                        file_mime_type, _ = mimetypes.guess_type(filename)
+                    resolved_file_data = construct_data_url(file_data, file_mime_type)
+            elif file_url:
+                resolved_file_data = file_url
+            converted_parts.append(
+                OpenAIFile(
+                    file=OpenAIFileFile(
+                        file_data=resolved_file_data,
+                        filename=filename,
+                    )
+                )
+            )
         elif isinstance(content_part, str):
             converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part))
         else:
@@ -110,12 +221,14 @@ async def convert_response_content_to_chat_content(
 async def convert_response_input_to_chat_messages(
     input: str | list[OpenAIResponseInput],
     previous_messages: list[OpenAIMessageParam] | None = None,
+    files_api: Files | None = None,
 ) -> list[OpenAIMessageParam]:
     """
     Convert the input from an OpenAI Response API request into OpenAI Chat Completion messages.
 
     :param input: The input to convert
     :param previous_messages: Optional previous messages to check for function_call references
+    :param files_api: Files API for resolving file_id to raw file content (optional, required for file/image content)
     """
     messages: list[OpenAIMessageParam] = []
     if isinstance(input, list):
@@ -169,6 +282,12 @@ async def convert_response_input_to_chat_messages(
             elif isinstance(input_item, OpenAIResponseOutputMessageMCPListTools):
                 # the tool list will be handled separately
                 pass
+            elif isinstance(
+                input_item,
+                OpenAIResponseOutputMessageWebSearchToolCall | OpenAIResponseOutputMessageFileSearchToolCall,
+            ):
+                # these tool calls are tracked internally but not converted to chat messages
+                pass
             elif isinstance(input_item, OpenAIResponseMCPApprovalRequest) or isinstance(
                 input_item, OpenAIResponseMCPApprovalResponse
             ):
@@ -176,7 +295,7 @@ async def convert_response_input_to_chat_messages(
                 pass
             elif isinstance(input_item, OpenAIResponseMessage):
                 # Narrow type to OpenAIResponseMessage which has content and role attributes
-                content = await convert_response_content_to_chat_content(input_item.content)
+                content = await convert_response_content_to_chat_content(input_item.content, files_api)
                 message_type = await get_message_type_by_role(input_item.role)
                 if message_type is None:
                     raise ValueError(
diff --git a/src/llama_stack/providers/registry/agents.py b/src/llama_stack/providers/registry/agents.py
index e85be99d6..22bb45faf 100644
--- a/src/llama_stack/providers/registry/agents.py
+++ b/src/llama_stack/providers/registry/agents.py
@@ -34,6 +34,8 @@ def available_providers() -> list[ProviderSpec]:
                 Api.tool_runtime,
                 Api.tool_groups,
                 Api.conversations,
+                Api.prompts,
+                Api.files,
             ],
             optional_api_dependencies=[
                 Api.safety,
diff --git a/tests/unit/providers/agents/meta_reference/test_openai_responses.py b/tests/unit/providers/agents/meta_reference/test_openai_responses.py
index 256df6baf..97bccbfe4 100644
--- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py
+++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py
@@ -25,6 +25,13 @@ from llama_stack.providers.utils.responses.responses_store import (
     ResponsesStore,
     _OpenAIResponseObjectWithInputAndMessages,
 )
+from llama_stack_api import (
+    OpenAIChatCompletionContentPartImageParam,
+    OpenAIFile,
+    OpenAIFileObject,
+    OpenAISystemMessageParam,
+    Prompt,
+)
 from llama_stack_api.agents import Order
 from llama_stack_api.inference import (
     OpenAIAssistantMessageParam,
@@ -38,6 +45,8 @@ from llama_stack_api.inference import (
 )
 from llama_stack_api.openai_responses import (
     ListOpenAIResponseInputItem,
+    OpenAIResponseInputMessageContentFile,
+    OpenAIResponseInputMessageContentImage,
     OpenAIResponseInputMessageContentText,
     OpenAIResponseInputToolFunction,
     OpenAIResponseInputToolMCP,
@@ -47,6 +56,7 @@ from llama_stack_api.openai_responses import (
     OpenAIResponseOutputMessageFunctionToolCall,
     OpenAIResponseOutputMessageMCPCall,
     OpenAIResponseOutputMessageWebSearchToolCall,
+    OpenAIResponsePrompt,
     OpenAIResponseText,
     OpenAIResponseTextFormat,
     WebSearchToolTypes,
@@ -98,6 +108,19 @@ def mock_safety_api():
     return safety_api
 
 
+@pytest.fixture
+def mock_prompts_api():
+    prompts_api = AsyncMock()
+    return prompts_api
+
+
+@pytest.fixture
+def mock_files_api():
+    """Mock files API for testing."""
+    files_api = AsyncMock()
+    return files_api
+
+
 @pytest.fixture
 def openai_responses_impl(
     mock_inference_api,
@@ -107,6 +130,8 @@ def openai_responses_impl(
     mock_vector_io_api,
     mock_safety_api,
     mock_conversations_api,
+    mock_prompts_api,
+    mock_files_api,
 ):
     return OpenAIResponsesImpl(
         inference_api=mock_inference_api,
@@ -116,6 +141,8 @@ def openai_responses_impl(
         vector_io_api=mock_vector_io_api,
         safety_api=mock_safety_api,
         conversations_api=mock_conversations_api,
+        prompts_api=mock_prompts_api,
+        files_api=mock_files_api,
     )
 
 
@@ -499,7 +526,7 @@ async def test_create_openai_response_with_tool_call_function_arguments_none(ope
     mock_inference_api.openai_chat_completion.return_value = fake_stream_toolcall()
 
 
-async def test_create_openai_response_with_multiple_messages(openai_responses_impl, mock_inference_api):
+async def test_create_openai_response_with_multiple_messages(openai_responses_impl, mock_inference_api, mock_files_api):
     """Test creating an OpenAI response with multiple messages."""
     # Setup
     input_messages = [
@@ -710,7 +737,7 @@ async def test_create_openai_response_with_instructions(openai_responses_impl, m
 
 
 async def test_create_openai_response_with_instructions_and_multiple_messages(
-    openai_responses_impl, mock_inference_api
+    openai_responses_impl, mock_inference_api, mock_files_api
 ):
     # Setup
     input_messages = [
@@ -1242,3 +1269,489 @@ async def test_create_openai_response_with_output_types_as_input(
 
     assert stored_with_outputs.input == input_with_output_types
     assert len(stored_with_outputs.input) == 3
+
+
+async def test_create_openai_response_with_prompt(openai_responses_impl, mock_inference_api, mock_prompts_api):
+    """Test creating an OpenAI response with a prompt."""
+    input_text = "What is the capital of Ireland?"
+    model = "meta-llama/Llama-3.1-8B-Instruct"
+    prompt_id = "pmpt_1234567890abcdef1234567890abcdef1234567890abcdef"
+    prompt = Prompt(
+        prompt="You are a helpful {{ area_name }} assistant at {{ company_name }}. Always provide accurate information.",
+        prompt_id=prompt_id,
+        version=1,
+        variables=["area_name", "company_name"],
+        is_default=True,
+    )
+
+    openai_response_prompt = OpenAIResponsePrompt(
+        id=prompt_id,
+        version="1",
+        variables={
+            "area_name": OpenAIResponseInputMessageContentText(text="geography"),
+            "company_name": OpenAIResponseInputMessageContentText(text="Dummy Company"),
+        },
+    )
+
+    mock_prompts_api.get_prompt.return_value = prompt
+    mock_inference_api.openai_chat_completion.return_value = fake_stream()
+
+    result = await openai_responses_impl.create_openai_response(
+        input=input_text,
+        model=model,
+        prompt=openai_response_prompt,
+    )
+
+    mock_prompts_api.get_prompt.assert_called_with(prompt_id, 1)
+    mock_inference_api.openai_chat_completion.assert_called()
+    call_args = mock_inference_api.openai_chat_completion.call_args
+    sent_messages = call_args.args[0].messages
+    assert len(sent_messages) == 2
+
+    system_messages = [msg for msg in sent_messages if msg.role == "system"]
+    assert len(system_messages) == 1
+    assert (
+        system_messages[0].content
+        == "You are a helpful geography assistant at Dummy Company. Always provide accurate information."
+    )
+
+    user_messages = [msg for msg in sent_messages if msg.role == "user"]
+    assert len(user_messages) == 1
+    assert user_messages[0].content == input_text
+
+    assert result.model == model
+    assert result.status == "completed"
+    assert isinstance(result.prompt, OpenAIResponsePrompt)
+    assert result.prompt.id == prompt_id
+    assert result.prompt.variables == openai_response_prompt.variables
+    assert result.prompt.version == "1"
+
+
+async def test_prepend_prompt_successful_without_variables(openai_responses_impl, mock_prompts_api, mock_inference_api):
+    """Test prepend_prompt function without variables."""
+    input_text = "What is the capital of Ireland?"
+    model = "meta-llama/Llama-3.1-8B-Instruct"
+    prompt_id = "pmpt_1234567890abcdef1234567890abcdef1234567890abcdef"
+    prompt = Prompt(
+        prompt="You are a helpful assistant. Always provide accurate information.",
+        prompt_id=prompt_id,
+        version=1,
+        variables=[],
+        is_default=True,
+    )
+
+    openai_response_prompt = OpenAIResponsePrompt(id=prompt_id, version="1")
+
+    mock_prompts_api.get_prompt.return_value = prompt
+    mock_inference_api.openai_chat_completion.return_value = fake_stream()
+
+    await openai_responses_impl.create_openai_response(
+        input=input_text,
+        model=model,
+        prompt=openai_response_prompt,
+    )
+
+    mock_prompts_api.get_prompt.assert_called_with(prompt_id, 1)
+    mock_inference_api.openai_chat_completion.assert_called()
+    call_args = mock_inference_api.openai_chat_completion.call_args
+    sent_messages = call_args.args[0].messages
+    assert len(sent_messages) == 2
+    system_messages = [msg for msg in sent_messages if msg.role == "system"]
+    assert system_messages[0].content == "You are a helpful assistant. Always provide accurate information."
+
+
+async def test_prepend_prompt_invalid_variable(openai_responses_impl, mock_prompts_api):
+    """Test error handling in prepend_prompt function when prompt parameters contain invalid variables."""
+    prompt_id = "pmpt_1234567890abcdef1234567890abcdef1234567890abcdef"
+    prompt = Prompt(
+        prompt="You are a {{ role }} assistant.",
+        prompt_id=prompt_id,
+        version=1,
+        variables=["role"],  # Only "role" is valid
+        is_default=True,
+    )
+
+    openai_response_prompt = OpenAIResponsePrompt(
+        id=prompt_id,
+        version="1",
+        variables={
+            "role": OpenAIResponseInputMessageContentText(text="helpful"),
+            "company": OpenAIResponseInputMessageContentText(
+                text="Dummy Company"
+            ),  # company is not in prompt.variables
+        },
+    )
+
+    mock_prompts_api.get_prompt.return_value = prompt
+
+    # Initial messages
+    messages = [OpenAIUserMessageParam(content="Test prompt")]
+
+    # Execute - should raise ValueError for invalid variable
+    with pytest.raises(ValueError, match="Variable company not found in prompt"):
+        await openai_responses_impl._prepend_prompt(messages, openai_response_prompt)
+
+    # Verify
+    mock_prompts_api.get_prompt.assert_called_once_with(prompt_id, 1)
+
+
+async def test_prepend_prompt_not_found(openai_responses_impl, mock_prompts_api):
+    """Test prepend_prompt function when prompt is not found."""
+    prompt_id = "pmpt_nonexistent"
+    openai_response_prompt = OpenAIResponsePrompt(id=prompt_id, version="1")
+
+    mock_prompts_api.get_prompt.return_value = None  # Prompt not found
+
+    # Initial messages
+    messages = [OpenAIUserMessageParam(content="Test prompt")]
+    initial_length = len(messages)
+
+    # Execute
+    result = await openai_responses_impl._prepend_prompt(messages, openai_response_prompt)
+
+    # Verify
+    mock_prompts_api.get_prompt.assert_called_once_with(prompt_id, 1)
+
+    # Should return None when prompt not found
+    assert result is None
+
+    # Messages should not be modified
+    assert len(messages) == initial_length
+    assert messages[0].content == "Test prompt"
+
+
+async def test_prepend_prompt_variable_substitution(openai_responses_impl, mock_prompts_api):
+    """Test complex variable substitution with multiple occurrences and special characters in prepend_prompt function."""
+    prompt_id = "pmpt_1234567890abcdef1234567890abcdef1234567890abcdef"
+
+    # Support all whitespace variations: {{name}}, {{ name }}, {{ name}}, {{name }}, etc.
+    prompt = Prompt(
+        prompt="Hello {{name}}! You are working at {{ company}}. Your role is {{role}} at {{company}}. Remember, {{ name }}, to be {{ tone }}.",
+        prompt_id=prompt_id,
+        version=1,
+        variables=["name", "company", "role", "tone"],
+        is_default=True,
+    )
+
+    openai_response_prompt = OpenAIResponsePrompt(
+        id=prompt_id,
+        version="1",
+        variables={
+            "name": OpenAIResponseInputMessageContentText(text="Alice"),
+            "company": OpenAIResponseInputMessageContentText(text="Dummy Company"),
+            "role": OpenAIResponseInputMessageContentText(text="AI Assistant"),
+            "tone": OpenAIResponseInputMessageContentText(text="professional"),
+        },
+    )
+
+    mock_prompts_api.get_prompt.return_value = prompt
+
+    # Initial messages
+    messages = [OpenAIUserMessageParam(content="Test")]
+
+    # Execute
+    await openai_responses_impl._prepend_prompt(messages, openai_response_prompt)
+
+    # Verify
+    assert len(messages) == 2
+    assert isinstance(messages[0], OpenAISystemMessageParam)
+    expected_content = "Hello Alice! You are working at Dummy Company. Your role is AI Assistant at Dummy Company. Remember, Alice, to be professional."
+    assert messages[0].content == expected_content
+
+
+async def test_prepend_prompt_with_image_variable(openai_responses_impl, mock_prompts_api, mock_files_api):
+    """Test prepend_prompt with image variable - should create placeholder in system message and append image as separate user message."""
+    prompt_id = "pmpt_1234567890abcdef1234567890abcdef1234567890abcdef"
+    prompt = Prompt(
+        prompt="Analyze this {{product_image}} and describe what you see.",
+        prompt_id=prompt_id,
+        version=1,
+        variables=["product_image"],
+        is_default=True,
+    )
+
+    # Mock file content and file metadata
+    mock_file_content = b"fake_image_data"
+    mock_files_api.openai_retrieve_file_content.return_value = type("obj", (object,), {"body": mock_file_content})()
+    mock_files_api.openai_retrieve_file.return_value = OpenAIFileObject(
+        object="file",
+        id="file-abc123",
+        bytes=len(mock_file_content),
+        created_at=1234567890,
+        expires_at=1234567890,
+        filename="product.jpg",
+        purpose="assistants",
+    )
+
+    openai_response_prompt = OpenAIResponsePrompt(
+        id=prompt_id,
+        version="1",
+        variables={
+            "product_image": OpenAIResponseInputMessageContentImage(
+                file_id="file-abc123",
+                detail="high",
+            )
+        },
+    )
+
+    mock_prompts_api.get_prompt.return_value = prompt
+
+    # Initial messages
+    messages = [OpenAIUserMessageParam(content="What do you think?")]
+
+    # Execute
+    await openai_responses_impl._prepend_prompt(messages, openai_response_prompt)
+
+    assert len(messages) == 3
+
+    # Check system message has placeholder
+    assert isinstance(messages[0], OpenAISystemMessageParam)
+    assert messages[0].content == "Analyze this [Image: product_image] and describe what you see."
+
+    # Check original user message is still there
+    assert isinstance(messages[1], OpenAIUserMessageParam)
+    assert messages[1].content == "What do you think?"
+
+    # Check new user message with image is appended
+    assert isinstance(messages[2], OpenAIUserMessageParam)
+    assert isinstance(messages[2].content, list)
+    assert len(messages[2].content) == 1
+
+    # Should be image with data URL
+    assert isinstance(messages[2].content[0], OpenAIChatCompletionContentPartImageParam)
+    assert messages[2].content[0].image_url.url.startswith("data:image/")
+    assert messages[2].content[0].image_url.detail == "high"
+
+
+async def test_prepend_prompt_with_file_variable(openai_responses_impl, mock_prompts_api, mock_files_api):
+    """Test prepend_prompt with file variable - should create placeholder in system message and append file as separate user message."""
+    prompt_id = "pmpt_1234567890abcdef1234567890abcdef1234567890abcdef"
+    prompt = Prompt(
+        prompt="Review the document {{contract_file}} and summarize key points.",
+        prompt_id=prompt_id,
+        version=1,
+        variables=["contract_file"],
+        is_default=True,
+    )
+
+    # Mock file retrieval
+    mock_file_content = b"fake_pdf_content"
+    mock_files_api.openai_retrieve_file_content.return_value = type("obj", (object,), {"body": mock_file_content})()
+    mock_files_api.openai_retrieve_file.return_value = OpenAIFileObject(
+        object="file",
+        id="file-contract-789",
+        bytes=len(mock_file_content),
+        created_at=1234567890,
+        expires_at=1234567890,
+        filename="contract.pdf",
+        purpose="assistants",
+    )
+
+    openai_response_prompt = OpenAIResponsePrompt(
+        id=prompt_id,
+        version="1",
+        variables={
+            "contract_file": OpenAIResponseInputMessageContentFile(
+                file_id="file-contract-789",
+                filename="contract.pdf",
+            )
+        },
+    )
+
+    mock_prompts_api.get_prompt.return_value = prompt
+
+    # Initial messages
+    messages = [OpenAIUserMessageParam(content="Please review this.")]
+
+    # Execute
+    await openai_responses_impl._prepend_prompt(messages, openai_response_prompt)
+
+    assert len(messages) == 3
+
+    # Check system message has placeholder
+    assert isinstance(messages[0], OpenAISystemMessageParam)
+    assert messages[0].content == "Review the document [File: contract_file] and summarize key points."
+
+    # Check original user message is still there
+    assert isinstance(messages[1], OpenAIUserMessageParam)
+    assert messages[1].content == "Please review this."
+
+    # Check new user message with file is appended
+    assert isinstance(messages[2], OpenAIUserMessageParam)
+    assert isinstance(messages[2].content, list)
+    assert len(messages[2].content) == 1
+
+    # First part should be file with data URL
+    assert isinstance(messages[2].content[0], OpenAIFile)
+    assert messages[2].content[0].file.file_data.startswith("data:application/pdf;base64,")
+    assert messages[2].content[0].file.filename == "contract.pdf"
+    assert messages[2].content[0].file.file_id is None
+
+
+async def test_prepend_prompt_with_mixed_variables(openai_responses_impl, mock_prompts_api, mock_files_api):
+    """Test prepend_prompt with text, image, and file variables mixed together."""
+    prompt_id = "pmpt_1234567890abcdef1234567890abcdef1234567890abcdef"
+    prompt = Prompt(
+        prompt="Hello {{name}}! Analyze {{photo}} and review {{document}}. Provide insights for {{company}}.",
+        prompt_id=prompt_id,
+        version=1,
+        variables=["name", "photo", "document", "company"],
+        is_default=True,
+    )
+
+    # Mock file retrieval for image and file
+    mock_image_content = b"fake_image_data"
+    mock_file_content = b"fake_doc_content"
+
+    async def mock_retrieve_file_content(file_id):
+        if file_id == "file-photo-123":
+            return type("obj", (object,), {"body": mock_image_content})()
+        elif file_id == "file-doc-456":
+            return type("obj", (object,), {"body": mock_file_content})()
+
+    mock_files_api.openai_retrieve_file_content.side_effect = mock_retrieve_file_content
+
+    def mock_retrieve_file(file_id):
+        if file_id == "file-photo-123":
+            return OpenAIFileObject(
+                object="file",
+                id="file-photo-123",
+                bytes=len(mock_image_content),
+                created_at=1234567890,
+                expires_at=1234567890,
+                filename="photo.jpg",
+                purpose="assistants",
+            )
+        elif file_id == "file-doc-456":
+            return OpenAIFileObject(
+                object="file",
+                id="file-doc-456",
+                bytes=len(mock_file_content),
+                created_at=1234567890,
+                expires_at=1234567890,
+                filename="doc.pdf",
+                purpose="assistants",
+            )
+
+    mock_files_api.openai_retrieve_file.side_effect = mock_retrieve_file
+
+    openai_response_prompt = OpenAIResponsePrompt(
+        id=prompt_id,
+        version="1",
+        variables={
+            "name": OpenAIResponseInputMessageContentText(text="Alice"),
+            "photo": OpenAIResponseInputMessageContentImage(file_id="file-photo-123", detail="auto"),
+            "document": OpenAIResponseInputMessageContentFile(file_id="file-doc-456", filename="doc.pdf"),
+            "company": OpenAIResponseInputMessageContentText(text="Acme Corp"),
+        },
+    )
+
+    mock_prompts_api.get_prompt.return_value = prompt
+
+    # Initial messages
+    messages = [OpenAIUserMessageParam(content="Here's my question.")]
+
+    # Execute
+    await openai_responses_impl._prepend_prompt(messages, openai_response_prompt)
+
+    assert len(messages) == 3
+
+    # Check system message has text and placeholders
+    assert isinstance(messages[0], OpenAISystemMessageParam)
+    expected_system = "Hello Alice! Analyze [Image: photo] and review [File: document]. Provide insights for Acme Corp."
+    assert messages[0].content == expected_system
+
+    # Check original user message is still there
+    assert isinstance(messages[1], OpenAIUserMessageParam)
+    assert messages[1].content == "Here's my question."
+
+    # Check new user message with media is appended (2 media items)
+    assert isinstance(messages[2], OpenAIUserMessageParam)
+    assert isinstance(messages[2].content, list)
+    assert len(messages[2].content) == 2
+
+    # First part should be image with data URL
+    assert isinstance(messages[2].content[0], OpenAIChatCompletionContentPartImageParam)
+    assert messages[2].content[0].image_url.url.startswith("data:image/")
+
+    # Second part should be file with data URL
+    assert isinstance(messages[2].content[1], OpenAIFile)
+    assert messages[2].content[1].file.file_data.startswith("data:application/pdf;base64,")
+    assert messages[2].content[1].file.filename == "doc.pdf"
+    assert messages[2].content[1].file.file_id is None
+
+
+async def test_prepend_prompt_with_image_using_image_url(openai_responses_impl, mock_prompts_api):
+    """Test prepend_prompt with image variable using image_url instead of file_id."""
+    prompt_id = "pmpt_1234567890abcdef1234567890abcdef1234567890abcdef"
+    prompt = Prompt(
+        prompt="Describe {{screenshot}}.",
+        prompt_id=prompt_id,
+        version=1,
+        variables=["screenshot"],
+        is_default=True,
+    )
+
+    openai_response_prompt = OpenAIResponsePrompt(
+        id=prompt_id,
+        version="1",
+        variables={
+            "screenshot": OpenAIResponseInputMessageContentImage(
+                image_url="https://example.com/screenshot.png",
+                detail="low",
+            )
+        },
+    )
+
+    mock_prompts_api.get_prompt.return_value = prompt
+
+    # Initial messages
+    messages = [OpenAIUserMessageParam(content="What is this?")]
+
+    # Execute
+    await openai_responses_impl._prepend_prompt(messages, openai_response_prompt)
+
+    assert len(messages) == 3
+
+    # Check system message has placeholder
+    assert isinstance(messages[0], OpenAISystemMessageParam)
+    assert messages[0].content == "Describe [Image: screenshot]."
+
+    # Check original user message is still there
+    assert isinstance(messages[1], OpenAIUserMessageParam)
+    assert messages[1].content == "What is this?"
+
+    # Check new user message with image is appended
+    assert isinstance(messages[2], OpenAIUserMessageParam)
+    assert isinstance(messages[2].content, list)
+
+    # Image should use the provided URL
+    assert isinstance(messages[2].content[0], OpenAIChatCompletionContentPartImageParam)
+    assert messages[2].content[0].image_url.url == "https://example.com/screenshot.png"
+    assert messages[2].content[0].image_url.detail == "low"
+
+
+async def test_prepend_prompt_image_variable_missing_required_fields(openai_responses_impl, mock_prompts_api):
+    """Test prepend_prompt with image variable that has neither file_id nor image_url - should raise error."""
+    prompt_id = "pmpt_1234567890abcdef1234567890abcdef1234567890abcdef"
+    prompt = Prompt(
+        prompt="Analyze {{bad_image}}.",
+        prompt_id=prompt_id,
+        version=1,
+        variables=["bad_image"],
+        is_default=True,
+    )
+
+    # Create image content with neither file_id nor image_url
+    openai_response_prompt = OpenAIResponsePrompt(
+        id=prompt_id,
+        version="1",
+        variables={"bad_image": OpenAIResponseInputMessageContentImage()},  # No file_id or image_url
+    )
+
+    mock_prompts_api.get_prompt.return_value = prompt
+    messages = [OpenAIUserMessageParam(content="Test")]
+
+    # Execute - should raise ValueError
+    with pytest.raises(ValueError, match="Image content must have either 'image_url' or 'file_id'"):
+        await openai_responses_impl._prepend_prompt(messages, openai_response_prompt)
diff --git a/tests/unit/providers/agents/meta_reference/test_openai_responses_conversations.py b/tests/unit/providers/agents/meta_reference/test_openai_responses_conversations.py
index fa1ddae78..5a3e6bf21 100644
--- a/tests/unit/providers/agents/meta_reference/test_openai_responses_conversations.py
+++ b/tests/unit/providers/agents/meta_reference/test_openai_responses_conversations.py
@@ -39,6 +39,8 @@ def responses_impl_with_conversations(
     mock_vector_io_api,
     mock_conversations_api,
     mock_safety_api,
+    mock_prompts_api,
+    mock_files_api,
 ):
     """Create OpenAIResponsesImpl instance with conversations API."""
     return OpenAIResponsesImpl(
@@ -49,6 +51,8 @@ def responses_impl_with_conversations(
         vector_io_api=mock_vector_io_api,
         conversations_api=mock_conversations_api,
         safety_api=mock_safety_api,
+        prompts_api=mock_prompts_api,
+        files_api=mock_files_api,
     )
 
 
diff --git a/tests/unit/providers/agents/meta_reference/test_response_conversion_utils.py b/tests/unit/providers/agents/meta_reference/test_response_conversion_utils.py
index b7a437686..e496a96e3 100644
--- a/tests/unit/providers/agents/meta_reference/test_response_conversion_utils.py
+++ b/tests/unit/providers/agents/meta_reference/test_response_conversion_utils.py
@@ -5,6 +5,8 @@
 # the root directory of this source tree.
 
 
+from unittest.mock import AsyncMock
+
 import pytest
 
 from llama_stack.providers.inline.agents.meta_reference.responses.utils import (
@@ -46,6 +48,12 @@ from llama_stack_api.openai_responses import (
 )
 
 
+@pytest.fixture
+def mock_files_api():
+    """Mock files API for testing."""
+    return AsyncMock()
+
+
 class TestConvertChatChoiceToResponseMessage:
     async def test_convert_string_content(self):
         choice = OpenAIChoice(
@@ -78,17 +86,17 @@ class TestConvertChatChoiceToResponseMessage:
 
 
 class TestConvertResponseContentToChatContent:
-    async def test_convert_string_content(self):
-        result = await convert_response_content_to_chat_content("Simple string")
+    async def test_convert_string_content(self, mock_files_api):
+        result = await convert_response_content_to_chat_content("Simple string", mock_files_api)
         assert result == "Simple string"
 
-    async def test_convert_text_content_parts(self):
+    async def test_convert_text_content_parts(self, mock_files_api):
         content = [
             OpenAIResponseInputMessageContentText(text="First part"),
             OpenAIResponseOutputMessageContentOutputText(text="Second part"),
         ]
 
-        result = await convert_response_content_to_chat_content(content)
+        result = await convert_response_content_to_chat_content(content, mock_files_api)
 
         assert len(result) == 2
         assert isinstance(result[0], OpenAIChatCompletionContentPartTextParam)
@@ -96,10 +104,10 @@ class TestConvertResponseContentToChatContent:
         assert isinstance(result[1], OpenAIChatCompletionContentPartTextParam)
         assert result[1].text == "Second part"
 
-    async def test_convert_image_content(self):
+    async def test_convert_image_content(self, mock_files_api):
         content = [OpenAIResponseInputMessageContentImage(image_url="https://example.com/image.jpg", detail="high")]
 
-        result = await convert_response_content_to_chat_content(content)
+        result = await convert_response_content_to_chat_content(content, mock_files_api)
 
         assert len(result) == 1
         assert isinstance(result[0], OpenAIChatCompletionContentPartImageParam)
diff --git a/tests/unit/providers/agents/meta_reference/test_responses_safety_utils.py b/tests/unit/providers/agents/meta_reference/test_responses_safety_utils.py
index d4d1b872a..a914bbef4 100644
--- a/tests/unit/providers/agents/meta_reference/test_responses_safety_utils.py
+++ b/tests/unit/providers/agents/meta_reference/test_responses_safety_utils.py
@@ -30,6 +30,8 @@ def mock_apis():
         "vector_io_api": AsyncMock(),
         "conversations_api": AsyncMock(),
         "safety_api": AsyncMock(),
+        "prompts_api": AsyncMock(),
+        "files_api": AsyncMock(),
     }
 
 
diff --git a/tests/unit/providers/agents/meta_reference/test_safety_optional.py b/tests/unit/providers/agents/meta_reference/test_safety_optional.py
index b48d38b29..c2311b68f 100644
--- a/tests/unit/providers/agents/meta_reference/test_safety_optional.py
+++ b/tests/unit/providers/agents/meta_reference/test_safety_optional.py
@@ -52,6 +52,8 @@ def mock_deps():
     tool_runtime_api = AsyncMock()
     tool_groups_api = AsyncMock()
     conversations_api = AsyncMock()
+    prompts_api = AsyncMock()
+    files_api = AsyncMock()
 
     return {
         Api.inference: inference_api,
@@ -59,6 +61,8 @@ def mock_deps():
         Api.tool_runtime: tool_runtime_api,
         Api.tool_groups: tool_groups_api,
         Api.conversations: conversations_api,
+        Api.prompts: prompts_api,
+        Api.files: files_api,
     }
 
 
@@ -144,6 +148,8 @@ class TestGuardrailsFunctionality:
                 vector_io_api=mock_deps[Api.vector_io],
                 safety_api=None,  # No Safety API
                 conversations_api=mock_deps[Api.conversations],
+                prompts_api=mock_deps[Api.prompts],
+                files_api=mock_deps[Api.files],
             )
 
             # Test with string guardrail
@@ -191,6 +197,8 @@ class TestGuardrailsFunctionality:
                 vector_io_api=mock_deps[Api.vector_io],
                 safety_api=None,  # No Safety API
                 conversations_api=mock_deps[Api.conversations],
+                prompts_api=mock_deps[Api.prompts],
+                files_api=mock_deps[Api.files],
             )
 
             # Should not raise when no guardrails requested

From aa2a7dae07d7ecd9213c3ab4f7fc9fa19eed22cc Mon Sep 17 00:00:00 2001
From: Sam El-Borai <sam@elborai.me>
Date: Wed, 19 Nov 2025 20:53:20 +0100
Subject: [PATCH 55/62] chore(ci): make stainless workflow more DRY (#4195)

# What does this PR do?
<!-- Provide a short summary of what this PR does and why. Link to
relevant issues if applicable. -->

Addresses feedback from
https://github.com/llamastack/llama-stack/pull/4187#discussion_r2542797437

<!-- If resolving an issue, uncomment and update the line below -->
<!-- Closes #[issue-number] -->

## Test Plan
<!-- Describe the tests you ran to verify your changes with result
summaries. *Provide clear instructions so the plan can be easily
re-executed.* -->
---
 .github/workflows/stainless-builds.yml | 88 +++++++++++---------------
 1 file changed, 38 insertions(+), 50 deletions(-)

diff --git a/.github/workflows/stainless-builds.yml b/.github/workflows/stainless-builds.yml
index a18c70887..28869fdd8 100644
--- a/.github/workflows/stainless-builds.yml
+++ b/.github/workflows/stainless-builds.yml
@@ -43,7 +43,41 @@ env:
   #   Stainless organization dashboard
 
 jobs:
+  compute-branch:
+    runs-on: ubuntu-latest
+    outputs:
+      preview_branch: ${{ steps.compute.outputs.preview_branch }}
+      base_branch: ${{ steps.compute.outputs.base_branch }}
+      merge_branch: ${{ steps.compute.outputs.merge_branch }}
+    steps:
+      - name: Compute branch names
+        id: compute
+        run: |
+          HEAD_REPO="${{ github.event.pull_request.head.repo.full_name }}"
+          BASE_REPO="${{ github.repository }}"
+          BRANCH_NAME="${{ github.event.pull_request.head.ref }}"
+          FORK_OWNER="${{ github.event.pull_request.head.repo.owner.login }}"
+
+          if [ "$HEAD_REPO" != "$BASE_REPO" ]; then
+            # Fork PR: prefix with fork owner for isolation
+            if [ -z "$FORK_OWNER" ]; then
+              echo "Error: Fork PR detected but fork owner is empty" >&2
+              exit 1
+            fi
+            PREVIEW_BRANCH="preview/${FORK_OWNER}/${BRANCH_NAME}"
+            BASE_BRANCH="preview/base/${FORK_OWNER}/${BRANCH_NAME}"
+          else
+            # Same-repo PR
+            PREVIEW_BRANCH="preview/${BRANCH_NAME}"
+            BASE_BRANCH="preview/base/${BRANCH_NAME}"
+          fi
+
+          echo "preview_branch=${PREVIEW_BRANCH}" >> $GITHUB_OUTPUT
+          echo "base_branch=${BASE_BRANCH}" >> $GITHUB_OUTPUT
+          echo "merge_branch=${PREVIEW_BRANCH}" >> $GITHUB_OUTPUT
+
   preview:
+    needs: compute-branch
     if: github.event.action != 'closed'
     runs-on: ubuntu-latest
     permissions:
@@ -59,32 +93,6 @@ jobs:
           ref: ${{ github.event.pull_request.head.sha }}
           fetch-depth: 2
 
-      # Compute the Stainless branch name, prefixing with fork owner if PR is from a fork.
-      # For fork PRs like "contributor:fix/issue-123", this creates "preview/contributor/fix/issue-123"
-      # For same-repo PRs, this creates "preview/fix/issue-123"
-      - name: Compute branch names
-        id: branch-names
-        run: |
-          HEAD_REPO="${{ github.event.pull_request.head.repo.full_name }}"
-          BASE_REPO="${{ github.repository }}"
-          BRANCH_NAME="${{ github.event.pull_request.head.ref }}"
-
-          if [ "$HEAD_REPO" != "$BASE_REPO" ]; then
-            # Fork PR: prefix with fork owner for isolation
-            FORK_OWNER="${{ github.event.pull_request.head.repo.owner.login }}"
-            PREVIEW_BRANCH="preview/${FORK_OWNER}/${BRANCH_NAME}"
-            BASE_BRANCH="preview/base/${FORK_OWNER}/${BRANCH_NAME}"
-          else
-            # Same-repo PR
-            PREVIEW_BRANCH="preview/${BRANCH_NAME}"
-            BASE_BRANCH="preview/base/${BRANCH_NAME}"
-          fi
-
-          echo "preview_branch=${PREVIEW_BRANCH}" >> $GITHUB_OUTPUT
-          echo "base_branch=${BASE_BRANCH}" >> $GITHUB_OUTPUT
-
-      # This action builds preview SDKs from the OpenAPI spec changes and
-      # posts/updates a comment on the PR with build results and links to the preview.
       - name: Run preview builds
         uses: stainless-api/upload-openapi-spec-action/preview@32823b096b4319c53ee948d702d9052873af485f # 1.6.0
         with:
@@ -97,10 +105,11 @@ jobs:
           base_sha: ${{ github.event.pull_request.base.sha }}
           base_ref: ${{ github.event.pull_request.base.ref }}
           head_sha: ${{ github.event.pull_request.head.sha }}
-          branch: ${{ steps.branch-names.outputs.preview_branch }}
-          base_branch: ${{ steps.branch-names.outputs.base_branch }}
+          branch: ${{ needs.compute-branch.outputs.preview_branch }}
+          base_branch: ${{ needs.compute-branch.outputs.base_branch }}
 
   merge:
+    needs: compute-branch
     if: github.event.action == 'closed' && github.event.pull_request.merged == true
     runs-on: ubuntu-latest
     permissions:
@@ -116,27 +125,6 @@ jobs:
           ref: ${{ github.event.pull_request.head.sha }}
           fetch-depth: 2
 
-      # Compute the Stainless branch name, prefixing with fork owner if PR is from a fork.
-      # For fork PRs like "contributor:fix/issue-123", this creates "preview/contributor/fix/issue-123"
-      # For same-repo PRs, this creates "preview/fix/issue-123"
-      - name: Compute branch names
-        id: branch-names
-        run: |
-          HEAD_REPO="${{ github.event.pull_request.head.repo.full_name }}"
-          BASE_REPO="${{ github.repository }}"
-          BRANCH_NAME="${{ github.event.pull_request.head.ref }}"
-
-          if [ "$HEAD_REPO" != "$BASE_REPO" ]; then
-            # Fork PR: prefix with fork owner for isolation
-            FORK_OWNER="${{ github.event.pull_request.head.repo.owner.login }}"
-            MERGE_BRANCH="preview/${FORK_OWNER}/${BRANCH_NAME}"
-          else
-            # Same-repo PR
-            MERGE_BRANCH="preview/${BRANCH_NAME}"
-          fi
-
-          echo "merge_branch=${MERGE_BRANCH}" >> $GITHUB_OUTPUT
-
       # Note that this only merges in changes that happened on the last build on
       # the computed preview branch. It's possible that there are OAS/config
       # changes that haven't been built, if the preview job didn't finish
@@ -155,4 +143,4 @@ jobs:
           base_sha: ${{ github.event.pull_request.base.sha }}
           base_ref: ${{ github.event.pull_request.base.ref }}
           head_sha: ${{ github.event.pull_request.head.sha }}
-          merge_branch: ${{ steps.branch-names.outputs.merge_branch }}
+          merge_branch: ${{ needs.compute-branch.outputs.merge_branch }}

From b6ce2428083fd3ec46f4422473dbc2512b835e66 Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Wed, 19 Nov 2025 13:43:11 -0800
Subject: [PATCH 56/62] chore: update code owners (#4199)

Update code owners given changed affiliations, projects, etc.
---
 .github/CODEOWNERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 8fff470f6..418d3113a 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -2,4 +2,4 @@
 
 # These owners will be the default owners for everything in
 # the repo. Unless a later match takes precedence,
-* @ashwinb @yanxi0830 @hardikjshah @raghotham @ehhuang @leseb @bbrowning @reluctantfuturist @mattf @slekkala1 @franciscojavierarceo
+* @ashwinb @raghotham @ehhuang @leseb @bbrowning @mattf @franciscojavierarceo

From d649c3663e3293a86cf1e9d4d83c91cba7032857 Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Wed, 19 Nov 2025 14:49:44 -0800
Subject: [PATCH 57/62] fix: enforce allowed_models during inference requests
 (#4197)

The `allowed_models` configuration was only being applied when listing
models via the `/v1/models` endpoint, but the actual inference requests
weren't checking this restriction. This meant users could directly
request any model the provider supports by specifying it in their
inference call, completely bypassing the intended cost controls.

The fix adds validation to all three inference methods (chat
completions, completions, and embeddings) that checks the requested
model against the allowed_models list before making the provider API
call.

### Test plan

Added unit tests
---
 .../providers/utils/inference/openai_mixin.py |  28 ++++-
 .../utils/inference/test_openai_mixin.py      | 102 +++++++++++++++++-
 2 files changed, 126 insertions(+), 4 deletions(-)

diff --git a/src/llama_stack/providers/utils/inference/openai_mixin.py b/src/llama_stack/providers/utils/inference/openai_mixin.py
index 559ac90ce..30511a341 100644
--- a/src/llama_stack/providers/utils/inference/openai_mixin.py
+++ b/src/llama_stack/providers/utils/inference/openai_mixin.py
@@ -213,6 +213,19 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
 
         return api_key
 
+    def _validate_model_allowed(self, provider_model_id: str) -> None:
+        """
+        Validate that the model is in the allowed_models list if configured.
+
+        :param provider_model_id: The provider-specific model ID to validate
+        :raises ValueError: If the model is not in the allowed_models list
+        """
+        if self.config.allowed_models is not None and provider_model_id not in self.config.allowed_models:
+            raise ValueError(
+                f"Model '{provider_model_id}' is not in the allowed models list. "
+                f"Allowed models: {self.config.allowed_models}"
+            )
+
     async def _get_provider_model_id(self, model: str) -> str:
         """
         Get the provider-specific model ID from the model store.
@@ -259,8 +272,11 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
         Direct OpenAI completion API call.
         """
         # TODO: fix openai_completion to return type compatible with OpenAI's API response
+        provider_model_id = await self._get_provider_model_id(params.model)
+        self._validate_model_allowed(provider_model_id)
+
         completion_kwargs = await prepare_openai_completion_params(
-            model=await self._get_provider_model_id(params.model),
+            model=provider_model_id,
             prompt=params.prompt,
             best_of=params.best_of,
             echo=params.echo,
@@ -292,6 +308,9 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
         """
         Direct OpenAI chat completion API call.
         """
+        provider_model_id = await self._get_provider_model_id(params.model)
+        self._validate_model_allowed(provider_model_id)
+
         messages = params.messages
 
         if self.download_images:
@@ -313,7 +332,7 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
             messages = [await _localize_image_url(m) for m in messages]
 
         request_params = await prepare_openai_completion_params(
-            model=await self._get_provider_model_id(params.model),
+            model=provider_model_id,
             messages=messages,
             frequency_penalty=params.frequency_penalty,
             function_call=params.function_call,
@@ -351,10 +370,13 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
         """
         Direct OpenAI embeddings API call.
         """
+        provider_model_id = await self._get_provider_model_id(params.model)
+        self._validate_model_allowed(provider_model_id)
+
         # Build request params conditionally to avoid NotGiven/Omit type mismatch
         # The OpenAI SDK uses Omit in signatures but NOT_GIVEN has type NotGiven
         request_params: dict[str, Any] = {
-            "model": await self._get_provider_model_id(params.model),
+            "model": provider_model_id,
             "input": params.input,
         }
         if params.encoding_format is not None:
diff --git a/tests/unit/providers/utils/inference/test_openai_mixin.py b/tests/unit/providers/utils/inference/test_openai_mixin.py
index 5b13a75f4..02d44f2ba 100644
--- a/tests/unit/providers/utils/inference/test_openai_mixin.py
+++ b/tests/unit/providers/utils/inference/test_openai_mixin.py
@@ -15,7 +15,14 @@ from pydantic import BaseModel, Field
 from llama_stack.core.request_headers import request_provider_data_context
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
-from llama_stack_api import Model, ModelType, OpenAIChatCompletionRequestWithExtraBody, OpenAIUserMessageParam
+from llama_stack_api import (
+    Model,
+    ModelType,
+    OpenAIChatCompletionRequestWithExtraBody,
+    OpenAICompletionRequestWithExtraBody,
+    OpenAIEmbeddingsRequestWithExtraBody,
+    OpenAIUserMessageParam,
+)
 
 
 class OpenAIMixinImpl(OpenAIMixin):
@@ -834,3 +841,96 @@ class TestOpenAIMixinProviderDataApiKey:
         error_message = str(exc_info.value)
         assert "test_api_key" in error_message
         assert "x-llamastack-provider-data" in error_message
+
+
+class TestOpenAIMixinAllowedModelsInference:
+    """Test cases for allowed_models enforcement during inference requests"""
+
+    async def test_inference_with_allowed_models(self, mixin, mock_client_context):
+        """Test that all inference methods succeed with allowed models"""
+        mixin.config.allowed_models = ["gpt-4", "text-davinci-003", "text-embedding-ada-002"]
+
+        mock_client = MagicMock()
+        mock_client.chat.completions.create = AsyncMock(return_value=MagicMock())
+        mock_client.completions.create = AsyncMock(return_value=MagicMock())
+        mock_embedding_response = MagicMock()
+        mock_embedding_response.data = [MagicMock(embedding=[0.1, 0.2, 0.3])]
+        mock_embedding_response.usage = MagicMock(prompt_tokens=5, total_tokens=5)
+        mock_client.embeddings.create = AsyncMock(return_value=mock_embedding_response)
+
+        with mock_client_context(mixin, mock_client):
+            # Test chat completion
+            await mixin.openai_chat_completion(
+                OpenAIChatCompletionRequestWithExtraBody(
+                    model="gpt-4", messages=[OpenAIUserMessageParam(role="user", content="Hello")]
+                )
+            )
+            mock_client.chat.completions.create.assert_called_once()
+
+            # Test completion
+            await mixin.openai_completion(
+                OpenAICompletionRequestWithExtraBody(model="text-davinci-003", prompt="Hello")
+            )
+            mock_client.completions.create.assert_called_once()
+
+            # Test embeddings
+            await mixin.openai_embeddings(
+                OpenAIEmbeddingsRequestWithExtraBody(model="text-embedding-ada-002", input="test text")
+            )
+            mock_client.embeddings.create.assert_called_once()
+
+    async def test_inference_with_disallowed_models(self, mixin, mock_client_context):
+        """Test that all inference methods fail with disallowed models"""
+        mixin.config.allowed_models = ["gpt-4"]
+
+        mock_client = MagicMock()
+
+        with mock_client_context(mixin, mock_client):
+            # Test chat completion with disallowed model
+            with pytest.raises(ValueError, match="Model 'gpt-4-turbo' is not in the allowed models list"):
+                await mixin.openai_chat_completion(
+                    OpenAIChatCompletionRequestWithExtraBody(
+                        model="gpt-4-turbo", messages=[OpenAIUserMessageParam(role="user", content="Hello")]
+                    )
+                )
+
+            # Test completion with disallowed model
+            with pytest.raises(ValueError, match="Model 'text-davinci-002' is not in the allowed models list"):
+                await mixin.openai_completion(
+                    OpenAICompletionRequestWithExtraBody(model="text-davinci-002", prompt="Hello")
+                )
+
+            # Test embeddings with disallowed model
+            with pytest.raises(ValueError, match="Model 'text-embedding-3-large' is not in the allowed models list"):
+                await mixin.openai_embeddings(
+                    OpenAIEmbeddingsRequestWithExtraBody(model="text-embedding-3-large", input="test text")
+                )
+
+            mock_client.chat.completions.create.assert_not_called()
+            mock_client.completions.create.assert_not_called()
+            mock_client.embeddings.create.assert_not_called()
+
+    async def test_inference_with_no_restrictions(self, mixin, mock_client_context):
+        """Test that inference succeeds when allowed_models is None or empty list blocks all"""
+        # Test with None (no restrictions)
+        assert mixin.config.allowed_models is None
+        mock_client = MagicMock()
+        mock_client.chat.completions.create = AsyncMock(return_value=MagicMock())
+
+        with mock_client_context(mixin, mock_client):
+            await mixin.openai_chat_completion(
+                OpenAIChatCompletionRequestWithExtraBody(
+                    model="any-model", messages=[OpenAIUserMessageParam(role="user", content="Hello")]
+                )
+            )
+            mock_client.chat.completions.create.assert_called_once()
+
+        # Test with empty list (blocks all models)
+        mixin.config.allowed_models = []
+        with mock_client_context(mixin, mock_client):
+            with pytest.raises(ValueError, match="Model 'gpt-4' is not in the allowed models list"):
+                await mixin.openai_chat_completion(
+                    OpenAIChatCompletionRequestWithExtraBody(
+                        model="gpt-4", messages=[OpenAIUserMessageParam(role="user", content="Hello")]
+                    )
+                )

From acf74cb8df904b16612dbdca4819b2db9b2bb64d Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Wed, 19 Nov 2025 16:25:30 -0800
Subject: [PATCH 58/62] feat(ci): add --typescript-only flag to skip Python
 tests in integration test script (#4201)

This adds a `--typescript-only` flag to `scripts/integration-tests.sh`
that skips pytest execution entirely while still starting the Llama
Stack server (required for TS client tests). The TypeScript client can
now be tested independently without Python test dependencies.
---
 scripts/integration-tests.sh | 33 +++++++++++++++++++++++----------
 1 file changed, 23 insertions(+), 10 deletions(-)

diff --git a/scripts/integration-tests.sh b/scripts/integration-tests.sh
index 20ecd0c4d..2adef892d 100755
--- a/scripts/integration-tests.sh
+++ b/scripts/integration-tests.sh
@@ -20,6 +20,7 @@ TEST_PATTERN=""
 INFERENCE_MODE="replay"
 EXTRA_PARAMS=""
 COLLECT_ONLY=false
+TYPESCRIPT_ONLY=false
 
 # Function to display usage
 usage() {
@@ -34,6 +35,7 @@ Options:
     --subdirs STRING         Comma-separated list of test subdirectories to run (overrides suite)
     --pattern STRING         Regex pattern to pass to pytest -k
     --collect-only           Collect tests only without running them (skips server startup)
+    --typescript-only        Skip Python tests and run only TypeScript client tests
     --help                   Show this help message
 
 Suites are defined in tests/integration/suites.py and define which tests to run.
@@ -90,6 +92,10 @@ while [[ $# -gt 0 ]]; do
         COLLECT_ONLY=true
         shift
         ;;
+    --typescript-only)
+        TYPESCRIPT_ONLY=true
+        shift
+        ;;
     --help)
         usage
         exit 0
@@ -544,16 +550,23 @@ if [[ -n "$STACK_CONFIG" ]]; then
     STACK_CONFIG_ARG="--stack-config=$STACK_CONFIG"
 fi
 
-pytest -s -v $PYTEST_TARGET \
-    $STACK_CONFIG_ARG \
-    --inference-mode="$INFERENCE_MODE" \
-    -k "$PYTEST_PATTERN" \
-    $EXTRA_PARAMS \
-    --color=yes \
-    --embedding-model=sentence-transformers/nomic-ai/nomic-embed-text-v1.5 \
-    --color=yes $EXTRA_PARAMS \
-    --capture=tee-sys
-exit_code=$?
+# Run Python tests unless typescript-only mode
+if [[ "$TYPESCRIPT_ONLY" == "false" ]]; then
+    pytest -s -v $PYTEST_TARGET \
+        $STACK_CONFIG_ARG \
+        --inference-mode="$INFERENCE_MODE" \
+        -k "$PYTEST_PATTERN" \
+        $EXTRA_PARAMS \
+        --color=yes \
+        --embedding-model=sentence-transformers/nomic-ai/nomic-embed-text-v1.5 \
+        --color=yes $EXTRA_PARAMS \
+        --capture=tee-sys
+    exit_code=$?
+else
+    echo "Skipping Python tests (--typescript-only mode)"
+    exit_code=0
+fi
+
 set +x
 set -e
 

From dc4665af179e83b8f93d5e3a004e5751761e55a5 Mon Sep 17 00:00:00 2001
From: Ken Dreyer <kdreyer@redhat.com>
Date: Fri, 21 Nov 2025 09:48:05 -0500
Subject: [PATCH 59/62] feat!: change bedrock bearer token env variable to
 match AWS docs & boto3 convention (#4152)

Rename `AWS_BEDROCK_API_KEY` to `AWS_BEARER_TOKEN_BEDROCK` to align with
the naming convention used in AWS Bedrock documentation and the AWS web
console UI. This reduces confusion when developers compare LLS docs with
AWS docs.

Closes #4147
---
 docs/docs/providers/inference/remote_bedrock.mdx            | 2 +-
 .../distributions/ci-tests/run-with-postgres-store.yaml     | 2 +-
 src/llama_stack/distributions/ci-tests/run.yaml             | 2 +-
 .../distributions/starter-gpu/run-with-postgres-store.yaml  | 2 +-
 src/llama_stack/distributions/starter-gpu/run.yaml          | 2 +-
 .../distributions/starter/run-with-postgres-store.yaml      | 2 +-
 src/llama_stack/distributions/starter/run.yaml              | 2 +-
 .../providers/remote/inference/bedrock/bedrock.py           | 4 ++--
 .../providers/remote/inference/bedrock/config.py            | 6 +++---
 tests/unit/providers/inference/test_bedrock_adapter.py      | 4 ++--
 tests/unit/providers/inference/test_bedrock_config.py       | 4 ++--
 11 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/docs/docs/providers/inference/remote_bedrock.mdx b/docs/docs/providers/inference/remote_bedrock.mdx
index 86bef3000..0b36ea01a 100644
--- a/docs/docs/providers/inference/remote_bedrock.mdx
+++ b/docs/docs/providers/inference/remote_bedrock.mdx
@@ -22,6 +22,6 @@ AWS Bedrock inference provider using OpenAI compatible endpoint.
 ## Sample Configuration
 
 ```yaml
-api_key: ${env.AWS_BEDROCK_API_KEY:=}
+api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
 region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
 ```
diff --git a/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml b/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml
index d942c23a4..7721138c7 100644
--- a/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml
+++ b/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml
@@ -47,7 +47,7 @@ providers:
   - provider_id: bedrock
     provider_type: remote::bedrock
     config:
-      api_key: ${env.AWS_BEDROCK_API_KEY:=}
+      api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
       region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
   - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
     provider_type: remote::nvidia
diff --git a/src/llama_stack/distributions/ci-tests/run.yaml b/src/llama_stack/distributions/ci-tests/run.yaml
index 8b1cd2bb2..b791e1488 100644
--- a/src/llama_stack/distributions/ci-tests/run.yaml
+++ b/src/llama_stack/distributions/ci-tests/run.yaml
@@ -47,7 +47,7 @@ providers:
   - provider_id: bedrock
     provider_type: remote::bedrock
     config:
-      api_key: ${env.AWS_BEDROCK_API_KEY:=}
+      api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
       region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
   - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
     provider_type: remote::nvidia
diff --git a/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml b/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml
index 75cc9d188..9c250c05a 100644
--- a/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml
+++ b/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml
@@ -47,7 +47,7 @@ providers:
   - provider_id: bedrock
     provider_type: remote::bedrock
     config:
-      api_key: ${env.AWS_BEDROCK_API_KEY:=}
+      api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
       region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
   - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
     provider_type: remote::nvidia
diff --git a/src/llama_stack/distributions/starter-gpu/run.yaml b/src/llama_stack/distributions/starter-gpu/run.yaml
index 09c7be5a1..65f9ae326 100644
--- a/src/llama_stack/distributions/starter-gpu/run.yaml
+++ b/src/llama_stack/distributions/starter-gpu/run.yaml
@@ -47,7 +47,7 @@ providers:
   - provider_id: bedrock
     provider_type: remote::bedrock
     config:
-      api_key: ${env.AWS_BEDROCK_API_KEY:=}
+      api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
       region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
   - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
     provider_type: remote::nvidia
diff --git a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml
index f59c809d2..3314bb9e9 100644
--- a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml
+++ b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml
@@ -47,7 +47,7 @@ providers:
   - provider_id: bedrock
     provider_type: remote::bedrock
     config:
-      api_key: ${env.AWS_BEDROCK_API_KEY:=}
+      api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
       region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
   - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
     provider_type: remote::nvidia
diff --git a/src/llama_stack/distributions/starter/run.yaml b/src/llama_stack/distributions/starter/run.yaml
index 435bb22a7..e88539e6a 100644
--- a/src/llama_stack/distributions/starter/run.yaml
+++ b/src/llama_stack/distributions/starter/run.yaml
@@ -47,7 +47,7 @@ providers:
   - provider_id: bedrock
     provider_type: remote::bedrock
     config:
-      api_key: ${env.AWS_BEDROCK_API_KEY:=}
+      api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
       region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
   - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
     provider_type: remote::nvidia
diff --git a/src/llama_stack/providers/remote/inference/bedrock/bedrock.py b/src/llama_stack/providers/remote/inference/bedrock/bedrock.py
index 70ee95916..451549db8 100644
--- a/src/llama_stack/providers/remote/inference/bedrock/bedrock.py
+++ b/src/llama_stack/providers/remote/inference/bedrock/bedrock.py
@@ -37,7 +37,7 @@ class BedrockInferenceAdapter(OpenAIMixin):
     """
 
     config: BedrockConfig
-    provider_data_api_key_field: str = "aws_bedrock_api_key"
+    provider_data_api_key_field: str = "aws_bearer_token_bedrock"
 
     def get_base_url(self) -> str:
         """Get base URL for OpenAI client."""
@@ -111,7 +111,7 @@ class BedrockInferenceAdapter(OpenAIMixin):
                 logger.error(f"AWS Bedrock authentication token expired: {error_msg}")
                 raise ValueError(
                     "AWS Bedrock authentication failed: Bearer token has expired. "
-                    "The AWS_BEDROCK_API_KEY environment variable contains an expired pre-signed URL. "
+                    "The AWS_BEARER_TOKEN_BEDROCK environment variable contains an expired pre-signed URL. "
                     "Please refresh your token by generating a new pre-signed URL with AWS credentials. "
                     "Refer to AWS Bedrock documentation for details on OpenAI-compatible endpoints."
                 ) from e
diff --git a/src/llama_stack/providers/remote/inference/bedrock/config.py b/src/llama_stack/providers/remote/inference/bedrock/config.py
index 631a6e7ef..f31db63aa 100644
--- a/src/llama_stack/providers/remote/inference/bedrock/config.py
+++ b/src/llama_stack/providers/remote/inference/bedrock/config.py
@@ -12,9 +12,9 @@ from llama_stack.providers.utils.inference.model_registry import RemoteInference
 
 
 class BedrockProviderDataValidator(BaseModel):
-    aws_bedrock_api_key: str | None = Field(
+    aws_bearer_token_bedrock: str | None = Field(
         default=None,
-        description="API key for Amazon Bedrock",
+        description="API Key (Bearer token) for Amazon Bedrock",
     )
 
 
@@ -27,6 +27,6 @@ class BedrockConfig(RemoteInferenceProviderConfig):
     @classmethod
     def sample_run_config(cls, **kwargs):
         return {
-            "api_key": "${env.AWS_BEDROCK_API_KEY:=}",
+            "api_key": "${env.AWS_BEARER_TOKEN_BEDROCK:=}",
             "region_name": "${env.AWS_DEFAULT_REGION:=us-east-2}",
         }
diff --git a/tests/unit/providers/inference/test_bedrock_adapter.py b/tests/unit/providers/inference/test_bedrock_adapter.py
index a20f2860a..2a1ca769b 100644
--- a/tests/unit/providers/inference/test_bedrock_adapter.py
+++ b/tests/unit/providers/inference/test_bedrock_adapter.py
@@ -40,8 +40,8 @@ def test_api_key_from_header_overrides_config():
     """Test API key from request header overrides config via client property"""
     config = BedrockConfig(api_key="config-key", region_name="us-east-1")
     adapter = BedrockInferenceAdapter(config=config)
-    adapter.provider_data_api_key_field = "aws_bedrock_api_key"
-    adapter.get_request_provider_data = MagicMock(return_value=SimpleNamespace(aws_bedrock_api_key="header-key"))
+    adapter.provider_data_api_key_field = "aws_bearer_token_bedrock"
+    adapter.get_request_provider_data = MagicMock(return_value=SimpleNamespace(aws_bearer_token_bedrock="header-key"))
 
     # The client property is where header override happens (in OpenAIMixin)
     assert adapter.client.api_key == "header-key"
diff --git a/tests/unit/providers/inference/test_bedrock_config.py b/tests/unit/providers/inference/test_bedrock_config.py
index 4c1fd56a2..622080426 100644
--- a/tests/unit/providers/inference/test_bedrock_config.py
+++ b/tests/unit/providers/inference/test_bedrock_config.py
@@ -9,7 +9,7 @@ from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
 
 def test_bedrock_config_defaults_no_env(monkeypatch):
     """Test BedrockConfig defaults when env vars are not set"""
-    monkeypatch.delenv("AWS_BEDROCK_API_KEY", raising=False)
+    monkeypatch.delenv("AWS_BEARER_TOKEN_BEDROCK", raising=False)
     monkeypatch.delenv("AWS_DEFAULT_REGION", raising=False)
     config = BedrockConfig()
     assert config.auth_credential is None
@@ -35,5 +35,5 @@ def test_bedrock_config_sample():
     sample = BedrockConfig.sample_run_config()
     assert "api_key" in sample
     assert "region_name" in sample
-    assert sample["api_key"] == "${env.AWS_BEDROCK_API_KEY:=}"
+    assert sample["api_key"] == "${env.AWS_BEARER_TOKEN_BEDROCK:=}"
     assert sample["region_name"] == "${env.AWS_DEFAULT_REGION:=us-east-2}"

From 74dceb30da601fac809f7d9d04d83c66c1aac7d2 Mon Sep 17 00:00:00 2001
From: raghotham <rsm@meta.com>
Date: Sat, 22 Nov 2025 00:30:36 +0530
Subject: [PATCH 60/62] chore: Add @cdoern as a code owner (#4209)

We went through the nomination process for CODEOWNERS in the codeowners
discord channel.

Welcome to the code owners group @cdoern! Thanks for your contributions
and we look forward to working with you!
---
 .github/CODEOWNERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 418d3113a..75636525e 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -2,4 +2,4 @@
 
 # These owners will be the default owners for everything in
 # the repo. Unless a later match takes precedence,
-* @ashwinb @raghotham @ehhuang @leseb @bbrowning @mattf @franciscojavierarceo
+* @ashwinb @raghotham @ehhuang @leseb @bbrowning @mattf @franciscojavierarceo @cdoern

From dabebdd2303af1401c5dd9d92654b530c4db5050 Mon Sep 17 00:00:00 2001
From: Ken Dreyer <kdreyer@redhat.com>
Date: Fri, 21 Nov 2025 16:12:01 -0500
Subject: [PATCH 61/62] fix: update hard-coded google model names (#4212)

# What does this PR do?
When we send the model names to Google's openai API, we must use the
"google" name prefix. Google does not recognize the "vertexai" model
names.

Closes #4211

## Test Plan
```bash
uv venv --python python312
. .venv/bin/activate
llama stack list-deps starter | xargs -L1 uv pip install
llama stack run starter
```

Test that this shows the gemini models with their correct names:
```bash
curl http://127.0.0.1:8321/v1/models | jq '.data | map(select(.custom_metadata.provider_id == "vertexai"))'
```

Test that this chat completion works:
```bash
curl -X POST   -H "Content-Type: application/json"   "http://127.0.0.1:8321/v1/chat/completions"   -d '{
        "model": "vertexai/google/gemini-2.5-flash",
        "messages": [
          {
            "role": "system",
            "content": "You are a helpful assistant."
          },
          {
            "role": "user",
            "content": "Hello! Can you tell me a joke?"
          }
        ],
        "temperature": 1.0,
        "max_tokens": 256
      }'
```
---
 src/llama_stack/providers/remote/inference/vertexai/vertexai.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/llama_stack/providers/remote/inference/vertexai/vertexai.py b/src/llama_stack/providers/remote/inference/vertexai/vertexai.py
index b91430fd0..7941f8c89 100644
--- a/src/llama_stack/providers/remote/inference/vertexai/vertexai.py
+++ b/src/llama_stack/providers/remote/inference/vertexai/vertexai.py
@@ -51,4 +51,4 @@ class VertexAIInferenceAdapter(OpenAIMixin):
 
         :return: An iterable of model IDs
         """
-        return ["vertexai/gemini-2.0-flash", "vertexai/gemini-2.5-flash", "vertexai/gemini-2.5-pro"]
+        return ["google/gemini-2.0-flash", "google/gemini-2.5-flash", "google/gemini-2.5-pro"]

From 3434c92a1446cf5eee8147541b030bbbe32c7823 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Sun, 23 Nov 2025 22:32:58 -0500
Subject: [PATCH 62/62] chore(github-deps): bump actions/setup-node from 4.1.0
 to 6.0.0 (#4216)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [actions/setup-node](https://github.com/actions/setup-node) from
4.1.0 to 6.0.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/actions/setup-node/releases">actions/setup-node's
releases</a>.</em></p>
<blockquote>
<h2>v6.0.0</h2>
<h2>What's Changed</h2>
<p><strong>Breaking Changes</strong></p>
<ul>
<li>Limit automatic caching to npm, update workflows and documentation
by <a
href="https://github.com/priyagupta108"><code>@​priyagupta108</code></a>
in <a
href="https://redirect.github.com/actions/setup-node/pull/1374">actions/setup-node#1374</a></li>
</ul>
<p><strong>Dependency Upgrades</strong></p>
<ul>
<li>Upgrade ts-jest from 29.1.2 to 29.4.1 and document breaking changes
in v5 by <a
href="https://github.com/dependabot"><code>@​dependabot</code></a>[bot]
in <a
href="https://redirect.github.com/actions/setup-node/pull/1336">#1336</a></li>
<li>Upgrade prettier from 2.8.8 to 3.6.2 by <a
href="https://github.com/dependabot"><code>@​dependabot</code></a>[bot]
in <a
href="https://redirect.github.com/actions/setup-node/pull/1334">#1334</a></li>
<li>Upgrade actions/publish-action from 0.3.0 to 0.4.0 by <a
href="https://github.com/dependabot"><code>@​dependabot</code></a>[bot]
in <a
href="https://redirect.github.com/actions/setup-node/pull/1362">#1362</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/actions/setup-node/compare/v5...v6.0.0">https://github.com/actions/setup-node/compare/v5...v6.0.0</a></p>
<h2>v5.0.0</h2>
<h2>What's Changed</h2>
<h3>Breaking Changes</h3>
<ul>
<li>Enhance caching in setup-node with automatic package manager
detection by <a
href="https://github.com/priya-kinthali"><code>@​priya-kinthali</code></a>
in <a
href="https://redirect.github.com/actions/setup-node/pull/1348">actions/setup-node#1348</a></li>
</ul>
<p>This update, introduces automatic caching when a valid
<code>packageManager</code> field is present in your
<code>package.json</code>. This aims to improve workflow performance and
make dependency management more seamless.
To disable this automatic caching, set <code>package-manager-cache:
false</code></p>
<pre lang="yaml"><code>steps:
- uses: actions/checkout@v5
- uses: actions/setup-node@v5
  with:
    package-manager-cache: false
</code></pre>
<ul>
<li>Upgrade action to use node24 by <a
href="https://github.com/salmanmkc"><code>@​salmanmkc</code></a> in <a
href="https://redirect.github.com/actions/setup-node/pull/1325">actions/setup-node#1325</a></li>
</ul>
<p>Make sure your runner is on version v2.327.1 or later to ensure
compatibility with this release. <a
href="https://github.com/actions/runner/releases/tag/v2.327.1">See
Release Notes</a></p>
<h3>Dependency Upgrades</h3>
<ul>
<li>Upgrade <code>@​octokit/request-error</code> and
<code>@​actions/github</code> by <a
href="https://github.com/dependabot"><code>@​dependabot</code></a>[bot]
in <a
href="https://redirect.github.com/actions/setup-node/pull/1227">actions/setup-node#1227</a></li>
<li>Upgrade uuid from 9.0.1 to 11.1.0 by <a
href="https://github.com/dependabot"><code>@​dependabot</code></a>[bot]
in <a
href="https://redirect.github.com/actions/setup-node/pull/1273">actions/setup-node#1273</a></li>
<li>Upgrade undici from 5.28.5 to 5.29.0 by <a
href="https://github.com/dependabot"><code>@​dependabot</code></a>[bot]
in <a
href="https://redirect.github.com/actions/setup-node/pull/1295">actions/setup-node#1295</a></li>
<li>Upgrade form-data to bring in fix for critical vulnerability by <a
href="https://github.com/gowridurgad"><code>@​gowridurgad</code></a> in
<a
href="https://redirect.github.com/actions/setup-node/pull/1332">actions/setup-node#1332</a></li>
<li>Upgrade actions/checkout from 4 to 5 by <a
href="https://github.com/dependabot"><code>@​dependabot</code></a>[bot]
in <a
href="https://redirect.github.com/actions/setup-node/pull/1345">actions/setup-node#1345</a></li>
</ul>
<h2>New Contributors</h2>
<ul>
<li><a
href="https://github.com/priya-kinthali"><code>@​priya-kinthali</code></a>
made their first contribution in <a
href="https://redirect.github.com/actions/setup-node/pull/1348">actions/setup-node#1348</a></li>
<li><a href="https://github.com/salmanmkc"><code>@​salmanmkc</code></a>
made their first contribution in <a
href="https://redirect.github.com/actions/setup-node/pull/1325">actions/setup-node#1325</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/actions/setup-node/compare/v4...v5.0.0">https://github.com/actions/setup-node/compare/v4...v5.0.0</a></p>
<h2>v4.4.0</h2>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/actions/setup-node/commit/2028fbc5c25fe9cf00d9f06a71cc4710d4507903"><code>2028fbc</code></a>
Limit automatic caching to npm, update workflows and documentation (<a
href="https://redirect.github.com/actions/setup-node/issues/1374">#1374</a>)</li>
<li><a
href="https://github.com/actions/setup-node/commit/13427813f706a0f6c9b74603b31103c40ab1c35a"><code>1342781</code></a>
Bump actions/publish-action from 0.3.0 to 0.4.0 (<a
href="https://redirect.github.com/actions/setup-node/issues/1362">#1362</a>)</li>
<li><a
href="https://github.com/actions/setup-node/commit/89d709d423dc495668cd762a18dd4a070611be3f"><code>89d709d</code></a>
Bump prettier from 2.8.8 to 3.6.2 (<a
href="https://redirect.github.com/actions/setup-node/issues/1334">#1334</a>)</li>
<li><a
href="https://github.com/actions/setup-node/commit/cd2651c46231bc0d6f48d6b34433b845331235fe"><code>cd2651c</code></a>
Bump ts-jest from 29.1.2 to 29.4.1 (<a
href="https://redirect.github.com/actions/setup-node/issues/1336">#1336</a>)</li>
<li><a
href="https://github.com/actions/setup-node/commit/a0853c24544627f65ddf259abe73b1d18a591444"><code>a0853c2</code></a>
Bump actions/checkout from 4 to 5 (<a
href="https://redirect.github.com/actions/setup-node/issues/1345">#1345</a>)</li>
<li><a
href="https://github.com/actions/setup-node/commit/b7234cc9fe124f0f4932554b4e5284543083ae7b"><code>b7234cc</code></a>
Upgrade action to use node24 (<a
href="https://redirect.github.com/actions/setup-node/issues/1325">#1325</a>)</li>
<li><a
href="https://github.com/actions/setup-node/commit/d7a11313b581b306c961b506cfc8971208bb03f6"><code>d7a1131</code></a>
Enhance caching in setup-node with automatic package manager detection
(<a
href="https://redirect.github.com/actions/setup-node/issues/1348">#1348</a>)</li>
<li><a
href="https://github.com/actions/setup-node/commit/5e2628c959b9ade56971c0afcebbe5332d44b398"><code>5e2628c</code></a>
Bumps form-data (<a
href="https://redirect.github.com/actions/setup-node/issues/1332">#1332</a>)</li>
<li><a
href="https://github.com/actions/setup-node/commit/65beceff8e91358525397bdce9103d999507ab03"><code>65becef</code></a>
Bump undici from 5.28.5 to 5.29.0 (<a
href="https://redirect.github.com/actions/setup-node/issues/1295">#1295</a>)</li>
<li><a
href="https://github.com/actions/setup-node/commit/7e24a656e1c7a0d6f3eaef8d8e84ae379a5b035b"><code>7e24a65</code></a>
Bump uuid from 9.0.1 to 11.1.0 (<a
href="https://redirect.github.com/actions/setup-node/issues/1273">#1273</a>)</li>
<li>Additional commits viewable in <a
href="https://github.com/actions/setup-node/compare/v4.1.0...2028fbc5c25fe9cf00d9f06a71cc4710d4507903">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=actions/setup-node&package-manager=github_actions&previous-version=4.1.0&new-version=6.0.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/integration-tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml
index 8073f6a15..721c3b5a0 100644
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@@ -95,7 +95,7 @@ jobs:
 
       - name: Setup Node.js for TypeScript client tests
         if: ${{ matrix.client == 'server' }}
-        uses: actions/setup-node@39370e3970a6d050c480ffad4ff0ed4d3fdee5af # v4.1.0
+        uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0
         with:
           node-version: '20'
           cache: 'npm'