Merge remote-tracking branch 'upstream/main' into elasticsearch-integration

2025-12-03 18:00:36 +00:00 · 2025-10-31 18:23:42 +01:00 · 2025-10-31 18:23:42 +01:00 · 2407115ee8
commit 2407115ee8
parent 22b27e6275 5f95c1f8cc
1050 changed files with 65153 additions and 2821 deletions
--- a/docs/docs/distributions/configuration.mdx
+++ b/docs/docs/distributions/configuration.mdx
@ -58,13 +58,21 @@ storage:
    sql_default:
      type: sql_sqlite
      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/sqlstore.db
-  references:
+  stores:
    metadata:
      backend: kv_default
      namespace: registry
    inference:
      backend: sql_default
      table_name: inference_store
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      backend: sql_default
+      table_name: openai_conversations
+    prompts:
+      backend: kv_default
+      namespace: prompts
 models:
 - metadata: {}
  model_id: ${env.INFERENCE_MODEL}
--- a/docs/docs/distributions/k8s/stack-configmap.yaml
+++ b/docs/docs/distributions/k8s/stack-configmap.yaml
@ -113,13 +113,21 @@ data:
          db: ${env.POSTGRES_DB:=llamastack}
          user: ${env.POSTGRES_USER:=llamastack}
          password: ${env.POSTGRES_PASSWORD:=llamastack}
-      references:
+      stores:
        metadata:
          backend: kv_default
          namespace: registry
        inference:
          backend: sql_default
          table_name: inference_store
+          max_write_queue_size: 10000
+          num_writers: 4
+        conversations:
+          backend: sql_default
+          table_name: openai_conversations
+        prompts:
+          backend: kv_default
+          namespace: prompts
    models:
    - metadata:
        embedding_dimension: 768
--- a/docs/docs/distributions/k8s/stack_run_config.yaml
+++ b/docs/docs/distributions/k8s/stack_run_config.yaml
@ -106,6 +106,9 @@ storage:
    conversations:
      table_name: openai_conversations
      backend: sql_default
+    prompts:
+      namespace: prompts
+      backend: kv_default
 registered_resources:
  models:
  - metadata:
--- a/docs/docs/distributions/self_hosted_distro/meta-reference-gpu.md
+++ b/docs/docs/distributions/self_hosted_distro/meta-reference-gpu.md
@ -79,6 +79,33 @@ docker run \
  --port $LLAMA_STACK_PORT
 ```

+### Via Docker with Custom Run Configuration
+
+You can also run the Docker container with a custom run configuration file by mounting it into the container:
+
+```bash
+# Set the path to your custom run.yaml file
+CUSTOM_RUN_CONFIG=/path/to/your/custom-run.yaml
+LLAMA_STACK_PORT=8321
+
+docker run \
+  -it \
+  --pull always \
+  --gpu all \
+  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
+  -v ~/.llama:/root/.llama \
+  -v $CUSTOM_RUN_CONFIG:/app/custom-run.yaml \
+  -e RUN_CONFIG_PATH=/app/custom-run.yaml \
+  llamastack/distribution-meta-reference-gpu \
+  --port $LLAMA_STACK_PORT
+```
+
+**Note**: The run configuration must be mounted into the container before it can be used. The `-v` flag mounts your local file into the container, and the `RUN_CONFIG_PATH` environment variable tells the entrypoint script which configuration to use.
+
+Available run configurations for this distribution:
+- `run.yaml`
+- `run-with-safety.yaml`
+
 ### Via venv

 Make sure you have the Llama Stack CLI available.
--- a/docs/docs/distributions/self_hosted_distro/nvidia.md
+++ b/docs/docs/distributions/self_hosted_distro/nvidia.md
@ -127,13 +127,39 @@ docker run \
  -it \
  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ./run.yaml:/root/my-run.yaml \
+  -v ~/.llama:/root/.llama \
  -e NVIDIA_API_KEY=$NVIDIA_API_KEY \
  llamastack/distribution-nvidia \
-  --config /root/my-run.yaml \
  --port $LLAMA_STACK_PORT
 ```

+### Via Docker with Custom Run Configuration
+
+You can also run the Docker container with a custom run configuration file by mounting it into the container:
+
+```bash
+# Set the path to your custom run.yaml file
+CUSTOM_RUN_CONFIG=/path/to/your/custom-run.yaml
+LLAMA_STACK_PORT=8321
+
+docker run \
+  -it \
+  --pull always \
+  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
+  -v ~/.llama:/root/.llama \
+  -v $CUSTOM_RUN_CONFIG:/app/custom-run.yaml \
+  -e RUN_CONFIG_PATH=/app/custom-run.yaml \
+  -e NVIDIA_API_KEY=$NVIDIA_API_KEY \
+  llamastack/distribution-nvidia \
+  --port $LLAMA_STACK_PORT
+```
+
+**Note**: The run configuration must be mounted into the container before it can be used. The `-v` flag mounts your local file into the container, and the `RUN_CONFIG_PATH` environment variable tells the entrypoint script which configuration to use.
+
+Available run configurations for this distribution:
+- `run.yaml`
+- `run-with-safety.yaml`
+
 ### Via venv

 If you've set up your local development environment, you can also install the distribution dependencies using your local virtual environment.
--- a/docs/docs/providers/files/remote_openai.mdx
+++ b/docs/docs/providers/files/remote_openai.mdx
@ -0,0 +1,27 @@
+---
+description: "OpenAI Files API provider for managing files through OpenAI's native file storage service."
+sidebar_label: Remote - Openai
+title: remote::openai
+---
+
+# remote::openai
+
+## Description
+
+OpenAI Files API provider for managing files through OpenAI's native file storage service.
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `api_key` | `<class 'str'>` | No |  | OpenAI API key for authentication |
+| `metadata_store` | `<class 'llama_stack.core.storage.datatypes.SqlStoreReference'>` | No |  | SQL store configuration for file metadata |
+
+## Sample Configuration
+
+```yaml
+api_key: ${env.OPENAI_API_KEY}
+metadata_store:
+  table_name: openai_files_metadata
+  backend: sql_default
+```
--- a/docs/docs/providers/inference/remote_nvidia.mdx
+++ b/docs/docs/providers/inference/remote_nvidia.mdx
@ -20,6 +20,7 @@ NVIDIA inference provider for accessing NVIDIA NIM models and AI services.
 | `url` | `<class 'str'>` | No | https://integrate.api.nvidia.com | A base url for accessing the NVIDIA NIM |
 | `timeout` | `<class 'int'>` | No | 60 | Timeout for the HTTP requests |
 | `append_api_version` | `<class 'bool'>` | No | True | When set to false, the API version will not be appended to the base_url. By default, it is true. |
+| `rerank_model_to_url` | `dict[str, str` | No | `{'nv-rerank-qa-mistral-4b:1': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking', 'nvidia/nv-rerankqa-mistral-4b-v3': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/nv-rerankqa-mistral-4b-v3/reranking', 'nvidia/llama-3.2-nv-rerankqa-1b-v2': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v2/reranking'}` | Mapping of rerank model identifiers to their API endpoints.  |

 ## Sample Configuration

--- a/docs/docs/providers/vector_io/inline_sqlite-vec.mdx
+++ b/docs/docs/providers/vector_io/inline_sqlite-vec.mdx
@ -72,14 +72,14 @@ description: |
  Example with hybrid search:
  ```python
  response = await vector_io.query_chunks(
-      vector_db_id="my_db",
+      vector_store_id="my_db",
      query="your query here",
      params={"mode": "hybrid", "max_chunks": 3, "score_threshold": 0.7},
  )

  # Using RRF ranker
  response = await vector_io.query_chunks(
-      vector_db_id="my_db",
+      vector_store_id="my_db",
      query="your query here",
      params={
          "mode": "hybrid",
@ -91,7 +91,7 @@ description: |

  # Using weighted ranker
  response = await vector_io.query_chunks(
-      vector_db_id="my_db",
+      vector_store_id="my_db",
      query="your query here",
      params={
          "mode": "hybrid",
@ -105,7 +105,7 @@ description: |
  Example with explicit vector search:
  ```python
  response = await vector_io.query_chunks(
-      vector_db_id="my_db",
+      vector_store_id="my_db",
      query="your query here",
      params={"mode": "vector", "max_chunks": 3, "score_threshold": 0.7},
  )
@ -114,7 +114,7 @@ description: |
  Example with keyword search:
  ```python
  response = await vector_io.query_chunks(
-      vector_db_id="my_db",
+      vector_store_id="my_db",
      query="your query here",
      params={"mode": "keyword", "max_chunks": 3, "score_threshold": 0.7},
  )
@ -277,14 +277,14 @@ The SQLite-vec provider supports three search modes:
 Example with hybrid search:
 ```python
 response = await vector_io.query_chunks(
-    vector_db_id="my_db",
+    vector_store_id="my_db",
    query="your query here",
    params={"mode": "hybrid", "max_chunks": 3, "score_threshold": 0.7},
 )

 # Using RRF ranker
 response = await vector_io.query_chunks(
-    vector_db_id="my_db",
+    vector_store_id="my_db",
    query="your query here",
    params={
        "mode": "hybrid",
@ -296,7 +296,7 @@ response = await vector_io.query_chunks(

 # Using weighted ranker
 response = await vector_io.query_chunks(
-    vector_db_id="my_db",
+    vector_store_id="my_db",
    query="your query here",
    params={
        "mode": "hybrid",
@ -310,7 +310,7 @@ response = await vector_io.query_chunks(
 Example with explicit vector search:
 ```python
 response = await vector_io.query_chunks(
-    vector_db_id="my_db",
+    vector_store_id="my_db",
    query="your query here",
    params={"mode": "vector", "max_chunks": 3, "score_threshold": 0.7},
 )
@ -319,7 +319,7 @@ response = await vector_io.query_chunks(
 Example with keyword search:
 ```python
 response = await vector_io.query_chunks(
-    vector_db_id="my_db",
+    vector_store_id="my_db",
    query="your query here",
    params={"mode": "keyword", "max_chunks": 3, "score_threshold": 0.7},
 )
--- a/docs/notebooks/llamastack_agents_getting_started_examples.ipynb
+++ b/docs/notebooks/llamastack_agents_getting_started_examples.ipynb
--- a/docs/sidebars.ts
+++ b/docs/sidebars.ts
@ -242,15 +242,6 @@ const sidebars: SidebarsConfig = {
            'providers/eval/remote_nvidia'
          ],
        },
-        {
-          type: 'category',
-          label: 'Telemetry',
-          collapsed: true,
-          items: [
-            'providers/telemetry/index',
-            'providers/telemetry/inline_meta-reference'
-          ],
-        },
        {
          type: 'category',
          label: 'Batches',
--- a/docs/static/deprecated-llama-stack-spec.html
+++ b/docs/static/deprecated-llama-stack-spec.html
@ -1414,6 +1414,193 @@
                "deprecated": true
            }
        },
+        "/v1/openai/v1/batches": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "A list of batch objects.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ListBatchesResponse"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Batches"
+                ],
+                "summary": "List all batches for the current user.",
+                "description": "List all batches for the current user.",
+                "parameters": [
+                    {
+                        "name": "after",
+                        "in": "query",
+                        "description": "A cursor for pagination; returns batches after this batch ID.",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "limit",
+                        "in": "query",
+                        "description": "Number of batches to return (default 20, max 100).",
+                        "required": true,
+                        "schema": {
+                            "type": "integer"
+                        }
+                    }
+                ],
+                "deprecated": true
+            },
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "The created batch object.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/Batch"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Batches"
+                ],
+                "summary": "Create a new batch for processing multiple API requests.",
+                "description": "Create a new batch for processing multiple API requests.",
+                "parameters": [],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/CreateBatchRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                },
+                "deprecated": true
+            }
+        },
+        "/v1/openai/v1/batches/{batch_id}": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "The batch object.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/Batch"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Batches"
+                ],
+                "summary": "Retrieve information about a specific batch.",
+                "description": "Retrieve information about a specific batch.",
+                "parameters": [
+                    {
+                        "name": "batch_id",
+                        "in": "path",
+                        "description": "The ID of the batch to retrieve.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ],
+                "deprecated": true
+            }
+        },
+        "/v1/openai/v1/batches/{batch_id}/cancel": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "The updated batch object.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/Batch"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Batches"
+                ],
+                "summary": "Cancel a batch that is in progress.",
+                "description": "Cancel a batch that is in progress.",
+                "parameters": [
+                    {
+                        "name": "batch_id",
+                        "in": "path",
+                        "description": "The ID of the batch to cancel.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ],
+                "deprecated": true
+            }
+        },
        "/v1/openai/v1/chat/completions": {
            "get": {
                "responses": {
@ -3901,7 +4088,6 @@
                    },
                    "max_tokens": {
                        "type": "integer",
-                        "default": 0,
                        "description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
                    },
                    "repetition_penalty": {
@ -4391,7 +4577,7 @@
                        "const": "memory_retrieval",
                        "default": "memory_retrieval"
                    },
-                    "vector_db_ids": {
+                    "vector_store_ids": {
                        "type": "string",
                        "description": "The IDs of the vector databases to retrieve context from."
                    },
@ -4405,7 +4591,7 @@
                    "turn_id",
                    "step_id",
                    "step_type",
-                    "vector_db_ids",
+                    "vector_store_ids",
                    "inserted_context"
                ],
                "title": "MemoryRetrievalStep",
@ -6402,6 +6588,451 @@
                "title": "Job",
                "description": "A job execution instance with status tracking."
            },
+            "ListBatchesResponse": {
+                "type": "object",
+                "properties": {
+                    "object": {
+                        "type": "string",
+                        "const": "list",
+                        "default": "list"
+                    },
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "properties": {
+                                "id": {
+                                    "type": "string"
+                                },
+                                "completion_window": {
+                                    "type": "string"
+                                },
+                                "created_at": {
+                                    "type": "integer"
+                                },
+                                "endpoint": {
+                                    "type": "string"
+                                },
+                                "input_file_id": {
+                                    "type": "string"
+                                },
+                                "object": {
+                                    "type": "string",
+                                    "const": "batch"
+                                },
+                                "status": {
+                                    "type": "string",
+                                    "enum": [
+                                        "validating",
+                                        "failed",
+                                        "in_progress",
+                                        "finalizing",
+                                        "completed",
+                                        "expired",
+                                        "cancelling",
+                                        "cancelled"
+                                    ]
+                                },
+                                "cancelled_at": {
+                                    "type": "integer"
+                                },
+                                "cancelling_at": {
+                                    "type": "integer"
+                                },
+                                "completed_at": {
+                                    "type": "integer"
+                                },
+                                "error_file_id": {
+                                    "type": "string"
+                                },
+                                "errors": {
+                                    "type": "object",
+                                    "properties": {
+                                        "data": {
+                                            "type": "array",
+                                            "items": {
+                                                "type": "object",
+                                                "properties": {
+                                                    "code": {
+                                                        "type": "string"
+                                                    },
+                                                    "line": {
+                                                        "type": "integer"
+                                                    },
+                                                    "message": {
+                                                        "type": "string"
+                                                    },
+                                                    "param": {
+                                                        "type": "string"
+                                                    }
+                                                },
+                                                "additionalProperties": false,
+                                                "title": "BatchError"
+                                            }
+                                        },
+                                        "object": {
+                                            "type": "string"
+                                        }
+                                    },
+                                    "additionalProperties": false,
+                                    "title": "Errors"
+                                },
+                                "expired_at": {
+                                    "type": "integer"
+                                },
+                                "expires_at": {
+                                    "type": "integer"
+                                },
+                                "failed_at": {
+                                    "type": "integer"
+                                },
+                                "finalizing_at": {
+                                    "type": "integer"
+                                },
+                                "in_progress_at": {
+                                    "type": "integer"
+                                },
+                                "metadata": {
+                                    "type": "object",
+                                    "additionalProperties": {
+                                        "type": "string"
+                                    }
+                                },
+                                "model": {
+                                    "type": "string"
+                                },
+                                "output_file_id": {
+                                    "type": "string"
+                                },
+                                "request_counts": {
+                                    "type": "object",
+                                    "properties": {
+                                        "completed": {
+                                            "type": "integer"
+                                        },
+                                        "failed": {
+                                            "type": "integer"
+                                        },
+                                        "total": {
+                                            "type": "integer"
+                                        }
+                                    },
+                                    "additionalProperties": false,
+                                    "required": [
+                                        "completed",
+                                        "failed",
+                                        "total"
+                                    ],
+                                    "title": "BatchRequestCounts"
+                                },
+                                "usage": {
+                                    "type": "object",
+                                    "properties": {
+                                        "input_tokens": {
+                                            "type": "integer"
+                                        },
+                                        "input_tokens_details": {
+                                            "type": "object",
+                                            "properties": {
+                                                "cached_tokens": {
+                                                    "type": "integer"
+                                                }
+                                            },
+                                            "additionalProperties": false,
+                                            "required": [
+                                                "cached_tokens"
+                                            ],
+                                            "title": "InputTokensDetails"
+                                        },
+                                        "output_tokens": {
+                                            "type": "integer"
+                                        },
+                                        "output_tokens_details": {
+                                            "type": "object",
+                                            "properties": {
+                                                "reasoning_tokens": {
+                                                    "type": "integer"
+                                                }
+                                            },
+                                            "additionalProperties": false,
+                                            "required": [
+                                                "reasoning_tokens"
+                                            ],
+                                            "title": "OutputTokensDetails"
+                                        },
+                                        "total_tokens": {
+                                            "type": "integer"
+                                        }
+                                    },
+                                    "additionalProperties": false,
+                                    "required": [
+                                        "input_tokens",
+                                        "input_tokens_details",
+                                        "output_tokens",
+                                        "output_tokens_details",
+                                        "total_tokens"
+                                    ],
+                                    "title": "BatchUsage"
+                                }
+                            },
+                            "additionalProperties": false,
+                            "required": [
+                                "id",
+                                "completion_window",
+                                "created_at",
+                                "endpoint",
+                                "input_file_id",
+                                "object",
+                                "status"
+                            ],
+                            "title": "Batch"
+                        }
+                    },
+                    "first_id": {
+                        "type": "string"
+                    },
+                    "last_id": {
+                        "type": "string"
+                    },
+                    "has_more": {
+                        "type": "boolean",
+                        "default": false
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "object",
+                    "data",
+                    "has_more"
+                ],
+                "title": "ListBatchesResponse",
+                "description": "Response containing a list of batch objects."
+            },
+            "CreateBatchRequest": {
+                "type": "object",
+                "properties": {
+                    "input_file_id": {
+                        "type": "string",
+                        "description": "The ID of an uploaded file containing requests for the batch."
+                    },
+                    "endpoint": {
+                        "type": "string",
+                        "description": "The endpoint to be used for all requests in the batch."
+                    },
+                    "completion_window": {
+                        "type": "string",
+                        "const": "24h",
+                        "description": "The time window within which the batch should be processed."
+                    },
+                    "metadata": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "type": "string"
+                        },
+                        "description": "Optional metadata for the batch."
+                    },
+                    "idempotency_key": {
+                        "type": "string",
+                        "description": "Optional idempotency key. When provided, enables idempotent behavior."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "input_file_id",
+                    "endpoint",
+                    "completion_window"
+                ],
+                "title": "CreateBatchRequest"
+            },
+            "Batch": {
+                "type": "object",
+                "properties": {
+                    "id": {
+                        "type": "string"
+                    },
+                    "completion_window": {
+                        "type": "string"
+                    },
+                    "created_at": {
+                        "type": "integer"
+                    },
+                    "endpoint": {
+                        "type": "string"
+                    },
+                    "input_file_id": {
+                        "type": "string"
+                    },
+                    "object": {
+                        "type": "string",
+                        "const": "batch"
+                    },
+                    "status": {
+                        "type": "string",
+                        "enum": [
+                            "validating",
+                            "failed",
+                            "in_progress",
+                            "finalizing",
+                            "completed",
+                            "expired",
+                            "cancelling",
+                            "cancelled"
+                        ]
+                    },
+                    "cancelled_at": {
+                        "type": "integer"
+                    },
+                    "cancelling_at": {
+                        "type": "integer"
+                    },
+                    "completed_at": {
+                        "type": "integer"
+                    },
+                    "error_file_id": {
+                        "type": "string"
+                    },
+                    "errors": {
+                        "type": "object",
+                        "properties": {
+                            "data": {
+                                "type": "array",
+                                "items": {
+                                    "type": "object",
+                                    "properties": {
+                                        "code": {
+                                            "type": "string"
+                                        },
+                                        "line": {
+                                            "type": "integer"
+                                        },
+                                        "message": {
+                                            "type": "string"
+                                        },
+                                        "param": {
+                                            "type": "string"
+                                        }
+                                    },
+                                    "additionalProperties": false,
+                                    "title": "BatchError"
+                                }
+                            },
+                            "object": {
+                                "type": "string"
+                            }
+                        },
+                        "additionalProperties": false,
+                        "title": "Errors"
+                    },
+                    "expired_at": {
+                        "type": "integer"
+                    },
+                    "expires_at": {
+                        "type": "integer"
+                    },
+                    "failed_at": {
+                        "type": "integer"
+                    },
+                    "finalizing_at": {
+                        "type": "integer"
+                    },
+                    "in_progress_at": {
+                        "type": "integer"
+                    },
+                    "metadata": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "type": "string"
+                        }
+                    },
+                    "model": {
+                        "type": "string"
+                    },
+                    "output_file_id": {
+                        "type": "string"
+                    },
+                    "request_counts": {
+                        "type": "object",
+                        "properties": {
+                            "completed": {
+                                "type": "integer"
+                            },
+                            "failed": {
+                                "type": "integer"
+                            },
+                            "total": {
+                                "type": "integer"
+                            }
+                        },
+                        "additionalProperties": false,
+                        "required": [
+                            "completed",
+                            "failed",
+                            "total"
+                        ],
+                        "title": "BatchRequestCounts"
+                    },
+                    "usage": {
+                        "type": "object",
+                        "properties": {
+                            "input_tokens": {
+                                "type": "integer"
+                            },
+                            "input_tokens_details": {
+                                "type": "object",
+                                "properties": {
+                                    "cached_tokens": {
+                                        "type": "integer"
+                                    }
+                                },
+                                "additionalProperties": false,
+                                "required": [
+                                    "cached_tokens"
+                                ],
+                                "title": "InputTokensDetails"
+                            },
+                            "output_tokens": {
+                                "type": "integer"
+                            },
+                            "output_tokens_details": {
+                                "type": "object",
+                                "properties": {
+                                    "reasoning_tokens": {
+                                        "type": "integer"
+                                    }
+                                },
+                                "additionalProperties": false,
+                                "required": [
+                                    "reasoning_tokens"
+                                ],
+                                "title": "OutputTokensDetails"
+                            },
+                            "total_tokens": {
+                                "type": "integer"
+                            }
+                        },
+                        "additionalProperties": false,
+                        "required": [
+                            "input_tokens",
+                            "input_tokens_details",
+                            "output_tokens",
+                            "output_tokens_details",
+                            "total_tokens"
+                        ],
+                        "title": "BatchUsage"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "id",
+                    "completion_window",
+                    "created_at",
+                    "endpoint",
+                    "input_file_id",
+                    "object",
+                    "status"
+                ],
+                "title": "Batch"
+            },
            "Order": {
                "type": "string",
                "enum": [
@ -8527,29 +9158,14 @@
            "OpenAIResponseInput": {
                "oneOf": [
                    {
-                        "$ref": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall"
-                    },
-                    {
-                        "$ref": "#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall"
-                    },
-                    {
-                        "$ref": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall"
+                        "$ref": "#/components/schemas/OpenAIResponseOutput"
                    },
                    {
                        "$ref": "#/components/schemas/OpenAIResponseInputFunctionToolCallOutput"
                    },
-                    {
-                        "$ref": "#/components/schemas/OpenAIResponseMCPApprovalRequest"
-                    },
                    {
                        "$ref": "#/components/schemas/OpenAIResponseMCPApprovalResponse"
                    },
-                    {
-                        "$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPCall"
-                    },
-                    {
-                        "$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools"
-                    },
                    {
                        "$ref": "#/components/schemas/OpenAIResponseMessage"
                    }
@ -8592,16 +9208,53 @@
                    },
                    {
                        "$ref": "#/components/schemas/OpenAIResponseInputMessageContentImage"
+                    },
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseInputMessageContentFile"
                    }
                ],
                "discriminator": {
                    "propertyName": "type",
                    "mapping": {
                        "input_text": "#/components/schemas/OpenAIResponseInputMessageContentText",
-                        "input_image": "#/components/schemas/OpenAIResponseInputMessageContentImage"
+                        "input_image": "#/components/schemas/OpenAIResponseInputMessageContentImage",
+                        "input_file": "#/components/schemas/OpenAIResponseInputMessageContentFile"
                    }
                }
            },
+            "OpenAIResponseInputMessageContentFile": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "input_file",
+                        "default": "input_file",
+                        "description": "The type of the input item. Always `input_file`."
+                    },
+                    "file_data": {
+                        "type": "string",
+                        "description": "The data of the file to be sent to the model."
+                    },
+                    "file_id": {
+                        "type": "string",
+                        "description": "(Optional) The ID of the file to be sent to the model."
+                    },
+                    "file_url": {
+                        "type": "string",
+                        "description": "The URL of the file to be sent to the model."
+                    },
+                    "filename": {
+                        "type": "string",
+                        "description": "The name of the file to be sent to the model."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type"
+                ],
+                "title": "OpenAIResponseInputMessageContentFile",
+                "description": "File content for input messages in OpenAI response format."
+            },
            "OpenAIResponseInputMessageContentImage": {
                "type": "object",
                "properties": {
@ -8629,6 +9282,10 @@
                        "default": "input_image",
                        "description": "Content type identifier, always \"input_image\""
                    },
+                    "file_id": {
+                        "type": "string",
+                        "description": "(Optional) The ID of the file to be sent to the model."
+                    },
                    "image_url": {
                        "type": "string",
                        "description": "(Optional) URL of the image content"
@ -8992,6 +9649,10 @@
                        "type": "string",
                        "description": "(Optional) ID of the previous response in a conversation"
                    },
+                    "prompt": {
+                        "$ref": "#/components/schemas/OpenAIResponsePrompt",
+                        "description": "(Optional) Reference to a prompt template and its variables."
+                    },
                    "status": {
                        "type": "string",
                        "description": "Current status of the response generation"
@ -9416,6 +10077,32 @@
                "title": "OpenAIResponseOutputMessageWebSearchToolCall",
                "description": "Web search tool call output message for OpenAI responses."
            },
+            "OpenAIResponsePrompt": {
+                "type": "object",
+                "properties": {
+                    "id": {
+                        "type": "string",
+                        "description": "Unique identifier of the prompt template"
+                    },
+                    "variables": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "$ref": "#/components/schemas/OpenAIResponseInputMessageContent"
+                        },
+                        "description": "Dictionary of variable names to OpenAIResponseInputMessageContent structure for template substitution. The substitution values can either be strings, or other Response input types like images or files."
+                    },
+                    "version": {
+                        "type": "string",
+                        "description": "Version number of the prompt to use (defaults to latest if not specified)"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "id"
+                ],
+                "title": "OpenAIResponsePrompt",
+                "description": "OpenAI compatible Prompt object that is used in OpenAI responses."
+            },
            "OpenAIResponseText": {
                "type": "object",
                "properties": {
@ -9786,6 +10473,10 @@
                        "type": "string",
                        "description": "The underlying LLM used for completions."
                    },
+                    "prompt": {
+                        "$ref": "#/components/schemas/OpenAIResponsePrompt",
+                        "description": "(Optional) Prompt object with ID, version, and variables."
+                    },
                    "instructions": {
                        "type": "string"
                    },
@ -9874,6 +10565,10 @@
                        "type": "string",
                        "description": "(Optional) ID of the previous response in a conversation"
                    },
+                    "prompt": {
+                        "$ref": "#/components/schemas/OpenAIResponsePrompt",
+                        "description": "(Optional) Reference to a prompt template and its variables."
+                    },
                    "status": {
                        "type": "string",
                        "description": "Current status of the response generation"
@ -13442,6 +14137,11 @@
            "description": "APIs for creating and interacting with agentic systems.\n\n## Deprecated APIs\n\n> **⚠️ DEPRECATED**: These APIs are provided for migration reference and will be removed in future versions. Not recommended for new projects.\n\n### Migration Guidance\n\nIf you are using deprecated versions of the Agents or Responses APIs, please migrate to:\n\n- **Responses API**: Use the stable v1 Responses API endpoints\n",
            "x-displayName": "Agents"
        },
+        {
+            "name": "Batches",
+            "description": "The API is designed to allow use of openai client libraries for seamless integration.\n\nThis API provides the following extensions:\n - idempotent batch creation\n\nNote: This API is currently under active development and may undergo changes.",
+            "x-displayName": "The Batches API enables efficient processing of multiple requests in a single operation, particularly useful for processing large datasets, batch evaluation workflows, and cost-effective inference at scale."
+        },
        {
            "name": "Benchmarks",
            "description": ""
@ -13492,6 +14192,7 @@
            "name": "Operations",
            "tags": [
                "Agents",
+                "Batches",
                "Benchmarks",
                "DatasetIO",
                "Datasets",
--- a/docs/static/deprecated-llama-stack-spec.yaml
+++ b/docs/static/deprecated-llama-stack-spec.yaml
@ -1012,6 +1012,141 @@ paths:
          schema:
            type: string
      deprecated: true
+  /v1/openai/v1/batches:
+    get:
+      responses:
+        '200':
+          description: A list of batch objects.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ListBatchesResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Batches
+      summary: List all batches for the current user.
+      description: List all batches for the current user.
+      parameters:
+        - name: after
+          in: query
+          description: >-
+            A cursor for pagination; returns batches after this batch ID.
+          required: false
+          schema:
+            type: string
+        - name: limit
+          in: query
+          description: >-
+            Number of batches to return (default 20, max 100).
+          required: true
+          schema:
+            type: integer
+      deprecated: true
+    post:
+      responses:
+        '200':
+          description: The created batch object.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/Batch'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Batches
+      summary: >-
+        Create a new batch for processing multiple API requests.
+      description: >-
+        Create a new batch for processing multiple API requests.
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/CreateBatchRequest'
+        required: true
+      deprecated: true
+  /v1/openai/v1/batches/{batch_id}:
+    get:
+      responses:
+        '200':
+          description: The batch object.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/Batch'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Batches
+      summary: >-
+        Retrieve information about a specific batch.
+      description: >-
+        Retrieve information about a specific batch.
+      parameters:
+        - name: batch_id
+          in: path
+          description: The ID of the batch to retrieve.
+          required: true
+          schema:
+            type: string
+      deprecated: true
+  /v1/openai/v1/batches/{batch_id}/cancel:
+    post:
+      responses:
+        '200':
+          description: The updated batch object.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/Batch'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Batches
+      summary: Cancel a batch that is in progress.
+      description: Cancel a batch that is in progress.
+      parameters:
+        - name: batch_id
+          in: path
+          description: The ID of the batch to cancel.
+          required: true
+          schema:
+            type: string
+      deprecated: true
  /v1/openai/v1/chat/completions:
    get:
      responses:
@ -2862,7 +2997,6 @@ components:
          description: The sampling strategy.
        max_tokens:
          type: integer
-          default: 0
          description: >-
            The maximum number of tokens that can be generated in the completion.
            The token count of your prompt plus max_tokens cannot exceed the model's
@ -3253,7 +3387,7 @@ components:
          description: Type of the step in an agent turn.
          const: memory_retrieval
          default: memory_retrieval
-        vector_db_ids:
+        vector_store_ids:
          type: string
          description: >-
            The IDs of the vector databases to retrieve context from.
@ -3266,7 +3400,7 @@ components:
        - turn_id
        - step_id
        - step_type
-        - vector_db_ids
+        - vector_store_ids
        - inserted_context
      title: MemoryRetrievalStep
      description: >-
@ -4737,6 +4871,331 @@ components:
      title: Job
      description: >-
        A job execution instance with status tracking.
+    ListBatchesResponse:
+      type: object
+      properties:
+        object:
+          type: string
+          const: list
+          default: list
+        data:
+          type: array
+          items:
+            type: object
+            properties:
+              id:
+                type: string
+              completion_window:
+                type: string
+              created_at:
+                type: integer
+              endpoint:
+                type: string
+              input_file_id:
+                type: string
+              object:
+                type: string
+                const: batch
+              status:
+                type: string
+                enum:
+                  - validating
+                  - failed
+                  - in_progress
+                  - finalizing
+                  - completed
+                  - expired
+                  - cancelling
+                  - cancelled
+              cancelled_at:
+                type: integer
+              cancelling_at:
+                type: integer
+              completed_at:
+                type: integer
+              error_file_id:
+                type: string
+              errors:
+                type: object
+                properties:
+                  data:
+                    type: array
+                    items:
+                      type: object
+                      properties:
+                        code:
+                          type: string
+                        line:
+                          type: integer
+                        message:
+                          type: string
+                        param:
+                          type: string
+                      additionalProperties: false
+                      title: BatchError
+                  object:
+                    type: string
+                additionalProperties: false
+                title: Errors
+              expired_at:
+                type: integer
+              expires_at:
+                type: integer
+              failed_at:
+                type: integer
+              finalizing_at:
+                type: integer
+              in_progress_at:
+                type: integer
+              metadata:
+                type: object
+                additionalProperties:
+                  type: string
+              model:
+                type: string
+              output_file_id:
+                type: string
+              request_counts:
+                type: object
+                properties:
+                  completed:
+                    type: integer
+                  failed:
+                    type: integer
+                  total:
+                    type: integer
+                additionalProperties: false
+                required:
+                  - completed
+                  - failed
+                  - total
+                title: BatchRequestCounts
+              usage:
+                type: object
+                properties:
+                  input_tokens:
+                    type: integer
+                  input_tokens_details:
+                    type: object
+                    properties:
+                      cached_tokens:
+                        type: integer
+                    additionalProperties: false
+                    required:
+                      - cached_tokens
+                    title: InputTokensDetails
+                  output_tokens:
+                    type: integer
+                  output_tokens_details:
+                    type: object
+                    properties:
+                      reasoning_tokens:
+                        type: integer
+                    additionalProperties: false
+                    required:
+                      - reasoning_tokens
+                    title: OutputTokensDetails
+                  total_tokens:
+                    type: integer
+                additionalProperties: false
+                required:
+                  - input_tokens
+                  - input_tokens_details
+                  - output_tokens
+                  - output_tokens_details
+                  - total_tokens
+                title: BatchUsage
+            additionalProperties: false
+            required:
+              - id
+              - completion_window
+              - created_at
+              - endpoint
+              - input_file_id
+              - object
+              - status
+            title: Batch
+        first_id:
+          type: string
+        last_id:
+          type: string
+        has_more:
+          type: boolean
+          default: false
+      additionalProperties: false
+      required:
+        - object
+        - data
+        - has_more
+      title: ListBatchesResponse
+      description: >-
+        Response containing a list of batch objects.
+    CreateBatchRequest:
+      type: object
+      properties:
+        input_file_id:
+          type: string
+          description: >-
+            The ID of an uploaded file containing requests for the batch.
+        endpoint:
+          type: string
+          description: >-
+            The endpoint to be used for all requests in the batch.
+        completion_window:
+          type: string
+          const: 24h
+          description: >-
+            The time window within which the batch should be processed.
+        metadata:
+          type: object
+          additionalProperties:
+            type: string
+          description: Optional metadata for the batch.
+        idempotency_key:
+          type: string
+          description: >-
+            Optional idempotency key. When provided, enables idempotent behavior.
+      additionalProperties: false
+      required:
+        - input_file_id
+        - endpoint
+        - completion_window
+      title: CreateBatchRequest
+    Batch:
+      type: object
+      properties:
+        id:
+          type: string
+        completion_window:
+          type: string
+        created_at:
+          type: integer
+        endpoint:
+          type: string
+        input_file_id:
+          type: string
+        object:
+          type: string
+          const: batch
+        status:
+          type: string
+          enum:
+            - validating
+            - failed
+            - in_progress
+            - finalizing
+            - completed
+            - expired
+            - cancelling
+            - cancelled
+        cancelled_at:
+          type: integer
+        cancelling_at:
+          type: integer
+        completed_at:
+          type: integer
+        error_file_id:
+          type: string
+        errors:
+          type: object
+          properties:
+            data:
+              type: array
+              items:
+                type: object
+                properties:
+                  code:
+                    type: string
+                  line:
+                    type: integer
+                  message:
+                    type: string
+                  param:
+                    type: string
+                additionalProperties: false
+                title: BatchError
+            object:
+              type: string
+          additionalProperties: false
+          title: Errors
+        expired_at:
+          type: integer
+        expires_at:
+          type: integer
+        failed_at:
+          type: integer
+        finalizing_at:
+          type: integer
+        in_progress_at:
+          type: integer
+        metadata:
+          type: object
+          additionalProperties:
+            type: string
+        model:
+          type: string
+        output_file_id:
+          type: string
+        request_counts:
+          type: object
+          properties:
+            completed:
+              type: integer
+            failed:
+              type: integer
+            total:
+              type: integer
+          additionalProperties: false
+          required:
+            - completed
+            - failed
+            - total
+          title: BatchRequestCounts
+        usage:
+          type: object
+          properties:
+            input_tokens:
+              type: integer
+            input_tokens_details:
+              type: object
+              properties:
+                cached_tokens:
+                  type: integer
+              additionalProperties: false
+              required:
+                - cached_tokens
+              title: InputTokensDetails
+            output_tokens:
+              type: integer
+            output_tokens_details:
+              type: object
+              properties:
+                reasoning_tokens:
+                  type: integer
+              additionalProperties: false
+              required:
+                - reasoning_tokens
+              title: OutputTokensDetails
+            total_tokens:
+              type: integer
+          additionalProperties: false
+          required:
+            - input_tokens
+            - input_tokens_details
+            - output_tokens
+            - output_tokens_details
+            - total_tokens
+          title: BatchUsage
+      additionalProperties: false
+      required:
+        - id
+        - completion_window
+        - created_at
+        - endpoint
+        - input_file_id
+        - object
+        - status
+      title: Batch
    Order:
      type: string
      enum:
@ -6370,14 +6829,9 @@ components:
        Error details for failed OpenAI response requests.
    OpenAIResponseInput:
      oneOf:
-        - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
-        - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
-        - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+        - $ref: '#/components/schemas/OpenAIResponseOutput'
        - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
-        - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
        - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
-        - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
-        - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
        - $ref: '#/components/schemas/OpenAIResponseMessage'
    "OpenAIResponseInputFunctionToolCallOutput":
      type: object
@ -6408,11 +6862,44 @@ components:
      oneOf:
        - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
        - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+        - $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile'
      discriminator:
        propertyName: type
        mapping:
          input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
          input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+          input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile'
+    OpenAIResponseInputMessageContentFile:
+      type: object
+      properties:
+        type:
+          type: string
+          const: input_file
+          default: input_file
+          description: >-
+            The type of the input item. Always `input_file`.
+        file_data:
+          type: string
+          description: >-
+            The data of the file to be sent to the model.
+        file_id:
+          type: string
+          description: >-
+            (Optional) The ID of the file to be sent to the model.
+        file_url:
+          type: string
+          description: >-
+            The URL of the file to be sent to the model.
+        filename:
+          type: string
+          description: >-
+            The name of the file to be sent to the model.
+      additionalProperties: false
+      required:
+        - type
+      title: OpenAIResponseInputMessageContentFile
+      description: >-
+        File content for input messages in OpenAI response format.
    OpenAIResponseInputMessageContentImage:
      type: object
      properties:
@ -6433,6 +6920,10 @@ components:
          default: input_image
          description: >-
            Content type identifier, always "input_image"
+        file_id:
+          type: string
+          description: >-
+            (Optional) The ID of the file to be sent to the model.
        image_url:
          type: string
          description: (Optional) URL of the image content
@ -6703,6 +7194,10 @@ components:
          type: string
          description: >-
            (Optional) ID of the previous response in a conversation
+        prompt:
+          $ref: '#/components/schemas/OpenAIResponsePrompt'
+          description: >-
+            (Optional) Reference to a prompt template and its variables.
        status:
          type: string
          description: >-
@ -7042,6 +7537,30 @@ components:
        OpenAIResponseOutputMessageWebSearchToolCall
      description: >-
        Web search tool call output message for OpenAI responses.
+    OpenAIResponsePrompt:
+      type: object
+      properties:
+        id:
+          type: string
+          description: Unique identifier of the prompt template
+        variables:
+          type: object
+          additionalProperties:
+            $ref: '#/components/schemas/OpenAIResponseInputMessageContent'
+          description: >-
+            Dictionary of variable names to OpenAIResponseInputMessageContent structure
+            for template substitution. The substitution values can either be strings,
+            or other Response input types like images or files.
+        version:
+          type: string
+          description: >-
+            Version number of the prompt to use (defaults to latest if not specified)
+      additionalProperties: false
+      required:
+        - id
+      title: OpenAIResponsePrompt
+      description: >-
+        OpenAI compatible Prompt object that is used in OpenAI responses.
    OpenAIResponseText:
      type: object
      properties:
@ -7299,6 +7818,10 @@ components:
        model:
          type: string
          description: The underlying LLM used for completions.
+        prompt:
+          $ref: '#/components/schemas/OpenAIResponsePrompt'
+          description: >-
+            (Optional) Prompt object with ID, version, and variables.
        instructions:
          type: string
        previous_response_id:
@ -7376,6 +7899,10 @@ components:
          type: string
          description: >-
            (Optional) ID of the previous response in a conversation
+        prompt:
+          $ref: '#/components/schemas/OpenAIResponsePrompt'
+          description: >-
+            (Optional) Reference to a prompt template and its variables.
        status:
          type: string
          description: >-
@ -10196,6 +10723,19 @@ tags:

      - **Responses API**: Use the stable v1 Responses API endpoints
    x-displayName: Agents
+  - name: Batches
+    description: >-
+      The API is designed to allow use of openai client libraries for seamless integration.
+
+
+      This API provides the following extensions:
+       - idempotent batch creation
+
+      Note: This API is currently under active development and may undergo changes.
+    x-displayName: >-
+      The Batches API enables efficient processing of multiple requests in a single
+      operation, particularly useful for processing large datasets, batch evaluation
+      workflows, and cost-effective inference at scale.
  - name: Benchmarks
    description: ''
  - name: DatasetIO
@ -10241,6 +10781,7 @@ x-tagGroups:
  - name: Operations
    tags:
      - Agents
+      - Batches
      - Benchmarks
      - DatasetIO
      - Datasets
--- a/docs/static/experimental-llama-stack-spec.html
+++ b/docs/static/experimental-llama-stack-spec.html
@ -2376,7 +2376,6 @@
                    },
                    "max_tokens": {
                        "type": "integer",
-                        "default": 0,
                        "description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
                    },
                    "repetition_penalty": {
@ -2866,7 +2865,7 @@
                        "const": "memory_retrieval",
                        "default": "memory_retrieval"
                    },
-                    "vector_db_ids": {
+                    "vector_store_ids": {
                        "type": "string",
                        "description": "The IDs of the vector databases to retrieve context from."
                    },
@ -2880,7 +2879,7 @@
                    "turn_id",
                    "step_id",
                    "step_type",
-                    "vector_db_ids",
+                    "vector_store_ids",
                    "inserted_context"
                ],
                "title": "MemoryRetrievalStep",
--- a/docs/static/experimental-llama-stack-spec.yaml
+++ b/docs/static/experimental-llama-stack-spec.yaml
@ -1695,7 +1695,6 @@ components:
          description: The sampling strategy.
        max_tokens:
          type: integer
-          default: 0
          description: >-
            The maximum number of tokens that can be generated in the completion.
            The token count of your prompt plus max_tokens cannot exceed the model's
@ -2086,7 +2085,7 @@ components:
          description: Type of the step in an agent turn.
          const: memory_retrieval
          default: memory_retrieval
-        vector_db_ids:
+        vector_store_ids:
          type: string
          description: >-
            The IDs of the vector databases to retrieve context from.
@ -2099,7 +2098,7 @@ components:
        - turn_id
        - step_id
        - step_type
-        - vector_db_ids
+        - vector_store_ids
        - inserted_context
      title: MemoryRetrievalStep
      description: >-
--- a/docs/static/llama-stack-spec.html
+++ b/docs/static/llama-stack-spec.html
@ -40,6 +40,193 @@
        }
    ],
    "paths": {
+        "/v1/batches": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "A list of batch objects.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ListBatchesResponse"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Batches"
+                ],
+                "summary": "List all batches for the current user.",
+                "description": "List all batches for the current user.",
+                "parameters": [
+                    {
+                        "name": "after",
+                        "in": "query",
+                        "description": "A cursor for pagination; returns batches after this batch ID.",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "limit",
+                        "in": "query",
+                        "description": "Number of batches to return (default 20, max 100).",
+                        "required": true,
+                        "schema": {
+                            "type": "integer"
+                        }
+                    }
+                ],
+                "deprecated": false
+            },
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "The created batch object.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/Batch"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Batches"
+                ],
+                "summary": "Create a new batch for processing multiple API requests.",
+                "description": "Create a new batch for processing multiple API requests.",
+                "parameters": [],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/CreateBatchRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                },
+                "deprecated": false
+            }
+        },
+        "/v1/batches/{batch_id}": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "The batch object.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/Batch"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Batches"
+                ],
+                "summary": "Retrieve information about a specific batch.",
+                "description": "Retrieve information about a specific batch.",
+                "parameters": [
+                    {
+                        "name": "batch_id",
+                        "in": "path",
+                        "description": "The ID of the batch to retrieve.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ],
+                "deprecated": false
+            }
+        },
+        "/v1/batches/{batch_id}/cancel": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "The updated batch object.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/Batch"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Batches"
+                ],
+                "summary": "Cancel a batch that is in progress.",
+                "description": "Cancel a batch that is in progress.",
+                "parameters": [
+                    {
+                        "name": "batch_id",
+                        "in": "path",
+                        "description": "The ID of the batch to cancel.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ],
+                "deprecated": false
+            }
+        },
        "/v1/chat/completions": {
            "get": {
                "responses": {
@ -4005,6 +4192,451 @@
                "title": "Error",
                "description": "Error response from the API. Roughly follows RFC 7807."
            },
+            "ListBatchesResponse": {
+                "type": "object",
+                "properties": {
+                    "object": {
+                        "type": "string",
+                        "const": "list",
+                        "default": "list"
+                    },
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "properties": {
+                                "id": {
+                                    "type": "string"
+                                },
+                                "completion_window": {
+                                    "type": "string"
+                                },
+                                "created_at": {
+                                    "type": "integer"
+                                },
+                                "endpoint": {
+                                    "type": "string"
+                                },
+                                "input_file_id": {
+                                    "type": "string"
+                                },
+                                "object": {
+                                    "type": "string",
+                                    "const": "batch"
+                                },
+                                "status": {
+                                    "type": "string",
+                                    "enum": [
+                                        "validating",
+                                        "failed",
+                                        "in_progress",
+                                        "finalizing",
+                                        "completed",
+                                        "expired",
+                                        "cancelling",
+                                        "cancelled"
+                                    ]
+                                },
+                                "cancelled_at": {
+                                    "type": "integer"
+                                },
+                                "cancelling_at": {
+                                    "type": "integer"
+                                },
+                                "completed_at": {
+                                    "type": "integer"
+                                },
+                                "error_file_id": {
+                                    "type": "string"
+                                },
+                                "errors": {
+                                    "type": "object",
+                                    "properties": {
+                                        "data": {
+                                            "type": "array",
+                                            "items": {
+                                                "type": "object",
+                                                "properties": {
+                                                    "code": {
+                                                        "type": "string"
+                                                    },
+                                                    "line": {
+                                                        "type": "integer"
+                                                    },
+                                                    "message": {
+                                                        "type": "string"
+                                                    },
+                                                    "param": {
+                                                        "type": "string"
+                                                    }
+                                                },
+                                                "additionalProperties": false,
+                                                "title": "BatchError"
+                                            }
+                                        },
+                                        "object": {
+                                            "type": "string"
+                                        }
+                                    },
+                                    "additionalProperties": false,
+                                    "title": "Errors"
+                                },
+                                "expired_at": {
+                                    "type": "integer"
+                                },
+                                "expires_at": {
+                                    "type": "integer"
+                                },
+                                "failed_at": {
+                                    "type": "integer"
+                                },
+                                "finalizing_at": {
+                                    "type": "integer"
+                                },
+                                "in_progress_at": {
+                                    "type": "integer"
+                                },
+                                "metadata": {
+                                    "type": "object",
+                                    "additionalProperties": {
+                                        "type": "string"
+                                    }
+                                },
+                                "model": {
+                                    "type": "string"
+                                },
+                                "output_file_id": {
+                                    "type": "string"
+                                },
+                                "request_counts": {
+                                    "type": "object",
+                                    "properties": {
+                                        "completed": {
+                                            "type": "integer"
+                                        },
+                                        "failed": {
+                                            "type": "integer"
+                                        },
+                                        "total": {
+                                            "type": "integer"
+                                        }
+                                    },
+                                    "additionalProperties": false,
+                                    "required": [
+                                        "completed",
+                                        "failed",
+                                        "total"
+                                    ],
+                                    "title": "BatchRequestCounts"
+                                },
+                                "usage": {
+                                    "type": "object",
+                                    "properties": {
+                                        "input_tokens": {
+                                            "type": "integer"
+                                        },
+                                        "input_tokens_details": {
+                                            "type": "object",
+                                            "properties": {
+                                                "cached_tokens": {
+                                                    "type": "integer"
+                                                }
+                                            },
+                                            "additionalProperties": false,
+                                            "required": [
+                                                "cached_tokens"
+                                            ],
+                                            "title": "InputTokensDetails"
+                                        },
+                                        "output_tokens": {
+                                            "type": "integer"
+                                        },
+                                        "output_tokens_details": {
+                                            "type": "object",
+                                            "properties": {
+                                                "reasoning_tokens": {
+                                                    "type": "integer"
+                                                }
+                                            },
+                                            "additionalProperties": false,
+                                            "required": [
+                                                "reasoning_tokens"
+                                            ],
+                                            "title": "OutputTokensDetails"
+                                        },
+                                        "total_tokens": {
+                                            "type": "integer"
+                                        }
+                                    },
+                                    "additionalProperties": false,
+                                    "required": [
+                                        "input_tokens",
+                                        "input_tokens_details",
+                                        "output_tokens",
+                                        "output_tokens_details",
+                                        "total_tokens"
+                                    ],
+                                    "title": "BatchUsage"
+                                }
+                            },
+                            "additionalProperties": false,
+                            "required": [
+                                "id",
+                                "completion_window",
+                                "created_at",
+                                "endpoint",
+                                "input_file_id",
+                                "object",
+                                "status"
+                            ],
+                            "title": "Batch"
+                        }
+                    },
+                    "first_id": {
+                        "type": "string"
+                    },
+                    "last_id": {
+                        "type": "string"
+                    },
+                    "has_more": {
+                        "type": "boolean",
+                        "default": false
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "object",
+                    "data",
+                    "has_more"
+                ],
+                "title": "ListBatchesResponse",
+                "description": "Response containing a list of batch objects."
+            },
+            "CreateBatchRequest": {
+                "type": "object",
+                "properties": {
+                    "input_file_id": {
+                        "type": "string",
+                        "description": "The ID of an uploaded file containing requests for the batch."
+                    },
+                    "endpoint": {
+                        "type": "string",
+                        "description": "The endpoint to be used for all requests in the batch."
+                    },
+                    "completion_window": {
+                        "type": "string",
+                        "const": "24h",
+                        "description": "The time window within which the batch should be processed."
+                    },
+                    "metadata": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "type": "string"
+                        },
+                        "description": "Optional metadata for the batch."
+                    },
+                    "idempotency_key": {
+                        "type": "string",
+                        "description": "Optional idempotency key. When provided, enables idempotent behavior."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "input_file_id",
+                    "endpoint",
+                    "completion_window"
+                ],
+                "title": "CreateBatchRequest"
+            },
+            "Batch": {
+                "type": "object",
+                "properties": {
+                    "id": {
+                        "type": "string"
+                    },
+                    "completion_window": {
+                        "type": "string"
+                    },
+                    "created_at": {
+                        "type": "integer"
+                    },
+                    "endpoint": {
+                        "type": "string"
+                    },
+                    "input_file_id": {
+                        "type": "string"
+                    },
+                    "object": {
+                        "type": "string",
+                        "const": "batch"
+                    },
+                    "status": {
+                        "type": "string",
+                        "enum": [
+                            "validating",
+                            "failed",
+                            "in_progress",
+                            "finalizing",
+                            "completed",
+                            "expired",
+                            "cancelling",
+                            "cancelled"
+                        ]
+                    },
+                    "cancelled_at": {
+                        "type": "integer"
+                    },
+                    "cancelling_at": {
+                        "type": "integer"
+                    },
+                    "completed_at": {
+                        "type": "integer"
+                    },
+                    "error_file_id": {
+                        "type": "string"
+                    },
+                    "errors": {
+                        "type": "object",
+                        "properties": {
+                            "data": {
+                                "type": "array",
+                                "items": {
+                                    "type": "object",
+                                    "properties": {
+                                        "code": {
+                                            "type": "string"
+                                        },
+                                        "line": {
+                                            "type": "integer"
+                                        },
+                                        "message": {
+                                            "type": "string"
+                                        },
+                                        "param": {
+                                            "type": "string"
+                                        }
+                                    },
+                                    "additionalProperties": false,
+                                    "title": "BatchError"
+                                }
+                            },
+                            "object": {
+                                "type": "string"
+                            }
+                        },
+                        "additionalProperties": false,
+                        "title": "Errors"
+                    },
+                    "expired_at": {
+                        "type": "integer"
+                    },
+                    "expires_at": {
+                        "type": "integer"
+                    },
+                    "failed_at": {
+                        "type": "integer"
+                    },
+                    "finalizing_at": {
+                        "type": "integer"
+                    },
+                    "in_progress_at": {
+                        "type": "integer"
+                    },
+                    "metadata": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "type": "string"
+                        }
+                    },
+                    "model": {
+                        "type": "string"
+                    },
+                    "output_file_id": {
+                        "type": "string"
+                    },
+                    "request_counts": {
+                        "type": "object",
+                        "properties": {
+                            "completed": {
+                                "type": "integer"
+                            },
+                            "failed": {
+                                "type": "integer"
+                            },
+                            "total": {
+                                "type": "integer"
+                            }
+                        },
+                        "additionalProperties": false,
+                        "required": [
+                            "completed",
+                            "failed",
+                            "total"
+                        ],
+                        "title": "BatchRequestCounts"
+                    },
+                    "usage": {
+                        "type": "object",
+                        "properties": {
+                            "input_tokens": {
+                                "type": "integer"
+                            },
+                            "input_tokens_details": {
+                                "type": "object",
+                                "properties": {
+                                    "cached_tokens": {
+                                        "type": "integer"
+                                    }
+                                },
+                                "additionalProperties": false,
+                                "required": [
+                                    "cached_tokens"
+                                ],
+                                "title": "InputTokensDetails"
+                            },
+                            "output_tokens": {
+                                "type": "integer"
+                            },
+                            "output_tokens_details": {
+                                "type": "object",
+                                "properties": {
+                                    "reasoning_tokens": {
+                                        "type": "integer"
+                                    }
+                                },
+                                "additionalProperties": false,
+                                "required": [
+                                    "reasoning_tokens"
+                                ],
+                                "title": "OutputTokensDetails"
+                            },
+                            "total_tokens": {
+                                "type": "integer"
+                            }
+                        },
+                        "additionalProperties": false,
+                        "required": [
+                            "input_tokens",
+                            "input_tokens_details",
+                            "output_tokens",
+                            "output_tokens_details",
+                            "total_tokens"
+                        ],
+                        "title": "BatchUsage"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "id",
+                    "completion_window",
+                    "created_at",
+                    "endpoint",
+                    "input_file_id",
+                    "object",
+                    "status"
+                ],
+                "title": "Batch"
+            },
            "Order": {
                "type": "string",
                "enum": [
@ -5696,16 +6328,53 @@
                    },
                    {
                        "$ref": "#/components/schemas/OpenAIResponseInputMessageContentImage"
+                    },
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseInputMessageContentFile"
                    }
                ],
                "discriminator": {
                    "propertyName": "type",
                    "mapping": {
                        "input_text": "#/components/schemas/OpenAIResponseInputMessageContentText",
-                        "input_image": "#/components/schemas/OpenAIResponseInputMessageContentImage"
+                        "input_image": "#/components/schemas/OpenAIResponseInputMessageContentImage",
+                        "input_file": "#/components/schemas/OpenAIResponseInputMessageContentFile"
                    }
                }
            },
+            "OpenAIResponseInputMessageContentFile": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "input_file",
+                        "default": "input_file",
+                        "description": "The type of the input item. Always `input_file`."
+                    },
+                    "file_data": {
+                        "type": "string",
+                        "description": "The data of the file to be sent to the model."
+                    },
+                    "file_id": {
+                        "type": "string",
+                        "description": "(Optional) The ID of the file to be sent to the model."
+                    },
+                    "file_url": {
+                        "type": "string",
+                        "description": "The URL of the file to be sent to the model."
+                    },
+                    "filename": {
+                        "type": "string",
+                        "description": "The name of the file to be sent to the model."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type"
+                ],
+                "title": "OpenAIResponseInputMessageContentFile",
+                "description": "File content for input messages in OpenAI response format."
+            },
            "OpenAIResponseInputMessageContentImage": {
                "type": "object",
                "properties": {
@ -5733,6 +6402,10 @@
                        "default": "input_image",
                        "description": "Content type identifier, always \"input_image\""
                    },
+                    "file_id": {
+                        "type": "string",
+                        "description": "(Optional) The ID of the file to be sent to the model."
+                    },
                    "image_url": {
                        "type": "string",
                        "description": "(Optional) URL of the image content"
@ -7305,29 +7978,14 @@
            "OpenAIResponseInput": {
                "oneOf": [
                    {
-                        "$ref": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall"
-                    },
-                    {
-                        "$ref": "#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall"
-                    },
-                    {
-                        "$ref": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall"
+                        "$ref": "#/components/schemas/OpenAIResponseOutput"
                    },
                    {
                        "$ref": "#/components/schemas/OpenAIResponseInputFunctionToolCallOutput"
                    },
-                    {
-                        "$ref": "#/components/schemas/OpenAIResponseMCPApprovalRequest"
-                    },
                    {
                        "$ref": "#/components/schemas/OpenAIResponseMCPApprovalResponse"
                    },
-                    {
-                        "$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPCall"
-                    },
-                    {
-                        "$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools"
-                    },
                    {
                        "$ref": "#/components/schemas/OpenAIResponseMessage"
                    }
@ -7536,6 +8194,10 @@
                        "type": "string",
                        "description": "(Optional) ID of the previous response in a conversation"
                    },
+                    "prompt": {
+                        "$ref": "#/components/schemas/OpenAIResponsePrompt",
+                        "description": "(Optional) Reference to a prompt template and its variables."
+                    },
                    "status": {
                        "type": "string",
                        "description": "Current status of the response generation"
@ -7631,6 +8293,32 @@
                    }
                }
            },
+            "OpenAIResponsePrompt": {
+                "type": "object",
+                "properties": {
+                    "id": {
+                        "type": "string",
+                        "description": "Unique identifier of the prompt template"
+                    },
+                    "variables": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "$ref": "#/components/schemas/OpenAIResponseInputMessageContent"
+                        },
+                        "description": "Dictionary of variable names to OpenAIResponseInputMessageContent structure for template substitution. The substitution values can either be strings, or other Response input types like images or files."
+                    },
+                    "version": {
+                        "type": "string",
+                        "description": "Version number of the prompt to use (defaults to latest if not specified)"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "id"
+                ],
+                "title": "OpenAIResponsePrompt",
+                "description": "OpenAI compatible Prompt object that is used in OpenAI responses."
+            },
            "OpenAIResponseText": {
                "type": "object",
                "properties": {
@ -8001,6 +8689,10 @@
                        "type": "string",
                        "description": "The underlying LLM used for completions."
                    },
+                    "prompt": {
+                        "$ref": "#/components/schemas/OpenAIResponsePrompt",
+                        "description": "(Optional) Prompt object with ID, version, and variables."
+                    },
                    "instructions": {
                        "type": "string"
                    },
@ -8089,6 +8781,10 @@
                        "type": "string",
                        "description": "(Optional) ID of the previous response in a conversation"
                    },
+                    "prompt": {
+                        "$ref": "#/components/schemas/OpenAIResponsePrompt",
+                        "description": "(Optional) Reference to a prompt template and its variables."
+                    },
                    "status": {
                        "type": "string",
                        "description": "Current status of the response generation"
@ -11427,7 +12123,7 @@
                        },
                        "description": "List of documents to index in the RAG system"
                    },
-                    "vector_db_id": {
+                    "vector_store_id": {
                        "type": "string",
                        "description": "ID of the vector database to store the document embeddings"
                    },
@ -11439,7 +12135,7 @@
                "additionalProperties": false,
                "required": [
                    "documents",
-                    "vector_db_id",
+                    "vector_store_id",
                    "chunk_size_in_tokens"
                ],
                "title": "InsertRequest"
@ -11630,7 +12326,7 @@
                        "$ref": "#/components/schemas/InterleavedContent",
                        "description": "The query content to search for in the indexed documents"
                    },
-                    "vector_db_ids": {
+                    "vector_store_ids": {
                        "type": "array",
                        "items": {
                            "type": "string"
@ -11645,7 +12341,7 @@
                "additionalProperties": false,
                "required": [
                    "content",
-                    "vector_db_ids"
+                    "vector_store_ids"
                ],
                "title": "QueryRequest"
            },
@ -11833,6 +12529,10 @@
                        "$ref": "#/components/schemas/InterleavedContent",
                        "description": "The content of the chunk, which can be interleaved text, images, or other types."
                    },
+                    "chunk_id": {
+                        "type": "string",
+                        "description": "Unique identifier for the chunk. Must be provided explicitly."
+                    },
                    "metadata": {
                        "type": "object",
                        "additionalProperties": {
@ -11866,10 +12566,6 @@
                        },
                        "description": "Optional embedding for the chunk. If not provided, it will be computed later."
                    },
-                    "stored_chunk_id": {
-                        "type": "string",
-                        "description": "The chunk ID that is stored in the vector database. Used for backend functionality."
-                    },
                    "chunk_metadata": {
                        "$ref": "#/components/schemas/ChunkMetadata",
                        "description": "Metadata for the chunk that will NOT be used in the context during inference. The `chunk_metadata` is required backend functionality."
@ -11878,6 +12574,7 @@
                "additionalProperties": false,
                "required": [
                    "content",
+                    "chunk_id",
                    "metadata"
                ],
                "title": "Chunk",
@ -11938,7 +12635,7 @@
            "InsertChunksRequest": {
                "type": "object",
                "properties": {
-                    "vector_db_id": {
+                    "vector_store_id": {
                        "type": "string",
                        "description": "The identifier of the vector database to insert the chunks into."
                    },
@ -11956,7 +12653,7 @@
                },
                "additionalProperties": false,
                "required": [
-                    "vector_db_id",
+                    "vector_store_id",
                    "chunks"
                ],
                "title": "InsertChunksRequest"
@ -11964,7 +12661,7 @@
            "QueryChunksRequest": {
                "type": "object",
                "properties": {
-                    "vector_db_id": {
+                    "vector_store_id": {
                        "type": "string",
                        "description": "The identifier of the vector database to query."
                    },
@ -12001,7 +12698,7 @@
                },
                "additionalProperties": false,
                "required": [
-                    "vector_db_id",
+                    "vector_store_id",
                    "query"
                ],
                "title": "QueryChunksRequest"
@ -13224,6 +13921,11 @@
            "description": "APIs for creating and interacting with agentic systems.\n\n## Responses API\n\nThe Responses API provides OpenAI-compatible functionality with enhanced capabilities for dynamic, stateful interactions.\n\n> **✅ STABLE**: This API is production-ready with backward compatibility guarantees. Recommended for production applications.\n\n### ✅ Supported Tools\n\nThe Responses API supports the following tool types:\n\n- **`web_search`**: Search the web for current information and real-time data\n- **`file_search`**: Search through uploaded files and vector stores\n  - Supports dynamic `vector_store_ids` per call\n  - Compatible with OpenAI file search patterns\n- **`function`**: Call custom functions with JSON schema validation\n- **`mcp_tool`**: Model Context Protocol integration\n\n### ✅ Supported Fields & Features\n\n**Core Capabilities:**\n- **Dynamic Configuration**: Switch models, vector stores, and tools per request without pre-configuration\n- **Conversation Branching**: Use `previous_response_id` to branch conversations and explore different paths\n- **Rich Annotations**: Automatic file citations, URL citations, and container file citations\n- **Status Tracking**: Monitor tool call execution status and handle failures gracefully\n\n### 🚧 Work in Progress\n\n- Full real-time response streaming support\n- `tool_choice` parameter\n- `max_tool_calls` parameter\n- Built-in tools (code interpreter, containers API)\n- Safety & guardrails\n- `reasoning` capabilities\n- `service_tier`\n- `logprobs`\n- `max_output_tokens`\n- `metadata` handling\n- `instructions`\n- `incomplete_details`\n- `background`",
            "x-displayName": "Agents"
        },
+        {
+            "name": "Batches",
+            "description": "The API is designed to allow use of openai client libraries for seamless integration.\n\nThis API provides the following extensions:\n - idempotent batch creation\n\nNote: This API is currently under active development and may undergo changes.",
+            "x-displayName": "The Batches API enables efficient processing of multiple requests in a single operation, particularly useful for processing large datasets, batch evaluation workflows, and cost-effective inference at scale."
+        },
        {
            "name": "Conversations",
            "description": "Protocol for conversation management operations.",
@ -13297,6 +13999,7 @@
            "name": "Operations",
            "tags": [
                "Agents",
+                "Batches",
                "Conversations",
                "Files",
                "Inference",
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@ -12,6 +12,141 @@ info:
 servers:
  - url: http://any-hosted-llama-stack.com
 paths:
+  /v1/batches:
+    get:
+      responses:
+        '200':
+          description: A list of batch objects.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ListBatchesResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Batches
+      summary: List all batches for the current user.
+      description: List all batches for the current user.
+      parameters:
+        - name: after
+          in: query
+          description: >-
+            A cursor for pagination; returns batches after this batch ID.
+          required: false
+          schema:
+            type: string
+        - name: limit
+          in: query
+          description: >-
+            Number of batches to return (default 20, max 100).
+          required: true
+          schema:
+            type: integer
+      deprecated: false
+    post:
+      responses:
+        '200':
+          description: The created batch object.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/Batch'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Batches
+      summary: >-
+        Create a new batch for processing multiple API requests.
+      description: >-
+        Create a new batch for processing multiple API requests.
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/CreateBatchRequest'
+        required: true
+      deprecated: false
+  /v1/batches/{batch_id}:
+    get:
+      responses:
+        '200':
+          description: The batch object.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/Batch'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Batches
+      summary: >-
+        Retrieve information about a specific batch.
+      description: >-
+        Retrieve information about a specific batch.
+      parameters:
+        - name: batch_id
+          in: path
+          description: The ID of the batch to retrieve.
+          required: true
+          schema:
+            type: string
+      deprecated: false
+  /v1/batches/{batch_id}/cancel:
+    post:
+      responses:
+        '200':
+          description: The updated batch object.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/Batch'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Batches
+      summary: Cancel a batch that is in progress.
+      description: Cancel a batch that is in progress.
+      parameters:
+        - name: batch_id
+          in: path
+          description: The ID of the batch to cancel.
+          required: true
+          schema:
+            type: string
+      deprecated: false
  /v1/chat/completions:
    get:
      responses:
@ -2999,6 +3134,331 @@ components:
      title: Error
      description: >-
        Error response from the API. Roughly follows RFC 7807.
+    ListBatchesResponse:
+      type: object
+      properties:
+        object:
+          type: string
+          const: list
+          default: list
+        data:
+          type: array
+          items:
+            type: object
+            properties:
+              id:
+                type: string
+              completion_window:
+                type: string
+              created_at:
+                type: integer
+              endpoint:
+                type: string
+              input_file_id:
+                type: string
+              object:
+                type: string
+                const: batch
+              status:
+                type: string
+                enum:
+                  - validating
+                  - failed
+                  - in_progress
+                  - finalizing
+                  - completed
+                  - expired
+                  - cancelling
+                  - cancelled
+              cancelled_at:
+                type: integer
+              cancelling_at:
+                type: integer
+              completed_at:
+                type: integer
+              error_file_id:
+                type: string
+              errors:
+                type: object
+                properties:
+                  data:
+                    type: array
+                    items:
+                      type: object
+                      properties:
+                        code:
+                          type: string
+                        line:
+                          type: integer
+                        message:
+                          type: string
+                        param:
+                          type: string
+                      additionalProperties: false
+                      title: BatchError
+                  object:
+                    type: string
+                additionalProperties: false
+                title: Errors
+              expired_at:
+                type: integer
+              expires_at:
+                type: integer
+              failed_at:
+                type: integer
+              finalizing_at:
+                type: integer
+              in_progress_at:
+                type: integer
+              metadata:
+                type: object
+                additionalProperties:
+                  type: string
+              model:
+                type: string
+              output_file_id:
+                type: string
+              request_counts:
+                type: object
+                properties:
+                  completed:
+                    type: integer
+                  failed:
+                    type: integer
+                  total:
+                    type: integer
+                additionalProperties: false
+                required:
+                  - completed
+                  - failed
+                  - total
+                title: BatchRequestCounts
+              usage:
+                type: object
+                properties:
+                  input_tokens:
+                    type: integer
+                  input_tokens_details:
+                    type: object
+                    properties:
+                      cached_tokens:
+                        type: integer
+                    additionalProperties: false
+                    required:
+                      - cached_tokens
+                    title: InputTokensDetails
+                  output_tokens:
+                    type: integer
+                  output_tokens_details:
+                    type: object
+                    properties:
+                      reasoning_tokens:
+                        type: integer
+                    additionalProperties: false
+                    required:
+                      - reasoning_tokens
+                    title: OutputTokensDetails
+                  total_tokens:
+                    type: integer
+                additionalProperties: false
+                required:
+                  - input_tokens
+                  - input_tokens_details
+                  - output_tokens
+                  - output_tokens_details
+                  - total_tokens
+                title: BatchUsage
+            additionalProperties: false
+            required:
+              - id
+              - completion_window
+              - created_at
+              - endpoint
+              - input_file_id
+              - object
+              - status
+            title: Batch
+        first_id:
+          type: string
+        last_id:
+          type: string
+        has_more:
+          type: boolean
+          default: false
+      additionalProperties: false
+      required:
+        - object
+        - data
+        - has_more
+      title: ListBatchesResponse
+      description: >-
+        Response containing a list of batch objects.
+    CreateBatchRequest:
+      type: object
+      properties:
+        input_file_id:
+          type: string
+          description: >-
+            The ID of an uploaded file containing requests for the batch.
+        endpoint:
+          type: string
+          description: >-
+            The endpoint to be used for all requests in the batch.
+        completion_window:
+          type: string
+          const: 24h
+          description: >-
+            The time window within which the batch should be processed.
+        metadata:
+          type: object
+          additionalProperties:
+            type: string
+          description: Optional metadata for the batch.
+        idempotency_key:
+          type: string
+          description: >-
+            Optional idempotency key. When provided, enables idempotent behavior.
+      additionalProperties: false
+      required:
+        - input_file_id
+        - endpoint
+        - completion_window
+      title: CreateBatchRequest
+    Batch:
+      type: object
+      properties:
+        id:
+          type: string
+        completion_window:
+          type: string
+        created_at:
+          type: integer
+        endpoint:
+          type: string
+        input_file_id:
+          type: string
+        object:
+          type: string
+          const: batch
+        status:
+          type: string
+          enum:
+            - validating
+            - failed
+            - in_progress
+            - finalizing
+            - completed
+            - expired
+            - cancelling
+            - cancelled
+        cancelled_at:
+          type: integer
+        cancelling_at:
+          type: integer
+        completed_at:
+          type: integer
+        error_file_id:
+          type: string
+        errors:
+          type: object
+          properties:
+            data:
+              type: array
+              items:
+                type: object
+                properties:
+                  code:
+                    type: string
+                  line:
+                    type: integer
+                  message:
+                    type: string
+                  param:
+                    type: string
+                additionalProperties: false
+                title: BatchError
+            object:
+              type: string
+          additionalProperties: false
+          title: Errors
+        expired_at:
+          type: integer
+        expires_at:
+          type: integer
+        failed_at:
+          type: integer
+        finalizing_at:
+          type: integer
+        in_progress_at:
+          type: integer
+        metadata:
+          type: object
+          additionalProperties:
+            type: string
+        model:
+          type: string
+        output_file_id:
+          type: string
+        request_counts:
+          type: object
+          properties:
+            completed:
+              type: integer
+            failed:
+              type: integer
+            total:
+              type: integer
+          additionalProperties: false
+          required:
+            - completed
+            - failed
+            - total
+          title: BatchRequestCounts
+        usage:
+          type: object
+          properties:
+            input_tokens:
+              type: integer
+            input_tokens_details:
+              type: object
+              properties:
+                cached_tokens:
+                  type: integer
+              additionalProperties: false
+              required:
+                - cached_tokens
+              title: InputTokensDetails
+            output_tokens:
+              type: integer
+            output_tokens_details:
+              type: object
+              properties:
+                reasoning_tokens:
+                  type: integer
+              additionalProperties: false
+              required:
+                - reasoning_tokens
+              title: OutputTokensDetails
+            total_tokens:
+              type: integer
+          additionalProperties: false
+          required:
+            - input_tokens
+            - input_tokens_details
+            - output_tokens
+            - output_tokens_details
+            - total_tokens
+          title: BatchUsage
+      additionalProperties: false
+      required:
+        - id
+        - completion_window
+        - created_at
+        - endpoint
+        - input_file_id
+        - object
+        - status
+      title: Batch
    Order:
      type: string
      enum:
@ -4261,11 +4721,44 @@ components:
      oneOf:
        - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
        - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+        - $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile'
      discriminator:
        propertyName: type
        mapping:
          input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
          input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+          input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile'
+    OpenAIResponseInputMessageContentFile:
+      type: object
+      properties:
+        type:
+          type: string
+          const: input_file
+          default: input_file
+          description: >-
+            The type of the input item. Always `input_file`.
+        file_data:
+          type: string
+          description: >-
+            The data of the file to be sent to the model.
+        file_id:
+          type: string
+          description: >-
+            (Optional) The ID of the file to be sent to the model.
+        file_url:
+          type: string
+          description: >-
+            The URL of the file to be sent to the model.
+        filename:
+          type: string
+          description: >-
+            The name of the file to be sent to the model.
+      additionalProperties: false
+      required:
+        - type
+      title: OpenAIResponseInputMessageContentFile
+      description: >-
+        File content for input messages in OpenAI response format.
    OpenAIResponseInputMessageContentImage:
      type: object
      properties:
@ -4286,6 +4779,10 @@ components:
          default: input_image
          description: >-
            Content type identifier, always "input_image"
+        file_id:
+          type: string
+          description: >-
+            (Optional) The ID of the file to be sent to the model.
        image_url:
          type: string
          description: (Optional) URL of the image content
@ -5522,14 +6019,9 @@ components:
        Error details for failed OpenAI response requests.
    OpenAIResponseInput:
      oneOf:
-        - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
-        - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
-        - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+        - $ref: '#/components/schemas/OpenAIResponseOutput'
        - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
-        - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
        - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
-        - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
-        - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
        - $ref: '#/components/schemas/OpenAIResponseMessage'
    OpenAIResponseInputToolFileSearch:
      type: object
@ -5685,6 +6177,10 @@ components:
          type: string
          description: >-
            (Optional) ID of the previous response in a conversation
+        prompt:
+          $ref: '#/components/schemas/OpenAIResponsePrompt'
+          description: >-
+            (Optional) Reference to a prompt template and its variables.
        status:
          type: string
          description: >-
@ -5758,6 +6254,30 @@ components:
          mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
          mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
          mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+    OpenAIResponsePrompt:
+      type: object
+      properties:
+        id:
+          type: string
+          description: Unique identifier of the prompt template
+        variables:
+          type: object
+          additionalProperties:
+            $ref: '#/components/schemas/OpenAIResponseInputMessageContent'
+          description: >-
+            Dictionary of variable names to OpenAIResponseInputMessageContent structure
+            for template substitution. The substitution values can either be strings,
+            or other Response input types like images or files.
+        version:
+          type: string
+          description: >-
+            Version number of the prompt to use (defaults to latest if not specified)
+      additionalProperties: false
+      required:
+        - id
+      title: OpenAIResponsePrompt
+      description: >-
+        OpenAI compatible Prompt object that is used in OpenAI responses.
    OpenAIResponseText:
      type: object
      properties:
@ -6015,6 +6535,10 @@ components:
        model:
          type: string
          description: The underlying LLM used for completions.
+        prompt:
+          $ref: '#/components/schemas/OpenAIResponsePrompt'
+          description: >-
+            (Optional) Prompt object with ID, version, and variables.
        instructions:
          type: string
        previous_response_id:
@ -6092,6 +6616,10 @@ components:
          type: string
          description: >-
            (Optional) ID of the previous response in a conversation
+        prompt:
+          $ref: '#/components/schemas/OpenAIResponsePrompt'
+          description: >-
+            (Optional) Reference to a prompt template and its variables.
        status:
          type: string
          description: >-
@ -8654,7 +9182,7 @@ components:
            $ref: '#/components/schemas/RAGDocument'
          description: >-
            List of documents to index in the RAG system
-        vector_db_id:
+        vector_store_id:
          type: string
          description: >-
            ID of the vector database to store the document embeddings
@ -8665,7 +9193,7 @@ components:
      additionalProperties: false
      required:
        - documents
-        - vector_db_id
+        - vector_store_id
        - chunk_size_in_tokens
      title: InsertRequest
    DefaultRAGQueryGeneratorConfig:
@ -8836,7 +9364,7 @@ components:
          $ref: '#/components/schemas/InterleavedContent'
          description: >-
            The query content to search for in the indexed documents
-        vector_db_ids:
+        vector_store_ids:
          type: array
          items:
            type: string
@ -8849,7 +9377,7 @@ components:
      additionalProperties: false
      required:
        - content
-        - vector_db_ids
+        - vector_store_ids
      title: QueryRequest
    RAGQueryResult:
      type: object
@ -8977,6 +9505,10 @@ components:
          description: >-
            The content of the chunk, which can be interleaved text, images, or other
            types.
+        chunk_id:
+          type: string
+          description: >-
+            Unique identifier for the chunk. Must be provided explicitly.
        metadata:
          type: object
          additionalProperties:
@ -8997,10 +9529,6 @@ components:
          description: >-
            Optional embedding for the chunk. If not provided, it will be computed
            later.
-        stored_chunk_id:
-          type: string
-          description: >-
-            The chunk ID that is stored in the vector database. Used for backend functionality.
        chunk_metadata:
          $ref: '#/components/schemas/ChunkMetadata'
          description: >-
@ -9009,6 +9537,7 @@ components:
      additionalProperties: false
      required:
        - content
+        - chunk_id
        - metadata
      title: Chunk
      description: >-
@ -9073,7 +9602,7 @@ components:
    InsertChunksRequest:
      type: object
      properties:
-        vector_db_id:
+        vector_store_id:
          type: string
          description: >-
            The identifier of the vector database to insert the chunks into.
@ -9092,13 +9621,13 @@ components:
          description: The time to live of the chunks.
      additionalProperties: false
      required:
-        - vector_db_id
+        - vector_store_id
        - chunks
      title: InsertChunksRequest
    QueryChunksRequest:
      type: object
      properties:
-        vector_db_id:
+        vector_store_id:
          type: string
          description: >-
            The identifier of the vector database to query.
@ -9118,7 +9647,7 @@ components:
          description: The parameters of the query.
      additionalProperties: false
      required:
-        - vector_db_id
+        - vector_store_id
        - query
      title: QueryChunksRequest
    QueryChunksResponse:
@ -10075,6 +10604,19 @@ tags:

      - `background`
    x-displayName: Agents
+  - name: Batches
+    description: >-
+      The API is designed to allow use of openai client libraries for seamless integration.
+
+
+      This API provides the following extensions:
+       - idempotent batch creation
+
+      Note: This API is currently under active development and may undergo changes.
+    x-displayName: >-
+      The Batches API enables efficient processing of multiple requests in a single
+      operation, particularly useful for processing large datasets, batch evaluation
+      workflows, and cost-effective inference at scale.
  - name: Conversations
    description: >-
      Protocol for conversation management operations.
@ -10137,6 +10679,7 @@ x-tagGroups:
  - name: Operations
    tags:
      - Agents
+      - Batches
      - Conversations
      - Files
      - Inference
--- a/docs/static/stainless-llama-stack-spec.html
+++ b/docs/static/stainless-llama-stack-spec.html
@ -40,6 +40,193 @@
        }
    ],
    "paths": {
+        "/v1/batches": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "A list of batch objects.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ListBatchesResponse"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Batches"
+                ],
+                "summary": "List all batches for the current user.",
+                "description": "List all batches for the current user.",
+                "parameters": [
+                    {
+                        "name": "after",
+                        "in": "query",
+                        "description": "A cursor for pagination; returns batches after this batch ID.",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "limit",
+                        "in": "query",
+                        "description": "Number of batches to return (default 20, max 100).",
+                        "required": true,
+                        "schema": {
+                            "type": "integer"
+                        }
+                    }
+                ],
+                "deprecated": false
+            },
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "The created batch object.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/Batch"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Batches"
+                ],
+                "summary": "Create a new batch for processing multiple API requests.",
+                "description": "Create a new batch for processing multiple API requests.",
+                "parameters": [],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/CreateBatchRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                },
+                "deprecated": false
+            }
+        },
+        "/v1/batches/{batch_id}": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "The batch object.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/Batch"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Batches"
+                ],
+                "summary": "Retrieve information about a specific batch.",
+                "description": "Retrieve information about a specific batch.",
+                "parameters": [
+                    {
+                        "name": "batch_id",
+                        "in": "path",
+                        "description": "The ID of the batch to retrieve.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ],
+                "deprecated": false
+            }
+        },
+        "/v1/batches/{batch_id}/cancel": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "The updated batch object.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/Batch"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Batches"
+                ],
+                "summary": "Cancel a batch that is in progress.",
+                "description": "Cancel a batch that is in progress.",
+                "parameters": [
+                    {
+                        "name": "batch_id",
+                        "in": "path",
+                        "description": "The ID of the batch to cancel.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ],
+                "deprecated": false
+            }
+        },
        "/v1/chat/completions": {
            "get": {
                "responses": {
@ -5677,6 +5864,451 @@
                "title": "Error",
                "description": "Error response from the API. Roughly follows RFC 7807."
            },
+            "ListBatchesResponse": {
+                "type": "object",
+                "properties": {
+                    "object": {
+                        "type": "string",
+                        "const": "list",
+                        "default": "list"
+                    },
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "properties": {
+                                "id": {
+                                    "type": "string"
+                                },
+                                "completion_window": {
+                                    "type": "string"
+                                },
+                                "created_at": {
+                                    "type": "integer"
+                                },
+                                "endpoint": {
+                                    "type": "string"
+                                },
+                                "input_file_id": {
+                                    "type": "string"
+                                },
+                                "object": {
+                                    "type": "string",
+                                    "const": "batch"
+                                },
+                                "status": {
+                                    "type": "string",
+                                    "enum": [
+                                        "validating",
+                                        "failed",
+                                        "in_progress",
+                                        "finalizing",
+                                        "completed",
+                                        "expired",
+                                        "cancelling",
+                                        "cancelled"
+                                    ]
+                                },
+                                "cancelled_at": {
+                                    "type": "integer"
+                                },
+                                "cancelling_at": {
+                                    "type": "integer"
+                                },
+                                "completed_at": {
+                                    "type": "integer"
+                                },
+                                "error_file_id": {
+                                    "type": "string"
+                                },
+                                "errors": {
+                                    "type": "object",
+                                    "properties": {
+                                        "data": {
+                                            "type": "array",
+                                            "items": {
+                                                "type": "object",
+                                                "properties": {
+                                                    "code": {
+                                                        "type": "string"
+                                                    },
+                                                    "line": {
+                                                        "type": "integer"
+                                                    },
+                                                    "message": {
+                                                        "type": "string"
+                                                    },
+                                                    "param": {
+                                                        "type": "string"
+                                                    }
+                                                },
+                                                "additionalProperties": false,
+                                                "title": "BatchError"
+                                            }
+                                        },
+                                        "object": {
+                                            "type": "string"
+                                        }
+                                    },
+                                    "additionalProperties": false,
+                                    "title": "Errors"
+                                },
+                                "expired_at": {
+                                    "type": "integer"
+                                },
+                                "expires_at": {
+                                    "type": "integer"
+                                },
+                                "failed_at": {
+                                    "type": "integer"
+                                },
+                                "finalizing_at": {
+                                    "type": "integer"
+                                },
+                                "in_progress_at": {
+                                    "type": "integer"
+                                },
+                                "metadata": {
+                                    "type": "object",
+                                    "additionalProperties": {
+                                        "type": "string"
+                                    }
+                                },
+                                "model": {
+                                    "type": "string"
+                                },
+                                "output_file_id": {
+                                    "type": "string"
+                                },
+                                "request_counts": {
+                                    "type": "object",
+                                    "properties": {
+                                        "completed": {
+                                            "type": "integer"
+                                        },
+                                        "failed": {
+                                            "type": "integer"
+                                        },
+                                        "total": {
+                                            "type": "integer"
+                                        }
+                                    },
+                                    "additionalProperties": false,
+                                    "required": [
+                                        "completed",
+                                        "failed",
+                                        "total"
+                                    ],
+                                    "title": "BatchRequestCounts"
+                                },
+                                "usage": {
+                                    "type": "object",
+                                    "properties": {
+                                        "input_tokens": {
+                                            "type": "integer"
+                                        },
+                                        "input_tokens_details": {
+                                            "type": "object",
+                                            "properties": {
+                                                "cached_tokens": {
+                                                    "type": "integer"
+                                                }
+                                            },
+                                            "additionalProperties": false,
+                                            "required": [
+                                                "cached_tokens"
+                                            ],
+                                            "title": "InputTokensDetails"
+                                        },
+                                        "output_tokens": {
+                                            "type": "integer"
+                                        },
+                                        "output_tokens_details": {
+                                            "type": "object",
+                                            "properties": {
+                                                "reasoning_tokens": {
+                                                    "type": "integer"
+                                                }
+                                            },
+                                            "additionalProperties": false,
+                                            "required": [
+                                                "reasoning_tokens"
+                                            ],
+                                            "title": "OutputTokensDetails"
+                                        },
+                                        "total_tokens": {
+                                            "type": "integer"
+                                        }
+                                    },
+                                    "additionalProperties": false,
+                                    "required": [
+                                        "input_tokens",
+                                        "input_tokens_details",
+                                        "output_tokens",
+                                        "output_tokens_details",
+                                        "total_tokens"
+                                    ],
+                                    "title": "BatchUsage"
+                                }
+                            },
+                            "additionalProperties": false,
+                            "required": [
+                                "id",
+                                "completion_window",
+                                "created_at",
+                                "endpoint",
+                                "input_file_id",
+                                "object",
+                                "status"
+                            ],
+                            "title": "Batch"
+                        }
+                    },
+                    "first_id": {
+                        "type": "string"
+                    },
+                    "last_id": {
+                        "type": "string"
+                    },
+                    "has_more": {
+                        "type": "boolean",
+                        "default": false
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "object",
+                    "data",
+                    "has_more"
+                ],
+                "title": "ListBatchesResponse",
+                "description": "Response containing a list of batch objects."
+            },
+            "CreateBatchRequest": {
+                "type": "object",
+                "properties": {
+                    "input_file_id": {
+                        "type": "string",
+                        "description": "The ID of an uploaded file containing requests for the batch."
+                    },
+                    "endpoint": {
+                        "type": "string",
+                        "description": "The endpoint to be used for all requests in the batch."
+                    },
+                    "completion_window": {
+                        "type": "string",
+                        "const": "24h",
+                        "description": "The time window within which the batch should be processed."
+                    },
+                    "metadata": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "type": "string"
+                        },
+                        "description": "Optional metadata for the batch."
+                    },
+                    "idempotency_key": {
+                        "type": "string",
+                        "description": "Optional idempotency key. When provided, enables idempotent behavior."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "input_file_id",
+                    "endpoint",
+                    "completion_window"
+                ],
+                "title": "CreateBatchRequest"
+            },
+            "Batch": {
+                "type": "object",
+                "properties": {
+                    "id": {
+                        "type": "string"
+                    },
+                    "completion_window": {
+                        "type": "string"
+                    },
+                    "created_at": {
+                        "type": "integer"
+                    },
+                    "endpoint": {
+                        "type": "string"
+                    },
+                    "input_file_id": {
+                        "type": "string"
+                    },
+                    "object": {
+                        "type": "string",
+                        "const": "batch"
+                    },
+                    "status": {
+                        "type": "string",
+                        "enum": [
+                            "validating",
+                            "failed",
+                            "in_progress",
+                            "finalizing",
+                            "completed",
+                            "expired",
+                            "cancelling",
+                            "cancelled"
+                        ]
+                    },
+                    "cancelled_at": {
+                        "type": "integer"
+                    },
+                    "cancelling_at": {
+                        "type": "integer"
+                    },
+                    "completed_at": {
+                        "type": "integer"
+                    },
+                    "error_file_id": {
+                        "type": "string"
+                    },
+                    "errors": {
+                        "type": "object",
+                        "properties": {
+                            "data": {
+                                "type": "array",
+                                "items": {
+                                    "type": "object",
+                                    "properties": {
+                                        "code": {
+                                            "type": "string"
+                                        },
+                                        "line": {
+                                            "type": "integer"
+                                        },
+                                        "message": {
+                                            "type": "string"
+                                        },
+                                        "param": {
+                                            "type": "string"
+                                        }
+                                    },
+                                    "additionalProperties": false,
+                                    "title": "BatchError"
+                                }
+                            },
+                            "object": {
+                                "type": "string"
+                            }
+                        },
+                        "additionalProperties": false,
+                        "title": "Errors"
+                    },
+                    "expired_at": {
+                        "type": "integer"
+                    },
+                    "expires_at": {
+                        "type": "integer"
+                    },
+                    "failed_at": {
+                        "type": "integer"
+                    },
+                    "finalizing_at": {
+                        "type": "integer"
+                    },
+                    "in_progress_at": {
+                        "type": "integer"
+                    },
+                    "metadata": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "type": "string"
+                        }
+                    },
+                    "model": {
+                        "type": "string"
+                    },
+                    "output_file_id": {
+                        "type": "string"
+                    },
+                    "request_counts": {
+                        "type": "object",
+                        "properties": {
+                            "completed": {
+                                "type": "integer"
+                            },
+                            "failed": {
+                                "type": "integer"
+                            },
+                            "total": {
+                                "type": "integer"
+                            }
+                        },
+                        "additionalProperties": false,
+                        "required": [
+                            "completed",
+                            "failed",
+                            "total"
+                        ],
+                        "title": "BatchRequestCounts"
+                    },
+                    "usage": {
+                        "type": "object",
+                        "properties": {
+                            "input_tokens": {
+                                "type": "integer"
+                            },
+                            "input_tokens_details": {
+                                "type": "object",
+                                "properties": {
+                                    "cached_tokens": {
+                                        "type": "integer"
+                                    }
+                                },
+                                "additionalProperties": false,
+                                "required": [
+                                    "cached_tokens"
+                                ],
+                                "title": "InputTokensDetails"
+                            },
+                            "output_tokens": {
+                                "type": "integer"
+                            },
+                            "output_tokens_details": {
+                                "type": "object",
+                                "properties": {
+                                    "reasoning_tokens": {
+                                        "type": "integer"
+                                    }
+                                },
+                                "additionalProperties": false,
+                                "required": [
+                                    "reasoning_tokens"
+                                ],
+                                "title": "OutputTokensDetails"
+                            },
+                            "total_tokens": {
+                                "type": "integer"
+                            }
+                        },
+                        "additionalProperties": false,
+                        "required": [
+                            "input_tokens",
+                            "input_tokens_details",
+                            "output_tokens",
+                            "output_tokens_details",
+                            "total_tokens"
+                        ],
+                        "title": "BatchUsage"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "id",
+                    "completion_window",
+                    "created_at",
+                    "endpoint",
+                    "input_file_id",
+                    "object",
+                    "status"
+                ],
+                "title": "Batch"
+            },
            "Order": {
                "type": "string",
                "enum": [
@ -7368,16 +8000,53 @@
                    },
                    {
                        "$ref": "#/components/schemas/OpenAIResponseInputMessageContentImage"
+                    },
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseInputMessageContentFile"
                    }
                ],
                "discriminator": {
                    "propertyName": "type",
                    "mapping": {
                        "input_text": "#/components/schemas/OpenAIResponseInputMessageContentText",
-                        "input_image": "#/components/schemas/OpenAIResponseInputMessageContentImage"
+                        "input_image": "#/components/schemas/OpenAIResponseInputMessageContentImage",
+                        "input_file": "#/components/schemas/OpenAIResponseInputMessageContentFile"
                    }
                }
            },
+            "OpenAIResponseInputMessageContentFile": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "input_file",
+                        "default": "input_file",
+                        "description": "The type of the input item. Always `input_file`."
+                    },
+                    "file_data": {
+                        "type": "string",
+                        "description": "The data of the file to be sent to the model."
+                    },
+                    "file_id": {
+                        "type": "string",
+                        "description": "(Optional) The ID of the file to be sent to the model."
+                    },
+                    "file_url": {
+                        "type": "string",
+                        "description": "The URL of the file to be sent to the model."
+                    },
+                    "filename": {
+                        "type": "string",
+                        "description": "The name of the file to be sent to the model."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type"
+                ],
+                "title": "OpenAIResponseInputMessageContentFile",
+                "description": "File content for input messages in OpenAI response format."
+            },
            "OpenAIResponseInputMessageContentImage": {
                "type": "object",
                "properties": {
@ -7405,6 +8074,10 @@
                        "default": "input_image",
                        "description": "Content type identifier, always \"input_image\""
                    },
+                    "file_id": {
+                        "type": "string",
+                        "description": "(Optional) The ID of the file to be sent to the model."
+                    },
                    "image_url": {
                        "type": "string",
                        "description": "(Optional) URL of the image content"
@ -8977,29 +9650,14 @@
            "OpenAIResponseInput": {
                "oneOf": [
                    {
-                        "$ref": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall"
-                    },
-                    {
-                        "$ref": "#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall"
-                    },
-                    {
-                        "$ref": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall"
+                        "$ref": "#/components/schemas/OpenAIResponseOutput"
                    },
                    {
                        "$ref": "#/components/schemas/OpenAIResponseInputFunctionToolCallOutput"
                    },
-                    {
-                        "$ref": "#/components/schemas/OpenAIResponseMCPApprovalRequest"
-                    },
                    {
                        "$ref": "#/components/schemas/OpenAIResponseMCPApprovalResponse"
                    },
-                    {
-                        "$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPCall"
-                    },
-                    {
-                        "$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools"
-                    },
                    {
                        "$ref": "#/components/schemas/OpenAIResponseMessage"
                    }
@ -9208,6 +9866,10 @@
                        "type": "string",
                        "description": "(Optional) ID of the previous response in a conversation"
                    },
+                    "prompt": {
+                        "$ref": "#/components/schemas/OpenAIResponsePrompt",
+                        "description": "(Optional) Reference to a prompt template and its variables."
+                    },
                    "status": {
                        "type": "string",
                        "description": "Current status of the response generation"
@ -9303,6 +9965,32 @@
                    }
                }
            },
+            "OpenAIResponsePrompt": {
+                "type": "object",
+                "properties": {
+                    "id": {
+                        "type": "string",
+                        "description": "Unique identifier of the prompt template"
+                    },
+                    "variables": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "$ref": "#/components/schemas/OpenAIResponseInputMessageContent"
+                        },
+                        "description": "Dictionary of variable names to OpenAIResponseInputMessageContent structure for template substitution. The substitution values can either be strings, or other Response input types like images or files."
+                    },
+                    "version": {
+                        "type": "string",
+                        "description": "Version number of the prompt to use (defaults to latest if not specified)"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "id"
+                ],
+                "title": "OpenAIResponsePrompt",
+                "description": "OpenAI compatible Prompt object that is used in OpenAI responses."
+            },
            "OpenAIResponseText": {
                "type": "object",
                "properties": {
@ -9673,6 +10361,10 @@
                        "type": "string",
                        "description": "The underlying LLM used for completions."
                    },
+                    "prompt": {
+                        "$ref": "#/components/schemas/OpenAIResponsePrompt",
+                        "description": "(Optional) Prompt object with ID, version, and variables."
+                    },
                    "instructions": {
                        "type": "string"
                    },
@ -9761,6 +10453,10 @@
                        "type": "string",
                        "description": "(Optional) ID of the previous response in a conversation"
                    },
+                    "prompt": {
+                        "$ref": "#/components/schemas/OpenAIResponsePrompt",
+                        "description": "(Optional) Reference to a prompt template and its variables."
+                    },
                    "status": {
                        "type": "string",
                        "description": "Current status of the response generation"
@ -13099,7 +13795,7 @@
                        },
                        "description": "List of documents to index in the RAG system"
                    },
-                    "vector_db_id": {
+                    "vector_store_id": {
                        "type": "string",
                        "description": "ID of the vector database to store the document embeddings"
                    },
@ -13111,7 +13807,7 @@
                "additionalProperties": false,
                "required": [
                    "documents",
-                    "vector_db_id",
+                    "vector_store_id",
                    "chunk_size_in_tokens"
                ],
                "title": "InsertRequest"
@ -13302,7 +13998,7 @@
                        "$ref": "#/components/schemas/InterleavedContent",
                        "description": "The query content to search for in the indexed documents"
                    },
-                    "vector_db_ids": {
+                    "vector_store_ids": {
                        "type": "array",
                        "items": {
                            "type": "string"
@ -13317,7 +14013,7 @@
                "additionalProperties": false,
                "required": [
                    "content",
-                    "vector_db_ids"
+                    "vector_store_ids"
                ],
                "title": "QueryRequest"
            },
@ -13505,6 +14201,10 @@
                        "$ref": "#/components/schemas/InterleavedContent",
                        "description": "The content of the chunk, which can be interleaved text, images, or other types."
                    },
+                    "chunk_id": {
+                        "type": "string",
+                        "description": "Unique identifier for the chunk. Must be provided explicitly."
+                    },
                    "metadata": {
                        "type": "object",
                        "additionalProperties": {
@ -13538,10 +14238,6 @@
                        },
                        "description": "Optional embedding for the chunk. If not provided, it will be computed later."
                    },
-                    "stored_chunk_id": {
-                        "type": "string",
-                        "description": "The chunk ID that is stored in the vector database. Used for backend functionality."
-                    },
                    "chunk_metadata": {
                        "$ref": "#/components/schemas/ChunkMetadata",
                        "description": "Metadata for the chunk that will NOT be used in the context during inference. The `chunk_metadata` is required backend functionality."
@ -13550,6 +14246,7 @@
                "additionalProperties": false,
                "required": [
                    "content",
+                    "chunk_id",
                    "metadata"
                ],
                "title": "Chunk",
@ -13610,7 +14307,7 @@
            "InsertChunksRequest": {
                "type": "object",
                "properties": {
-                    "vector_db_id": {
+                    "vector_store_id": {
                        "type": "string",
                        "description": "The identifier of the vector database to insert the chunks into."
                    },
@ -13628,7 +14325,7 @@
                },
                "additionalProperties": false,
                "required": [
-                    "vector_db_id",
+                    "vector_store_id",
                    "chunks"
                ],
                "title": "InsertChunksRequest"
@ -13636,7 +14333,7 @@
            "QueryChunksRequest": {
                "type": "object",
                "properties": {
-                    "vector_db_id": {
+                    "vector_store_id": {
                        "type": "string",
                        "description": "The identifier of the vector database to query."
                    },
@ -13673,7 +14370,7 @@
                },
                "additionalProperties": false,
                "required": [
-                    "vector_db_id",
+                    "vector_store_id",
                    "query"
                ],
                "title": "QueryChunksRequest"
@ -15452,7 +16149,6 @@
                    },
                    "max_tokens": {
                        "type": "integer",
-                        "default": 0,
                        "description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
                    },
                    "repetition_penalty": {
@ -15735,7 +16431,7 @@
                        "const": "memory_retrieval",
                        "default": "memory_retrieval"
                    },
-                    "vector_db_ids": {
+                    "vector_store_ids": {
                        "type": "string",
                        "description": "The IDs of the vector databases to retrieve context from."
                    },
@ -15749,7 +16445,7 @@
                    "turn_id",
                    "step_id",
                    "step_type",
-                    "vector_db_ids",
+                    "vector_store_ids",
                    "inserted_context"
                ],
                "title": "MemoryRetrievalStep",
@ -17897,6 +18593,11 @@
            "description": "APIs for creating and interacting with agentic systems.",
            "x-displayName": "Agents"
        },
+        {
+            "name": "Batches",
+            "description": "The API is designed to allow use of openai client libraries for seamless integration.\n\nThis API provides the following extensions:\n - idempotent batch creation\n\nNote: This API is currently under active development and may undergo changes.",
+            "x-displayName": "The Batches API enables efficient processing of multiple requests in a single operation, particularly useful for processing large datasets, batch evaluation workflows, and cost-effective inference at scale."
+        },
        {
            "name": "Benchmarks",
            "description": ""
@ -17991,6 +18692,7 @@
            "name": "Operations",
            "tags": [
                "Agents",
+                "Batches",
                "Benchmarks",
                "Conversations",
                "DatasetIO",
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@ -15,6 +15,141 @@ info:
 servers:
  - url: http://any-hosted-llama-stack.com
 paths:
+  /v1/batches:
+    get:
+      responses:
+        '200':
+          description: A list of batch objects.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ListBatchesResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Batches
+      summary: List all batches for the current user.
+      description: List all batches for the current user.
+      parameters:
+        - name: after
+          in: query
+          description: >-
+            A cursor for pagination; returns batches after this batch ID.
+          required: false
+          schema:
+            type: string
+        - name: limit
+          in: query
+          description: >-
+            Number of batches to return (default 20, max 100).
+          required: true
+          schema:
+            type: integer
+      deprecated: false
+    post:
+      responses:
+        '200':
+          description: The created batch object.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/Batch'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Batches
+      summary: >-
+        Create a new batch for processing multiple API requests.
+      description: >-
+        Create a new batch for processing multiple API requests.
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/CreateBatchRequest'
+        required: true
+      deprecated: false
+  /v1/batches/{batch_id}:
+    get:
+      responses:
+        '200':
+          description: The batch object.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/Batch'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Batches
+      summary: >-
+        Retrieve information about a specific batch.
+      description: >-
+        Retrieve information about a specific batch.
+      parameters:
+        - name: batch_id
+          in: path
+          description: The ID of the batch to retrieve.
+          required: true
+          schema:
+            type: string
+      deprecated: false
+  /v1/batches/{batch_id}/cancel:
+    post:
+      responses:
+        '200':
+          description: The updated batch object.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/Batch'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Batches
+      summary: Cancel a batch that is in progress.
+      description: Cancel a batch that is in progress.
+      parameters:
+        - name: batch_id
+          in: path
+          description: The ID of the batch to cancel.
+          required: true
+          schema:
+            type: string
+      deprecated: false
  /v1/chat/completions:
    get:
      responses:
@ -4212,6 +4347,331 @@ components:
      title: Error
      description: >-
        Error response from the API. Roughly follows RFC 7807.
+    ListBatchesResponse:
+      type: object
+      properties:
+        object:
+          type: string
+          const: list
+          default: list
+        data:
+          type: array
+          items:
+            type: object
+            properties:
+              id:
+                type: string
+              completion_window:
+                type: string
+              created_at:
+                type: integer
+              endpoint:
+                type: string
+              input_file_id:
+                type: string
+              object:
+                type: string
+                const: batch
+              status:
+                type: string
+                enum:
+                  - validating
+                  - failed
+                  - in_progress
+                  - finalizing
+                  - completed
+                  - expired
+                  - cancelling
+                  - cancelled
+              cancelled_at:
+                type: integer
+              cancelling_at:
+                type: integer
+              completed_at:
+                type: integer
+              error_file_id:
+                type: string
+              errors:
+                type: object
+                properties:
+                  data:
+                    type: array
+                    items:
+                      type: object
+                      properties:
+                        code:
+                          type: string
+                        line:
+                          type: integer
+                        message:
+                          type: string
+                        param:
+                          type: string
+                      additionalProperties: false
+                      title: BatchError
+                  object:
+                    type: string
+                additionalProperties: false
+                title: Errors
+              expired_at:
+                type: integer
+              expires_at:
+                type: integer
+              failed_at:
+                type: integer
+              finalizing_at:
+                type: integer
+              in_progress_at:
+                type: integer
+              metadata:
+                type: object
+                additionalProperties:
+                  type: string
+              model:
+                type: string
+              output_file_id:
+                type: string
+              request_counts:
+                type: object
+                properties:
+                  completed:
+                    type: integer
+                  failed:
+                    type: integer
+                  total:
+                    type: integer
+                additionalProperties: false
+                required:
+                  - completed
+                  - failed
+                  - total
+                title: BatchRequestCounts
+              usage:
+                type: object
+                properties:
+                  input_tokens:
+                    type: integer
+                  input_tokens_details:
+                    type: object
+                    properties:
+                      cached_tokens:
+                        type: integer
+                    additionalProperties: false
+                    required:
+                      - cached_tokens
+                    title: InputTokensDetails
+                  output_tokens:
+                    type: integer
+                  output_tokens_details:
+                    type: object
+                    properties:
+                      reasoning_tokens:
+                        type: integer
+                    additionalProperties: false
+                    required:
+                      - reasoning_tokens
+                    title: OutputTokensDetails
+                  total_tokens:
+                    type: integer
+                additionalProperties: false
+                required:
+                  - input_tokens
+                  - input_tokens_details
+                  - output_tokens
+                  - output_tokens_details
+                  - total_tokens
+                title: BatchUsage
+            additionalProperties: false
+            required:
+              - id
+              - completion_window
+              - created_at
+              - endpoint
+              - input_file_id
+              - object
+              - status
+            title: Batch
+        first_id:
+          type: string
+        last_id:
+          type: string
+        has_more:
+          type: boolean
+          default: false
+      additionalProperties: false
+      required:
+        - object
+        - data
+        - has_more
+      title: ListBatchesResponse
+      description: >-
+        Response containing a list of batch objects.
+    CreateBatchRequest:
+      type: object
+      properties:
+        input_file_id:
+          type: string
+          description: >-
+            The ID of an uploaded file containing requests for the batch.
+        endpoint:
+          type: string
+          description: >-
+            The endpoint to be used for all requests in the batch.
+        completion_window:
+          type: string
+          const: 24h
+          description: >-
+            The time window within which the batch should be processed.
+        metadata:
+          type: object
+          additionalProperties:
+            type: string
+          description: Optional metadata for the batch.
+        idempotency_key:
+          type: string
+          description: >-
+            Optional idempotency key. When provided, enables idempotent behavior.
+      additionalProperties: false
+      required:
+        - input_file_id
+        - endpoint
+        - completion_window
+      title: CreateBatchRequest
+    Batch:
+      type: object
+      properties:
+        id:
+          type: string
+        completion_window:
+          type: string
+        created_at:
+          type: integer
+        endpoint:
+          type: string
+        input_file_id:
+          type: string
+        object:
+          type: string
+          const: batch
+        status:
+          type: string
+          enum:
+            - validating
+            - failed
+            - in_progress
+            - finalizing
+            - completed
+            - expired
+            - cancelling
+            - cancelled
+        cancelled_at:
+          type: integer
+        cancelling_at:
+          type: integer
+        completed_at:
+          type: integer
+        error_file_id:
+          type: string
+        errors:
+          type: object
+          properties:
+            data:
+              type: array
+              items:
+                type: object
+                properties:
+                  code:
+                    type: string
+                  line:
+                    type: integer
+                  message:
+                    type: string
+                  param:
+                    type: string
+                additionalProperties: false
+                title: BatchError
+            object:
+              type: string
+          additionalProperties: false
+          title: Errors
+        expired_at:
+          type: integer
+        expires_at:
+          type: integer
+        failed_at:
+          type: integer
+        finalizing_at:
+          type: integer
+        in_progress_at:
+          type: integer
+        metadata:
+          type: object
+          additionalProperties:
+            type: string
+        model:
+          type: string
+        output_file_id:
+          type: string
+        request_counts:
+          type: object
+          properties:
+            completed:
+              type: integer
+            failed:
+              type: integer
+            total:
+              type: integer
+          additionalProperties: false
+          required:
+            - completed
+            - failed
+            - total
+          title: BatchRequestCounts
+        usage:
+          type: object
+          properties:
+            input_tokens:
+              type: integer
+            input_tokens_details:
+              type: object
+              properties:
+                cached_tokens:
+                  type: integer
+              additionalProperties: false
+              required:
+                - cached_tokens
+              title: InputTokensDetails
+            output_tokens:
+              type: integer
+            output_tokens_details:
+              type: object
+              properties:
+                reasoning_tokens:
+                  type: integer
+              additionalProperties: false
+              required:
+                - reasoning_tokens
+              title: OutputTokensDetails
+            total_tokens:
+              type: integer
+          additionalProperties: false
+          required:
+            - input_tokens
+            - input_tokens_details
+            - output_tokens
+            - output_tokens_details
+            - total_tokens
+          title: BatchUsage
+      additionalProperties: false
+      required:
+        - id
+        - completion_window
+        - created_at
+        - endpoint
+        - input_file_id
+        - object
+        - status
+      title: Batch
    Order:
      type: string
      enum:
@ -5474,11 +5934,44 @@ components:
      oneOf:
        - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
        - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+        - $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile'
      discriminator:
        propertyName: type
        mapping:
          input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
          input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+          input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile'
+    OpenAIResponseInputMessageContentFile:
+      type: object
+      properties:
+        type:
+          type: string
+          const: input_file
+          default: input_file
+          description: >-
+            The type of the input item. Always `input_file`.
+        file_data:
+          type: string
+          description: >-
+            The data of the file to be sent to the model.
+        file_id:
+          type: string
+          description: >-
+            (Optional) The ID of the file to be sent to the model.
+        file_url:
+          type: string
+          description: >-
+            The URL of the file to be sent to the model.
+        filename:
+          type: string
+          description: >-
+            The name of the file to be sent to the model.
+      additionalProperties: false
+      required:
+        - type
+      title: OpenAIResponseInputMessageContentFile
+      description: >-
+        File content for input messages in OpenAI response format.
    OpenAIResponseInputMessageContentImage:
      type: object
      properties:
@ -5499,6 +5992,10 @@ components:
          default: input_image
          description: >-
            Content type identifier, always "input_image"
+        file_id:
+          type: string
+          description: >-
+            (Optional) The ID of the file to be sent to the model.
        image_url:
          type: string
          description: (Optional) URL of the image content
@ -6735,14 +7232,9 @@ components:
        Error details for failed OpenAI response requests.
    OpenAIResponseInput:
      oneOf:
-        - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
-        - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
-        - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+        - $ref: '#/components/schemas/OpenAIResponseOutput'
        - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
-        - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
        - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
-        - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
-        - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
        - $ref: '#/components/schemas/OpenAIResponseMessage'
    OpenAIResponseInputToolFileSearch:
      type: object
@ -6898,6 +7390,10 @@ components:
          type: string
          description: >-
            (Optional) ID of the previous response in a conversation
+        prompt:
+          $ref: '#/components/schemas/OpenAIResponsePrompt'
+          description: >-
+            (Optional) Reference to a prompt template and its variables.
        status:
          type: string
          description: >-
@ -6971,6 +7467,30 @@ components:
          mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
          mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
          mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+    OpenAIResponsePrompt:
+      type: object
+      properties:
+        id:
+          type: string
+          description: Unique identifier of the prompt template
+        variables:
+          type: object
+          additionalProperties:
+            $ref: '#/components/schemas/OpenAIResponseInputMessageContent'
+          description: >-
+            Dictionary of variable names to OpenAIResponseInputMessageContent structure
+            for template substitution. The substitution values can either be strings,
+            or other Response input types like images or files.
+        version:
+          type: string
+          description: >-
+            Version number of the prompt to use (defaults to latest if not specified)
+      additionalProperties: false
+      required:
+        - id
+      title: OpenAIResponsePrompt
+      description: >-
+        OpenAI compatible Prompt object that is used in OpenAI responses.
    OpenAIResponseText:
      type: object
      properties:
@ -7228,6 +7748,10 @@ components:
        model:
          type: string
          description: The underlying LLM used for completions.
+        prompt:
+          $ref: '#/components/schemas/OpenAIResponsePrompt'
+          description: >-
+            (Optional) Prompt object with ID, version, and variables.
        instructions:
          type: string
        previous_response_id:
@ -7305,6 +7829,10 @@ components:
          type: string
          description: >-
            (Optional) ID of the previous response in a conversation
+        prompt:
+          $ref: '#/components/schemas/OpenAIResponsePrompt'
+          description: >-
+            (Optional) Reference to a prompt template and its variables.
        status:
          type: string
          description: >-
@ -9867,7 +10395,7 @@ components:
            $ref: '#/components/schemas/RAGDocument'
          description: >-
            List of documents to index in the RAG system
-        vector_db_id:
+        vector_store_id:
          type: string
          description: >-
            ID of the vector database to store the document embeddings
@ -9878,7 +10406,7 @@ components:
      additionalProperties: false
      required:
        - documents
-        - vector_db_id
+        - vector_store_id
        - chunk_size_in_tokens
      title: InsertRequest
    DefaultRAGQueryGeneratorConfig:
@ -10049,7 +10577,7 @@ components:
          $ref: '#/components/schemas/InterleavedContent'
          description: >-
            The query content to search for in the indexed documents
-        vector_db_ids:
+        vector_store_ids:
          type: array
          items:
            type: string
@ -10062,7 +10590,7 @@ components:
      additionalProperties: false
      required:
        - content
-        - vector_db_ids
+        - vector_store_ids
      title: QueryRequest
    RAGQueryResult:
      type: object
@ -10190,6 +10718,10 @@ components:
          description: >-
            The content of the chunk, which can be interleaved text, images, or other
            types.
+        chunk_id:
+          type: string
+          description: >-
+            Unique identifier for the chunk. Must be provided explicitly.
        metadata:
          type: object
          additionalProperties:
@ -10210,10 +10742,6 @@ components:
          description: >-
            Optional embedding for the chunk. If not provided, it will be computed
            later.
-        stored_chunk_id:
-          type: string
-          description: >-
-            The chunk ID that is stored in the vector database. Used for backend functionality.
        chunk_metadata:
          $ref: '#/components/schemas/ChunkMetadata'
          description: >-
@ -10222,6 +10750,7 @@ components:
      additionalProperties: false
      required:
        - content
+        - chunk_id
        - metadata
      title: Chunk
      description: >-
@ -10286,7 +10815,7 @@ components:
    InsertChunksRequest:
      type: object
      properties:
-        vector_db_id:
+        vector_store_id:
          type: string
          description: >-
            The identifier of the vector database to insert the chunks into.
@ -10305,13 +10834,13 @@ components:
          description: The time to live of the chunks.
      additionalProperties: false
      required:
-        - vector_db_id
+        - vector_store_id
        - chunks
      title: InsertChunksRequest
    QueryChunksRequest:
      type: object
      properties:
-        vector_db_id:
+        vector_store_id:
          type: string
          description: >-
            The identifier of the vector database to query.
@ -10331,7 +10860,7 @@ components:
          description: The parameters of the query.
      additionalProperties: false
      required:
-        - vector_db_id
+        - vector_store_id
        - query
      title: QueryChunksRequest
    QueryChunksResponse:
@ -11600,7 +12129,6 @@ components:
          description: The sampling strategy.
        max_tokens:
          type: integer
-          default: 0
          description: >-
            The maximum number of tokens that can be generated in the completion.
            The token count of your prompt plus max_tokens cannot exceed the model's
@ -11850,7 +12378,7 @@ components:
          description: Type of the step in an agent turn.
          const: memory_retrieval
          default: memory_retrieval
-        vector_db_ids:
+        vector_store_ids:
          type: string
          description: >-
            The IDs of the vector databases to retrieve context from.
@ -11863,7 +12391,7 @@ components:
        - turn_id
        - step_id
        - step_type
-        - vector_db_ids
+        - vector_store_ids
        - inserted_context
      title: MemoryRetrievalStep
      description: >-
@ -13460,6 +13988,19 @@ tags:
    description: >-
      APIs for creating and interacting with agentic systems.
    x-displayName: Agents
+  - name: Batches
+    description: >-
+      The API is designed to allow use of openai client libraries for seamless integration.
+
+
+      This API provides the following extensions:
+       - idempotent batch creation
+
+      Note: This API is currently under active development and may undergo changes.
+    x-displayName: >-
+      The Batches API enables efficient processing of multiple requests in a single
+      operation, particularly useful for processing large datasets, batch evaluation
+      workflows, and cost-effective inference at scale.
  - name: Benchmarks
    description: ''
  - name: Conversations
@ -13534,6 +14075,7 @@ x-tagGroups:
  - name: Operations
    tags:
      - Agents
+      - Batches
      - Benchmarks
      - Conversations
      - DatasetIO