From 18ab1985da2cb461772bd9a4501a7803555eaa1f Mon Sep 17 00:00:00 2001
From: Yuan Tang <terrytangyuan@gmail.com>
Date: Fri, 28 Feb 2025 12:48:49 -0500
Subject: [PATCH 01/13] fix: Make remote::vllm compatible with vLLM <= v0.6.3
 (#1325)

# What does this PR do?

This is to be consistent with OpenAI API and support vLLM <= v0.6.3

References:
*
https://platform.openai.com/docs/api-reference/chat/create#chat-create-tool_choice
* https://github.com/vllm-project/vllm/pull/10000

This fixes the error when running older versions of vLLM:

```
00:50:19.834 [START] /v1/inference/chat-completion
INFO 2025-02-28 00:50:20,203 httpx:1025: HTTP Request: POST https://api-xeai-granite-3-1-8b-instruct.apps.int.stc.ai.preprod.us-east-1.aws.paas.redhat.com/v1/chat/completions "HTTP/1.1 400 Bad Request"
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/site-packages/llama_stack/distribution/server/server.py", line 235, in endpoint
    return await maybe_await(value)
  File "/usr/local/lib/python3.10/site-packages/llama_stack/distribution/server/server.py", line 201, in maybe_await
    return await value
  File "/usr/local/lib/python3.10/site-packages/llama_stack/providers/utils/telemetry/trace_protocol.py", line 89, in async_wrapper
    result = await method(self, *args, **kwargs)
  File "/usr/local/lib/python3.10/site-packages/llama_stack/distribution/routers/routers.py", line 193, in chat_completion
    return await provider.chat_completion(**params)
  File "/usr/local/lib/python3.10/site-packages/llama_stack/providers/utils/telemetry/trace_protocol.py", line 89, in async_wrapper
    result = await method(self, *args, **kwargs)
  File "/usr/local/lib/python3.10/site-packages/llama_stack/providers/remote/inference/vllm/vllm.py", line 286, in chat_completion
    return await self._nonstream_chat_completion(request, self.client)
  File "/usr/local/lib/python3.10/site-packages/llama_stack/providers/remote/inference/vllm/vllm.py", line 292, in _nonstream_chat_completion
    r = client.chat.completions.create(**params)
  File "/usr/local/lib/python3.10/site-packages/openai/_utils/_utils.py", line 279, in wrapper
    return func(*args, **kwargs)
  File "/usr/local/lib/python3.10/site-packages/openai/resources/chat/completions/completions.py", line 879, in create
    return self._post(
  File "/usr/local/lib/python3.10/site-packages/openai/_base_client.py", line 1290, in post
    return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))
  File "/usr/local/lib/python3.10/site-packages/openai/_base_client.py", line 967, in request
    return self._request(
  File "/usr/local/lib/python3.10/site-packages/openai/_base_client.py", line 1071, in _request
    raise self._make_status_error_from_response(err.response) from None
openai.BadRequestError: Error code: 400 - {'object': 'error', 'message': "[{'type': 'value_error', 'loc': ('body',), 'msg': 'Value error, When using `tool_choice`, `tools` must be set.', 'input': {'messages': [{'role': 'user', 'content': [{'type': 'text', 'text': 'What model are you?'}]}], 'model': 'granite-3-1-8b-instruct', 'max_tokens': 4096, 'stream': False, 'temperature': 0.0, 'tools': None, 'tool_choice': 'auto'}, 'ctx': {'error': ValueError('When using `tool_choice`, `tools` must be set.')}}]", 'type': 'BadRequestError', 'param': None, 'code': 400}
INFO:     2600:1700:9d20:ac0::49:59736 - "POST /v1/inference/chat-completion HTTP/1.1" 500 Internal Server Error
00:50:20.266 [END] /v1/inference/chat-completion [StatusCode.OK] (431.99ms)
```

## Test Plan

All existing tests pass.

---------

Signed-off-by: Yuan Tang <terrytangyuan@gmail.com>
---
 llama_stack/providers/remote/inference/vllm/vllm.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py
index 967a3e44d..8ec23cd90 100644
--- a/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/llama_stack/providers/remote/inference/vllm/vllm.py
@@ -270,6 +270,12 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
         tool_config: Optional[ToolConfig] = None,
     ) -> AsyncGenerator:
         model = await self.model_store.get_model(model_id)
+        # This is to be consistent with OpenAI API and support vLLM <= v0.6.3
+        # References:
+        #   * https://platform.openai.com/docs/api-reference/chat/create#chat-create-tool_choice
+        #   * https://github.com/vllm-project/vllm/pull/10000
+        if not tools and tool_config is not None:
+            tool_config.tool_choice = ToolChoice.none
         request = ChatCompletionRequest(
             model=model.provider_resource_id,
             messages=messages,

From c91548fe07ca7ec0fa33cf82443e165594abda9f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Han?= <seb@redhat.com>
Date: Fri, 28 Feb 2025 19:01:52 +0100
Subject: [PATCH 02/13] build(container): misc improvements (#1291)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

# What does this PR do?

See individual commit messages.

[//]: # (If resolving an issue, uncomment and update the line below)
[//]: # (Closes #[issue-number])

## Test Plan

Apply this diff:

```
diff --git a/llama_stack/templates/ollama/build.yaml b/llama_stack/templates/ollama/build.yaml
index da33b8d5..4a702f6f 100644
--- a/llama_stack/templates/ollama/build.yaml
+++ b/llama_stack/templates/ollama/build.yaml
@@ -28,5 +28,5 @@ distribution_spec:
     - remote::tavily-search
     - inline::code-interpreter
     - inline::rag-runtime
-    - remote::model-context-protocol
+  container_image: "registry.access.redhat.com/ubi9"
 image_type: conda
```

Then run:

```
CONTAINER_BINARY=podman llama stack build --template ollama --image-type container --image-name registry.access.redhat.com/ubi9
Containerfile created successfully in /var/folders/mq/rnm5w_7s2d3fxmtkx02knvhm0000gn/T/tmp.I7E5V6zbVI/Containerfile

FROM registry.access.redhat.com/ubi9
WORKDIR /app

RUN dnf -y update && dnf install -y iputils net-tools wget     vim-minimal python3.11 python3.11-pip python3.11-wheel     python3.11-setuptools && ln -s /bin/pip3.11 /bin/pip && ln -s /bin/python3.11 /bin/python && dnf clean all

ENV UV_SYSTEM_PYTHON=1
RUN pip install uv
RUN uv pip install --no-cache ollama nltk opentelemetry-sdk aiosqlite matplotlib datasets sqlite-vec scipy chromadb-client psycopg2-binary numpy scikit-learn openai redis pandas tqdm blobfile sentencepiece aiohttp requests pillow pymongo transformers autoevals opentelemetry-exporter-otlp-proto-http pypdf chardet aiosqlite fastapi fire httpx uvicorn
RUN uv pip install --no-cache llama-stack
RUN pip uninstall -y uv
ENTRYPOINT ["python", "-m", "llama_stack.distribution.server.server", "--template", "ollama"]

# Allows running as non-root user
RUN mkdir -p /.llama /.cache

RUN chmod -R g+rw /app /.llama /.cache

PWD: /Users/leseb/Documents/AI/llama-stack
Containerfile: /var/folders/mq/rnm5w_7s2d3fxmtkx02knvhm0000gn/T/tmp.I7E5V6zbVI/Containerfile
+ podman build --platform linux/arm64 -t distribution-ollama:0.1.4 -f /var/folders/mq/rnm5w_7s2d3fxmtkx02knvhm0000gn/T/tmp.I7E5V6zbVI/Containerfile . --progress=plain
STEP 1/11: FROM registry.access.redhat.com/ubi9
STEP 2/11: WORKDIR /app
--> Using cache d73dafd4caddd75bc29242a9031258fea759dc571c5bb53a64b5e6d86b3b1335
--> d73dafd4cadd
STEP 3/11: RUN dnf -y update && dnf install -y iputils net-tools wget     vim-minimal python3.11 python3.11-pip python3.11-wheel     python3.11-setuptools && ln -s /bin/pip3.11 /bin/pip && ln -s /bin/python3.11 /bin/python && dnf clean all
--> Using cache b74ad682db149771612a3ea1e4796e0760ab8a4e07c26ad672b46a86d38178c2
--> b74ad682db14
STEP 4/11: ENV UV_SYSTEM_PYTHON=1
--> Using cache 0812a05e6576506aa2fe646cbf239d0cb504cac30a50cb5cf4dc88e49039466d
--> 0812a05e6576
STEP 5/11: RUN pip install uv
--> Using cache a0ce1705f87e52f70f6eb34e66f67b68ebc7c1a073f4d2a664b189cfa89a4e88
--> a0ce1705f87e
STEP 6/11: RUN uv pip install --no-cache ollama nltk opentelemetry-sdk aiosqlite matplotlib datasets sqlite-vec scipy chromadb-client psycopg2-binary numpy scikit-learn openai redis pandas tqdm blobfile sentencepiece aiohttp requests pillow pymongo transformers autoevals opentelemetry-exporter-otlp-proto-http pypdf chardet aiosqlite fastapi fire httpx uvicorn
Using Python 3.11.9 environment at: /usr
Resolved 107 packages in 1.78s
Downloading kiwisolver (1.4MiB)
Downloading aiohttp (1.6MiB)
Downloading grpcio (5.4MiB)
Downloading nltk (1.4MiB)
Downloading transformers (9.5MiB)
Downloading pydantic-core (1.7MiB)
Downloading lxml (4.6MiB)
Downloading psycopg2-binary (2.7MiB)
Downloading scipy (33.8MiB)
Downloading scikit-learn (12.0MiB)
Downloading tokenizers (2.8MiB)
Downloading fonttools (4.6MiB)
Downloading pymongo (1.3MiB)
Downloading rapidfuzz (1.4MiB)
Downloading sentencepiece (1.2MiB)
Downloading pyarrow (38.7MiB)
Downloading matplotlib (8.1MiB)
Downloading pycryptodomex (2.1MiB)
Downloading pillow (4.2MiB)
Downloading pandas (14.9MiB)
Downloading numpy (13.6MiB)
   Building fire==0.7.0
 Downloaded sentencepiece
 Downloaded kiwisolver
 Downloaded pymongo
 Downloaded rapidfuzz
 Downloaded nltk
 Downloaded aiohttp
      Built fire==0.7.0
 Downloaded pydantic-core
 Downloaded pycryptodomex
 Downloaded psycopg2-binary
 Downloaded tokenizers
 Downloaded pillow
 Downloaded lxml
 Downloaded fonttools
 Downloaded grpcio
 Downloaded matplotlib
 Downloaded transformers
 Downloaded scikit-learn
 Downloaded numpy
 Downloaded pandas
 Downloaded scipy
 Downloaded pyarrow
Prepared 107 packages in 3.03s
Installed 107 packages in 62ms
 + aiohappyeyeballs==2.4.6
 + aiohttp==3.11.13
 + aiosignal==1.3.2
 + aiosqlite==0.21.0
 + annotated-types==0.7.0
 + anyio==4.8.0
 + attrs==25.1.0
 + autoevals==0.0.120
 + backoff==2.2.1
 + blobfile==3.0.0
 + braintrust-core==0.0.58
 + certifi==2025.1.31
 + chardet==5.2.0
 + charset-normalizer==3.4.1
 + chevron==0.14.0
 + chromadb-client==0.6.3
 + click==8.1.8
 + contourpy==1.3.1
 + cycler==0.12.1
 + datasets==3.3.2
 + deprecated==1.2.18
 + dill==0.3.8
 + distro==1.9.0
 + dnspython==2.7.0
 + fastapi==0.115.8
 + filelock==3.17.0
 + fire==0.7.0
 + fonttools==4.56.0
 + frozenlist==1.5.0
 + fsspec==2024.12.0
 + googleapis-common-protos==1.68.0
 + grpcio==1.70.0
 + h11==0.14.0
 + httpcore==1.0.7
 + httpx==0.28.1
 + huggingface-hub==0.29.1
 + idna==3.10
 + importlib-metadata==8.5.0
 + jiter==0.8.2
 + joblib==1.4.2
 + jsonschema==4.23.0
 + jsonschema-specifications==2024.10.1
 + kiwisolver==1.4.8
 + levenshtein==0.26.1
 + lxml==5.3.1
 + matplotlib==3.10.0
 + monotonic==1.6
 + multidict==6.1.0
 + multiprocess==0.70.16
 + nltk==3.9.1
 + numpy==1.26.4
 + ollama==0.4.7
 + openai==1.64.0
 + opentelemetry-api==1.30.0
 + opentelemetry-exporter-otlp-proto-common==1.30.0
 + opentelemetry-exporter-otlp-proto-grpc==1.30.0
 + opentelemetry-exporter-otlp-proto-http==1.30.0
 + opentelemetry-proto==1.30.0
 + opentelemetry-sdk==1.30.0
 + opentelemetry-semantic-conventions==0.51b0
 + orjson==3.10.15
 + overrides==7.7.0
 + packaging==24.2
 + pandas==2.2.3
 + pillow==11.1.0
 + posthog==3.16.0
 + propcache==0.3.0
 + protobuf==5.29.3
 + psycopg2-binary==2.9.10
 + pyarrow==19.0.1
 + pycryptodomex==3.21.0
 + pydantic==2.10.6
 + pydantic-core==2.27.2
 + pymongo==4.11.1
 + pyparsing==3.2.1
 + pypdf==5.3.0
 + python-dateutil==2.9.0.post0
 + pytz==2025.1
 + pyyaml==6.0.2
 + rapidfuzz==3.12.1
 + redis==5.2.1
 + referencing==0.36.2
 + regex==2024.11.6
 + requests==2.32.3
 + rpds-py==0.23.1
 + safetensors==0.5.3
 + scikit-learn==1.6.1
 + scipy==1.15.2
 + sentencepiece==0.2.0
 + six==1.17.0
 + sniffio==1.3.1
 + sqlite-vec==0.1.6
 + starlette==0.45.3
 + tenacity==9.0.0
 + termcolor==2.5.0
 + threadpoolctl==3.5.0
 + tokenizers==0.21.0
 + tqdm==4.67.1
 + transformers==4.49.0
 + typing-extensions==4.12.2
 + tzdata==2025.1
 + urllib3==2.3.0
 + uvicorn==0.34.0
 + wrapt==1.17.2
 + xxhash==3.5.0
 + yarl==1.18.3
 + zipp==3.21.0
--> 5b5b823605a1
STEP 7/11: RUN uv pip install --no-cache llama-stack
Using Python 3.11.9 environment at: /usr
Resolved 55 packages in 1.08s
Downloading setuptools (1.2MiB)
Downloading pygments (1.2MiB)
Downloading llama-models (1.5MiB)
Downloading tiktoken (1.1MiB)
 Downloaded tiktoken
 Downloaded llama-models
 Downloaded pygments
 Downloaded setuptools
Prepared 15 packages in 402ms
Installed 15 packages in 15ms
 + jinja2==3.1.5
 + llama-models==0.1.4
 + llama-stack==0.1.4
 + llama-stack-client==0.1.4
 + markdown-it-py==3.0.0
 + markupsafe==3.0.2
 + mdurl==0.1.2
 + prompt-toolkit==3.0.50
 + pyaml==25.1.0
 + pygments==2.19.1
 + python-dotenv==1.0.1
 + rich==13.9.4
 + setuptools==75.8.2
 + tiktoken==0.9.0
 + wcwidth==0.2.13
--> 38a037443807
STEP 8/11: RUN pip uninstall -y uv
Found existing installation: uv 0.6.3
Uninstalling uv-0.6.3:
  Successfully uninstalled uv-0.6.3
WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv
--> 54f749dc5ece
STEP 9/11: ENTRYPOINT ["python", "-m", "llama_stack.distribution.server.server", "--template", "ollama"]
--> 481c138b1982
STEP 10/11: RUN mkdir -p /.llama /.cache
--> 0fc174f014a8
STEP 11/11: RUN chmod -R g+rw /app /.llama /.cache
COMMIT distribution-ollama:0.1.4
--> d41b4ab4b136
Successfully tagged localhost/distribution-ollama:0.1.4
d41b4ab4b1363bfbaf6239e6f313bcb37873ef4b5f2fd816a4ee55acf2ac54d3
+ set +x
Success!
Build Successful!
```

UBI9 container successfully builds.

Run the container:

```
podman run d41b4ab4b1363bfbaf6239e6f313bcb37873ef4b5f2fd816a4ee55acf2ac54d3 --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
INFO 2025-02-27 13:08:03,666 llama_stack.distribution.resolver:213: Resolved 30 providers
INFO 2025-02-27 13:08:03,666 llama_stack.distribution.resolver:215:  inner-inference => ollama
INFO 2025-02-27 13:08:03,666 llama_stack.distribution.resolver:215:  models => __routing_table__
INFO 2025-02-27 13:08:03,666 llama_stack.distribution.resolver:215:  inference => __autorouted__
INFO 2025-02-27 13:08:03,666 llama_stack.distribution.resolver:215:  inner-vector_io => sqlite-vec
INFO 2025-02-27 13:08:03,666 llama_stack.distribution.resolver:215:  inner-safety => llama-guard
INFO 2025-02-27 13:08:03,666 llama_stack.distribution.resolver:215:  shields => __routing_table__
INFO 2025-02-27 13:08:03,666 llama_stack.distribution.resolver:215:  safety => __autorouted__
INFO 2025-02-27 13:08:03,666 llama_stack.distribution.resolver:215:  vector_dbs => __routing_table__
INFO 2025-02-27 13:08:03,666 llama_stack.distribution.resolver:215:  vector_io => __autorouted__
INFO 2025-02-27 13:08:03,666 llama_stack.distribution.resolver:215:  inner-tool_runtime => brave-search
INFO 2025-02-27 13:08:03,666 llama_stack.distribution.resolver:215:  inner-tool_runtime => tavily-search
```


[//]: # (## Documentation)

---------

Signed-off-by: Sébastien Han <seb@redhat.com>
---
 llama_stack/distribution/build.py           |  3 -
 llama_stack/distribution/build_container.sh | 65 +++++++++++++--------
 2 files changed, 40 insertions(+), 28 deletions(-)

diff --git a/llama_stack/distribution/build.py b/llama_stack/distribution/build.py
index 2b43b8128..3d808a4a4 100644
--- a/llama_stack/distribution/build.py
+++ b/llama_stack/distribution/build.py
@@ -15,7 +15,6 @@ from termcolor import cprint
 
 from llama_stack.distribution.datatypes import BuildConfig, Provider
 from llama_stack.distribution.distribution import get_provider_registry
-from llama_stack.distribution.utils.config_dirs import BUILDS_BASE_DIR
 from llama_stack.distribution.utils.exec import run_command, run_with_pty
 from llama_stack.distribution.utils.image_types import ImageType
 from llama_stack.providers.datatypes import Api
@@ -103,8 +102,6 @@ def build_image(
             template_or_config,
             image_name,
             container_base,
-            str(build_file_path),
-            str(BUILDS_BASE_DIR / ImageType.container.value),
             " ".join(normal_deps),
         ]
     elif build_config.image_type == ImageType.conda.value:
diff --git a/llama_stack/distribution/build_container.sh b/llama_stack/distribution/build_container.sh
index 08941a538..9b584a85c 100755
--- a/llama_stack/distribution/build_container.sh
+++ b/llama_stack/distribution/build_container.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
@@ -20,26 +20,27 @@ UV_HTTP_TIMEOUT=${UV_HTTP_TIMEOUT:-500}
 # mounting is not supported by docker buildx, so we use COPY instead
 USE_COPY_NOT_MOUNT=${USE_COPY_NOT_MOUNT:-}
 
-if [ "$#" -lt 6 ]; then
+if [ "$#" -lt 4 ]; then
   # This only works for templates
-  echo "Usage: $0 <template_or_config> <image_name> <container_base> <build_file_path> <host_build_dir> <pip_dependencies> [<special_pip_deps>]" >&2
+  echo "Usage: $0 <template_or_config> <image_name> <container_base> <pip_dependencies> [<special_pip_deps>]" >&2
   exit 1
 fi
 
 set -euo pipefail
 
 template_or_config="$1"
-image_name="$2"
-container_base="$3"
-build_file_path="$4"
-host_build_dir="$5"
-pip_dependencies="$6"
-special_pip_deps="${7:-}"
+shift
+image_name="$1"
+shift
+container_base="$1"
+shift
+pip_dependencies="$1"
+shift
+special_pip_deps="${1:-}"
 
 
 # Define color codes
 RED='\033[0;31m'
-GREEN='\033[0;32m'
 NC='\033[0m' # No Color
 
 CONTAINER_BINARY=${CONTAINER_BINARY:-docker}
@@ -48,7 +49,6 @@ CONTAINER_OPTS=${CONTAINER_OPTS:-}
 TEMP_DIR=$(mktemp -d)
 
 add_to_container() {
-  local input
   output_file="$TEMP_DIR/Containerfile"
   if [ -t 0 ]; then
     printf '%s\n' "$1" >>"$output_file"
@@ -64,9 +64,9 @@ if [[ $container_base == *"registry.access.redhat.com/ubi9"* ]]; then
 FROM $container_base
 WORKDIR /app
 
-RUN microdnf -y update && microdnf install -y iputils net-tools wget \
+RUN dnf -y update && dnf install -y iputils net-tools wget \
     vim-minimal python3.11 python3.11-pip python3.11-wheel \
-    python3.11-setuptools && ln -s /bin/pip3.11 /bin/pip && ln -s /bin/python3.11 /bin/python && microdnf clean all
+    python3.11-setuptools && ln -s /bin/pip3.11 /bin/pip && ln -s /bin/python3.11 /bin/python && dnf clean all
 
 ENV UV_SYSTEM_PYTHON=1
 RUN pip install uv
@@ -165,6 +165,11 @@ EOF
   fi
 fi
 
+# remove uv after installation
+  add_to_container << EOF
+RUN pip uninstall -y uv
+EOF
+
 # if template_or_config ends with .yaml, it is not a template and we should not use the --template flag
 if [[ "$template_or_config" != *.yaml ]]; then
   add_to_container << EOF
@@ -185,26 +190,31 @@ RUN mkdir -p /.llama /.cache
 RUN chmod -R g+rw /app /.llama /.cache
 EOF
 
-printf "Containerfile created successfully in $TEMP_DIR/Containerfile\n\n"
-cat $TEMP_DIR/Containerfile
+printf "Containerfile created successfully in %s/Containerfile\n\n" "$TEMP_DIR"
+cat "$TEMP_DIR"/Containerfile
 printf "\n"
 
-mounts=""
+# Start building the CLI arguments
+CLI_ARGS=()
+
+# Read CONTAINER_OPTS and put it in an array
+read -ra CLI_ARGS <<< "$CONTAINER_OPTS"
+
 if [ "$USE_COPY_NOT_MOUNT" != "true" ]; then
   if [ -n "$LLAMA_STACK_DIR" ]; then
-    mounts="$mounts -v $(readlink -f $LLAMA_STACK_DIR):$stack_mount"
+    CLI_ARGS+=("-v" "$(readlink -f "$LLAMA_STACK_DIR"):$stack_mount")
   fi
   if [ -n "$LLAMA_MODELS_DIR" ]; then
-    mounts="$mounts -v $(readlink -f $LLAMA_MODELS_DIR):$models_mount"
+    CLI_ARGS+=("-v" "$(readlink -f "$LLAMA_MODELS_DIR"):$models_mount")
   fi
   if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then
-    mounts="$mounts -v $(readlink -f $LLAMA_STACK_CLIENT_DIR):$client_mount"
+    CLI_ARGS+=("-v" "$(readlink -f "$LLAMA_STACK_CLIENT_DIR"):$client_mount")
   fi
 fi
 
 if command -v selinuxenabled &>/dev/null && selinuxenabled; then
   # Disable SELinux labels -- we don't want to relabel the llama-stack source dir
-  CONTAINER_OPTS="$CONTAINER_OPTS --security-opt label=disable"
+  CLI_ARGS+=("--security-opt" "label=disable")
 fi
 
 # Set version tag based on PyPI version
@@ -225,11 +235,11 @@ image_tag="$image_name:$version_tag"
 # Detect platform architecture
 ARCH=$(uname -m)
 if [ -n "$BUILD_PLATFORM" ]; then
-  PLATFORM="--platform $BUILD_PLATFORM"
+  CLI_ARGS+=("--platform $BUILD_PLATFORM")
 elif [ "$ARCH" = "arm64" ] || [ "$ARCH" = "aarch64" ]; then
-  PLATFORM="--platform linux/arm64"
+  CLI_ARGS+=("--platform" "linux/arm64")
 elif [ "$ARCH" = "x86_64" ]; then
-  PLATFORM="--platform linux/amd64"
+  CLI_ARGS+=("--platform" "linux/amd64")
 else
   echo "Unsupported architecture: $ARCH"
   exit 1
@@ -238,8 +248,13 @@ fi
 echo "PWD: $(pwd)"
 echo "Containerfile: $TEMP_DIR/Containerfile"
 set -x
-$CONTAINER_BINARY build $CONTAINER_OPTS $PLATFORM -t $image_tag \
-  -f "$TEMP_DIR/Containerfile" "." $mounts --progress=plain
+
+$CONTAINER_BINARY build \
+  "${CLI_ARGS[@]}" \
+  -t "$image_tag" \
+  -f "$TEMP_DIR/Containerfile" \
+  "." \
+  --progress=plain
 
 # clean up tmp/configs
 set +x

From 83dc8fbdffdc673dcdd6392ea9f1138fd0a9f412 Mon Sep 17 00:00:00 2001
From: Matthew Farrellee <matt@cs.wisc.edu>
Date: Fri, 28 Feb 2025 12:02:36 -0600
Subject: [PATCH 03/13] test: cleanup embedding model test suite (#1322)

# What does this PR do?

 - skip media tests for models that do not support media
 - skip output_dimension tests for models that do not support it
 - skip task_type tests for models that do not support it
 - provide task_type for models that require it

## Test Plan

`LLAMA_STACK_BASE_URL=http://localhost:8321 pytest -v
tests/client-sdk/inference/test_embedding.py --embedding-model ...`
---
 tests/client-sdk/inference/test_embedding.py | 65 ++++++++++++++++----
 1 file changed, 54 insertions(+), 11 deletions(-)

diff --git a/tests/client-sdk/inference/test_embedding.py b/tests/client-sdk/inference/test_embedding.py
index 69d35d05d..075f927f7 100644
--- a/tests/client-sdk/inference/test_embedding.py
+++ b/tests/client-sdk/inference/test_embedding.py
@@ -76,6 +76,25 @@ DUMMY_IMAGE_URL = ImageContentItem(
 )
 DUMMY_IMAGE_BASE64 = ImageContentItem(image=ImageContentItemImage(data="base64string"), type="image")
 SUPPORTED_PROVIDERS = {"remote::nvidia"}
+MODELS_SUPPORTING_MEDIA = {}
+MODELS_SUPPORTING_OUTPUT_DIMENSION = {"nvidia/llama-3.2-nv-embedqa-1b-v2"}
+MODELS_REQUIRING_TASK_TYPE = {
+    "nvidia/llama-3.2-nv-embedqa-1b-v2",
+    "nvidia/nv-embedqa-e5-v5",
+    "nvidia/nv-embedqa-mistral-7b-v2",
+    "snowflake/arctic-embed-l",
+}
+MODELS_SUPPORTING_TASK_TYPE = MODELS_REQUIRING_TASK_TYPE
+
+
+def default_task_type(model_id):
+    """
+    Some models require a task type parameter. This provides a default value for
+    testing those models.
+    """
+    if model_id in MODELS_REQUIRING_TASK_TYPE:
+        return {"task_type": "query"}
+    return {}
 
 
 @pytest.mark.parametrize(
@@ -92,7 +111,9 @@ SUPPORTED_PROVIDERS = {"remote::nvidia"}
 def test_embedding_text(llama_stack_client, embedding_model_id, contents, inference_provider_type):
     if inference_provider_type not in SUPPORTED_PROVIDERS:
         pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
-    response = llama_stack_client.inference.embeddings(model_id=embedding_model_id, contents=contents)
+    response = llama_stack_client.inference.embeddings(
+        model_id=embedding_model_id, contents=contents, **default_task_type(embedding_model_id)
+    )
     assert isinstance(response, EmbeddingsResponse)
     assert len(response.embeddings) == sum(len(content) if isinstance(content, list) else 1 for content in contents)
     assert isinstance(response.embeddings[0], list)
@@ -110,11 +131,14 @@ def test_embedding_text(llama_stack_client, embedding_model_id, contents, infere
         "list[url,string,base64,text]",
     ],
 )
-@pytest.mark.xfail(reason="Media is not supported")
 def test_embedding_image(llama_stack_client, embedding_model_id, contents, inference_provider_type):
     if inference_provider_type not in SUPPORTED_PROVIDERS:
         pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
-    response = llama_stack_client.inference.embeddings(model_id=embedding_model_id, contents=contents)
+    if embedding_model_id not in MODELS_SUPPORTING_MEDIA:
+        pytest.xfail(f"{embedding_model_id} doesn't support media")
+    response = llama_stack_client.inference.embeddings(
+        model_id=embedding_model_id, contents=contents, **default_task_type(embedding_model_id)
+    )
     assert isinstance(response, EmbeddingsResponse)
     assert len(response.embeddings) == sum(len(content) if isinstance(content, list) else 1 for content in contents)
     assert isinstance(response.embeddings[0], list)
@@ -145,7 +169,10 @@ def test_embedding_truncation(
     if inference_provider_type not in SUPPORTED_PROVIDERS:
         pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
     response = llama_stack_client.inference.embeddings(
-        model_id=embedding_model_id, contents=contents, text_truncation=text_truncation
+        model_id=embedding_model_id,
+        contents=contents,
+        text_truncation=text_truncation,
+        **default_task_type(embedding_model_id),
     )
     assert isinstance(response, EmbeddingsResponse)
     assert len(response.embeddings) == 1
@@ -178,26 +205,36 @@ def test_embedding_truncation_error(
         pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
     with pytest.raises(BadRequestError):
         llama_stack_client.inference.embeddings(
-            model_id=embedding_model_id, contents=[DUMMY_LONG_TEXT], text_truncation=text_truncation
+            model_id=embedding_model_id,
+            contents=[DUMMY_LONG_TEXT],
+            text_truncation=text_truncation,
+            **default_task_type(embedding_model_id),
         )
 
 
-@pytest.mark.xfail(reason="Only valid for model supporting dimension reduction")
 def test_embedding_output_dimension(llama_stack_client, embedding_model_id, inference_provider_type):
     if inference_provider_type not in SUPPORTED_PROVIDERS:
         pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
-    base_response = llama_stack_client.inference.embeddings(model_id=embedding_model_id, contents=[DUMMY_STRING])
+    if embedding_model_id not in MODELS_SUPPORTING_OUTPUT_DIMENSION:
+        pytest.xfail(f"{embedding_model_id} doesn't support output_dimension")
+    base_response = llama_stack_client.inference.embeddings(
+        model_id=embedding_model_id, contents=[DUMMY_STRING], **default_task_type(embedding_model_id)
+    )
     test_response = llama_stack_client.inference.embeddings(
-        model_id=embedding_model_id, contents=[DUMMY_STRING], output_dimension=32
+        model_id=embedding_model_id,
+        contents=[DUMMY_STRING],
+        **default_task_type(embedding_model_id),
+        output_dimension=32,
     )
     assert len(base_response.embeddings[0]) != len(test_response.embeddings[0])
     assert len(test_response.embeddings[0]) == 32
 
 
-@pytest.mark.xfail(reason="Only valid for model supporting task type")
 def test_embedding_task_type(llama_stack_client, embedding_model_id, inference_provider_type):
     if inference_provider_type not in SUPPORTED_PROVIDERS:
         pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
+    if embedding_model_id not in MODELS_SUPPORTING_TASK_TYPE:
+        pytest.xfail(f"{embedding_model_id} doesn't support task_type")
     query_embedding = llama_stack_client.inference.embeddings(
         model_id=embedding_model_id, contents=[DUMMY_STRING], task_type="query"
     )
@@ -220,7 +257,10 @@ def test_embedding_text_truncation(llama_stack_client, embedding_model_id, text_
     if inference_provider_type not in SUPPORTED_PROVIDERS:
         pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
     response = llama_stack_client.inference.embeddings(
-        model_id=embedding_model_id, contents=[DUMMY_STRING], text_truncation=text_truncation
+        model_id=embedding_model_id,
+        contents=[DUMMY_STRING],
+        text_truncation=text_truncation,
+        **default_task_type(embedding_model_id),
     )
     assert isinstance(response, EmbeddingsResponse)
     assert len(response.embeddings) == 1
@@ -245,5 +285,8 @@ def test_embedding_text_truncation_error(
         pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
     with pytest.raises(BadRequestError):
         llama_stack_client.inference.embeddings(
-            model_id=embedding_model_id, contents=[DUMMY_STRING], text_truncation=text_truncation
+            model_id=embedding_model_id,
+            contents=[DUMMY_STRING],
+            text_truncation=text_truncation,
+            **default_task_type(embedding_model_id),
         )

From 5366dab31e3dfecab455a9a6c5f55cc18c7c7ae6 Mon Sep 17 00:00:00 2001
From: Reid <61492567+reidliu41@users.noreply.github.com>
Date: Sat, 1 Mar 2025 02:03:45 +0800
Subject: [PATCH 04/13] docs: update build doc (#1262)

# What does this PR do?
[Provide a short summary of what this PR does and why. Link to relevant
issues if applicable.]


https://github.com/meta-llama/llama-stack/blob/55eb257459f5f891d7e570740e816eed950131b3/llama_stack/cli/stack/run.py#L22


https://github.com/meta-llama/llama-stack/blob/55eb257459f5f891d7e570740e816eed950131b3/llama_stack/cli/stack/_build.py#L103

[//]: # (If resolving an issue, uncomment and update the line below)
[//]: # (Closes #[issue-number])

## Test Plan
[Describe the tests you ran to verify your changes with result
summaries. *Provide clear instructions so the plan can be easily
re-executed.*]

[//]: # (## Documentation)

Signed-off-by: reidliu <reid201711@gmail.com>
Co-authored-by: reidliu <reid201711@gmail.com>
---
 docs/source/distributions/building_distro.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/source/distributions/building_distro.md b/docs/source/distributions/building_distro.md
index 9cb1a402f..20a835201 100644
--- a/docs/source/distributions/building_distro.md
+++ b/docs/source/distributions/building_distro.md
@@ -106,7 +106,7 @@ It would be best to start with a template and understand the structure of the co
 llama stack build
 
 > Enter a name for your Llama Stack (e.g. my-local-stack): my-stack
-> Enter the image type you want your Llama Stack to be built as (container or conda): conda
+> Enter the image type you want your Llama Stack to be built as (container or conda or venv): conda
 
 Llama Stack is composed of several APIs working together. Let's select
 the provider types (implementations) you want to use for these APIs.
@@ -187,7 +187,7 @@ usage: llama stack run [-h] [--port PORT] [--image-name IMAGE_NAME] [--disable-i
                        [--tls-certfile TLS_CERTFILE] [--image-type {conda,container,venv}]
                        config
 
-start the server for a Llama Stack Distribution. You should have already built (or downloaded) and configured the distribution.
+Start the server for a Llama Stack Distribution. You should have already built (or downloaded) and configured the distribution.
 
 positional arguments:
   config                Path to config file to use for the run

From ea4f13cc209e1222aadfab52224a48f687a6d483 Mon Sep 17 00:00:00 2001
From: Reid <61492567+reidliu41@users.noreply.github.com>
Date: Sat, 1 Mar 2025 02:07:24 +0800
Subject: [PATCH 05/13] chore: add container cmd check (#1306)

# What does this PR do?
[Provide a short summary of what this PR does and why. Link to relevant
issues if applicable.]

[//]: # (If resolving an issue, uncomment and update the line below)
[//]: # (Closes #[issue-number])

## Test Plan
[Describe the tests you ran to verify your changes with result
summaries. *Provide clear instructions so the plan can be easily
re-executed.*]

[//]: # (## Documentation)

Signed-off-by: reidliu <reid201711@gmail.com>
Co-authored-by: reidliu <reid201711@gmail.com>
---
 llama_stack/distribution/build_container.sh | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/llama_stack/distribution/build_container.sh b/llama_stack/distribution/build_container.sh
index 9b584a85c..68f8a0863 100755
--- a/llama_stack/distribution/build_container.sh
+++ b/llama_stack/distribution/build_container.sh
@@ -48,6 +48,9 @@ CONTAINER_OPTS=${CONTAINER_OPTS:-}
 
 TEMP_DIR=$(mktemp -d)
 
+SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
+source "$SCRIPT_DIR/common.sh"
+
 add_to_container() {
   output_file="$TEMP_DIR/Containerfile"
   if [ -t 0 ]; then
@@ -58,6 +61,12 @@ add_to_container() {
   fi
 }
 
+# Check if container command is available
+if ! is_command_available $CONTAINER_BINARY; then
+  printf "${RED}Error: ${CONTAINER_BINARY} command not found. Is ${CONTAINER_BINARY} installed and in your PATH?${NC}" >&2
+  exit 1
+fi
+
 # Update and install UBI9 components if UBI9 base image is used
 if [[ $container_base == *"registry.access.redhat.com/ubi9"* ]]; then
   add_to_container << EOF
@@ -212,7 +221,7 @@ if [ "$USE_COPY_NOT_MOUNT" != "true" ]; then
   fi
 fi
 
-if command -v selinuxenabled &>/dev/null && selinuxenabled; then
+if is_command_available selinuxenabled && selinuxenabled; then
   # Disable SELinux labels -- we don't want to relabel the llama-stack source dir
   CLI_ARGS+=("--security-opt" "label=disable")
 fi

From 14c442f177591ab336414f0017d4da3d1e20a088 Mon Sep 17 00:00:00 2001
From: Reid <61492567+reidliu41@users.noreply.github.com>
Date: Sat, 1 Mar 2025 02:08:05 +0800
Subject: [PATCH 06/13] chore: update cmd check (#1293)

# What does this PR do?
[Provide a short summary of what this PR does and why. Link to relevant
issues if applicable.]

[//]: # (If resolving an issue, uncomment and update the line below)
[//]: # (Closes #[issue-number])

## Test Plan
[Describe the tests you ran to verify your changes with result
summaries. *Provide clear instructions so the plan can be easily
re-executed.*]

[//]: # (## Documentation)

Signed-off-by: reidliu <reid201711@gmail.com>
Co-authored-by: reidliu <reid201711@gmail.com>
---
 llama_stack/distribution/build_conda_env.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llama_stack/distribution/build_conda_env.sh b/llama_stack/distribution/build_conda_env.sh
index 31b3e1b21..1eac2ee08 100755
--- a/llama_stack/distribution/build_conda_env.sh
+++ b/llama_stack/distribution/build_conda_env.sh
@@ -52,7 +52,7 @@ ensure_conda_env_python310() {
   local python_version="3.10"
 
   # Check if conda command is available
-  if ! command -v conda &>/dev/null; then
+  if ! is_command_available conda; then
     printf "${RED}Error: conda command not found. Is Conda installed and in your PATH?${NC}" >&2
     exit 1
   fi

From 66cd128ab51aff0b649c8ae59d7ec139a54913c1 Mon Sep 17 00:00:00 2001
From: Reid <61492567+reidliu41@users.noreply.github.com>
Date: Sat, 1 Mar 2025 02:10:12 +0800
Subject: [PATCH 07/13] docs: update the downloaded list doc (#1266)

# What does this PR do?
[Provide a short summary of what this PR does and why. Link to relevant
issues if applicable.]

Since released the `--downloaded` option, so update the related
documents.

[//]: # (If resolving an issue, uncomment and update the line below)
[//]: # (Closes #[issue-number])

## Test Plan
[Describe the tests you ran to verify your changes with result
summaries. *Provide clear instructions so the plan can be easily
re-executed.*]

[//]: # (## Documentation)

Signed-off-by: reidliu <reid201711@gmail.com>
Co-authored-by: reidliu <reid201711@gmail.com>
---
 .../self_hosted_distro/meta-reference-gpu.md  | 27 +++++++++++++---
 .../meta-reference-quantized-gpu.md           | 27 +++++++++++++---
 .../llama_cli_reference/download_models.md    | 32 +++++++++++++++++++
 .../references/llama_cli_reference/index.md   | 32 +++++++++++++++++++
 .../meta-reference-gpu/doc_template.md        | 27 +++++++++++++---
 .../doc_template.md                           | 27 +++++++++++++---
 6 files changed, 156 insertions(+), 16 deletions(-)

diff --git a/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md b/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md
index b183757db..b8d1b1714 100644
--- a/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md
+++ b/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md
@@ -41,12 +41,31 @@ The following environment variables can be configured:
 
 ## Prerequisite: Downloading Models
 
-Please make sure you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints.
+Please use `llama model list --downloaded` to check that you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints.
 
 ```
-$ ls ~/.llama/checkpoints
-Llama3.1-8B           Llama3.2-11B-Vision-Instruct  Llama3.2-1B-Instruct  Llama3.2-90B-Vision-Instruct  Llama-Guard-3-8B
-Llama3.1-8B-Instruct  Llama3.2-1B                   Llama3.2-3B-Instruct  Llama-Guard-3-1B              Prompt-Guard-86M
+$ llama model list --downloaded
+┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓
+┃ Model                                   ┃ Size     ┃ Modified Time       ┃
+┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩
+│ Llama3.2-1B-Instruct:int4-qlora-eo8     │ 1.53 GB  │ 2025-02-26 11:22:28 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-1B                             │ 2.31 GB  │ 2025-02-18 21:48:52 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Prompt-Guard-86M                        │ 0.02 GB  │ 2025-02-26 11:29:28 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-3B-Instruct:int4-spinquant-eo8 │ 3.69 GB  │ 2025-02-26 11:37:41 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-3B                             │ 5.99 GB  │ 2025-02-18 21:51:26 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.1-8B                             │ 14.97 GB │ 2025-02-16 10:36:37 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-1B-Instruct:int4-spinquant-eo8 │ 1.51 GB  │ 2025-02-26 11:35:02 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama-Guard-3-1B                        │ 2.80 GB  │ 2025-02-26 11:20:46 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama-Guard-3-1B:int4                   │ 0.43 GB  │ 2025-02-26 11:33:33 │
+└─────────────────────────────────────────┴──────────┴─────────────────────┘
 ```
 
 ## Running the Distribution
diff --git a/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md b/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md
index 9aeb7a88b..a49175e22 100644
--- a/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md
+++ b/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md
@@ -41,12 +41,31 @@ The following environment variables can be configured:
 
 ## Prerequisite: Downloading Models
 
-Please make sure you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints.
+Please use `llama model list --downloaded` to check that you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints.
 
 ```
-$ ls ~/.llama/checkpoints
-Llama3.1-8B           Llama3.2-11B-Vision-Instruct  Llama3.2-1B-Instruct  Llama3.2-90B-Vision-Instruct  Llama-Guard-3-8B
-Llama3.1-8B-Instruct  Llama3.2-1B                   Llama3.2-3B-Instruct  Llama-Guard-3-1B              Prompt-Guard-86M
+$ llama model list --downloaded
+┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓
+┃ Model                                   ┃ Size     ┃ Modified Time       ┃
+┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩
+│ Llama3.2-1B-Instruct:int4-qlora-eo8     │ 1.53 GB  │ 2025-02-26 11:22:28 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-1B                             │ 2.31 GB  │ 2025-02-18 21:48:52 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Prompt-Guard-86M                        │ 0.02 GB  │ 2025-02-26 11:29:28 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-3B-Instruct:int4-spinquant-eo8 │ 3.69 GB  │ 2025-02-26 11:37:41 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-3B                             │ 5.99 GB  │ 2025-02-18 21:51:26 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.1-8B                             │ 14.97 GB │ 2025-02-16 10:36:37 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-1B-Instruct:int4-spinquant-eo8 │ 1.51 GB  │ 2025-02-26 11:35:02 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama-Guard-3-1B                        │ 2.80 GB  │ 2025-02-26 11:20:46 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama-Guard-3-1B:int4                   │ 0.43 GB  │ 2025-02-26 11:33:33 │
+└─────────────────────────────────────────┴──────────┴─────────────────────┘
 ```
 
 ## Running the Distribution
diff --git a/docs/source/references/llama_cli_reference/download_models.md b/docs/source/references/llama_cli_reference/download_models.md
index 6c791bcb7..ca470f8c2 100644
--- a/docs/source/references/llama_cli_reference/download_models.md
+++ b/docs/source/references/llama_cli_reference/download_models.md
@@ -129,3 +129,35 @@ llama download --source huggingface --model-id Prompt-Guard-86M --ignore-pattern
 **Important:** Set your environment variable `HF_TOKEN` or pass in `--hf-token` to the command to validate your access. You can find your token at [https://huggingface.co/settings/tokens](https://huggingface.co/settings/tokens).
 
 > **Tip:** Default for `llama download` is to run with `--ignore-patterns *.safetensors` since we use the `.pth` files in the `original` folder. For Llama Guard and Prompt Guard, however, we need safetensors. Hence, please run with `--ignore-patterns original` so that safetensors are downloaded and `.pth` files are ignored.
+
+## List the downloaded models
+
+To list the downloaded models with the following command:
+```
+llama model list --downloaded
+```
+
+You should see a table like this:
+```
+┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓
+┃ Model                                   ┃ Size     ┃ Modified Time       ┃
+┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩
+│ Llama3.2-1B-Instruct:int4-qlora-eo8     │ 1.53 GB  │ 2025-02-26 11:22:28 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-1B                             │ 2.31 GB  │ 2025-02-18 21:48:52 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Prompt-Guard-86M                        │ 0.02 GB  │ 2025-02-26 11:29:28 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-3B-Instruct:int4-spinquant-eo8 │ 3.69 GB  │ 2025-02-26 11:37:41 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-3B                             │ 5.99 GB  │ 2025-02-18 21:51:26 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.1-8B                             │ 14.97 GB │ 2025-02-16 10:36:37 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-1B-Instruct:int4-spinquant-eo8 │ 1.51 GB  │ 2025-02-26 11:35:02 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama-Guard-3-1B                        │ 2.80 GB  │ 2025-02-26 11:20:46 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama-Guard-3-1B:int4                   │ 0.43 GB  │ 2025-02-26 11:33:33 │
+└─────────────────────────────────────────┴──────────┴─────────────────────┘
+```
diff --git a/docs/source/references/llama_cli_reference/index.md b/docs/source/references/llama_cli_reference/index.md
index a43666963..8a38fc3ae 100644
--- a/docs/source/references/llama_cli_reference/index.md
+++ b/docs/source/references/llama_cli_reference/index.md
@@ -154,6 +154,38 @@ llama download --source huggingface --model-id Prompt-Guard-86M --ignore-pattern
 
 > **Tip:** Default for `llama download` is to run with `--ignore-patterns *.safetensors` since we use the `.pth` files in the `original` folder. For Llama Guard and Prompt Guard, however, we need safetensors. Hence, please run with `--ignore-patterns original` so that safetensors are downloaded and `.pth` files are ignored.
 
+## List the downloaded models
+
+To list the downloaded models with the following command:
+```
+llama model list --downloaded
+```
+
+You should see a table like this:
+```
+┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓
+┃ Model                                   ┃ Size     ┃ Modified Time       ┃
+┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩
+│ Llama3.2-1B-Instruct:int4-qlora-eo8     │ 1.53 GB  │ 2025-02-26 11:22:28 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-1B                             │ 2.31 GB  │ 2025-02-18 21:48:52 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Prompt-Guard-86M                        │ 0.02 GB  │ 2025-02-26 11:29:28 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-3B-Instruct:int4-spinquant-eo8 │ 3.69 GB  │ 2025-02-26 11:37:41 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-3B                             │ 5.99 GB  │ 2025-02-18 21:51:26 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.1-8B                             │ 14.97 GB │ 2025-02-16 10:36:37 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-1B-Instruct:int4-spinquant-eo8 │ 1.51 GB  │ 2025-02-26 11:35:02 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama-Guard-3-1B                        │ 2.80 GB  │ 2025-02-26 11:20:46 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama-Guard-3-1B:int4                   │ 0.43 GB  │ 2025-02-26 11:33:33 │
+└─────────────────────────────────────────┴──────────┴─────────────────────┘
+```
+
 
 ## Understand the models
 The `llama model` command helps you explore the model’s interface.
diff --git a/llama_stack/templates/meta-reference-gpu/doc_template.md b/llama_stack/templates/meta-reference-gpu/doc_template.md
index 60556a6f3..87438fb6d 100644
--- a/llama_stack/templates/meta-reference-gpu/doc_template.md
+++ b/llama_stack/templates/meta-reference-gpu/doc_template.md
@@ -29,12 +29,31 @@ The following environment variables can be configured:
 
 ## Prerequisite: Downloading Models
 
-Please make sure you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints.
+Please use `llama model list --downloaded` to check that you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints.
 
 ```
-$ ls ~/.llama/checkpoints
-Llama3.1-8B           Llama3.2-11B-Vision-Instruct  Llama3.2-1B-Instruct  Llama3.2-90B-Vision-Instruct  Llama-Guard-3-8B
-Llama3.1-8B-Instruct  Llama3.2-1B                   Llama3.2-3B-Instruct  Llama-Guard-3-1B              Prompt-Guard-86M
+$ llama model list --downloaded
+┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓
+┃ Model                                   ┃ Size     ┃ Modified Time       ┃
+┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩
+│ Llama3.2-1B-Instruct:int4-qlora-eo8     │ 1.53 GB  │ 2025-02-26 11:22:28 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-1B                             │ 2.31 GB  │ 2025-02-18 21:48:52 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Prompt-Guard-86M                        │ 0.02 GB  │ 2025-02-26 11:29:28 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-3B-Instruct:int4-spinquant-eo8 │ 3.69 GB  │ 2025-02-26 11:37:41 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-3B                             │ 5.99 GB  │ 2025-02-18 21:51:26 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.1-8B                             │ 14.97 GB │ 2025-02-16 10:36:37 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-1B-Instruct:int4-spinquant-eo8 │ 1.51 GB  │ 2025-02-26 11:35:02 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama-Guard-3-1B                        │ 2.80 GB  │ 2025-02-26 11:20:46 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama-Guard-3-1B:int4                   │ 0.43 GB  │ 2025-02-26 11:33:33 │
+└─────────────────────────────────────────┴──────────┴─────────────────────┘
 ```
 
 ## Running the Distribution
diff --git a/llama_stack/templates/meta-reference-quantized-gpu/doc_template.md b/llama_stack/templates/meta-reference-quantized-gpu/doc_template.md
index 2b117120c..e8dfaaf3c 100644
--- a/llama_stack/templates/meta-reference-quantized-gpu/doc_template.md
+++ b/llama_stack/templates/meta-reference-quantized-gpu/doc_template.md
@@ -31,12 +31,31 @@ The following environment variables can be configured:
 
 ## Prerequisite: Downloading Models
 
-Please make sure you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints.
+Please use `llama model list --downloaded` to check that you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints.
 
 ```
-$ ls ~/.llama/checkpoints
-Llama3.1-8B           Llama3.2-11B-Vision-Instruct  Llama3.2-1B-Instruct  Llama3.2-90B-Vision-Instruct  Llama-Guard-3-8B
-Llama3.1-8B-Instruct  Llama3.2-1B                   Llama3.2-3B-Instruct  Llama-Guard-3-1B              Prompt-Guard-86M
+$ llama model list --downloaded
+┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓
+┃ Model                                   ┃ Size     ┃ Modified Time       ┃
+┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩
+│ Llama3.2-1B-Instruct:int4-qlora-eo8     │ 1.53 GB  │ 2025-02-26 11:22:28 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-1B                             │ 2.31 GB  │ 2025-02-18 21:48:52 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Prompt-Guard-86M                        │ 0.02 GB  │ 2025-02-26 11:29:28 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-3B-Instruct:int4-spinquant-eo8 │ 3.69 GB  │ 2025-02-26 11:37:41 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-3B                             │ 5.99 GB  │ 2025-02-18 21:51:26 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.1-8B                             │ 14.97 GB │ 2025-02-16 10:36:37 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-1B-Instruct:int4-spinquant-eo8 │ 1.51 GB  │ 2025-02-26 11:35:02 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama-Guard-3-1B                        │ 2.80 GB  │ 2025-02-26 11:20:46 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama-Guard-3-1B:int4                   │ 0.43 GB  │ 2025-02-26 11:33:33 │
+└─────────────────────────────────────────┴──────────┴─────────────────────┘
 ```
 
 ## Running the Distribution

From 6520baebed13c1cbf4227f84d0dcd6e77bcf9ba7 Mon Sep 17 00:00:00 2001
From: Xi Yan <xiyan@meta.com>
Date: Fri, 28 Feb 2025 11:10:45 -0800
Subject: [PATCH 08/13] fix: replace eval with json decoding (#1327)

# What does this PR do?

- Using `eval` on server is a security risk
- Replace `eval` with `json.loads`

[//]: # (If resolving an issue, uncomment and update the line below)
[//]: # (Closes #[issue-number])

## Test Plan
```
pytest -v -s --nbval-lax ./llama-stack/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb
```
<img width="747" alt="image"
src="https://github.com/user-attachments/assets/7aff3d95-0b12-4394-b9d0-aeff791eee38"
/>


[//]: # (## Documentation)
---
 .../providers/inline/eval/meta_reference/eval.py       | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/llama_stack/providers/inline/eval/meta_reference/eval.py b/llama_stack/providers/inline/eval/meta_reference/eval.py
index 48157b018..a01f7f1f3 100644
--- a/llama_stack/providers/inline/eval/meta_reference/eval.py
+++ b/llama_stack/providers/inline/eval/meta_reference/eval.py
@@ -3,6 +3,7 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
+import json
 from typing import Any, Dict, List, Optional
 
 from tqdm import tqdm
@@ -116,7 +117,7 @@ class MetaReferenceEvalImpl(
         generations = []
         for i, x in tqdm(enumerate(input_rows)):
             assert ColumnName.chat_completion_input.value in x, "Invalid input row"
-            input_messages = eval(str(x[ColumnName.chat_completion_input.value]))
+            input_messages = json.loads(x[ColumnName.chat_completion_input.value])
             input_messages = [UserMessage(**x) for x in input_messages]
 
             # NOTE: only single-turn agent generation is supported. Create a new session for each input row
@@ -158,7 +159,7 @@ class MetaReferenceEvalImpl(
         generations = []
         for x in tqdm(input_rows):
             if ColumnName.completion_input.value in x:
-                input_content = eval(str(x[ColumnName.completion_input.value]))
+                input_content = json.loads(x[ColumnName.completion_input.value])
                 response = await self.inference_api.completion(
                     model=candidate.model,
                     content=input_content,
@@ -166,9 +167,8 @@ class MetaReferenceEvalImpl(
                 )
                 generations.append({ColumnName.generated_answer.value: response.completion_message.content})
             elif ColumnName.chat_completion_input.value in x:
-                chat_completion_input_str = str(x[ColumnName.chat_completion_input.value])
-                input_messages = eval(chat_completion_input_str)
-                input_messages = [UserMessage(**x) for x in input_messages]
+                chat_completion_input_json = json.loads(x[ColumnName.chat_completion_input.value])
+                input_messages = [UserMessage(**x) for x in chat_completion_input_json]
                 messages = []
                 if candidate.system_message:
                     messages.append(candidate.system_message)

From 5547ef953c304858d80b1ffa6b0f8226c3aad497 Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Fri, 28 Feb 2025 11:16:12 -0800
Subject: [PATCH 09/13] feat: enhance OpenAPI spec to include Error types
 (#1320)

# What does this PR do?

An API spec must talk about Error handling. This was a pretty glaring
omission so far. This PR begins to address it by adding a set of
standard error responses we can attach to all our API calls.

At a future point, we can add specific error types where necessary
(although we should not hurry to do that; it is best done very late.)

## Test Plan

Checked that Stainless SDK generation succeeds.
---
 docs/_static/llama-stack-spec.html            | 1076 ++++++++++++++++-
 docs/_static/llama-stack-spec.yaml            |  894 +++++++++++++-
 docs/openapi_generator/generate.py            |    1 +
 docs/openapi_generator/pyopenapi/generator.py |   82 ++
 docs/openapi_generator/pyopenapi/options.py   |    2 +
 llama_stack/apis/datatypes.py                 |   20 +
 6 files changed, 2073 insertions(+), 2 deletions(-)

diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index 2a9f4b6f7..6b98cad90 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -52,6 +52,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -97,6 +109,18 @@
                 "responses": {
                     "200": {
                         "description": "OK"
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -128,6 +152,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -159,6 +195,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -183,6 +231,18 @@
                 "responses": {
                     "200": {
                         "description": "OK"
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -219,6 +279,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -255,6 +327,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -286,6 +370,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -317,6 +413,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -362,6 +470,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -410,6 +530,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -438,6 +570,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -462,6 +606,18 @@
                 "responses": {
                     "200": {
                         "description": "OK"
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -492,6 +648,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -532,6 +700,18 @@
                 "responses": {
                     "200": {
                         "description": "OK"
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -570,6 +750,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -608,6 +800,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -648,6 +852,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -679,6 +895,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -719,6 +947,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -773,6 +1013,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -826,6 +1078,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -863,6 +1127,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -884,6 +1160,18 @@
                 "responses": {
                     "200": {
                         "description": "OK"
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -921,6 +1209,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -942,6 +1242,18 @@
                 "responses": {
                     "200": {
                         "description": "OK"
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -979,6 +1291,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1016,6 +1340,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1046,6 +1382,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1084,6 +1432,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1124,6 +1484,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1154,6 +1526,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1175,6 +1559,18 @@
                 "responses": {
                     "200": {
                         "description": "OK"
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1205,6 +1601,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1242,6 +1650,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1279,6 +1699,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1309,6 +1741,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1337,6 +1781,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1373,6 +1829,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1422,6 +1890,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1443,6 +1923,18 @@
                 "responses": {
                     "200": {
                         "description": "OK"
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1473,6 +1965,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1487,6 +1991,18 @@
                 "responses": {
                     "200": {
                         "description": "OK"
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1511,6 +2027,18 @@
                 "responses": {
                     "200": {
                         "description": "OK"
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1542,6 +2070,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1580,6 +2120,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1609,6 +2161,18 @@
                 "responses": {
                     "200": {
                         "description": "OK"
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1647,6 +2211,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1685,6 +2261,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1697,6 +2285,18 @@
                 "responses": {
                     "200": {
                         "description": "OK"
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1728,6 +2328,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1740,6 +2352,18 @@
                 "responses": {
                     "200": {
                         "description": "OK"
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1771,6 +2395,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1802,6 +2438,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1821,6 +2469,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1852,6 +2512,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1873,6 +2545,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1894,6 +2578,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1932,6 +2628,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1944,6 +2652,18 @@
                 "responses": {
                     "200": {
                         "description": "OK"
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1975,6 +2695,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1994,6 +2726,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -2025,6 +2769,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -2037,6 +2793,18 @@
                 "responses": {
                     "200": {
                         "description": "OK"
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -2068,6 +2836,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -2098,6 +2878,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -2117,6 +2909,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -2141,6 +2945,18 @@
                 "responses": {
                     "200": {
                         "description": "OK"
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -2172,6 +2988,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -2203,6 +3031,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -2234,6 +3074,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -2265,6 +3117,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -2296,6 +3160,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -2332,6 +3208,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -2391,6 +3279,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -2431,6 +3331,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -2455,6 +3367,18 @@
                 "responses": {
                     "200": {
                         "description": "OK"
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -2486,6 +3410,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -2517,6 +3453,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -2548,6 +3496,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -2579,6 +3539,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -2610,6 +3582,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -2623,6 +3607,35 @@
     "jsonSchemaDialect": "https://json-schema.org/draft/2020-12/schema",
     "components": {
         "schemas": {
+            "Error": {
+                "type": "object",
+                "properties": {
+                    "status": {
+                        "type": "integer",
+                        "description": "HTTP status code"
+                    },
+                    "title": {
+                        "type": "string",
+                        "description": "Error title, a short summary of the error which is invariant for an error type"
+                    },
+                    "detail": {
+                        "type": "string",
+                        "description": "Error detail, a longer human-readable description of the error"
+                    },
+                    "instance": {
+                        "type": "string",
+                        "description": "(Optional) A URL which can be used to retrieve more information about the specific occurrence of the error"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "status",
+                    "title",
+                    "detail"
+                ],
+                "title": "Error",
+                "description": "Error response from the API. Roughly follows RFC 7807."
+            },
             "AppendRowsRequest": {
                 "type": "object",
                 "properties": {
@@ -8741,7 +9754,68 @@
                 "title": "VersionInfo"
             }
         },
-        "responses": {}
+        "responses": {
+            "BadRequest400": {
+                "description": "The request was invalid or malformed",
+                "content": {
+                    "application/json": {
+                        "schema": {
+                            "$ref": "#/components/schemas/Error"
+                        },
+                        "example": {
+                            "status": 400,
+                            "title": "Bad Request",
+                            "detail": "The request was invalid or malformed"
+                        }
+                    }
+                }
+            },
+            "TooManyRequests429": {
+                "description": "The client has sent too many requests in a given amount of time",
+                "content": {
+                    "application/json": {
+                        "schema": {
+                            "$ref": "#/components/schemas/Error"
+                        },
+                        "example": {
+                            "status": 429,
+                            "title": "Too Many Requests",
+                            "detail": "You have exceeded the rate limit. Please try again later."
+                        }
+                    }
+                }
+            },
+            "InternalServerError500": {
+                "description": "The server encountered an unexpected error",
+                "content": {
+                    "application/json": {
+                        "schema": {
+                            "$ref": "#/components/schemas/Error"
+                        },
+                        "example": {
+                            "status": 500,
+                            "title": "Internal Server Error",
+                            "detail": "An unexpected error occurred. Our team has been notified."
+                        }
+                    }
+                }
+            },
+            "DefaultError": {
+                "description": "An unexpected error occurred",
+                "content": {
+                    "application/json": {
+                        "schema": {
+                            "$ref": "#/components/schemas/Error"
+                        },
+                        "example": {
+                            "status": 0,
+                            "title": "Error",
+                            "detail": "An unexpected error occurred"
+                        }
+                    }
+                }
+            }
+        }
     },
     "security": [
         {
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index a2329e47a..13f7edc4b 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -19,6 +19,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/PaginatedRowsResult'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - DatasetIO
       description: ''
@@ -47,6 +57,16 @@ paths:
       responses:
         '200':
           description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - DatasetIO
       description: ''
@@ -66,6 +86,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/BatchChatCompletionResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - BatchInference (Coming Soon)
       description: ''
@@ -85,6 +115,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/BatchCompletionResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - BatchInference (Coming Soon)
       description: ''
@@ -100,6 +140,16 @@ paths:
       responses:
         '200':
           description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - PostTraining (Coming Soon)
       description: ''
@@ -124,6 +174,16 @@ paths:
             text/event-stream:
               schema:
                 $ref: '#/components/schemas/ChatCompletionResponseStreamChunk'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Inference
       description: >-
@@ -149,6 +209,16 @@ paths:
             text/event-stream:
               schema:
                 $ref: '#/components/schemas/CompletionResponseStreamChunk'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Inference
       description: >-
@@ -169,6 +239,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/AgentCreateResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Agents
       description: ''
@@ -188,6 +268,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/AgentSessionCreateResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Agents
       description: ''
@@ -217,6 +307,16 @@ paths:
             text/event-stream:
               schema:
                 $ref: '#/components/schemas/AgentTurnResponseStreamChunk'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Agents
       description: ''
@@ -246,6 +346,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/ListBucketResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Files (Coming Soon)
       description: List all buckets.
@@ -263,6 +373,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/FileUploadResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Files (Coming Soon)
       description: >-
@@ -279,6 +399,16 @@ paths:
       responses:
         '200':
           description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Agents
       description: ''
@@ -297,6 +427,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/Session'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Agents
       description: ''
@@ -322,6 +462,16 @@ paths:
       responses:
         '200':
           description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Agents
       description: ''
@@ -345,6 +495,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/FileResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Files (Coming Soon)
       description: >-
@@ -371,6 +531,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/FileResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Files (Coming Soon)
       description: >-
@@ -401,6 +571,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/EmbeddingsResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Inference
       description: >-
@@ -421,6 +601,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/EvaluateResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Eval
       description: ''
@@ -445,6 +635,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/AgentStepResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Agents
       description: ''
@@ -478,6 +678,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/Turn'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Agents
       description: ''
@@ -508,6 +718,16 @@ paths:
                 oneOf:
                   - $ref: '#/components/schemas/Benchmark'
                   - type: 'null'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Benchmarks
       description: ''
@@ -528,6 +748,16 @@ paths:
                 oneOf:
                   - $ref: '#/components/schemas/Dataset'
                   - type: 'null'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Datasets
       description: ''
@@ -541,6 +771,16 @@ paths:
       responses:
         '200':
           description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Datasets
       description: ''
@@ -561,6 +801,16 @@ paths:
                 oneOf:
                   - $ref: '#/components/schemas/Model'
                   - type: 'null'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Models
       description: ''
@@ -574,6 +824,16 @@ paths:
       responses:
         '200':
           description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Models
       description: ''
@@ -594,6 +854,16 @@ paths:
                 oneOf:
                   - $ref: '#/components/schemas/ScoringFn'
                   - type: 'null'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - ScoringFunctions
       description: ''
@@ -614,6 +884,16 @@ paths:
                 oneOf:
                   - $ref: '#/components/schemas/Shield'
                   - type: 'null'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Shields
       description: ''
@@ -632,6 +912,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/Span'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Telemetry
       description: ''
@@ -655,6 +945,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/QuerySpanTreeResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Telemetry
       description: ''
@@ -679,6 +979,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/Tool'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - ToolGroups
       description: ''
@@ -697,6 +1007,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/ToolGroup'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - ToolGroups
       description: ''
@@ -710,6 +1030,16 @@ paths:
       responses:
         '200':
           description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - ToolGroups
       description: Unregister a tool group
@@ -728,6 +1058,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/Trace'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Telemetry
       description: ''
@@ -748,6 +1088,16 @@ paths:
                 oneOf:
                   - $ref: '#/components/schemas/PostTrainingJobArtifactsResponse'
                   - type: 'null'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - PostTraining (Coming Soon)
       description: ''
@@ -768,6 +1118,16 @@ paths:
                 oneOf:
                   - $ref: '#/components/schemas/PostTrainingJobStatusResponse'
                   - type: 'null'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - PostTraining (Coming Soon)
       description: ''
@@ -786,6 +1146,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/ListPostTrainingJobsResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - PostTraining (Coming Soon)
       description: ''
@@ -801,6 +1171,16 @@ paths:
                 oneOf:
                   - $ref: '#/components/schemas/FileUploadResponse'
                   - type: 'null'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Files (Coming Soon)
       description: >-
@@ -822,6 +1202,16 @@ paths:
                 oneOf:
                   - $ref: '#/components/schemas/FileResponse'
                   - type: 'null'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Files (Coming Soon)
       description: >-
@@ -852,6 +1242,16 @@ paths:
                 oneOf:
                   - $ref: '#/components/schemas/VectorDB'
                   - type: 'null'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - VectorDBs
       description: ''
@@ -865,6 +1265,16 @@ paths:
       responses:
         '200':
           description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - VectorDBs
       description: ''
@@ -883,6 +1293,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/HealthInfo'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Inspect
       description: ''
@@ -892,6 +1312,16 @@ paths:
       responses:
         '200':
           description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - ToolRuntime
       description: >-
@@ -908,6 +1338,16 @@ paths:
       responses:
         '200':
           description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - VectorIO
       description: ''
@@ -927,6 +1367,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/ToolInvocationResult'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - ToolRuntime
       description: Run a tool with the given arguments
@@ -948,6 +1398,16 @@ paths:
                 oneOf:
                   - $ref: '#/components/schemas/JobStatus'
                   - type: 'null'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Eval
       description: ''
@@ -966,6 +1426,16 @@ paths:
       responses:
         '200':
           description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Eval
       description: ''
@@ -989,6 +1459,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/EvaluateResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Eval
       description: ''
@@ -1012,6 +1492,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/ListBenchmarksResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Benchmarks
       description: ''
@@ -1020,6 +1510,16 @@ paths:
       responses:
         '200':
           description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Benchmarks
       description: ''
@@ -1039,6 +1539,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/ListDatasetsResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Datasets
       description: ''
@@ -1047,6 +1557,16 @@ paths:
       responses:
         '200':
           description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Datasets
       description: ''
@@ -1066,6 +1586,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/ListFileResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Files (Coming Soon)
       description: List all files in a bucket.
@@ -1085,6 +1615,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/ListModelsResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Models
       description: ''
@@ -1097,6 +1637,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/Model'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Models
       description: ''
@@ -1116,6 +1666,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/ListProvidersResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Inspect
       description: ''
@@ -1129,6 +1689,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/ListRoutesResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Inspect
       description: ''
@@ -1142,6 +1712,16 @@ paths:
             application/jsonl:
               schema:
                 $ref: '#/components/schemas/ToolDef'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - ToolRuntime
       description: ''
@@ -1165,6 +1745,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/ListScoringFunctionsResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - ScoringFunctions
       description: ''
@@ -1173,6 +1763,16 @@ paths:
       responses:
         '200':
           description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - ScoringFunctions
       description: ''
@@ -1192,6 +1792,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/ListShieldsResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Shields
       description: ''
@@ -1204,6 +1814,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/Shield'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Shields
       description: ''
@@ -1223,6 +1843,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/ListToolGroupsResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - ToolGroups
       description: List tool groups with optional provider
@@ -1231,6 +1861,16 @@ paths:
       responses:
         '200':
           description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - ToolGroups
       description: Register a tool group
@@ -1250,6 +1890,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/ListToolsResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - ToolGroups
       description: List tools with optional tool group
@@ -1268,6 +1918,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/ListVectorDBsResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - VectorDBs
       description: ''
@@ -1280,6 +1940,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/VectorDB'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - VectorDBs
       description: ''
@@ -1295,6 +1965,16 @@ paths:
       responses:
         '200':
           description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Telemetry
       description: ''
@@ -1314,6 +1994,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/PostTrainingJob'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - PostTraining (Coming Soon)
       description: ''
@@ -1333,6 +2023,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/RAGQueryResult'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - ToolRuntime
       description: >-
@@ -1353,6 +2053,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/QueryChunksResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - VectorIO
       description: ''
@@ -1372,6 +2082,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/QuerySpansResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Telemetry
       description: ''
@@ -1391,6 +2111,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/QueryTracesResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Telemetry
       description: ''
@@ -1415,6 +2145,16 @@ paths:
             text/event-stream:
               schema:
                 $ref: '#/components/schemas/AgentTurnResponseStreamChunk'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Agents
       description: >-
@@ -1457,6 +2197,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/Job'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Eval
       description: ''
@@ -1481,6 +2231,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/RunShieldResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Safety
       description: ''
@@ -1496,6 +2256,16 @@ paths:
       responses:
         '200':
           description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Telemetry
       description: ''
@@ -1515,6 +2285,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/ScoreResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Scoring
       description: ''
@@ -1534,6 +2314,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/ScoreBatchResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Scoring
       description: ''
@@ -1553,6 +2343,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/PostTrainingJob'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - PostTraining (Coming Soon)
       description: ''
@@ -1572,6 +2372,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/SyntheticDataGenerationResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - SyntheticDataGeneration (Coming Soon)
       description: ''
@@ -1591,6 +2401,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/VersionInfo'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Inspect
       description: ''
@@ -1599,6 +2419,34 @@ jsonSchemaDialect: >-
   https://json-schema.org/draft/2020-12/schema
 components:
   schemas:
+    Error:
+      type: object
+      properties:
+        status:
+          type: integer
+          description: HTTP status code
+        title:
+          type: string
+          description: >-
+            Error title, a short summary of the error which is invariant for an error
+            type
+        detail:
+          type: string
+          description: >-
+            Error detail, a longer human-readable description of the error
+        instance:
+          type: string
+          description: >-
+            (Optional) A URL which can be used to retrieve more information about
+            the specific occurrence of the error
+      additionalProperties: false
+      required:
+        - status
+        - title
+        - detail
+      title: Error
+      description: >-
+        Error response from the API. Roughly follows RFC 7807.
     AppendRowsRequest:
       type: object
       properties:
@@ -5626,7 +6474,51 @@ components:
       required:
         - version
       title: VersionInfo
-  responses: {}
+  responses:
+    BadRequest400:
+      description: The request was invalid or malformed
+      content:
+        application/json:
+          schema:
+            $ref: '#/components/schemas/Error'
+          example:
+            status: 400
+            title: Bad Request
+            detail: The request was invalid or malformed
+    TooManyRequests429:
+      description: >-
+        The client has sent too many requests in a given amount of time
+      content:
+        application/json:
+          schema:
+            $ref: '#/components/schemas/Error'
+          example:
+            status: 429
+            title: Too Many Requests
+            detail: >-
+              You have exceeded the rate limit. Please try again later.
+    InternalServerError500:
+      description: >-
+        The server encountered an unexpected error
+      content:
+        application/json:
+          schema:
+            $ref: '#/components/schemas/Error'
+          example:
+            status: 500
+            title: Internal Server Error
+            detail: >-
+              An unexpected error occurred. Our team has been notified.
+    DefaultError:
+      description: An unexpected error occurred
+      content:
+        application/json:
+          schema:
+            $ref: '#/components/schemas/Error'
+          example:
+            status: 0
+            title: Error
+            detail: An unexpected error occurred
 security:
   - Default: []
 tags:
diff --git a/docs/openapi_generator/generate.py b/docs/openapi_generator/generate.py
index dcbee7d2f..a2553f905 100644
--- a/docs/openapi_generator/generate.py
+++ b/docs/openapi_generator/generate.py
@@ -55,6 +55,7 @@ def main(output_dir: str):
                 a set of endpoints and their corresponding interfaces that are tailored to
                 best leverage Llama Models.""",
             ),
+            include_standard_error_responses=True,
         ),
     )
 
diff --git a/docs/openapi_generator/pyopenapi/generator.py b/docs/openapi_generator/pyopenapi/generator.py
index 4220cfc05..91f32e6c8 100644
--- a/docs/openapi_generator/pyopenapi/generator.py
+++ b/docs/openapi_generator/pyopenapi/generator.py
@@ -10,6 +10,7 @@ import typing
 from dataclasses import make_dataclass
 from typing import Any, Dict, Set, Union
 
+from llama_stack.apis.datatypes import Error
 from llama_stack.strong_typing.core import JsonType
 from llama_stack.strong_typing.docstring import Docstring, parse_type
 from llama_stack.strong_typing.inspection import (
@@ -434,6 +435,75 @@ class Generator:
         )
         self.schema_builder = SchemaBuilder(schema_generator)
         self.responses = {}
+        
+        # Create standard error responses
+        self._create_standard_error_responses()
+
+    def _create_standard_error_responses(self) -> None:
+        """
+        Creates standard error responses that can be reused across operations.
+        These will be added to the components.responses section of the OpenAPI document.
+        """
+        # Get the Error schema
+        error_schema = self.schema_builder.classdef_to_ref(Error)
+        
+        # Create standard error responses
+        self.responses["BadRequest400"] = Response(
+            description="The request was invalid or malformed",
+            content={
+                "application/json": MediaType(
+                    schema=error_schema,
+                    example={
+                        "status": 400,
+                        "title": "Bad Request",
+                        "detail": "The request was invalid or malformed",
+                    }
+                )
+            }
+        )
+        
+        self.responses["TooManyRequests429"] = Response(
+            description="The client has sent too many requests in a given amount of time",
+            content={
+                "application/json": MediaType(
+                    schema=error_schema,
+                    example={
+                        "status": 429,
+                        "title": "Too Many Requests",
+                        "detail": "You have exceeded the rate limit. Please try again later.",
+                    }
+                )
+            }
+        )
+        
+        self.responses["InternalServerError500"] = Response(
+            description="The server encountered an unexpected error",
+            content={
+                "application/json": MediaType(
+                    schema=error_schema,
+                    example={
+                        "status": 500,
+                        "title": "Internal Server Error",
+                        "detail": "An unexpected error occurred. Our team has been notified.",
+                    }
+                )
+            }
+        )
+        
+        # Add a default error response for any unhandled error cases
+        self.responses["DefaultError"] = Response(
+            description="An unexpected error occurred",
+            content={
+                "application/json": MediaType(
+                    schema=error_schema,
+                    example={
+                        "status": 0,
+                        "title": "Error",
+                        "detail": "An unexpected error occurred",
+                    }
+                )
+            }
+        )
 
     def _build_type_tag(self, ref: str, schema: Schema) -> Tag:
         # Don't include schema definition in the tag description because for one,
@@ -649,6 +719,18 @@ class Generator:
             responses.update(response_builder.build_response(response_options))
 
         assert len(responses.keys()) > 0, f"No responses found for {op.name}"
+        
+        # Add standard error response references
+        if self.options.include_standard_error_responses:
+            if "400" not in responses:
+                responses["400"] = ResponseRef("BadRequest400")
+            if "429" not in responses:
+                responses["429"] = ResponseRef("TooManyRequests429")
+            if "500" not in responses:
+                responses["500"] = ResponseRef("InternalServerError500")
+            if "default" not in responses:
+                responses["default"] = ResponseRef("DefaultError")
+        
         if op.event_type is not None:
             builder = ContentBuilder(self.schema_builder)
             callbacks = {
diff --git a/docs/openapi_generator/pyopenapi/options.py b/docs/openapi_generator/pyopenapi/options.py
index f80da453b..edc861ad5 100644
--- a/docs/openapi_generator/pyopenapi/options.py
+++ b/docs/openapi_generator/pyopenapi/options.py
@@ -35,6 +35,7 @@ class Options:
     :param error_wrapper: True if errors are encapsulated in an error object wrapper.
     :param property_description_fun: Custom transformation function to apply to class property documentation strings.
     :param captions: User-defined captions for sections such as "Operations" or "Types", and (if applicable) groups of extra types.
+    :param include_standard_error_responses: Whether to include standard error responses (400, 429, 500, 503) in all operations.
     """
 
     server: Server
@@ -52,6 +53,7 @@ class Options:
     error_wrapper: bool = False
     property_description_fun: Optional[Callable[[type, str, str], str]] = None
     captions: Optional[Dict[str, str]] = None
+    include_standard_error_responses: bool = True
 
     default_captions: ClassVar[Dict[str, str]] = {
         "Operations": "Operations",
diff --git a/llama_stack/apis/datatypes.py b/llama_stack/apis/datatypes.py
index 6df93052c..842a2b63d 100644
--- a/llama_stack/apis/datatypes.py
+++ b/llama_stack/apis/datatypes.py
@@ -5,6 +5,9 @@
 # the root directory of this source tree.
 
 from enum import Enum
+from typing import Optional
+
+from pydantic import BaseModel
 
 from llama_stack.schema_utils import json_schema_type
 
@@ -33,3 +36,20 @@ class Api(Enum):
 
     # built-in API
     inspect = "inspect"
+
+
+@json_schema_type
+class Error(BaseModel):
+    """
+    Error response from the API. Roughly follows RFC 7807.
+
+    :param status: HTTP status code
+    :param title: Error title, a short summary of the error which is invariant for an error type
+    :param detail: Error detail, a longer human-readable description of the error
+    :param instance: (Optional) A URL which can be used to retrieve more information about the specific occurrence of the error
+    """
+
+    status: int
+    title: str
+    detail: str
+    instance: Optional[str] = None

From 15f69e75ffaf07c79edf1cdcef1c31d0b67bbc3d Mon Sep 17 00:00:00 2001
From: Xi Yan <xiyan@meta.com>
Date: Fri, 28 Feb 2025 11:25:23 -0800
Subject: [PATCH 10/13] fix: replace eval with json decoding for format_adapter
 (#1328)

# What does this PR do?
- using `eval` is a security risk

[//]: # (If resolving an issue, uncomment and update the line below)
[//]: # (Closes #[issue-number])

## Test Plan

- see https://github.com/meta-llama/llama-stack/pull/1327

cc @SLR722 we will need to update the corresponding dataset via

```python
def update_to_json_str():

dataset = datasets.load_dataset(...)
processed_dataset = dataset[split].map(
        lambda x: {
                "column": json.dumps(eval(x["column"]))
       }
)
processed_dataset.push_to_hub(...)
```
[//]: # (## Documentation)
---
 .../post_training/torchtune/datasets/format_adapter.py   | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py b/llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py
index 884977803..6b607f1c7 100644
--- a/llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py
+++ b/llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py
@@ -10,16 +10,19 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+import json
 from typing import Any, Mapping
 
 from llama_stack.providers.utils.common.data_schema_validator import ColumnName
 
 
-def llama_stack_instruct_to_torchtune_instruct(sample: Mapping[str, Any]) -> Mapping[str, Any]:
+def llama_stack_instruct_to_torchtune_instruct(
+    sample: Mapping[str, Any],
+) -> Mapping[str, Any]:
     assert ColumnName.chat_completion_input.value in sample and ColumnName.expected_answer.value in sample, (
         "Invalid input row"
     )
-    input_messages = eval(str(sample[ColumnName.chat_completion_input.value]))
+    input_messages = json.loads(sample[ColumnName.chat_completion_input.value])
 
     assert len(input_messages) == 1, "llama stack intruct dataset format only supports 1 user message"
     input_message = input_messages[0]
@@ -37,7 +40,7 @@ def llama_stack_instruct_to_torchtune_instruct(sample: Mapping[str, Any]) -> Map
 def llama_stack_chat_to_torchtune_chat(sample: Mapping[str, Any]) -> Mapping[str, Any]:
     assert ColumnName.dialog.value in sample, "Invalid input row"
     role_map = {"user": "human", "assistant": "gpt"}
-    dialog = eval(str(sample[ColumnName.dialog.value]))
+    dialog = json.loads(sample[ColumnName.dialog.value])
 
     assert len(dialog) > 1, "dialog must have at least 2 messagse"
     roles = []

From 82fa0803faee41ae0e74a5e97066cdb78bfee294 Mon Sep 17 00:00:00 2001
From: Xi Yan <xiyan@meta.com>
Date: Fri, 28 Feb 2025 12:29:50 -0800
Subject: [PATCH 11/13] chore: refactor client tool in test (#1331)

# What does this PR do?

Use @client_tool decorator instead of ClientTool

[//]: # (If resolving an issue, uncomment and update the line below)
[//]: # (Closes #[issue-number])

## Test Plan
```
LLAMA_STACK_CONFIG=fireworks pytest -v tests/client-sdk/agents/test_agents.py --inference-model "meta-llama/Llama-3.3-70B-Instruct"
```

<img width="1053" alt="image"
src="https://github.com/user-attachments/assets/d3ade884-ef42-494e-8028-3b09d9ef1978"
/>


[//]: # (## Documentation)
---
 tests/client-sdk/agents/test_agents.py | 82 ++++++--------------------
 1 file changed, 18 insertions(+), 64 deletions(-)

diff --git a/tests/client-sdk/agents/test_agents.py b/tests/client-sdk/agents/test_agents.py
index 8f68699b2..9690a8139 100644
--- a/tests/client-sdk/agents/test_agents.py
+++ b/tests/client-sdk/agents/test_agents.py
@@ -4,20 +4,15 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-import json
-from typing import Dict, List
 from uuid import uuid4
 
 import pytest
 from llama_stack_client.lib.agents.agent import Agent
-from llama_stack_client.lib.agents.client_tool import ClientTool
+from llama_stack_client.lib.agents.client_tool import client_tool
 from llama_stack_client.lib.agents.event_logger import EventLogger
-from llama_stack_client.types import ToolResponseMessage
 from llama_stack_client.types.agents.turn_create_params import Document as AgentDocument
 from llama_stack_client.types.memory_insert_params import Document
-from llama_stack_client.types.shared.completion_message import CompletionMessage
 from llama_stack_client.types.shared_params.agent_config import AgentConfig, ToolConfig
-from llama_stack_client.types.tool_def_param import Parameter
 
 from llama_stack.apis.agents.agents import (
     AgentConfig as Server__AgentConfig,
@@ -27,63 +22,22 @@ from llama_stack.apis.agents.agents import (
 )
 
 
-class TestClientTool(ClientTool):
-    """Tool to give boiling point of a liquid
-    Returns the correct value for polyjuice in Celcius and Fahrenheit
-    and returns -1 for other liquids
+@client_tool
+def get_boiling_point(liquid_name: str, celcius: bool = True) -> int:
     """
+    Returns the boiling point of a liquid in Celcius or Fahrenheit
 
-    def run(self, messages: List[CompletionMessage]) -> List[ToolResponseMessage]:
-        assert len(messages) == 1, "Expected single message"
-
-        message = messages[0]
-
-        tool_call = message.tool_calls[0]
-
-        try:
-            response = self.run_impl(**tool_call.arguments)
-            response_str = json.dumps(response, ensure_ascii=False)
-        except Exception as e:
-            response_str = f"Error when running tool: {e}"
-
-        message = ToolResponseMessage(
-            role="tool",
-            call_id=tool_call.call_id,
-            tool_name=tool_call.tool_name,
-            content=response_str,
-        )
-        return message
-
-    def get_name(self) -> str:
-        return "get_boiling_point"
-
-    def get_description(self) -> str:
-        return "Get the boiling point of imaginary liquids (eg. polyjuice)"
-
-    def get_params_definition(self) -> Dict[str, Parameter]:
-        return {
-            "liquid_name": Parameter(
-                name="liquid_name",
-                parameter_type="string",
-                description="The name of the liquid",
-                required=True,
-            ),
-            "celcius": Parameter(
-                name="celcius",
-                parameter_type="boolean",
-                description="Whether to return the boiling point in Celcius",
-                required=False,
-            ),
-        }
-
-    def run_impl(self, liquid_name: str, celcius: bool = True) -> int:
-        if liquid_name.lower() == "polyjuice":
-            if celcius:
-                return -100
-            else:
-                return -212
+    :param liquid_name: The name of the liquid
+    :param celcius: Whether to return the boiling point in Celcius
+    :return: The boiling point of the liquid in Celcius or Fahrenheit
+    """
+    if liquid_name.lower() == "polyjuice":
+        if celcius:
+            return -100
         else:
-            return -1
+            return -212
+    else:
+        return -1
 
 
 @pytest.fixture(scope="session")
@@ -298,7 +252,7 @@ def test_code_interpreter_for_attachments(llama_stack_client, agent_config):
 
 
 def test_custom_tool(llama_stack_client, agent_config):
-    client_tool = TestClientTool()
+    client_tool = get_boiling_point
     agent_config = {
         **agent_config,
         "toolgroups": ["builtin::websearch"],
@@ -326,7 +280,7 @@ def test_custom_tool(llama_stack_client, agent_config):
 
 def test_tool_choice(llama_stack_client, agent_config):
     def run_agent(tool_choice):
-        client_tool = TestClientTool()
+        client_tool = get_boiling_point
 
         test_agent_config = {
             **agent_config,
@@ -362,7 +316,7 @@ def test_tool_choice(llama_stack_client, agent_config):
 
 # TODO: fix this flaky test
 def xtest_override_system_message_behavior(llama_stack_client, agent_config):
-    client_tool = TestClientTool()
+    client_tool = get_boiling_point
     agent_config = {
         **agent_config,
         "instructions": "You are a pirate",
@@ -586,7 +540,7 @@ def test_rag_and_code_agent(llama_stack_client, agent_config):
 
 
 def test_create_turn_response(llama_stack_client, agent_config):
-    client_tool = TestClientTool()
+    client_tool = get_boiling_point
     agent_config = {
         **agent_config,
         "input_shields": [],

From 9b6a2577b1ced370b314d32a8b97093c0f6b4c7e Mon Sep 17 00:00:00 2001
From: Surya Prakash Pathak <supathak@redhat.com>
Date: Fri, 28 Feb 2025 21:37:03 +0000
Subject: [PATCH 12/13] docs: Update llama-stack version in README.md (#1330)

# What does this PR do?
This PR updates the version in the
[README.md](https://github.com/meta-llama/llama-stack/blob/main/docs/zero_to_hero_guide/README.md)
to reflect the latest changes in Llama Stack setup.

Previously, using **llama-stack==0.1.0** caused an error when running:
```bash
llama stack build --template ollama --image-type conda
```
Upgrading to llama-stack==0.1.3 resolves this issue.

## Test Plan
- Verified that `llama stack build --template ollama --image-type conda`
works correctly.

---------

Signed-off-by: Surya Prakash Pathak <supathak@redhat.com>
---
 docs/zero_to_hero_guide/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/zero_to_hero_guide/README.md b/docs/zero_to_hero_guide/README.md
index 7dfd8e5fa..98f40bc3c 100644
--- a/docs/zero_to_hero_guide/README.md
+++ b/docs/zero_to_hero_guide/README.md
@@ -73,7 +73,7 @@ If you're looking for more specific topics, we have a [Zero to Hero Guide](#next
    Open a new terminal and install `llama-stack`:
    ```bash
    conda activate ollama
-   pip install llama-stack==0.1.0
+   pip install -U llama-stack
    ```
 
 ---

From 7ad7e3b970a2ea143a3e4fc4e5befed1d6bc7d67 Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Fri, 28 Feb 2025 16:12:05 -0800
Subject: [PATCH 13/13] fix: only install llama-stack package, deps are now
 correctly incorporated

---
 llama_stack/distribution/build_container.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llama_stack/distribution/build_container.sh b/llama_stack/distribution/build_container.sh
index 68f8a0863..04d115f70 100755
--- a/llama_stack/distribution/build_container.sh
+++ b/llama_stack/distribution/build_container.sh
@@ -159,12 +159,12 @@ EOF
     add_to_container << EOF
 RUN uv pip install --no-cache --extra-index-url https://test.pypi.org/simple/ \
   --index-strategy unsafe-best-match \
-  llama-models==$TEST_PYPI_VERSION llama-stack-client==$TEST_PYPI_VERSION llama-stack==$TEST_PYPI_VERSION
+  llama-stack==$TEST_PYPI_VERSION
 
 EOF
   else
     if [ -n "$PYPI_VERSION" ]; then
-      SPEC_VERSION="llama-stack==${PYPI_VERSION} llama-models==${PYPI_VERSION} llama-stack-client==${PYPI_VERSION}"
+      SPEC_VERSION="llama-stack==${PYPI_VERSION}"
     else
       SPEC_VERSION="llama-stack"
     fi