From 18ab1985da2cb461772bd9a4501a7803555eaa1f Mon Sep 17 00:00:00 2001 From: Yuan Tang Date: Fri, 28 Feb 2025 12:48:49 -0500 Subject: [PATCH 01/13] fix: Make remote::vllm compatible with vLLM <= v0.6.3 (#1325) # What does this PR do? This is to be consistent with OpenAI API and support vLLM <= v0.6.3 References: * https://platform.openai.com/docs/api-reference/chat/create#chat-create-tool_choice * https://github.com/vllm-project/vllm/pull/10000 This fixes the error when running older versions of vLLM: ``` 00:50:19.834 [START] /v1/inference/chat-completion INFO 2025-02-28 00:50:20,203 httpx:1025: HTTP Request: POST https://api-xeai-granite-3-1-8b-instruct.apps.int.stc.ai.preprod.us-east-1.aws.paas.redhat.com/v1/chat/completions "HTTP/1.1 400 Bad Request" Traceback (most recent call last): File "/usr/local/lib/python3.10/site-packages/llama_stack/distribution/server/server.py", line 235, in endpoint return await maybe_await(value) File "/usr/local/lib/python3.10/site-packages/llama_stack/distribution/server/server.py", line 201, in maybe_await return await value File "/usr/local/lib/python3.10/site-packages/llama_stack/providers/utils/telemetry/trace_protocol.py", line 89, in async_wrapper result = await method(self, *args, **kwargs) File "/usr/local/lib/python3.10/site-packages/llama_stack/distribution/routers/routers.py", line 193, in chat_completion return await provider.chat_completion(**params) File "/usr/local/lib/python3.10/site-packages/llama_stack/providers/utils/telemetry/trace_protocol.py", line 89, in async_wrapper result = await method(self, *args, **kwargs) File "/usr/local/lib/python3.10/site-packages/llama_stack/providers/remote/inference/vllm/vllm.py", line 286, in chat_completion return await self._nonstream_chat_completion(request, self.client) File "/usr/local/lib/python3.10/site-packages/llama_stack/providers/remote/inference/vllm/vllm.py", line 292, in _nonstream_chat_completion r = client.chat.completions.create(**params) File "/usr/local/lib/python3.10/site-packages/openai/_utils/_utils.py", line 279, in wrapper return func(*args, **kwargs) File "/usr/local/lib/python3.10/site-packages/openai/resources/chat/completions/completions.py", line 879, in create return self._post( File "/usr/local/lib/python3.10/site-packages/openai/_base_client.py", line 1290, in post return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls)) File "/usr/local/lib/python3.10/site-packages/openai/_base_client.py", line 967, in request return self._request( File "/usr/local/lib/python3.10/site-packages/openai/_base_client.py", line 1071, in _request raise self._make_status_error_from_response(err.response) from None openai.BadRequestError: Error code: 400 - {'object': 'error', 'message': "[{'type': 'value_error', 'loc': ('body',), 'msg': 'Value error, When using `tool_choice`, `tools` must be set.', 'input': {'messages': [{'role': 'user', 'content': [{'type': 'text', 'text': 'What model are you?'}]}], 'model': 'granite-3-1-8b-instruct', 'max_tokens': 4096, 'stream': False, 'temperature': 0.0, 'tools': None, 'tool_choice': 'auto'}, 'ctx': {'error': ValueError('When using `tool_choice`, `tools` must be set.')}}]", 'type': 'BadRequestError', 'param': None, 'code': 400} INFO: 2600:1700:9d20:ac0::49:59736 - "POST /v1/inference/chat-completion HTTP/1.1" 500 Internal Server Error 00:50:20.266 [END] /v1/inference/chat-completion [StatusCode.OK] (431.99ms) ``` ## Test Plan All existing tests pass. --------- Signed-off-by: Yuan Tang --- llama_stack/providers/remote/inference/vllm/vllm.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py index 967a3e44d..8ec23cd90 100644 --- a/llama_stack/providers/remote/inference/vllm/vllm.py +++ b/llama_stack/providers/remote/inference/vllm/vllm.py @@ -270,6 +270,12 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate): tool_config: Optional[ToolConfig] = None, ) -> AsyncGenerator: model = await self.model_store.get_model(model_id) + # This is to be consistent with OpenAI API and support vLLM <= v0.6.3 + # References: + # * https://platform.openai.com/docs/api-reference/chat/create#chat-create-tool_choice + # * https://github.com/vllm-project/vllm/pull/10000 + if not tools and tool_config is not None: + tool_config.tool_choice = ToolChoice.none request = ChatCompletionRequest( model=model.provider_resource_id, messages=messages, From c91548fe07ca7ec0fa33cf82443e165594abda9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Han?= Date: Fri, 28 Feb 2025 19:01:52 +0100 Subject: [PATCH 02/13] build(container): misc improvements (#1291) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # What does this PR do? See individual commit messages. [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan Apply this diff: ``` diff --git a/llama_stack/templates/ollama/build.yaml b/llama_stack/templates/ollama/build.yaml index da33b8d5..4a702f6f 100644 --- a/llama_stack/templates/ollama/build.yaml +++ b/llama_stack/templates/ollama/build.yaml @@ -28,5 +28,5 @@ distribution_spec: - remote::tavily-search - inline::code-interpreter - inline::rag-runtime - - remote::model-context-protocol + container_image: "registry.access.redhat.com/ubi9" image_type: conda ``` Then run: ``` CONTAINER_BINARY=podman llama stack build --template ollama --image-type container --image-name registry.access.redhat.com/ubi9 Containerfile created successfully in /var/folders/mq/rnm5w_7s2d3fxmtkx02knvhm0000gn/T/tmp.I7E5V6zbVI/Containerfile FROM registry.access.redhat.com/ubi9 WORKDIR /app RUN dnf -y update && dnf install -y iputils net-tools wget vim-minimal python3.11 python3.11-pip python3.11-wheel python3.11-setuptools && ln -s /bin/pip3.11 /bin/pip && ln -s /bin/python3.11 /bin/python && dnf clean all ENV UV_SYSTEM_PYTHON=1 RUN pip install uv RUN uv pip install --no-cache ollama nltk opentelemetry-sdk aiosqlite matplotlib datasets sqlite-vec scipy chromadb-client psycopg2-binary numpy scikit-learn openai redis pandas tqdm blobfile sentencepiece aiohttp requests pillow pymongo transformers autoevals opentelemetry-exporter-otlp-proto-http pypdf chardet aiosqlite fastapi fire httpx uvicorn RUN uv pip install --no-cache llama-stack RUN pip uninstall -y uv ENTRYPOINT ["python", "-m", "llama_stack.distribution.server.server", "--template", "ollama"] # Allows running as non-root user RUN mkdir -p /.llama /.cache RUN chmod -R g+rw /app /.llama /.cache PWD: /Users/leseb/Documents/AI/llama-stack Containerfile: /var/folders/mq/rnm5w_7s2d3fxmtkx02knvhm0000gn/T/tmp.I7E5V6zbVI/Containerfile + podman build --platform linux/arm64 -t distribution-ollama:0.1.4 -f /var/folders/mq/rnm5w_7s2d3fxmtkx02knvhm0000gn/T/tmp.I7E5V6zbVI/Containerfile . --progress=plain STEP 1/11: FROM registry.access.redhat.com/ubi9 STEP 2/11: WORKDIR /app --> Using cache d73dafd4caddd75bc29242a9031258fea759dc571c5bb53a64b5e6d86b3b1335 --> d73dafd4cadd STEP 3/11: RUN dnf -y update && dnf install -y iputils net-tools wget vim-minimal python3.11 python3.11-pip python3.11-wheel python3.11-setuptools && ln -s /bin/pip3.11 /bin/pip && ln -s /bin/python3.11 /bin/python && dnf clean all --> Using cache b74ad682db149771612a3ea1e4796e0760ab8a4e07c26ad672b46a86d38178c2 --> b74ad682db14 STEP 4/11: ENV UV_SYSTEM_PYTHON=1 --> Using cache 0812a05e6576506aa2fe646cbf239d0cb504cac30a50cb5cf4dc88e49039466d --> 0812a05e6576 STEP 5/11: RUN pip install uv --> Using cache a0ce1705f87e52f70f6eb34e66f67b68ebc7c1a073f4d2a664b189cfa89a4e88 --> a0ce1705f87e STEP 6/11: RUN uv pip install --no-cache ollama nltk opentelemetry-sdk aiosqlite matplotlib datasets sqlite-vec scipy chromadb-client psycopg2-binary numpy scikit-learn openai redis pandas tqdm blobfile sentencepiece aiohttp requests pillow pymongo transformers autoevals opentelemetry-exporter-otlp-proto-http pypdf chardet aiosqlite fastapi fire httpx uvicorn Using Python 3.11.9 environment at: /usr Resolved 107 packages in 1.78s Downloading kiwisolver (1.4MiB) Downloading aiohttp (1.6MiB) Downloading grpcio (5.4MiB) Downloading nltk (1.4MiB) Downloading transformers (9.5MiB) Downloading pydantic-core (1.7MiB) Downloading lxml (4.6MiB) Downloading psycopg2-binary (2.7MiB) Downloading scipy (33.8MiB) Downloading scikit-learn (12.0MiB) Downloading tokenizers (2.8MiB) Downloading fonttools (4.6MiB) Downloading pymongo (1.3MiB) Downloading rapidfuzz (1.4MiB) Downloading sentencepiece (1.2MiB) Downloading pyarrow (38.7MiB) Downloading matplotlib (8.1MiB) Downloading pycryptodomex (2.1MiB) Downloading pillow (4.2MiB) Downloading pandas (14.9MiB) Downloading numpy (13.6MiB) Building fire==0.7.0 Downloaded sentencepiece Downloaded kiwisolver Downloaded pymongo Downloaded rapidfuzz Downloaded nltk Downloaded aiohttp Built fire==0.7.0 Downloaded pydantic-core Downloaded pycryptodomex Downloaded psycopg2-binary Downloaded tokenizers Downloaded pillow Downloaded lxml Downloaded fonttools Downloaded grpcio Downloaded matplotlib Downloaded transformers Downloaded scikit-learn Downloaded numpy Downloaded pandas Downloaded scipy Downloaded pyarrow Prepared 107 packages in 3.03s Installed 107 packages in 62ms + aiohappyeyeballs==2.4.6 + aiohttp==3.11.13 + aiosignal==1.3.2 + aiosqlite==0.21.0 + annotated-types==0.7.0 + anyio==4.8.0 + attrs==25.1.0 + autoevals==0.0.120 + backoff==2.2.1 + blobfile==3.0.0 + braintrust-core==0.0.58 + certifi==2025.1.31 + chardet==5.2.0 + charset-normalizer==3.4.1 + chevron==0.14.0 + chromadb-client==0.6.3 + click==8.1.8 + contourpy==1.3.1 + cycler==0.12.1 + datasets==3.3.2 + deprecated==1.2.18 + dill==0.3.8 + distro==1.9.0 + dnspython==2.7.0 + fastapi==0.115.8 + filelock==3.17.0 + fire==0.7.0 + fonttools==4.56.0 + frozenlist==1.5.0 + fsspec==2024.12.0 + googleapis-common-protos==1.68.0 + grpcio==1.70.0 + h11==0.14.0 + httpcore==1.0.7 + httpx==0.28.1 + huggingface-hub==0.29.1 + idna==3.10 + importlib-metadata==8.5.0 + jiter==0.8.2 + joblib==1.4.2 + jsonschema==4.23.0 + jsonschema-specifications==2024.10.1 + kiwisolver==1.4.8 + levenshtein==0.26.1 + lxml==5.3.1 + matplotlib==3.10.0 + monotonic==1.6 + multidict==6.1.0 + multiprocess==0.70.16 + nltk==3.9.1 + numpy==1.26.4 + ollama==0.4.7 + openai==1.64.0 + opentelemetry-api==1.30.0 + opentelemetry-exporter-otlp-proto-common==1.30.0 + opentelemetry-exporter-otlp-proto-grpc==1.30.0 + opentelemetry-exporter-otlp-proto-http==1.30.0 + opentelemetry-proto==1.30.0 + opentelemetry-sdk==1.30.0 + opentelemetry-semantic-conventions==0.51b0 + orjson==3.10.15 + overrides==7.7.0 + packaging==24.2 + pandas==2.2.3 + pillow==11.1.0 + posthog==3.16.0 + propcache==0.3.0 + protobuf==5.29.3 + psycopg2-binary==2.9.10 + pyarrow==19.0.1 + pycryptodomex==3.21.0 + pydantic==2.10.6 + pydantic-core==2.27.2 + pymongo==4.11.1 + pyparsing==3.2.1 + pypdf==5.3.0 + python-dateutil==2.9.0.post0 + pytz==2025.1 + pyyaml==6.0.2 + rapidfuzz==3.12.1 + redis==5.2.1 + referencing==0.36.2 + regex==2024.11.6 + requests==2.32.3 + rpds-py==0.23.1 + safetensors==0.5.3 + scikit-learn==1.6.1 + scipy==1.15.2 + sentencepiece==0.2.0 + six==1.17.0 + sniffio==1.3.1 + sqlite-vec==0.1.6 + starlette==0.45.3 + tenacity==9.0.0 + termcolor==2.5.0 + threadpoolctl==3.5.0 + tokenizers==0.21.0 + tqdm==4.67.1 + transformers==4.49.0 + typing-extensions==4.12.2 + tzdata==2025.1 + urllib3==2.3.0 + uvicorn==0.34.0 + wrapt==1.17.2 + xxhash==3.5.0 + yarl==1.18.3 + zipp==3.21.0 --> 5b5b823605a1 STEP 7/11: RUN uv pip install --no-cache llama-stack Using Python 3.11.9 environment at: /usr Resolved 55 packages in 1.08s Downloading setuptools (1.2MiB) Downloading pygments (1.2MiB) Downloading llama-models (1.5MiB) Downloading tiktoken (1.1MiB) Downloaded tiktoken Downloaded llama-models Downloaded pygments Downloaded setuptools Prepared 15 packages in 402ms Installed 15 packages in 15ms + jinja2==3.1.5 + llama-models==0.1.4 + llama-stack==0.1.4 + llama-stack-client==0.1.4 + markdown-it-py==3.0.0 + markupsafe==3.0.2 + mdurl==0.1.2 + prompt-toolkit==3.0.50 + pyaml==25.1.0 + pygments==2.19.1 + python-dotenv==1.0.1 + rich==13.9.4 + setuptools==75.8.2 + tiktoken==0.9.0 + wcwidth==0.2.13 --> 38a037443807 STEP 8/11: RUN pip uninstall -y uv Found existing installation: uv 0.6.3 Uninstalling uv-0.6.3: Successfully uninstalled uv-0.6.3 WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv --> 54f749dc5ece STEP 9/11: ENTRYPOINT ["python", "-m", "llama_stack.distribution.server.server", "--template", "ollama"] --> 481c138b1982 STEP 10/11: RUN mkdir -p /.llama /.cache --> 0fc174f014a8 STEP 11/11: RUN chmod -R g+rw /app /.llama /.cache COMMIT distribution-ollama:0.1.4 --> d41b4ab4b136 Successfully tagged localhost/distribution-ollama:0.1.4 d41b4ab4b1363bfbaf6239e6f313bcb37873ef4b5f2fd816a4ee55acf2ac54d3 + set +x Success! Build Successful! ``` UBI9 container successfully builds. Run the container: ``` podman run d41b4ab4b1363bfbaf6239e6f313bcb37873ef4b5f2fd816a4ee55acf2ac54d3 --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct INFO 2025-02-27 13:08:03,666 llama_stack.distribution.resolver:213: Resolved 30 providers INFO 2025-02-27 13:08:03,666 llama_stack.distribution.resolver:215: inner-inference => ollama INFO 2025-02-27 13:08:03,666 llama_stack.distribution.resolver:215: models => __routing_table__ INFO 2025-02-27 13:08:03,666 llama_stack.distribution.resolver:215: inference => __autorouted__ INFO 2025-02-27 13:08:03,666 llama_stack.distribution.resolver:215: inner-vector_io => sqlite-vec INFO 2025-02-27 13:08:03,666 llama_stack.distribution.resolver:215: inner-safety => llama-guard INFO 2025-02-27 13:08:03,666 llama_stack.distribution.resolver:215: shields => __routing_table__ INFO 2025-02-27 13:08:03,666 llama_stack.distribution.resolver:215: safety => __autorouted__ INFO 2025-02-27 13:08:03,666 llama_stack.distribution.resolver:215: vector_dbs => __routing_table__ INFO 2025-02-27 13:08:03,666 llama_stack.distribution.resolver:215: vector_io => __autorouted__ INFO 2025-02-27 13:08:03,666 llama_stack.distribution.resolver:215: inner-tool_runtime => brave-search INFO 2025-02-27 13:08:03,666 llama_stack.distribution.resolver:215: inner-tool_runtime => tavily-search ``` [//]: # (## Documentation) --------- Signed-off-by: Sébastien Han --- llama_stack/distribution/build.py | 3 - llama_stack/distribution/build_container.sh | 65 +++++++++++++-------- 2 files changed, 40 insertions(+), 28 deletions(-) diff --git a/llama_stack/distribution/build.py b/llama_stack/distribution/build.py index 2b43b8128..3d808a4a4 100644 --- a/llama_stack/distribution/build.py +++ b/llama_stack/distribution/build.py @@ -15,7 +15,6 @@ from termcolor import cprint from llama_stack.distribution.datatypes import BuildConfig, Provider from llama_stack.distribution.distribution import get_provider_registry -from llama_stack.distribution.utils.config_dirs import BUILDS_BASE_DIR from llama_stack.distribution.utils.exec import run_command, run_with_pty from llama_stack.distribution.utils.image_types import ImageType from llama_stack.providers.datatypes import Api @@ -103,8 +102,6 @@ def build_image( template_or_config, image_name, container_base, - str(build_file_path), - str(BUILDS_BASE_DIR / ImageType.container.value), " ".join(normal_deps), ] elif build_config.image_type == ImageType.conda.value: diff --git a/llama_stack/distribution/build_container.sh b/llama_stack/distribution/build_container.sh index 08941a538..9b584a85c 100755 --- a/llama_stack/distribution/build_container.sh +++ b/llama_stack/distribution/build_container.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. @@ -20,26 +20,27 @@ UV_HTTP_TIMEOUT=${UV_HTTP_TIMEOUT:-500} # mounting is not supported by docker buildx, so we use COPY instead USE_COPY_NOT_MOUNT=${USE_COPY_NOT_MOUNT:-} -if [ "$#" -lt 6 ]; then +if [ "$#" -lt 4 ]; then # This only works for templates - echo "Usage: $0 []" >&2 + echo "Usage: $0 []" >&2 exit 1 fi set -euo pipefail template_or_config="$1" -image_name="$2" -container_base="$3" -build_file_path="$4" -host_build_dir="$5" -pip_dependencies="$6" -special_pip_deps="${7:-}" +shift +image_name="$1" +shift +container_base="$1" +shift +pip_dependencies="$1" +shift +special_pip_deps="${1:-}" # Define color codes RED='\033[0;31m' -GREEN='\033[0;32m' NC='\033[0m' # No Color CONTAINER_BINARY=${CONTAINER_BINARY:-docker} @@ -48,7 +49,6 @@ CONTAINER_OPTS=${CONTAINER_OPTS:-} TEMP_DIR=$(mktemp -d) add_to_container() { - local input output_file="$TEMP_DIR/Containerfile" if [ -t 0 ]; then printf '%s\n' "$1" >>"$output_file" @@ -64,9 +64,9 @@ if [[ $container_base == *"registry.access.redhat.com/ubi9"* ]]; then FROM $container_base WORKDIR /app -RUN microdnf -y update && microdnf install -y iputils net-tools wget \ +RUN dnf -y update && dnf install -y iputils net-tools wget \ vim-minimal python3.11 python3.11-pip python3.11-wheel \ - python3.11-setuptools && ln -s /bin/pip3.11 /bin/pip && ln -s /bin/python3.11 /bin/python && microdnf clean all + python3.11-setuptools && ln -s /bin/pip3.11 /bin/pip && ln -s /bin/python3.11 /bin/python && dnf clean all ENV UV_SYSTEM_PYTHON=1 RUN pip install uv @@ -165,6 +165,11 @@ EOF fi fi +# remove uv after installation + add_to_container << EOF +RUN pip uninstall -y uv +EOF + # if template_or_config ends with .yaml, it is not a template and we should not use the --template flag if [[ "$template_or_config" != *.yaml ]]; then add_to_container << EOF @@ -185,26 +190,31 @@ RUN mkdir -p /.llama /.cache RUN chmod -R g+rw /app /.llama /.cache EOF -printf "Containerfile created successfully in $TEMP_DIR/Containerfile\n\n" -cat $TEMP_DIR/Containerfile +printf "Containerfile created successfully in %s/Containerfile\n\n" "$TEMP_DIR" +cat "$TEMP_DIR"/Containerfile printf "\n" -mounts="" +# Start building the CLI arguments +CLI_ARGS=() + +# Read CONTAINER_OPTS and put it in an array +read -ra CLI_ARGS <<< "$CONTAINER_OPTS" + if [ "$USE_COPY_NOT_MOUNT" != "true" ]; then if [ -n "$LLAMA_STACK_DIR" ]; then - mounts="$mounts -v $(readlink -f $LLAMA_STACK_DIR):$stack_mount" + CLI_ARGS+=("-v" "$(readlink -f "$LLAMA_STACK_DIR"):$stack_mount") fi if [ -n "$LLAMA_MODELS_DIR" ]; then - mounts="$mounts -v $(readlink -f $LLAMA_MODELS_DIR):$models_mount" + CLI_ARGS+=("-v" "$(readlink -f "$LLAMA_MODELS_DIR"):$models_mount") fi if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then - mounts="$mounts -v $(readlink -f $LLAMA_STACK_CLIENT_DIR):$client_mount" + CLI_ARGS+=("-v" "$(readlink -f "$LLAMA_STACK_CLIENT_DIR"):$client_mount") fi fi if command -v selinuxenabled &>/dev/null && selinuxenabled; then # Disable SELinux labels -- we don't want to relabel the llama-stack source dir - CONTAINER_OPTS="$CONTAINER_OPTS --security-opt label=disable" + CLI_ARGS+=("--security-opt" "label=disable") fi # Set version tag based on PyPI version @@ -225,11 +235,11 @@ image_tag="$image_name:$version_tag" # Detect platform architecture ARCH=$(uname -m) if [ -n "$BUILD_PLATFORM" ]; then - PLATFORM="--platform $BUILD_PLATFORM" + CLI_ARGS+=("--platform $BUILD_PLATFORM") elif [ "$ARCH" = "arm64" ] || [ "$ARCH" = "aarch64" ]; then - PLATFORM="--platform linux/arm64" + CLI_ARGS+=("--platform" "linux/arm64") elif [ "$ARCH" = "x86_64" ]; then - PLATFORM="--platform linux/amd64" + CLI_ARGS+=("--platform" "linux/amd64") else echo "Unsupported architecture: $ARCH" exit 1 @@ -238,8 +248,13 @@ fi echo "PWD: $(pwd)" echo "Containerfile: $TEMP_DIR/Containerfile" set -x -$CONTAINER_BINARY build $CONTAINER_OPTS $PLATFORM -t $image_tag \ - -f "$TEMP_DIR/Containerfile" "." $mounts --progress=plain + +$CONTAINER_BINARY build \ + "${CLI_ARGS[@]}" \ + -t "$image_tag" \ + -f "$TEMP_DIR/Containerfile" \ + "." \ + --progress=plain # clean up tmp/configs set +x From 83dc8fbdffdc673dcdd6392ea9f1138fd0a9f412 Mon Sep 17 00:00:00 2001 From: Matthew Farrellee Date: Fri, 28 Feb 2025 12:02:36 -0600 Subject: [PATCH 03/13] test: cleanup embedding model test suite (#1322) # What does this PR do? - skip media tests for models that do not support media - skip output_dimension tests for models that do not support it - skip task_type tests for models that do not support it - provide task_type for models that require it ## Test Plan `LLAMA_STACK_BASE_URL=http://localhost:8321 pytest -v tests/client-sdk/inference/test_embedding.py --embedding-model ...` --- tests/client-sdk/inference/test_embedding.py | 65 ++++++++++++++++---- 1 file changed, 54 insertions(+), 11 deletions(-) diff --git a/tests/client-sdk/inference/test_embedding.py b/tests/client-sdk/inference/test_embedding.py index 69d35d05d..075f927f7 100644 --- a/tests/client-sdk/inference/test_embedding.py +++ b/tests/client-sdk/inference/test_embedding.py @@ -76,6 +76,25 @@ DUMMY_IMAGE_URL = ImageContentItem( ) DUMMY_IMAGE_BASE64 = ImageContentItem(image=ImageContentItemImage(data="base64string"), type="image") SUPPORTED_PROVIDERS = {"remote::nvidia"} +MODELS_SUPPORTING_MEDIA = {} +MODELS_SUPPORTING_OUTPUT_DIMENSION = {"nvidia/llama-3.2-nv-embedqa-1b-v2"} +MODELS_REQUIRING_TASK_TYPE = { + "nvidia/llama-3.2-nv-embedqa-1b-v2", + "nvidia/nv-embedqa-e5-v5", + "nvidia/nv-embedqa-mistral-7b-v2", + "snowflake/arctic-embed-l", +} +MODELS_SUPPORTING_TASK_TYPE = MODELS_REQUIRING_TASK_TYPE + + +def default_task_type(model_id): + """ + Some models require a task type parameter. This provides a default value for + testing those models. + """ + if model_id in MODELS_REQUIRING_TASK_TYPE: + return {"task_type": "query"} + return {} @pytest.mark.parametrize( @@ -92,7 +111,9 @@ SUPPORTED_PROVIDERS = {"remote::nvidia"} def test_embedding_text(llama_stack_client, embedding_model_id, contents, inference_provider_type): if inference_provider_type not in SUPPORTED_PROVIDERS: pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet") - response = llama_stack_client.inference.embeddings(model_id=embedding_model_id, contents=contents) + response = llama_stack_client.inference.embeddings( + model_id=embedding_model_id, contents=contents, **default_task_type(embedding_model_id) + ) assert isinstance(response, EmbeddingsResponse) assert len(response.embeddings) == sum(len(content) if isinstance(content, list) else 1 for content in contents) assert isinstance(response.embeddings[0], list) @@ -110,11 +131,14 @@ def test_embedding_text(llama_stack_client, embedding_model_id, contents, infere "list[url,string,base64,text]", ], ) -@pytest.mark.xfail(reason="Media is not supported") def test_embedding_image(llama_stack_client, embedding_model_id, contents, inference_provider_type): if inference_provider_type not in SUPPORTED_PROVIDERS: pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet") - response = llama_stack_client.inference.embeddings(model_id=embedding_model_id, contents=contents) + if embedding_model_id not in MODELS_SUPPORTING_MEDIA: + pytest.xfail(f"{embedding_model_id} doesn't support media") + response = llama_stack_client.inference.embeddings( + model_id=embedding_model_id, contents=contents, **default_task_type(embedding_model_id) + ) assert isinstance(response, EmbeddingsResponse) assert len(response.embeddings) == sum(len(content) if isinstance(content, list) else 1 for content in contents) assert isinstance(response.embeddings[0], list) @@ -145,7 +169,10 @@ def test_embedding_truncation( if inference_provider_type not in SUPPORTED_PROVIDERS: pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet") response = llama_stack_client.inference.embeddings( - model_id=embedding_model_id, contents=contents, text_truncation=text_truncation + model_id=embedding_model_id, + contents=contents, + text_truncation=text_truncation, + **default_task_type(embedding_model_id), ) assert isinstance(response, EmbeddingsResponse) assert len(response.embeddings) == 1 @@ -178,26 +205,36 @@ def test_embedding_truncation_error( pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet") with pytest.raises(BadRequestError): llama_stack_client.inference.embeddings( - model_id=embedding_model_id, contents=[DUMMY_LONG_TEXT], text_truncation=text_truncation + model_id=embedding_model_id, + contents=[DUMMY_LONG_TEXT], + text_truncation=text_truncation, + **default_task_type(embedding_model_id), ) -@pytest.mark.xfail(reason="Only valid for model supporting dimension reduction") def test_embedding_output_dimension(llama_stack_client, embedding_model_id, inference_provider_type): if inference_provider_type not in SUPPORTED_PROVIDERS: pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet") - base_response = llama_stack_client.inference.embeddings(model_id=embedding_model_id, contents=[DUMMY_STRING]) + if embedding_model_id not in MODELS_SUPPORTING_OUTPUT_DIMENSION: + pytest.xfail(f"{embedding_model_id} doesn't support output_dimension") + base_response = llama_stack_client.inference.embeddings( + model_id=embedding_model_id, contents=[DUMMY_STRING], **default_task_type(embedding_model_id) + ) test_response = llama_stack_client.inference.embeddings( - model_id=embedding_model_id, contents=[DUMMY_STRING], output_dimension=32 + model_id=embedding_model_id, + contents=[DUMMY_STRING], + **default_task_type(embedding_model_id), + output_dimension=32, ) assert len(base_response.embeddings[0]) != len(test_response.embeddings[0]) assert len(test_response.embeddings[0]) == 32 -@pytest.mark.xfail(reason="Only valid for model supporting task type") def test_embedding_task_type(llama_stack_client, embedding_model_id, inference_provider_type): if inference_provider_type not in SUPPORTED_PROVIDERS: pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet") + if embedding_model_id not in MODELS_SUPPORTING_TASK_TYPE: + pytest.xfail(f"{embedding_model_id} doesn't support task_type") query_embedding = llama_stack_client.inference.embeddings( model_id=embedding_model_id, contents=[DUMMY_STRING], task_type="query" ) @@ -220,7 +257,10 @@ def test_embedding_text_truncation(llama_stack_client, embedding_model_id, text_ if inference_provider_type not in SUPPORTED_PROVIDERS: pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet") response = llama_stack_client.inference.embeddings( - model_id=embedding_model_id, contents=[DUMMY_STRING], text_truncation=text_truncation + model_id=embedding_model_id, + contents=[DUMMY_STRING], + text_truncation=text_truncation, + **default_task_type(embedding_model_id), ) assert isinstance(response, EmbeddingsResponse) assert len(response.embeddings) == 1 @@ -245,5 +285,8 @@ def test_embedding_text_truncation_error( pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet") with pytest.raises(BadRequestError): llama_stack_client.inference.embeddings( - model_id=embedding_model_id, contents=[DUMMY_STRING], text_truncation=text_truncation + model_id=embedding_model_id, + contents=[DUMMY_STRING], + text_truncation=text_truncation, + **default_task_type(embedding_model_id), ) From 5366dab31e3dfecab455a9a6c5f55cc18c7c7ae6 Mon Sep 17 00:00:00 2001 From: Reid <61492567+reidliu41@users.noreply.github.com> Date: Sat, 1 Mar 2025 02:03:45 +0800 Subject: [PATCH 04/13] docs: update build doc (#1262) # What does this PR do? [Provide a short summary of what this PR does and why. Link to relevant issues if applicable.] https://github.com/meta-llama/llama-stack/blob/55eb257459f5f891d7e570740e816eed950131b3/llama_stack/cli/stack/run.py#L22 https://github.com/meta-llama/llama-stack/blob/55eb257459f5f891d7e570740e816eed950131b3/llama_stack/cli/stack/_build.py#L103 [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan [Describe the tests you ran to verify your changes with result summaries. *Provide clear instructions so the plan can be easily re-executed.*] [//]: # (## Documentation) Signed-off-by: reidliu Co-authored-by: reidliu --- docs/source/distributions/building_distro.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/distributions/building_distro.md b/docs/source/distributions/building_distro.md index 9cb1a402f..20a835201 100644 --- a/docs/source/distributions/building_distro.md +++ b/docs/source/distributions/building_distro.md @@ -106,7 +106,7 @@ It would be best to start with a template and understand the structure of the co llama stack build > Enter a name for your Llama Stack (e.g. my-local-stack): my-stack -> Enter the image type you want your Llama Stack to be built as (container or conda): conda +> Enter the image type you want your Llama Stack to be built as (container or conda or venv): conda Llama Stack is composed of several APIs working together. Let's select the provider types (implementations) you want to use for these APIs. @@ -187,7 +187,7 @@ usage: llama stack run [-h] [--port PORT] [--image-name IMAGE_NAME] [--disable-i [--tls-certfile TLS_CERTFILE] [--image-type {conda,container,venv}] config -start the server for a Llama Stack Distribution. You should have already built (or downloaded) and configured the distribution. +Start the server for a Llama Stack Distribution. You should have already built (or downloaded) and configured the distribution. positional arguments: config Path to config file to use for the run From ea4f13cc209e1222aadfab52224a48f687a6d483 Mon Sep 17 00:00:00 2001 From: Reid <61492567+reidliu41@users.noreply.github.com> Date: Sat, 1 Mar 2025 02:07:24 +0800 Subject: [PATCH 05/13] chore: add container cmd check (#1306) # What does this PR do? [Provide a short summary of what this PR does and why. Link to relevant issues if applicable.] [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan [Describe the tests you ran to verify your changes with result summaries. *Provide clear instructions so the plan can be easily re-executed.*] [//]: # (## Documentation) Signed-off-by: reidliu Co-authored-by: reidliu --- llama_stack/distribution/build_container.sh | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/llama_stack/distribution/build_container.sh b/llama_stack/distribution/build_container.sh index 9b584a85c..68f8a0863 100755 --- a/llama_stack/distribution/build_container.sh +++ b/llama_stack/distribution/build_container.sh @@ -48,6 +48,9 @@ CONTAINER_OPTS=${CONTAINER_OPTS:-} TEMP_DIR=$(mktemp -d) +SCRIPT_DIR=$(dirname "$(readlink -f "$0")") +source "$SCRIPT_DIR/common.sh" + add_to_container() { output_file="$TEMP_DIR/Containerfile" if [ -t 0 ]; then @@ -58,6 +61,12 @@ add_to_container() { fi } +# Check if container command is available +if ! is_command_available $CONTAINER_BINARY; then + printf "${RED}Error: ${CONTAINER_BINARY} command not found. Is ${CONTAINER_BINARY} installed and in your PATH?${NC}" >&2 + exit 1 +fi + # Update and install UBI9 components if UBI9 base image is used if [[ $container_base == *"registry.access.redhat.com/ubi9"* ]]; then add_to_container << EOF @@ -212,7 +221,7 @@ if [ "$USE_COPY_NOT_MOUNT" != "true" ]; then fi fi -if command -v selinuxenabled &>/dev/null && selinuxenabled; then +if is_command_available selinuxenabled && selinuxenabled; then # Disable SELinux labels -- we don't want to relabel the llama-stack source dir CLI_ARGS+=("--security-opt" "label=disable") fi From 14c442f177591ab336414f0017d4da3d1e20a088 Mon Sep 17 00:00:00 2001 From: Reid <61492567+reidliu41@users.noreply.github.com> Date: Sat, 1 Mar 2025 02:08:05 +0800 Subject: [PATCH 06/13] chore: update cmd check (#1293) # What does this PR do? [Provide a short summary of what this PR does and why. Link to relevant issues if applicable.] [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan [Describe the tests you ran to verify your changes with result summaries. *Provide clear instructions so the plan can be easily re-executed.*] [//]: # (## Documentation) Signed-off-by: reidliu Co-authored-by: reidliu --- llama_stack/distribution/build_conda_env.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama_stack/distribution/build_conda_env.sh b/llama_stack/distribution/build_conda_env.sh index 31b3e1b21..1eac2ee08 100755 --- a/llama_stack/distribution/build_conda_env.sh +++ b/llama_stack/distribution/build_conda_env.sh @@ -52,7 +52,7 @@ ensure_conda_env_python310() { local python_version="3.10" # Check if conda command is available - if ! command -v conda &>/dev/null; then + if ! is_command_available conda; then printf "${RED}Error: conda command not found. Is Conda installed and in your PATH?${NC}" >&2 exit 1 fi From 66cd128ab51aff0b649c8ae59d7ec139a54913c1 Mon Sep 17 00:00:00 2001 From: Reid <61492567+reidliu41@users.noreply.github.com> Date: Sat, 1 Mar 2025 02:10:12 +0800 Subject: [PATCH 07/13] docs: update the downloaded list doc (#1266) # What does this PR do? [Provide a short summary of what this PR does and why. Link to relevant issues if applicable.] Since released the `--downloaded` option, so update the related documents. [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan [Describe the tests you ran to verify your changes with result summaries. *Provide clear instructions so the plan can be easily re-executed.*] [//]: # (## Documentation) Signed-off-by: reidliu Co-authored-by: reidliu --- .../self_hosted_distro/meta-reference-gpu.md | 27 +++++++++++++--- .../meta-reference-quantized-gpu.md | 27 +++++++++++++--- .../llama_cli_reference/download_models.md | 32 +++++++++++++++++++ .../references/llama_cli_reference/index.md | 32 +++++++++++++++++++ .../meta-reference-gpu/doc_template.md | 27 +++++++++++++--- .../doc_template.md | 27 +++++++++++++--- 6 files changed, 156 insertions(+), 16 deletions(-) diff --git a/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md b/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md index b183757db..b8d1b1714 100644 --- a/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md +++ b/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md @@ -41,12 +41,31 @@ The following environment variables can be configured: ## Prerequisite: Downloading Models -Please make sure you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints. +Please use `llama model list --downloaded` to check that you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints. ``` -$ ls ~/.llama/checkpoints -Llama3.1-8B Llama3.2-11B-Vision-Instruct Llama3.2-1B-Instruct Llama3.2-90B-Vision-Instruct Llama-Guard-3-8B -Llama3.1-8B-Instruct Llama3.2-1B Llama3.2-3B-Instruct Llama-Guard-3-1B Prompt-Guard-86M +$ llama model list --downloaded +┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓ +┃ Model ┃ Size ┃ Modified Time ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩ +│ Llama3.2-1B-Instruct:int4-qlora-eo8 │ 1.53 GB │ 2025-02-26 11:22:28 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-1B │ 2.31 GB │ 2025-02-18 21:48:52 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Prompt-Guard-86M │ 0.02 GB │ 2025-02-26 11:29:28 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-3B-Instruct:int4-spinquant-eo8 │ 3.69 GB │ 2025-02-26 11:37:41 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-3B │ 5.99 GB │ 2025-02-18 21:51:26 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.1-8B │ 14.97 GB │ 2025-02-16 10:36:37 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-1B-Instruct:int4-spinquant-eo8 │ 1.51 GB │ 2025-02-26 11:35:02 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama-Guard-3-1B │ 2.80 GB │ 2025-02-26 11:20:46 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama-Guard-3-1B:int4 │ 0.43 GB │ 2025-02-26 11:33:33 │ +└─────────────────────────────────────────┴──────────┴─────────────────────┘ ``` ## Running the Distribution diff --git a/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md b/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md index 9aeb7a88b..a49175e22 100644 --- a/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md +++ b/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md @@ -41,12 +41,31 @@ The following environment variables can be configured: ## Prerequisite: Downloading Models -Please make sure you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints. +Please use `llama model list --downloaded` to check that you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints. ``` -$ ls ~/.llama/checkpoints -Llama3.1-8B Llama3.2-11B-Vision-Instruct Llama3.2-1B-Instruct Llama3.2-90B-Vision-Instruct Llama-Guard-3-8B -Llama3.1-8B-Instruct Llama3.2-1B Llama3.2-3B-Instruct Llama-Guard-3-1B Prompt-Guard-86M +$ llama model list --downloaded +┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓ +┃ Model ┃ Size ┃ Modified Time ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩ +│ Llama3.2-1B-Instruct:int4-qlora-eo8 │ 1.53 GB │ 2025-02-26 11:22:28 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-1B │ 2.31 GB │ 2025-02-18 21:48:52 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Prompt-Guard-86M │ 0.02 GB │ 2025-02-26 11:29:28 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-3B-Instruct:int4-spinquant-eo8 │ 3.69 GB │ 2025-02-26 11:37:41 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-3B │ 5.99 GB │ 2025-02-18 21:51:26 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.1-8B │ 14.97 GB │ 2025-02-16 10:36:37 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-1B-Instruct:int4-spinquant-eo8 │ 1.51 GB │ 2025-02-26 11:35:02 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama-Guard-3-1B │ 2.80 GB │ 2025-02-26 11:20:46 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama-Guard-3-1B:int4 │ 0.43 GB │ 2025-02-26 11:33:33 │ +└─────────────────────────────────────────┴──────────┴─────────────────────┘ ``` ## Running the Distribution diff --git a/docs/source/references/llama_cli_reference/download_models.md b/docs/source/references/llama_cli_reference/download_models.md index 6c791bcb7..ca470f8c2 100644 --- a/docs/source/references/llama_cli_reference/download_models.md +++ b/docs/source/references/llama_cli_reference/download_models.md @@ -129,3 +129,35 @@ llama download --source huggingface --model-id Prompt-Guard-86M --ignore-pattern **Important:** Set your environment variable `HF_TOKEN` or pass in `--hf-token` to the command to validate your access. You can find your token at [https://huggingface.co/settings/tokens](https://huggingface.co/settings/tokens). > **Tip:** Default for `llama download` is to run with `--ignore-patterns *.safetensors` since we use the `.pth` files in the `original` folder. For Llama Guard and Prompt Guard, however, we need safetensors. Hence, please run with `--ignore-patterns original` so that safetensors are downloaded and `.pth` files are ignored. + +## List the downloaded models + +To list the downloaded models with the following command: +``` +llama model list --downloaded +``` + +You should see a table like this: +``` +┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓ +┃ Model ┃ Size ┃ Modified Time ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩ +│ Llama3.2-1B-Instruct:int4-qlora-eo8 │ 1.53 GB │ 2025-02-26 11:22:28 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-1B │ 2.31 GB │ 2025-02-18 21:48:52 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Prompt-Guard-86M │ 0.02 GB │ 2025-02-26 11:29:28 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-3B-Instruct:int4-spinquant-eo8 │ 3.69 GB │ 2025-02-26 11:37:41 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-3B │ 5.99 GB │ 2025-02-18 21:51:26 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.1-8B │ 14.97 GB │ 2025-02-16 10:36:37 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-1B-Instruct:int4-spinquant-eo8 │ 1.51 GB │ 2025-02-26 11:35:02 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama-Guard-3-1B │ 2.80 GB │ 2025-02-26 11:20:46 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama-Guard-3-1B:int4 │ 0.43 GB │ 2025-02-26 11:33:33 │ +└─────────────────────────────────────────┴──────────┴─────────────────────┘ +``` diff --git a/docs/source/references/llama_cli_reference/index.md b/docs/source/references/llama_cli_reference/index.md index a43666963..8a38fc3ae 100644 --- a/docs/source/references/llama_cli_reference/index.md +++ b/docs/source/references/llama_cli_reference/index.md @@ -154,6 +154,38 @@ llama download --source huggingface --model-id Prompt-Guard-86M --ignore-pattern > **Tip:** Default for `llama download` is to run with `--ignore-patterns *.safetensors` since we use the `.pth` files in the `original` folder. For Llama Guard and Prompt Guard, however, we need safetensors. Hence, please run with `--ignore-patterns original` so that safetensors are downloaded and `.pth` files are ignored. +## List the downloaded models + +To list the downloaded models with the following command: +``` +llama model list --downloaded +``` + +You should see a table like this: +``` +┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓ +┃ Model ┃ Size ┃ Modified Time ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩ +│ Llama3.2-1B-Instruct:int4-qlora-eo8 │ 1.53 GB │ 2025-02-26 11:22:28 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-1B │ 2.31 GB │ 2025-02-18 21:48:52 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Prompt-Guard-86M │ 0.02 GB │ 2025-02-26 11:29:28 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-3B-Instruct:int4-spinquant-eo8 │ 3.69 GB │ 2025-02-26 11:37:41 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-3B │ 5.99 GB │ 2025-02-18 21:51:26 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.1-8B │ 14.97 GB │ 2025-02-16 10:36:37 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-1B-Instruct:int4-spinquant-eo8 │ 1.51 GB │ 2025-02-26 11:35:02 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama-Guard-3-1B │ 2.80 GB │ 2025-02-26 11:20:46 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama-Guard-3-1B:int4 │ 0.43 GB │ 2025-02-26 11:33:33 │ +└─────────────────────────────────────────┴──────────┴─────────────────────┘ +``` + ## Understand the models The `llama model` command helps you explore the model’s interface. diff --git a/llama_stack/templates/meta-reference-gpu/doc_template.md b/llama_stack/templates/meta-reference-gpu/doc_template.md index 60556a6f3..87438fb6d 100644 --- a/llama_stack/templates/meta-reference-gpu/doc_template.md +++ b/llama_stack/templates/meta-reference-gpu/doc_template.md @@ -29,12 +29,31 @@ The following environment variables can be configured: ## Prerequisite: Downloading Models -Please make sure you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints. +Please use `llama model list --downloaded` to check that you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints. ``` -$ ls ~/.llama/checkpoints -Llama3.1-8B Llama3.2-11B-Vision-Instruct Llama3.2-1B-Instruct Llama3.2-90B-Vision-Instruct Llama-Guard-3-8B -Llama3.1-8B-Instruct Llama3.2-1B Llama3.2-3B-Instruct Llama-Guard-3-1B Prompt-Guard-86M +$ llama model list --downloaded +┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓ +┃ Model ┃ Size ┃ Modified Time ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩ +│ Llama3.2-1B-Instruct:int4-qlora-eo8 │ 1.53 GB │ 2025-02-26 11:22:28 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-1B │ 2.31 GB │ 2025-02-18 21:48:52 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Prompt-Guard-86M │ 0.02 GB │ 2025-02-26 11:29:28 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-3B-Instruct:int4-spinquant-eo8 │ 3.69 GB │ 2025-02-26 11:37:41 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-3B │ 5.99 GB │ 2025-02-18 21:51:26 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.1-8B │ 14.97 GB │ 2025-02-16 10:36:37 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-1B-Instruct:int4-spinquant-eo8 │ 1.51 GB │ 2025-02-26 11:35:02 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama-Guard-3-1B │ 2.80 GB │ 2025-02-26 11:20:46 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama-Guard-3-1B:int4 │ 0.43 GB │ 2025-02-26 11:33:33 │ +└─────────────────────────────────────────┴──────────┴─────────────────────┘ ``` ## Running the Distribution diff --git a/llama_stack/templates/meta-reference-quantized-gpu/doc_template.md b/llama_stack/templates/meta-reference-quantized-gpu/doc_template.md index 2b117120c..e8dfaaf3c 100644 --- a/llama_stack/templates/meta-reference-quantized-gpu/doc_template.md +++ b/llama_stack/templates/meta-reference-quantized-gpu/doc_template.md @@ -31,12 +31,31 @@ The following environment variables can be configured: ## Prerequisite: Downloading Models -Please make sure you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints. +Please use `llama model list --downloaded` to check that you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints. ``` -$ ls ~/.llama/checkpoints -Llama3.1-8B Llama3.2-11B-Vision-Instruct Llama3.2-1B-Instruct Llama3.2-90B-Vision-Instruct Llama-Guard-3-8B -Llama3.1-8B-Instruct Llama3.2-1B Llama3.2-3B-Instruct Llama-Guard-3-1B Prompt-Guard-86M +$ llama model list --downloaded +┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓ +┃ Model ┃ Size ┃ Modified Time ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩ +│ Llama3.2-1B-Instruct:int4-qlora-eo8 │ 1.53 GB │ 2025-02-26 11:22:28 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-1B │ 2.31 GB │ 2025-02-18 21:48:52 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Prompt-Guard-86M │ 0.02 GB │ 2025-02-26 11:29:28 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-3B-Instruct:int4-spinquant-eo8 │ 3.69 GB │ 2025-02-26 11:37:41 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-3B │ 5.99 GB │ 2025-02-18 21:51:26 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.1-8B │ 14.97 GB │ 2025-02-16 10:36:37 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-1B-Instruct:int4-spinquant-eo8 │ 1.51 GB │ 2025-02-26 11:35:02 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama-Guard-3-1B │ 2.80 GB │ 2025-02-26 11:20:46 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama-Guard-3-1B:int4 │ 0.43 GB │ 2025-02-26 11:33:33 │ +└─────────────────────────────────────────┴──────────┴─────────────────────┘ ``` ## Running the Distribution From 6520baebed13c1cbf4227f84d0dcd6e77bcf9ba7 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Fri, 28 Feb 2025 11:10:45 -0800 Subject: [PATCH 08/13] fix: replace eval with json decoding (#1327) # What does this PR do? - Using `eval` on server is a security risk - Replace `eval` with `json.loads` [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan ``` pytest -v -s --nbval-lax ./llama-stack/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb ``` image [//]: # (## Documentation) --- .../providers/inline/eval/meta_reference/eval.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llama_stack/providers/inline/eval/meta_reference/eval.py b/llama_stack/providers/inline/eval/meta_reference/eval.py index 48157b018..a01f7f1f3 100644 --- a/llama_stack/providers/inline/eval/meta_reference/eval.py +++ b/llama_stack/providers/inline/eval/meta_reference/eval.py @@ -3,6 +3,7 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +import json from typing import Any, Dict, List, Optional from tqdm import tqdm @@ -116,7 +117,7 @@ class MetaReferenceEvalImpl( generations = [] for i, x in tqdm(enumerate(input_rows)): assert ColumnName.chat_completion_input.value in x, "Invalid input row" - input_messages = eval(str(x[ColumnName.chat_completion_input.value])) + input_messages = json.loads(x[ColumnName.chat_completion_input.value]) input_messages = [UserMessage(**x) for x in input_messages] # NOTE: only single-turn agent generation is supported. Create a new session for each input row @@ -158,7 +159,7 @@ class MetaReferenceEvalImpl( generations = [] for x in tqdm(input_rows): if ColumnName.completion_input.value in x: - input_content = eval(str(x[ColumnName.completion_input.value])) + input_content = json.loads(x[ColumnName.completion_input.value]) response = await self.inference_api.completion( model=candidate.model, content=input_content, @@ -166,9 +167,8 @@ class MetaReferenceEvalImpl( ) generations.append({ColumnName.generated_answer.value: response.completion_message.content}) elif ColumnName.chat_completion_input.value in x: - chat_completion_input_str = str(x[ColumnName.chat_completion_input.value]) - input_messages = eval(chat_completion_input_str) - input_messages = [UserMessage(**x) for x in input_messages] + chat_completion_input_json = json.loads(x[ColumnName.chat_completion_input.value]) + input_messages = [UserMessage(**x) for x in chat_completion_input_json] messages = [] if candidate.system_message: messages.append(candidate.system_message) From 5547ef953c304858d80b1ffa6b0f8226c3aad497 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Fri, 28 Feb 2025 11:16:12 -0800 Subject: [PATCH 09/13] feat: enhance OpenAPI spec to include Error types (#1320) # What does this PR do? An API spec must talk about Error handling. This was a pretty glaring omission so far. This PR begins to address it by adding a set of standard error responses we can attach to all our API calls. At a future point, we can add specific error types where necessary (although we should not hurry to do that; it is best done very late.) ## Test Plan Checked that Stainless SDK generation succeeds. --- docs/_static/llama-stack-spec.html | 1076 ++++++++++++++++- docs/_static/llama-stack-spec.yaml | 894 +++++++++++++- docs/openapi_generator/generate.py | 1 + docs/openapi_generator/pyopenapi/generator.py | 82 ++ docs/openapi_generator/pyopenapi/options.py | 2 + llama_stack/apis/datatypes.py | 20 + 6 files changed, 2073 insertions(+), 2 deletions(-) diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index 2a9f4b6f7..6b98cad90 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -52,6 +52,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -97,6 +109,18 @@ "responses": { "200": { "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -128,6 +152,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -159,6 +195,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -183,6 +231,18 @@ "responses": { "200": { "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -219,6 +279,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -255,6 +327,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -286,6 +370,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -317,6 +413,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -362,6 +470,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -410,6 +530,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -438,6 +570,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -462,6 +606,18 @@ "responses": { "200": { "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -492,6 +648,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -532,6 +700,18 @@ "responses": { "200": { "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -570,6 +750,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -608,6 +800,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -648,6 +852,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -679,6 +895,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -719,6 +947,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -773,6 +1013,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -826,6 +1078,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -863,6 +1127,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -884,6 +1160,18 @@ "responses": { "200": { "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -921,6 +1209,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -942,6 +1242,18 @@ "responses": { "200": { "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -979,6 +1291,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1016,6 +1340,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1046,6 +1382,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1084,6 +1432,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1124,6 +1484,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1154,6 +1526,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1175,6 +1559,18 @@ "responses": { "200": { "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1205,6 +1601,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1242,6 +1650,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1279,6 +1699,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1309,6 +1741,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1337,6 +1781,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1373,6 +1829,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1422,6 +1890,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1443,6 +1923,18 @@ "responses": { "200": { "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1473,6 +1965,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1487,6 +1991,18 @@ "responses": { "200": { "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1511,6 +2027,18 @@ "responses": { "200": { "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1542,6 +2070,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1580,6 +2120,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1609,6 +2161,18 @@ "responses": { "200": { "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1647,6 +2211,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1685,6 +2261,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1697,6 +2285,18 @@ "responses": { "200": { "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1728,6 +2328,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1740,6 +2352,18 @@ "responses": { "200": { "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1771,6 +2395,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1802,6 +2438,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1821,6 +2469,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1852,6 +2512,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1873,6 +2545,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1894,6 +2578,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1932,6 +2628,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1944,6 +2652,18 @@ "responses": { "200": { "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1975,6 +2695,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1994,6 +2726,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2025,6 +2769,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2037,6 +2793,18 @@ "responses": { "200": { "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2068,6 +2836,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2098,6 +2878,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2117,6 +2909,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2141,6 +2945,18 @@ "responses": { "200": { "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2172,6 +2988,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2203,6 +3031,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2234,6 +3074,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2265,6 +3117,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2296,6 +3160,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2332,6 +3208,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2391,6 +3279,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2431,6 +3331,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2455,6 +3367,18 @@ "responses": { "200": { "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2486,6 +3410,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2517,6 +3453,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2548,6 +3496,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2579,6 +3539,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2610,6 +3582,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2623,6 +3607,35 @@ "jsonSchemaDialect": "https://json-schema.org/draft/2020-12/schema", "components": { "schemas": { + "Error": { + "type": "object", + "properties": { + "status": { + "type": "integer", + "description": "HTTP status code" + }, + "title": { + "type": "string", + "description": "Error title, a short summary of the error which is invariant for an error type" + }, + "detail": { + "type": "string", + "description": "Error detail, a longer human-readable description of the error" + }, + "instance": { + "type": "string", + "description": "(Optional) A URL which can be used to retrieve more information about the specific occurrence of the error" + } + }, + "additionalProperties": false, + "required": [ + "status", + "title", + "detail" + ], + "title": "Error", + "description": "Error response from the API. Roughly follows RFC 7807." + }, "AppendRowsRequest": { "type": "object", "properties": { @@ -8741,7 +9754,68 @@ "title": "VersionInfo" } }, - "responses": {} + "responses": { + "BadRequest400": { + "description": "The request was invalid or malformed", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Error" + }, + "example": { + "status": 400, + "title": "Bad Request", + "detail": "The request was invalid or malformed" + } + } + } + }, + "TooManyRequests429": { + "description": "The client has sent too many requests in a given amount of time", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Error" + }, + "example": { + "status": 429, + "title": "Too Many Requests", + "detail": "You have exceeded the rate limit. Please try again later." + } + } + } + }, + "InternalServerError500": { + "description": "The server encountered an unexpected error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Error" + }, + "example": { + "status": 500, + "title": "Internal Server Error", + "detail": "An unexpected error occurred. Our team has been notified." + } + } + } + }, + "DefaultError": { + "description": "An unexpected error occurred", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Error" + }, + "example": { + "status": 0, + "title": "Error", + "detail": "An unexpected error occurred" + } + } + } + } + } }, "security": [ { diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml index a2329e47a..13f7edc4b 100644 --- a/docs/_static/llama-stack-spec.yaml +++ b/docs/_static/llama-stack-spec.yaml @@ -19,6 +19,16 @@ paths: application/json: schema: $ref: '#/components/schemas/PaginatedRowsResult' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - DatasetIO description: '' @@ -47,6 +57,16 @@ paths: responses: '200': description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - DatasetIO description: '' @@ -66,6 +86,16 @@ paths: application/json: schema: $ref: '#/components/schemas/BatchChatCompletionResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - BatchInference (Coming Soon) description: '' @@ -85,6 +115,16 @@ paths: application/json: schema: $ref: '#/components/schemas/BatchCompletionResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - BatchInference (Coming Soon) description: '' @@ -100,6 +140,16 @@ paths: responses: '200': description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - PostTraining (Coming Soon) description: '' @@ -124,6 +174,16 @@ paths: text/event-stream: schema: $ref: '#/components/schemas/ChatCompletionResponseStreamChunk' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Inference description: >- @@ -149,6 +209,16 @@ paths: text/event-stream: schema: $ref: '#/components/schemas/CompletionResponseStreamChunk' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Inference description: >- @@ -169,6 +239,16 @@ paths: application/json: schema: $ref: '#/components/schemas/AgentCreateResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Agents description: '' @@ -188,6 +268,16 @@ paths: application/json: schema: $ref: '#/components/schemas/AgentSessionCreateResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Agents description: '' @@ -217,6 +307,16 @@ paths: text/event-stream: schema: $ref: '#/components/schemas/AgentTurnResponseStreamChunk' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Agents description: '' @@ -246,6 +346,16 @@ paths: application/json: schema: $ref: '#/components/schemas/ListBucketResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Files (Coming Soon) description: List all buckets. @@ -263,6 +373,16 @@ paths: application/json: schema: $ref: '#/components/schemas/FileUploadResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Files (Coming Soon) description: >- @@ -279,6 +399,16 @@ paths: responses: '200': description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Agents description: '' @@ -297,6 +427,16 @@ paths: application/json: schema: $ref: '#/components/schemas/Session' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Agents description: '' @@ -322,6 +462,16 @@ paths: responses: '200': description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Agents description: '' @@ -345,6 +495,16 @@ paths: application/json: schema: $ref: '#/components/schemas/FileResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Files (Coming Soon) description: >- @@ -371,6 +531,16 @@ paths: application/json: schema: $ref: '#/components/schemas/FileResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Files (Coming Soon) description: >- @@ -401,6 +571,16 @@ paths: application/json: schema: $ref: '#/components/schemas/EmbeddingsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Inference description: >- @@ -421,6 +601,16 @@ paths: application/json: schema: $ref: '#/components/schemas/EvaluateResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Eval description: '' @@ -445,6 +635,16 @@ paths: application/json: schema: $ref: '#/components/schemas/AgentStepResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Agents description: '' @@ -478,6 +678,16 @@ paths: application/json: schema: $ref: '#/components/schemas/Turn' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Agents description: '' @@ -508,6 +718,16 @@ paths: oneOf: - $ref: '#/components/schemas/Benchmark' - type: 'null' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Benchmarks description: '' @@ -528,6 +748,16 @@ paths: oneOf: - $ref: '#/components/schemas/Dataset' - type: 'null' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Datasets description: '' @@ -541,6 +771,16 @@ paths: responses: '200': description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Datasets description: '' @@ -561,6 +801,16 @@ paths: oneOf: - $ref: '#/components/schemas/Model' - type: 'null' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Models description: '' @@ -574,6 +824,16 @@ paths: responses: '200': description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Models description: '' @@ -594,6 +854,16 @@ paths: oneOf: - $ref: '#/components/schemas/ScoringFn' - type: 'null' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - ScoringFunctions description: '' @@ -614,6 +884,16 @@ paths: oneOf: - $ref: '#/components/schemas/Shield' - type: 'null' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Shields description: '' @@ -632,6 +912,16 @@ paths: application/json: schema: $ref: '#/components/schemas/Span' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Telemetry description: '' @@ -655,6 +945,16 @@ paths: application/json: schema: $ref: '#/components/schemas/QuerySpanTreeResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Telemetry description: '' @@ -679,6 +979,16 @@ paths: application/json: schema: $ref: '#/components/schemas/Tool' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - ToolGroups description: '' @@ -697,6 +1007,16 @@ paths: application/json: schema: $ref: '#/components/schemas/ToolGroup' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - ToolGroups description: '' @@ -710,6 +1030,16 @@ paths: responses: '200': description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - ToolGroups description: Unregister a tool group @@ -728,6 +1058,16 @@ paths: application/json: schema: $ref: '#/components/schemas/Trace' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Telemetry description: '' @@ -748,6 +1088,16 @@ paths: oneOf: - $ref: '#/components/schemas/PostTrainingJobArtifactsResponse' - type: 'null' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - PostTraining (Coming Soon) description: '' @@ -768,6 +1118,16 @@ paths: oneOf: - $ref: '#/components/schemas/PostTrainingJobStatusResponse' - type: 'null' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - PostTraining (Coming Soon) description: '' @@ -786,6 +1146,16 @@ paths: application/json: schema: $ref: '#/components/schemas/ListPostTrainingJobsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - PostTraining (Coming Soon) description: '' @@ -801,6 +1171,16 @@ paths: oneOf: - $ref: '#/components/schemas/FileUploadResponse' - type: 'null' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Files (Coming Soon) description: >- @@ -822,6 +1202,16 @@ paths: oneOf: - $ref: '#/components/schemas/FileResponse' - type: 'null' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Files (Coming Soon) description: >- @@ -852,6 +1242,16 @@ paths: oneOf: - $ref: '#/components/schemas/VectorDB' - type: 'null' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - VectorDBs description: '' @@ -865,6 +1265,16 @@ paths: responses: '200': description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - VectorDBs description: '' @@ -883,6 +1293,16 @@ paths: application/json: schema: $ref: '#/components/schemas/HealthInfo' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Inspect description: '' @@ -892,6 +1312,16 @@ paths: responses: '200': description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - ToolRuntime description: >- @@ -908,6 +1338,16 @@ paths: responses: '200': description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - VectorIO description: '' @@ -927,6 +1367,16 @@ paths: application/json: schema: $ref: '#/components/schemas/ToolInvocationResult' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - ToolRuntime description: Run a tool with the given arguments @@ -948,6 +1398,16 @@ paths: oneOf: - $ref: '#/components/schemas/JobStatus' - type: 'null' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Eval description: '' @@ -966,6 +1426,16 @@ paths: responses: '200': description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Eval description: '' @@ -989,6 +1459,16 @@ paths: application/json: schema: $ref: '#/components/schemas/EvaluateResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Eval description: '' @@ -1012,6 +1492,16 @@ paths: application/json: schema: $ref: '#/components/schemas/ListBenchmarksResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Benchmarks description: '' @@ -1020,6 +1510,16 @@ paths: responses: '200': description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Benchmarks description: '' @@ -1039,6 +1539,16 @@ paths: application/json: schema: $ref: '#/components/schemas/ListDatasetsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Datasets description: '' @@ -1047,6 +1557,16 @@ paths: responses: '200': description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Datasets description: '' @@ -1066,6 +1586,16 @@ paths: application/json: schema: $ref: '#/components/schemas/ListFileResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Files (Coming Soon) description: List all files in a bucket. @@ -1085,6 +1615,16 @@ paths: application/json: schema: $ref: '#/components/schemas/ListModelsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Models description: '' @@ -1097,6 +1637,16 @@ paths: application/json: schema: $ref: '#/components/schemas/Model' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Models description: '' @@ -1116,6 +1666,16 @@ paths: application/json: schema: $ref: '#/components/schemas/ListProvidersResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Inspect description: '' @@ -1129,6 +1689,16 @@ paths: application/json: schema: $ref: '#/components/schemas/ListRoutesResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Inspect description: '' @@ -1142,6 +1712,16 @@ paths: application/jsonl: schema: $ref: '#/components/schemas/ToolDef' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - ToolRuntime description: '' @@ -1165,6 +1745,16 @@ paths: application/json: schema: $ref: '#/components/schemas/ListScoringFunctionsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - ScoringFunctions description: '' @@ -1173,6 +1763,16 @@ paths: responses: '200': description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - ScoringFunctions description: '' @@ -1192,6 +1792,16 @@ paths: application/json: schema: $ref: '#/components/schemas/ListShieldsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Shields description: '' @@ -1204,6 +1814,16 @@ paths: application/json: schema: $ref: '#/components/schemas/Shield' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Shields description: '' @@ -1223,6 +1843,16 @@ paths: application/json: schema: $ref: '#/components/schemas/ListToolGroupsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - ToolGroups description: List tool groups with optional provider @@ -1231,6 +1861,16 @@ paths: responses: '200': description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - ToolGroups description: Register a tool group @@ -1250,6 +1890,16 @@ paths: application/json: schema: $ref: '#/components/schemas/ListToolsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - ToolGroups description: List tools with optional tool group @@ -1268,6 +1918,16 @@ paths: application/json: schema: $ref: '#/components/schemas/ListVectorDBsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - VectorDBs description: '' @@ -1280,6 +1940,16 @@ paths: application/json: schema: $ref: '#/components/schemas/VectorDB' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - VectorDBs description: '' @@ -1295,6 +1965,16 @@ paths: responses: '200': description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Telemetry description: '' @@ -1314,6 +1994,16 @@ paths: application/json: schema: $ref: '#/components/schemas/PostTrainingJob' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - PostTraining (Coming Soon) description: '' @@ -1333,6 +2023,16 @@ paths: application/json: schema: $ref: '#/components/schemas/RAGQueryResult' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - ToolRuntime description: >- @@ -1353,6 +2053,16 @@ paths: application/json: schema: $ref: '#/components/schemas/QueryChunksResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - VectorIO description: '' @@ -1372,6 +2082,16 @@ paths: application/json: schema: $ref: '#/components/schemas/QuerySpansResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Telemetry description: '' @@ -1391,6 +2111,16 @@ paths: application/json: schema: $ref: '#/components/schemas/QueryTracesResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Telemetry description: '' @@ -1415,6 +2145,16 @@ paths: text/event-stream: schema: $ref: '#/components/schemas/AgentTurnResponseStreamChunk' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Agents description: >- @@ -1457,6 +2197,16 @@ paths: application/json: schema: $ref: '#/components/schemas/Job' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Eval description: '' @@ -1481,6 +2231,16 @@ paths: application/json: schema: $ref: '#/components/schemas/RunShieldResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Safety description: '' @@ -1496,6 +2256,16 @@ paths: responses: '200': description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Telemetry description: '' @@ -1515,6 +2285,16 @@ paths: application/json: schema: $ref: '#/components/schemas/ScoreResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Scoring description: '' @@ -1534,6 +2314,16 @@ paths: application/json: schema: $ref: '#/components/schemas/ScoreBatchResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Scoring description: '' @@ -1553,6 +2343,16 @@ paths: application/json: schema: $ref: '#/components/schemas/PostTrainingJob' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - PostTraining (Coming Soon) description: '' @@ -1572,6 +2372,16 @@ paths: application/json: schema: $ref: '#/components/schemas/SyntheticDataGenerationResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - SyntheticDataGeneration (Coming Soon) description: '' @@ -1591,6 +2401,16 @@ paths: application/json: schema: $ref: '#/components/schemas/VersionInfo' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Inspect description: '' @@ -1599,6 +2419,34 @@ jsonSchemaDialect: >- https://json-schema.org/draft/2020-12/schema components: schemas: + Error: + type: object + properties: + status: + type: integer + description: HTTP status code + title: + type: string + description: >- + Error title, a short summary of the error which is invariant for an error + type + detail: + type: string + description: >- + Error detail, a longer human-readable description of the error + instance: + type: string + description: >- + (Optional) A URL which can be used to retrieve more information about + the specific occurrence of the error + additionalProperties: false + required: + - status + - title + - detail + title: Error + description: >- + Error response from the API. Roughly follows RFC 7807. AppendRowsRequest: type: object properties: @@ -5626,7 +6474,51 @@ components: required: - version title: VersionInfo - responses: {} + responses: + BadRequest400: + description: The request was invalid or malformed + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + example: + status: 400 + title: Bad Request + detail: The request was invalid or malformed + TooManyRequests429: + description: >- + The client has sent too many requests in a given amount of time + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + example: + status: 429 + title: Too Many Requests + detail: >- + You have exceeded the rate limit. Please try again later. + InternalServerError500: + description: >- + The server encountered an unexpected error + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + example: + status: 500 + title: Internal Server Error + detail: >- + An unexpected error occurred. Our team has been notified. + DefaultError: + description: An unexpected error occurred + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + example: + status: 0 + title: Error + detail: An unexpected error occurred security: - Default: [] tags: diff --git a/docs/openapi_generator/generate.py b/docs/openapi_generator/generate.py index dcbee7d2f..a2553f905 100644 --- a/docs/openapi_generator/generate.py +++ b/docs/openapi_generator/generate.py @@ -55,6 +55,7 @@ def main(output_dir: str): a set of endpoints and their corresponding interfaces that are tailored to best leverage Llama Models.""", ), + include_standard_error_responses=True, ), ) diff --git a/docs/openapi_generator/pyopenapi/generator.py b/docs/openapi_generator/pyopenapi/generator.py index 4220cfc05..91f32e6c8 100644 --- a/docs/openapi_generator/pyopenapi/generator.py +++ b/docs/openapi_generator/pyopenapi/generator.py @@ -10,6 +10,7 @@ import typing from dataclasses import make_dataclass from typing import Any, Dict, Set, Union +from llama_stack.apis.datatypes import Error from llama_stack.strong_typing.core import JsonType from llama_stack.strong_typing.docstring import Docstring, parse_type from llama_stack.strong_typing.inspection import ( @@ -434,6 +435,75 @@ class Generator: ) self.schema_builder = SchemaBuilder(schema_generator) self.responses = {} + + # Create standard error responses + self._create_standard_error_responses() + + def _create_standard_error_responses(self) -> None: + """ + Creates standard error responses that can be reused across operations. + These will be added to the components.responses section of the OpenAPI document. + """ + # Get the Error schema + error_schema = self.schema_builder.classdef_to_ref(Error) + + # Create standard error responses + self.responses["BadRequest400"] = Response( + description="The request was invalid or malformed", + content={ + "application/json": MediaType( + schema=error_schema, + example={ + "status": 400, + "title": "Bad Request", + "detail": "The request was invalid or malformed", + } + ) + } + ) + + self.responses["TooManyRequests429"] = Response( + description="The client has sent too many requests in a given amount of time", + content={ + "application/json": MediaType( + schema=error_schema, + example={ + "status": 429, + "title": "Too Many Requests", + "detail": "You have exceeded the rate limit. Please try again later.", + } + ) + } + ) + + self.responses["InternalServerError500"] = Response( + description="The server encountered an unexpected error", + content={ + "application/json": MediaType( + schema=error_schema, + example={ + "status": 500, + "title": "Internal Server Error", + "detail": "An unexpected error occurred. Our team has been notified.", + } + ) + } + ) + + # Add a default error response for any unhandled error cases + self.responses["DefaultError"] = Response( + description="An unexpected error occurred", + content={ + "application/json": MediaType( + schema=error_schema, + example={ + "status": 0, + "title": "Error", + "detail": "An unexpected error occurred", + } + ) + } + ) def _build_type_tag(self, ref: str, schema: Schema) -> Tag: # Don't include schema definition in the tag description because for one, @@ -649,6 +719,18 @@ class Generator: responses.update(response_builder.build_response(response_options)) assert len(responses.keys()) > 0, f"No responses found for {op.name}" + + # Add standard error response references + if self.options.include_standard_error_responses: + if "400" not in responses: + responses["400"] = ResponseRef("BadRequest400") + if "429" not in responses: + responses["429"] = ResponseRef("TooManyRequests429") + if "500" not in responses: + responses["500"] = ResponseRef("InternalServerError500") + if "default" not in responses: + responses["default"] = ResponseRef("DefaultError") + if op.event_type is not None: builder = ContentBuilder(self.schema_builder) callbacks = { diff --git a/docs/openapi_generator/pyopenapi/options.py b/docs/openapi_generator/pyopenapi/options.py index f80da453b..edc861ad5 100644 --- a/docs/openapi_generator/pyopenapi/options.py +++ b/docs/openapi_generator/pyopenapi/options.py @@ -35,6 +35,7 @@ class Options: :param error_wrapper: True if errors are encapsulated in an error object wrapper. :param property_description_fun: Custom transformation function to apply to class property documentation strings. :param captions: User-defined captions for sections such as "Operations" or "Types", and (if applicable) groups of extra types. + :param include_standard_error_responses: Whether to include standard error responses (400, 429, 500, 503) in all operations. """ server: Server @@ -52,6 +53,7 @@ class Options: error_wrapper: bool = False property_description_fun: Optional[Callable[[type, str, str], str]] = None captions: Optional[Dict[str, str]] = None + include_standard_error_responses: bool = True default_captions: ClassVar[Dict[str, str]] = { "Operations": "Operations", diff --git a/llama_stack/apis/datatypes.py b/llama_stack/apis/datatypes.py index 6df93052c..842a2b63d 100644 --- a/llama_stack/apis/datatypes.py +++ b/llama_stack/apis/datatypes.py @@ -5,6 +5,9 @@ # the root directory of this source tree. from enum import Enum +from typing import Optional + +from pydantic import BaseModel from llama_stack.schema_utils import json_schema_type @@ -33,3 +36,20 @@ class Api(Enum): # built-in API inspect = "inspect" + + +@json_schema_type +class Error(BaseModel): + """ + Error response from the API. Roughly follows RFC 7807. + + :param status: HTTP status code + :param title: Error title, a short summary of the error which is invariant for an error type + :param detail: Error detail, a longer human-readable description of the error + :param instance: (Optional) A URL which can be used to retrieve more information about the specific occurrence of the error + """ + + status: int + title: str + detail: str + instance: Optional[str] = None From 15f69e75ffaf07c79edf1cdcef1c31d0b67bbc3d Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Fri, 28 Feb 2025 11:25:23 -0800 Subject: [PATCH 10/13] fix: replace eval with json decoding for format_adapter (#1328) # What does this PR do? - using `eval` is a security risk [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan - see https://github.com/meta-llama/llama-stack/pull/1327 cc @SLR722 we will need to update the corresponding dataset via ```python def update_to_json_str(): dataset = datasets.load_dataset(...) processed_dataset = dataset[split].map( lambda x: { "column": json.dumps(eval(x["column"])) } ) processed_dataset.push_to_hub(...) ``` [//]: # (## Documentation) --- .../post_training/torchtune/datasets/format_adapter.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py b/llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py index 884977803..6b607f1c7 100644 --- a/llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +++ b/llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py @@ -10,16 +10,19 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +import json from typing import Any, Mapping from llama_stack.providers.utils.common.data_schema_validator import ColumnName -def llama_stack_instruct_to_torchtune_instruct(sample: Mapping[str, Any]) -> Mapping[str, Any]: +def llama_stack_instruct_to_torchtune_instruct( + sample: Mapping[str, Any], +) -> Mapping[str, Any]: assert ColumnName.chat_completion_input.value in sample and ColumnName.expected_answer.value in sample, ( "Invalid input row" ) - input_messages = eval(str(sample[ColumnName.chat_completion_input.value])) + input_messages = json.loads(sample[ColumnName.chat_completion_input.value]) assert len(input_messages) == 1, "llama stack intruct dataset format only supports 1 user message" input_message = input_messages[0] @@ -37,7 +40,7 @@ def llama_stack_instruct_to_torchtune_instruct(sample: Mapping[str, Any]) -> Map def llama_stack_chat_to_torchtune_chat(sample: Mapping[str, Any]) -> Mapping[str, Any]: assert ColumnName.dialog.value in sample, "Invalid input row" role_map = {"user": "human", "assistant": "gpt"} - dialog = eval(str(sample[ColumnName.dialog.value])) + dialog = json.loads(sample[ColumnName.dialog.value]) assert len(dialog) > 1, "dialog must have at least 2 messagse" roles = [] From 82fa0803faee41ae0e74a5e97066cdb78bfee294 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Fri, 28 Feb 2025 12:29:50 -0800 Subject: [PATCH 11/13] chore: refactor client tool in test (#1331) # What does this PR do? Use @client_tool decorator instead of ClientTool [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan ``` LLAMA_STACK_CONFIG=fireworks pytest -v tests/client-sdk/agents/test_agents.py --inference-model "meta-llama/Llama-3.3-70B-Instruct" ``` image [//]: # (## Documentation) --- tests/client-sdk/agents/test_agents.py | 82 ++++++-------------------- 1 file changed, 18 insertions(+), 64 deletions(-) diff --git a/tests/client-sdk/agents/test_agents.py b/tests/client-sdk/agents/test_agents.py index 8f68699b2..9690a8139 100644 --- a/tests/client-sdk/agents/test_agents.py +++ b/tests/client-sdk/agents/test_agents.py @@ -4,20 +4,15 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import json -from typing import Dict, List from uuid import uuid4 import pytest from llama_stack_client.lib.agents.agent import Agent -from llama_stack_client.lib.agents.client_tool import ClientTool +from llama_stack_client.lib.agents.client_tool import client_tool from llama_stack_client.lib.agents.event_logger import EventLogger -from llama_stack_client.types import ToolResponseMessage from llama_stack_client.types.agents.turn_create_params import Document as AgentDocument from llama_stack_client.types.memory_insert_params import Document -from llama_stack_client.types.shared.completion_message import CompletionMessage from llama_stack_client.types.shared_params.agent_config import AgentConfig, ToolConfig -from llama_stack_client.types.tool_def_param import Parameter from llama_stack.apis.agents.agents import ( AgentConfig as Server__AgentConfig, @@ -27,63 +22,22 @@ from llama_stack.apis.agents.agents import ( ) -class TestClientTool(ClientTool): - """Tool to give boiling point of a liquid - Returns the correct value for polyjuice in Celcius and Fahrenheit - and returns -1 for other liquids +@client_tool +def get_boiling_point(liquid_name: str, celcius: bool = True) -> int: """ + Returns the boiling point of a liquid in Celcius or Fahrenheit - def run(self, messages: List[CompletionMessage]) -> List[ToolResponseMessage]: - assert len(messages) == 1, "Expected single message" - - message = messages[0] - - tool_call = message.tool_calls[0] - - try: - response = self.run_impl(**tool_call.arguments) - response_str = json.dumps(response, ensure_ascii=False) - except Exception as e: - response_str = f"Error when running tool: {e}" - - message = ToolResponseMessage( - role="tool", - call_id=tool_call.call_id, - tool_name=tool_call.tool_name, - content=response_str, - ) - return message - - def get_name(self) -> str: - return "get_boiling_point" - - def get_description(self) -> str: - return "Get the boiling point of imaginary liquids (eg. polyjuice)" - - def get_params_definition(self) -> Dict[str, Parameter]: - return { - "liquid_name": Parameter( - name="liquid_name", - parameter_type="string", - description="The name of the liquid", - required=True, - ), - "celcius": Parameter( - name="celcius", - parameter_type="boolean", - description="Whether to return the boiling point in Celcius", - required=False, - ), - } - - def run_impl(self, liquid_name: str, celcius: bool = True) -> int: - if liquid_name.lower() == "polyjuice": - if celcius: - return -100 - else: - return -212 + :param liquid_name: The name of the liquid + :param celcius: Whether to return the boiling point in Celcius + :return: The boiling point of the liquid in Celcius or Fahrenheit + """ + if liquid_name.lower() == "polyjuice": + if celcius: + return -100 else: - return -1 + return -212 + else: + return -1 @pytest.fixture(scope="session") @@ -298,7 +252,7 @@ def test_code_interpreter_for_attachments(llama_stack_client, agent_config): def test_custom_tool(llama_stack_client, agent_config): - client_tool = TestClientTool() + client_tool = get_boiling_point agent_config = { **agent_config, "toolgroups": ["builtin::websearch"], @@ -326,7 +280,7 @@ def test_custom_tool(llama_stack_client, agent_config): def test_tool_choice(llama_stack_client, agent_config): def run_agent(tool_choice): - client_tool = TestClientTool() + client_tool = get_boiling_point test_agent_config = { **agent_config, @@ -362,7 +316,7 @@ def test_tool_choice(llama_stack_client, agent_config): # TODO: fix this flaky test def xtest_override_system_message_behavior(llama_stack_client, agent_config): - client_tool = TestClientTool() + client_tool = get_boiling_point agent_config = { **agent_config, "instructions": "You are a pirate", @@ -586,7 +540,7 @@ def test_rag_and_code_agent(llama_stack_client, agent_config): def test_create_turn_response(llama_stack_client, agent_config): - client_tool = TestClientTool() + client_tool = get_boiling_point agent_config = { **agent_config, "input_shields": [], From 9b6a2577b1ced370b314d32a8b97093c0f6b4c7e Mon Sep 17 00:00:00 2001 From: Surya Prakash Pathak Date: Fri, 28 Feb 2025 21:37:03 +0000 Subject: [PATCH 12/13] docs: Update llama-stack version in README.md (#1330) # What does this PR do? This PR updates the version in the [README.md](https://github.com/meta-llama/llama-stack/blob/main/docs/zero_to_hero_guide/README.md) to reflect the latest changes in Llama Stack setup. Previously, using **llama-stack==0.1.0** caused an error when running: ```bash llama stack build --template ollama --image-type conda ``` Upgrading to llama-stack==0.1.3 resolves this issue. ## Test Plan - Verified that `llama stack build --template ollama --image-type conda` works correctly. --------- Signed-off-by: Surya Prakash Pathak --- docs/zero_to_hero_guide/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/zero_to_hero_guide/README.md b/docs/zero_to_hero_guide/README.md index 7dfd8e5fa..98f40bc3c 100644 --- a/docs/zero_to_hero_guide/README.md +++ b/docs/zero_to_hero_guide/README.md @@ -73,7 +73,7 @@ If you're looking for more specific topics, we have a [Zero to Hero Guide](#next Open a new terminal and install `llama-stack`: ```bash conda activate ollama - pip install llama-stack==0.1.0 + pip install -U llama-stack ``` --- From 7ad7e3b970a2ea143a3e4fc4e5befed1d6bc7d67 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Fri, 28 Feb 2025 16:12:05 -0800 Subject: [PATCH 13/13] fix: only install llama-stack package, deps are now correctly incorporated --- llama_stack/distribution/build_container.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llama_stack/distribution/build_container.sh b/llama_stack/distribution/build_container.sh index 68f8a0863..04d115f70 100755 --- a/llama_stack/distribution/build_container.sh +++ b/llama_stack/distribution/build_container.sh @@ -159,12 +159,12 @@ EOF add_to_container << EOF RUN uv pip install --no-cache --extra-index-url https://test.pypi.org/simple/ \ --index-strategy unsafe-best-match \ - llama-models==$TEST_PYPI_VERSION llama-stack-client==$TEST_PYPI_VERSION llama-stack==$TEST_PYPI_VERSION + llama-stack==$TEST_PYPI_VERSION EOF else if [ -n "$PYPI_VERSION" ]; then - SPEC_VERSION="llama-stack==${PYPI_VERSION} llama-models==${PYPI_VERSION} llama-stack-client==${PYPI_VERSION}" + SPEC_VERSION="llama-stack==${PYPI_VERSION}" else SPEC_VERSION="llama-stack" fi