From 354faa15cef47902fd51119b800e94681d444691 Mon Sep 17 00:00:00 2001
From: Yuan Tang <terrytangyuan@gmail.com>
Date: Thu, 15 May 2025 10:50:56 -0400
Subject: [PATCH 1/9] feat: Allow to print usage information for install script
 (#2171)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

# What does this PR do?

This allows users to print the usage information for this script:

```
📚 Llama-Stack Deployment Script

Description:
    This script sets up and deploys Llama-Stack with Ollama integration in containers.
    It handles both Docker and Podman runtimes and includes automatic platform detection.

Usage:
    install.sh [OPTIONS]

Options:
    -p, --port PORT            Server port for Llama-Stack (default: 8321)
    -o, --ollama-port PORT     Ollama service port (default: 11434)
    -m, --model MODEL          Model alias to use (default: llama3.2:3b)
    -i, --image IMAGE          Server image (default: llamastack/distribution-ollama:0.2.2)
    -t, --timeout SECONDS      Service wait timeout in seconds (default: 300)
    -h, --help               Show this help message

For more information:
    Documentation: https://llama-stack.readthedocs.io/
    GitHub: https://github.com/meta-llama/llama-stack

Report issues:
    https://github.com/meta-llama/llama-stack/issues

```

---------

Signed-off-by: Yuan Tang <terrytangyuan@gmail.com>
Co-authored-by: Sébastien Han <seb@redhat.com>
---
 install.sh | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 61 insertions(+)

diff --git a/install.sh b/install.sh
index 614dbc2f2..e424925a6 100755
--- a/install.sh
+++ b/install.sh
@@ -38,6 +38,67 @@ wait_for_service() {
   return 0
 }
 
+usage() {
+    cat << EOF
+📚 Llama-Stack Deployment Script
+
+Description:
+    This script sets up and deploys Llama-Stack with Ollama integration in containers.
+    It handles both Docker and Podman runtimes and includes automatic platform detection.
+
+Usage:
+    $(basename "$0") [OPTIONS]
+
+Options:
+    -p, --port PORT            Server port for Llama-Stack (default: ${PORT})
+    -o, --ollama-port PORT     Ollama service port (default: ${OLLAMA_PORT})
+    -m, --model MODEL          Model alias to use (default: ${MODEL_ALIAS})
+    -i, --image IMAGE          Server image (default: ${SERVER_IMAGE})
+    -t, --timeout SECONDS      Service wait timeout in seconds (default: ${WAIT_TIMEOUT})
+    -h, --help                 Show this help message
+
+For more information:
+    Documentation: https://llama-stack.readthedocs.io/
+    GitHub: https://github.com/meta-llama/llama-stack
+
+Report issues:
+    https://github.com/meta-llama/llama-stack/issues
+EOF
+}
+
+# Parse command line arguments
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        -h|--help)
+            usage
+            exit 0
+            ;;
+        -p|--port)
+            PORT="$2"
+            shift 2
+            ;;
+        -o|--ollama-port)
+            OLLAMA_PORT="$2"
+            shift 2
+            ;;
+        -m|--model)
+            MODEL_ALIAS="$2"
+            shift 2
+            ;;
+        -i|--image)
+            SERVER_IMAGE="$2"
+            shift 2
+            ;;
+        -t|--timeout)
+            WAIT_TIMEOUT="$2"
+            shift 2
+            ;;
+        *)
+            die "Unknown option: $1"
+            ;;
+    esac
+done
+
 if command -v docker &> /dev/null; then
   ENGINE="docker"
 elif command -v podman &> /dev/null; then

From c3f27de3eada24fb9890d80e1527a4e8b923ed32 Mon Sep 17 00:00:00 2001
From: Ihar Hrachyshka <ihar.hrachyshka@gmail.com>
Date: Thu, 15 May 2025 11:39:25 -0400
Subject: [PATCH 2/9] chore: Update triagers list with new additions (#2180)

Signed-off-by: Ihar Hrachyshka <ihar.hrachyshka@gmail.com>
---
 .github/TRIAGERS.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/TRIAGERS.md b/.github/TRIAGERS.md
index d4ef6d1ac..586a5a506 100644
--- a/.github/TRIAGERS.md
+++ b/.github/TRIAGERS.md
@@ -1,2 +1,2 @@
 # This file documents Triage members in the Llama Stack community
-@franciscojavierarceo @leseb
+ @bbrowning @booxter @franciscojavierarceo @leseb

From 7e25c8df28d238a27c077e499463b9d939e5f6d5 Mon Sep 17 00:00:00 2001
From: Yuan Tang <terrytangyuan@gmail.com>
Date: Thu, 15 May 2025 11:41:15 -0400
Subject: [PATCH 3/9] fix: ReadTheDocs should display all versions (#2172)

# What does this PR do?

Currently the website only displays the "latest" version. This is
because our config and workflow do not include version information. This
PR adds missing version info.

---------

Signed-off-by: Yuan Tang <terrytangyuan@gmail.com>
---
 .github/workflows/update-readthedocs.yml | 7 ++++++-
 docs/source/conf.py                      | 2 ++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/update-readthedocs.yml b/.github/workflows/update-readthedocs.yml
index 21e3b633d..094942368 100644
--- a/.github/workflows/update-readthedocs.yml
+++ b/.github/workflows/update-readthedocs.yml
@@ -14,6 +14,8 @@ on:
       - 'docs/**'
       - 'pyproject.toml'
       - '.github/workflows/update-readthedocs.yml'
+    tags:
+      - '*'
   pull_request:
     branches:
       - main
@@ -61,7 +63,10 @@ jobs:
 
           response=$(curl -X POST \
             -H "Content-Type: application/json" \
-            -d "{\"token\": \"$TOKEN\"}" \
+            -d "{
+              \"token\": \"$TOKEN\",
+              \"version\": \"$GITHUB_REF_NAME\"
+            }" \
             https://readthedocs.org/api/v2/webhook/llama-stack/289768/)
 
           echo "Response: $response"
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 55c6383b2..501a923dd 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -110,6 +110,8 @@ html_theme_options = {
     "canonical_url": "https://github.com/meta-llama/llama-stack",
     "collapse_navigation": False,
     # "style_nav_header_background": "#c3c9d4",
+    'display_version': True,
+    'version_selector': True,
 }
 
 default_dark_mode = False

From e46de23be6b03073b524ebc47f7145f4eb10541e Mon Sep 17 00:00:00 2001
From: Charlie Doern <cdoern@redhat.com>
Date: Thu, 15 May 2025 14:17:03 -0400
Subject: [PATCH 4/9] feat: refactor external providers dir (#2049)

# What does this PR do?

currently the "default" dir for external providers is
`/etc/llama-stack/providers.d`

This dir is not used anywhere nor created.

Switch to a more friendly `~/.llama/providers.d/`

This allows external providers to actually create this dir and/or
populate it upon installation, `pip` cannot create directories in `etc`.

If a user does not specify a dir, default to this one

see https://github.com/containers/ramalama-stack/issues/36

Signed-off-by: Charlie Doern <cdoern@redhat.com>
---
 .github/workflows/test-external-providers.yml |  6 +-
 docs/source/distributions/building_distro.md  |  4 +-
 docs/source/providers/external.md             |  6 +-
 llama_stack/cli/stack/_build.py               | 13 ++-
 llama_stack/cli/stack/run.py                  | 91 +++++++++++--------
 llama_stack/distribution/build_container.sh   | 21 +++--
 llama_stack/distribution/configure.py         |  4 +
 llama_stack/distribution/datatypes.py         | 14 ++-
 llama_stack/distribution/distribution.py      |  2 +-
 llama_stack/distribution/start_stack.sh       | 83 +++++++++++------
 llama_stack/distribution/utils/config_dirs.py |  2 +
 llama_stack/distribution/utils/exec.py        |  6 +-
 .../custom-distro.yaml                        |  2 +-
 .../llama-stack-provider-ollama/run.yaml      |  2 +-
 14 files changed, 166 insertions(+), 90 deletions(-)

diff --git a/.github/workflows/test-external-providers.yml b/.github/workflows/test-external-providers.yml
index 77e280349..8c75dde25 100644
--- a/.github/workflows/test-external-providers.yml
+++ b/.github/workflows/test-external-providers.yml
@@ -47,8 +47,8 @@ jobs:
 
       - name: Create provider configuration
         run: |
-          mkdir -p /tmp/providers.d/remote/inference
-          cp tests/external-provider/llama-stack-provider-ollama/custom_ollama.yaml /tmp/providers.d/remote/inference/custom_ollama.yaml
+          mkdir -p /home/runner/.llama/providers.d/remote/inference
+          cp tests/external-provider/llama-stack-provider-ollama/custom_ollama.yaml /home/runner/.llama/providers.d/remote/inference/custom_ollama.yaml
 
       - name: Build distro from config file
         run: |
@@ -66,7 +66,7 @@ jobs:
       - name: Wait for Llama Stack server to be ready
         run: |
           for i in {1..30}; do
-            if ! grep -q "remote::custom_ollama from /tmp/providers.d/remote/inference/custom_ollama.yaml" server.log; then
+            if ! grep -q "remote::custom_ollama from /home/runner/.llama/providers.d/remote/inference/custom_ollama.yaml" server.log; then
               echo "Waiting for Llama Stack server to load the provider..."
               sleep 1
             else
diff --git a/docs/source/distributions/building_distro.md b/docs/source/distributions/building_distro.md
index 56b8d30a8..a5c223219 100644
--- a/docs/source/distributions/building_distro.md
+++ b/docs/source/distributions/building_distro.md
@@ -178,7 +178,7 @@ image_name: ollama
 image_type: conda
 
 # If some providers are external, you can specify the path to the implementation
-external_providers_dir: /etc/llama-stack/providers.d
+external_providers_dir: ~/.llama/providers.d
 ```
 
 ```
@@ -206,7 +206,7 @@ distribution_spec:
 image_type: container
 image_name: ci-test
 # Path to external provider implementations
-external_providers_dir: /etc/llama-stack/providers.d
+external_providers_dir: ~/.llama/providers.d
 ```
 
 Here's an example for a custom Ollama provider:
diff --git a/docs/source/providers/external.md b/docs/source/providers/external.md
index 6c36901ee..55211ac5f 100644
--- a/docs/source/providers/external.md
+++ b/docs/source/providers/external.md
@@ -10,7 +10,7 @@ Llama Stack supports external providers that live outside of the main codebase.
 To enable external providers, you need to configure the `external_providers_dir` in your Llama Stack configuration. This directory should contain your external provider specifications:
 
 ```yaml
-external_providers_dir: /etc/llama-stack/providers.d/
+external_providers_dir: ~/.llama/providers.d/
 ```
 
 ## Directory Structure
@@ -182,7 +182,7 @@ dependencies = ["llama-stack", "pydantic", "ollama", "aiohttp"]
 3. Create the provider specification:
 
 ```yaml
-# /etc/llama-stack/providers.d/remote/inference/custom_ollama.yaml
+# ~/.llama/providers.d/remote/inference/custom_ollama.yaml
 adapter:
   adapter_type: custom_ollama
   pip_packages: ["ollama", "aiohttp"]
@@ -201,7 +201,7 @@ uv pip install -e .
 5. Configure Llama Stack to use external providers:
 
 ```yaml
-external_providers_dir: /etc/llama-stack/providers.d/
+external_providers_dir: ~/.llama/providers.d/
 ```
 
 The provider will now be available in Llama Stack with the type `remote::custom_ollama`.
diff --git a/llama_stack/cli/stack/_build.py b/llama_stack/cli/stack/_build.py
index ae4a39ce2..37147e905 100644
--- a/llama_stack/cli/stack/_build.py
+++ b/llama_stack/cli/stack/_build.py
@@ -36,7 +36,8 @@ from llama_stack.distribution.datatypes import (
 )
 from llama_stack.distribution.distribution import get_provider_registry
 from llama_stack.distribution.resolver import InvalidProviderError
-from llama_stack.distribution.utils.config_dirs import DISTRIBS_BASE_DIR
+from llama_stack.distribution.stack import replace_env_vars
+from llama_stack.distribution.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR
 from llama_stack.distribution.utils.dynamic import instantiate_class_type
 from llama_stack.distribution.utils.exec import formulate_run_args, run_command
 from llama_stack.distribution.utils.image_types import LlamaStackImageType
@@ -202,7 +203,9 @@ def run_stack_build_command(args: argparse.Namespace) -> None:
     else:
         with open(args.config) as f:
             try:
-                build_config = BuildConfig(**yaml.safe_load(f))
+                contents = yaml.safe_load(f)
+                contents = replace_env_vars(contents)
+                build_config = BuildConfig(**contents)
             except Exception as e:
                 cprint(
                     f"Could not parse config file {args.config}: {e}",
@@ -248,6 +251,8 @@ def run_stack_build_command(args: argparse.Namespace) -> None:
         run_config = Path(run_config)
         config_dict = yaml.safe_load(run_config.read_text())
         config = parse_and_maybe_upgrade_config(config_dict)
+        if not os.path.exists(str(config.external_providers_dir)):
+            os.makedirs(str(config.external_providers_dir), exist_ok=True)
         run_args = formulate_run_args(args.image_type, args.image_name, config, args.template)
         run_args.extend([run_config, str(os.getenv("LLAMA_STACK_PORT", 8321))])
         run_command(run_args)
@@ -267,7 +272,9 @@ def _generate_run_config(
         image_name=image_name,
         apis=apis,
         providers={},
-        external_providers_dir=build_config.external_providers_dir if build_config.external_providers_dir else None,
+        external_providers_dir=build_config.external_providers_dir
+        if build_config.external_providers_dir
+        else EXTERNAL_PROVIDERS_DIR,
     )
     # build providers dict
     provider_registry = get_provider_registry(build_config)
diff --git a/llama_stack/cli/stack/run.py b/llama_stack/cli/stack/run.py
index f3a6a9865..4a44e0366 100644
--- a/llama_stack/cli/stack/run.py
+++ b/llama_stack/cli/stack/run.py
@@ -33,7 +33,8 @@ class StackRun(Subcommand):
         self.parser.add_argument(
             "config",
             type=str,
-            help="Path to config file to use for the run",
+            nargs="?",  # Make it optional
+            help="Path to config file to use for the run. Required for venv and conda environments.",
         )
         self.parser.add_argument(
             "--port",
@@ -82,44 +83,55 @@ class StackRun(Subcommand):
         from llama_stack.distribution.utils.config_dirs import DISTRIBS_BASE_DIR
         from llama_stack.distribution.utils.exec import formulate_run_args, run_command
 
-        config_file = Path(args.config)
-        has_yaml_suffix = args.config.endswith(".yaml")
-        template_name = None
-
-        if not config_file.exists() and not has_yaml_suffix:
-            # check if this is a template
-            config_file = Path(REPO_ROOT) / "llama_stack" / "templates" / args.config / "run.yaml"
-            if config_file.exists():
-                template_name = args.config
-
-        if not config_file.exists() and not has_yaml_suffix:
-            # check if it's a build config saved to ~/.llama dir
-            config_file = Path(DISTRIBS_BASE_DIR / f"llamastack-{args.config}" / f"{args.config}-run.yaml")
-
-        if not config_file.exists():
-            self.parser.error(
-                f"File {str(config_file)} does not exist.\n\nPlease run `llama stack build` to generate (and optionally edit) a run.yaml file"
-            )
-
-        if not config_file.is_file():
-            self.parser.error(
-                f"Config file must be a valid file path, '{config_file}' is not a file: type={type(config_file)}"
-            )
-
-        logger.info(f"Using run configuration: {config_file}")
-
-        try:
-            config_dict = yaml.safe_load(config_file.read_text())
-        except yaml.parser.ParserError as e:
-            self.parser.error(f"failed to load config file '{config_file}':\n {e}")
-
-        try:
-            config = parse_and_maybe_upgrade_config(config_dict)
-        except AttributeError as e:
-            self.parser.error(f"failed to parse config file '{config_file}':\n {e}")
-
         image_type, image_name = self._get_image_type_and_name(args)
 
+        # Check if config is required based on image type
+        if (image_type in [ImageType.CONDA.value, ImageType.VENV.value]) and not args.config:
+            self.parser.error("Config file is required for venv and conda environments")
+
+        if args.config:
+            config_file = Path(args.config)
+            has_yaml_suffix = args.config.endswith(".yaml")
+            template_name = None
+
+            if not config_file.exists() and not has_yaml_suffix:
+                # check if this is a template
+                config_file = Path(REPO_ROOT) / "llama_stack" / "templates" / args.config / "run.yaml"
+                if config_file.exists():
+                    template_name = args.config
+
+            if not config_file.exists() and not has_yaml_suffix:
+                # check if it's a build config saved to ~/.llama dir
+                config_file = Path(DISTRIBS_BASE_DIR / f"llamastack-{args.config}" / f"{args.config}-run.yaml")
+
+            if not config_file.exists():
+                self.parser.error(
+                    f"File {str(config_file)} does not exist.\n\nPlease run `llama stack build` to generate (and optionally edit) a run.yaml file"
+                )
+
+            if not config_file.is_file():
+                self.parser.error(
+                    f"Config file must be a valid file path, '{config_file}' is not a file: type={type(config_file)}"
+                )
+
+            logger.info(f"Using run configuration: {config_file}")
+
+            try:
+                config_dict = yaml.safe_load(config_file.read_text())
+            except yaml.parser.ParserError as e:
+                self.parser.error(f"failed to load config file '{config_file}':\n {e}")
+
+            try:
+                config = parse_and_maybe_upgrade_config(config_dict)
+                if not os.path.exists(str(config.external_providers_dir)):
+                    os.makedirs(str(config.external_providers_dir), exist_ok=True)
+            except AttributeError as e:
+                self.parser.error(f"failed to parse config file '{config_file}':\n {e}")
+        else:
+            config = None
+            config_file = None
+            template_name = None
+
         # If neither image type nor image name is provided, assume the server should be run directly
         # using the current environment packages.
         if not image_type and not image_name:
@@ -141,7 +153,10 @@ class StackRun(Subcommand):
         else:
             run_args = formulate_run_args(image_type, image_name, config, template_name)
 
-            run_args.extend([str(config_file), str(args.port)])
+            run_args.extend([str(args.port)])
+
+            if config_file:
+                run_args.extend(["--config", str(config_file)])
 
             if args.env:
                 for env_var in args.env:
diff --git a/llama_stack/distribution/build_container.sh b/llama_stack/distribution/build_container.sh
index ad316d45e..c128729e1 100755
--- a/llama_stack/distribution/build_container.sh
+++ b/llama_stack/distribution/build_container.sh
@@ -154,6 +154,12 @@ get_python_cmd() {
     fi
 }
 
+# Add other required item commands generic to all containers
+add_to_container << EOF
+# Allows running as non-root user
+RUN mkdir -p /.llama/providers.d /.cache
+EOF
+
 if [ -n "$run_config" ]; then
   # Copy the run config to the build context since it's an absolute path
   cp "$run_config" "$BUILD_CONTEXT_DIR/run.yaml"
@@ -166,17 +172,19 @@ EOF
   # and update the configuration to reference the new container path
   python_cmd=$(get_python_cmd)
   external_providers_dir=$($python_cmd -c "import yaml; config = yaml.safe_load(open('$run_config')); print(config.get('external_providers_dir') or '')")
-  if [ -n "$external_providers_dir" ]; then
+  external_providers_dir=$(eval echo "$external_providers_dir")
+  if [ -n "$external_providers_dir" ] && [ -d "$external_providers_dir" ]; then
     echo "Copying external providers directory: $external_providers_dir"
+    cp -r "$external_providers_dir" "$BUILD_CONTEXT_DIR/providers.d"
     add_to_container << EOF
-COPY $external_providers_dir /app/providers.d
+COPY providers.d /.llama/providers.d
 EOF
-    # Edit the run.yaml file to change the external_providers_dir to /app/providers.d
+    # Edit the run.yaml file to change the external_providers_dir to /.llama/providers.d
     if [ "$(uname)" = "Darwin" ]; then
-      sed -i.bak -e 's|external_providers_dir:.*|external_providers_dir: /app/providers.d|' "$BUILD_CONTEXT_DIR/run.yaml"
+      sed -i.bak -e 's|external_providers_dir:.*|external_providers_dir: /.llama/providers.d|' "$BUILD_CONTEXT_DIR/run.yaml"
       rm -f "$BUILD_CONTEXT_DIR/run.yaml.bak"
     else
-      sed -i 's|external_providers_dir:.*|external_providers_dir: /app/providers.d|' "$BUILD_CONTEXT_DIR/run.yaml"
+      sed -i 's|external_providers_dir:.*|external_providers_dir: /.llama/providers.d|' "$BUILD_CONTEXT_DIR/run.yaml"
     fi
   fi
 fi
@@ -255,9 +263,6 @@ fi
 # Add other require item commands genearic to all containers
 add_to_container << EOF
 
-# Allows running as non-root user
-RUN mkdir -p /.llama /.cache
-
 RUN chmod -R g+rw /app /.llama /.cache
 EOF
 
diff --git a/llama_stack/distribution/configure.py b/llama_stack/distribution/configure.py
index 78a6a184e..e58ea0338 100644
--- a/llama_stack/distribution/configure.py
+++ b/llama_stack/distribution/configure.py
@@ -17,6 +17,7 @@ from llama_stack.distribution.distribution import (
     builtin_automatically_routed_apis,
     get_provider_registry,
 )
+from llama_stack.distribution.utils.config_dirs import EXTERNAL_PROVIDERS_DIR
 from llama_stack.distribution.utils.dynamic import instantiate_class_type
 from llama_stack.distribution.utils.prompt_for_config import prompt_for_config
 from llama_stack.providers.datatypes import Api, ProviderSpec
@@ -170,4 +171,7 @@ def parse_and_maybe_upgrade_config(config_dict: dict[str, Any]) -> StackRunConfi
 
     config_dict["version"] = LLAMA_STACK_RUN_CONFIG_VERSION
 
+    if not config_dict.get("external_providers_dir", None):
+        config_dict["external_providers_dir"] = EXTERNAL_PROVIDERS_DIR
+
     return StackRunConfig(**config_dict)
diff --git a/llama_stack/distribution/datatypes.py b/llama_stack/distribution/datatypes.py
index d36e21c6d..7b5465e0a 100644
--- a/llama_stack/distribution/datatypes.py
+++ b/llama_stack/distribution/datatypes.py
@@ -5,9 +5,10 @@
 # the root directory of this source tree.
 
 from enum import Enum
+from pathlib import Path
 from typing import Annotated, Any
 
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, field_validator
 
 from llama_stack.apis.benchmarks import Benchmark, BenchmarkInput
 from llama_stack.apis.datasetio import DatasetIO
@@ -312,11 +313,20 @@ a default SQLite store will be used.""",
         description="Configuration for the HTTP(S) server",
     )
 
-    external_providers_dir: str | None = Field(
+    external_providers_dir: Path | None = Field(
         default=None,
         description="Path to directory containing external provider implementations. The providers code and dependencies must be installed on the system.",
     )
 
+    @field_validator("external_providers_dir")
+    @classmethod
+    def validate_external_providers_dir(cls, v):
+        if v is None:
+            return None
+        if isinstance(v, str):
+            return Path(v)
+        return v
+
 
 class BuildConfig(BaseModel):
     version: str = LLAMA_STACK_BUILD_CONFIG_VERSION
diff --git a/llama_stack/distribution/distribution.py b/llama_stack/distribution/distribution.py
index 07a91478a..b860d15ab 100644
--- a/llama_stack/distribution/distribution.py
+++ b/llama_stack/distribution/distribution.py
@@ -145,7 +145,7 @@ def get_provider_registry(
 
     # Check if config has the external_providers_dir attribute
     if config and hasattr(config, "external_providers_dir") and config.external_providers_dir:
-        external_providers_dir = os.path.abspath(config.external_providers_dir)
+        external_providers_dir = os.path.abspath(os.path.expanduser(config.external_providers_dir))
         if not os.path.exists(external_providers_dir):
             raise FileNotFoundError(f"External providers directory not found: {external_providers_dir}")
         logger.info(f"Loading external providers from {external_providers_dir}")
diff --git a/llama_stack/distribution/start_stack.sh b/llama_stack/distribution/start_stack.sh
index d3e13c7dc..bf49e1619 100755
--- a/llama_stack/distribution/start_stack.sh
+++ b/llama_stack/distribution/start_stack.sh
@@ -29,7 +29,7 @@ error_handler() {
 trap 'error_handler ${LINENO}' ERR
 
 if [ $# -lt 3 ]; then
-  echo "Usage: $0 <env_type> <env_path_or_name> <yaml_config> <port> <script_args...>"
+  echo "Usage: $0 <env_type> <env_path_or_name> <port> [--config <yaml_config>] [--env KEY=VALUE]..."
   exit 1
 fi
 
@@ -40,37 +40,51 @@ env_path_or_name="$1"
 container_image="localhost/$env_path_or_name"
 shift
 
-yaml_config="$1"
-shift
-
 port="$1"
 shift
 
 SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
 source "$SCRIPT_DIR/common.sh"
 
-# Initialize env_vars as an string
+# Initialize variables
+yaml_config=""
 env_vars=""
 other_args=""
-# Process environment variables from --env arguments
+
+# Process remaining arguments
 while [[ $# -gt 0 ]]; do
   case "$1" in
-  --env)
-
-    if [[ -n "$2" ]]; then
-      env_vars="$env_vars --env $2"
-      shift 2
-    else
-      echo -e "${RED}Error: --env requires a KEY=VALUE argument${NC}" >&2
-      exit 1
-    fi
-    ;;
-  *)
-    other_args="$other_args $1"
-    shift
-    ;;
+    --config|--yaml-config)
+      if [[ -n "$2" ]]; then
+        yaml_config="$2"
+        shift 2
+      else
+        echo -e "${RED}Error: $1 requires a CONFIG argument${NC}" >&2
+        exit 1
+      fi
+      ;;
+    --env)
+      if [[ -n "$2" ]]; then
+        env_vars="$env_vars --env $2"
+        shift 2
+      else
+        echo -e "${RED}Error: --env requires a KEY=VALUE argument${NC}" >&2
+        exit 1
+      fi
+      ;;
+    *)
+      other_args="$other_args $1"
+      shift
+      ;;
   esac
 done
+
+# Check if yaml_config is required based on env_type
+if [[ "$env_type" == "venv" || "$env_type" == "conda" ]] && [ -z "$yaml_config" ]; then
+  echo -e "${RED}Error: --config is required for venv and conda environments${NC}" >&2
+  exit 1
+fi
+
 PYTHON_BINARY="python"
 case "$env_type" in
   "venv")
@@ -106,8 +120,14 @@ esac
 if [[ "$env_type" == "venv" || "$env_type" == "conda" ]]; then
     set -x
 
+    if [ -n "$yaml_config" ]; then
+        yaml_config_arg="--yaml-config $yaml_config"
+    else
+        yaml_config_arg=""
+    fi
+
     $PYTHON_BINARY -m llama_stack.distribution.server.server \
-    --yaml-config "$yaml_config" \
+    $yaml_config_arg \
     --port "$port" \
     $env_vars \
     $other_args
@@ -149,15 +169,26 @@ elif [[ "$env_type" == "container" ]]; then
         version_tag=$(curl -s $URL | jq -r '.info.version')
     fi
 
-    $CONTAINER_BINARY run $CONTAINER_OPTS -it \
+    # Build the command with optional yaml config
+    cmd="$CONTAINER_BINARY run $CONTAINER_OPTS -it \
     -p $port:$port \
     $env_vars \
-    -v "$yaml_config:/app/config.yaml" \
     $mounts \
     --env LLAMA_STACK_PORT=$port \
     --entrypoint python \
     $container_image:$version_tag \
-    -m llama_stack.distribution.server.server \
-    --yaml-config /app/config.yaml \
-    $other_args
+    -m llama_stack.distribution.server.server"
+
+    # Add yaml config if provided, otherwise use default
+    if [ -n "$yaml_config" ]; then
+        cmd="$cmd -v $yaml_config:/app/run.yaml --yaml-config /app/run.yaml"
+    else
+        cmd="$cmd --yaml-config /app/run.yaml"
+    fi
+
+    # Add any other args
+    cmd="$cmd $other_args"
+
+    # Execute the command
+    eval $cmd
 fi
diff --git a/llama_stack/distribution/utils/config_dirs.py b/llama_stack/distribution/utils/config_dirs.py
index 9b9a7ceb3..c3e520f28 100644
--- a/llama_stack/distribution/utils/config_dirs.py
+++ b/llama_stack/distribution/utils/config_dirs.py
@@ -14,3 +14,5 @@ DISTRIBS_BASE_DIR = LLAMA_STACK_CONFIG_DIR / "distributions"
 DEFAULT_CHECKPOINT_DIR = LLAMA_STACK_CONFIG_DIR / "checkpoints"
 
 RUNTIME_BASE_DIR = LLAMA_STACK_CONFIG_DIR / "runtime"
+
+EXTERNAL_PROVIDERS_DIR = LLAMA_STACK_CONFIG_DIR / "providers.d"
diff --git a/llama_stack/distribution/utils/exec.py b/llama_stack/distribution/utils/exec.py
index 3bf3c81ce..4acce4f5b 100644
--- a/llama_stack/distribution/utils/exec.py
+++ b/llama_stack/distribution/utils/exec.py
@@ -22,8 +22,10 @@ from llama_stack.distribution.utils.image_types import LlamaStackImageType
 
 def formulate_run_args(image_type, image_name, config, template_name) -> list:
     env_name = ""
-    if image_type == LlamaStackImageType.CONTAINER.value or config.container_image:
-        env_name = f"distribution-{template_name}" if template_name else config.container_image
+    if image_type == LlamaStackImageType.CONTAINER.value:
+        env_name = (
+            f"distribution-{template_name}" if template_name else (config.container_image if config else image_name)
+        )
     elif image_type == LlamaStackImageType.CONDA.value:
         current_conda_env = os.environ.get("CONDA_DEFAULT_ENV")
         env_name = image_name or current_conda_env
diff --git a/tests/external-provider/llama-stack-provider-ollama/custom-distro.yaml b/tests/external-provider/llama-stack-provider-ollama/custom-distro.yaml
index eb3b85e52..1f3ab3817 100644
--- a/tests/external-provider/llama-stack-provider-ollama/custom-distro.yaml
+++ b/tests/external-provider/llama-stack-provider-ollama/custom-distro.yaml
@@ -6,4 +6,4 @@ distribution_spec:
     - remote::custom_ollama
 image_type: container
 image_name: ci-test
-external_providers_dir: /tmp/providers.d
+external_providers_dir: ~/.llama/providers.d
diff --git a/tests/external-provider/llama-stack-provider-ollama/run.yaml b/tests/external-provider/llama-stack-provider-ollama/run.yaml
index 666189f03..158f6800f 100644
--- a/tests/external-provider/llama-stack-provider-ollama/run.yaml
+++ b/tests/external-provider/llama-stack-provider-ollama/run.yaml
@@ -91,4 +91,4 @@ tool_groups:
   provider_id: wolfram-alpha
 server:
   port: 8321
-external_providers_dir: /tmp/providers.d
+external_providers_dir: ~/.llama/providers.d

From bb5fca952158641d5f9d2c85bb3eb424eeab6898 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Han?= <seb@redhat.com>
Date: Thu, 15 May 2025 20:22:51 +0200
Subject: [PATCH 5/9] chore: more API validators (#2165)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

# What does this PR do?

We added:

* make sure docstrings are present with 'params' and 'returns'
* fail if someone sets 'returns: None'
* fix the failing APIs

Signed-off-by: Sébastien Han <seb@redhat.com>
---
 docs/_static/llama-stack-spec.html            | 647 ++++++++++--------
 docs/_static/llama-stack-spec.yaml            | 536 ++++++++++-----
 docs/openapi_generator/pyopenapi/utility.py   |  39 ++
 llama_stack/apis/agents/agents.py             |   4 +-
 .../apis/batch_inference/batch_inference.py   |  27 +-
 llama_stack/apis/benchmarks/benchmarks.py     |  27 +-
 llama_stack/apis/datasetio/datasetio.py       |  13 +-
 llama_stack/apis/datasets/datasets.py         |  28 +-
 llama_stack/apis/eval/eval.py                 |   8 +-
 llama_stack/apis/files/files.py               |  32 +-
 llama_stack/apis/inference/inference.py       | 116 ++--
 llama_stack/apis/inspect/inspect.py           |  21 +-
 llama_stack/apis/models/models.py             |  41 +-
 .../apis/post_training/post_training.py       |  57 +-
 llama_stack/apis/providers/providers.py       |  15 +-
 llama_stack/apis/safety/safety.py             |  10 +-
 llama_stack/apis/scoring/scoring.py           |  12 +-
 .../scoring_functions/scoring_functions.py    |  27 +-
 llama_stack/apis/shields/shields.py           |  26 +-
 llama_stack/apis/telemetry/telemetry.py       |  83 ++-
 llama_stack/apis/tools/tools.py               |  56 +-
 llama_stack/apis/vector_dbs/vector_dbs.py     |  34 +-
 llama_stack/apis/vector_io/vector_io.py       |  19 +-
 23 files changed, 1304 insertions(+), 574 deletions(-)

diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index 5df6db20c..9032e5968 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -62,11 +62,12 @@
                 "tags": [
                     "DatasetIO"
                 ],
-                "description": "",
+                "description": "Append rows to a dataset.",
                 "parameters": [
                     {
                         "name": "dataset_id",
                         "in": "path",
+                        "description": "The ID of the dataset to append the rows to.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -89,7 +90,7 @@
             "post": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A BatchChatCompletionResponse with the full completions.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -114,7 +115,7 @@
                 "tags": [
                     "Inference"
                 ],
-                "description": "",
+                "description": "Generate chat completions for a batch of messages using the specified model.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -132,7 +133,7 @@
             "post": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A BatchCompletionResponse with the full completions.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -157,7 +158,7 @@
                 "tags": [
                     "Inference"
                 ],
-                "description": "",
+                "description": "Generate completions for a batch of content using the specified model.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -193,7 +194,7 @@
                 "tags": [
                     "PostTraining (Coming Soon)"
                 ],
-                "description": "",
+                "description": "Cancel a training job.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -211,7 +212,7 @@
             "post": {
                 "responses": {
                     "200": {
-                        "description": "If stream=False, returns a ChatCompletionResponse with the full completion. If stream=True, returns an SSE event stream of ChatCompletionResponseStreamChunk",
+                        "description": "If stream=False, returns a ChatCompletionResponse with the full completion. If stream=True, returns an SSE event stream of ChatCompletionResponseStreamChunk.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -259,7 +260,7 @@
             "post": {
                 "responses": {
                     "200": {
-                        "description": "If stream=False, returns a CompletionResponse with the full completion. If stream=True, returns an SSE event stream of CompletionResponseStreamChunk",
+                        "description": "If stream=False, returns a CompletionResponse with the full completion. If stream=True, returns an SSE event stream of CompletionResponseStreamChunk.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -453,7 +454,7 @@
             "post": {
                 "responses": {
                     "200": {
-                        "description": "If stream=False, returns a Turn object. If stream=True, returns an SSE event stream of AgentTurnResponseStreamChunk",
+                        "description": "If stream=False, returns a Turn object. If stream=True, returns an SSE event stream of AgentTurnResponseStreamChunk.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -520,7 +521,7 @@
             "post": {
                 "responses": {
                     "200": {
-                        "description": "Runtime representation of an annotated type.",
+                        "description": "An OpenAIResponseObject.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -568,7 +569,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A ListBucketResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -598,6 +599,7 @@
                     {
                         "name": "bucket",
                         "in": "query",
+                        "description": "Bucket name (valid chars: a-zA-Z0-9_-).",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -608,7 +610,7 @@
             "post": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A FileUploadResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -728,7 +730,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A Session.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -835,7 +837,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A FileResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -865,7 +867,7 @@
                     {
                         "name": "bucket",
                         "in": "path",
-                        "description": "Bucket name (valid chars: a-zA-Z0-9_-)",
+                        "description": "Bucket name (valid chars: a-zA-Z0-9_-).",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -874,7 +876,7 @@
                     {
                         "name": "key",
                         "in": "path",
-                        "description": "Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)",
+                        "description": "Key under which the file is stored (valid chars: a-zA-Z0-9_-/.).",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -908,7 +910,7 @@
                     {
                         "name": "bucket",
                         "in": "path",
-                        "description": "Bucket name (valid chars: a-zA-Z0-9_-)",
+                        "description": "Bucket name (valid chars: a-zA-Z0-9_-).",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -917,7 +919,7 @@
                     {
                         "name": "key",
                         "in": "path",
-                        "description": "Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)",
+                        "description": "Key under which the file is stored (valid chars: a-zA-Z0-9_-/.).",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -930,7 +932,7 @@
             "post": {
                 "responses": {
                     "200": {
-                        "description": "An array of embeddings, one for each content. Each embedding is a list of floats. The dimensionality of the embedding is model-specific; you can check model metadata using /models/{model_id}",
+                        "description": "An array of embeddings, one for each content. Each embedding is a list of floats. The dimensionality of the embedding is model-specific; you can check model metadata using /models/{model_id}.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -973,7 +975,7 @@
             "post": {
                 "responses": {
                     "200": {
-                        "description": "EvaluateResponse object containing generations and scores",
+                        "description": "EvaluateResponse object containing generations and scores.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -1157,7 +1159,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A Benchmark.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -1182,11 +1184,12 @@
                 "tags": [
                     "Benchmarks"
                 ],
-                "description": "",
+                "description": "Get a benchmark by its ID.",
                 "parameters": [
                     {
                         "name": "benchmark_id",
                         "in": "path",
+                        "description": "The ID of the benchmark to get.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -1199,7 +1202,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A Dataset.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -1224,11 +1227,12 @@
                 "tags": [
                     "Datasets"
                 ],
-                "description": "",
+                "description": "Get a dataset by its ID.",
                 "parameters": [
                     {
                         "name": "dataset_id",
                         "in": "path",
+                        "description": "The ID of the dataset to get.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -1257,11 +1261,12 @@
                 "tags": [
                     "Datasets"
                 ],
-                "description": "",
+                "description": "Unregister a dataset by its ID.",
                 "parameters": [
                     {
                         "name": "dataset_id",
                         "in": "path",
+                        "description": "The ID of the dataset to unregister.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -1274,7 +1279,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A Model.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -1299,11 +1304,12 @@
                 "tags": [
                     "Models"
                 ],
-                "description": "",
+                "description": "Get a model by its identifier.",
                 "parameters": [
                     {
                         "name": "model_id",
                         "in": "path",
+                        "description": "The identifier of the model to get.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -1332,11 +1338,12 @@
                 "tags": [
                     "Models"
                 ],
-                "description": "",
+                "description": "Unregister a model.",
                 "parameters": [
                     {
                         "name": "model_id",
                         "in": "path",
+                        "description": "The identifier of the model to unregister.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -1392,7 +1399,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A ScoringFn.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -1417,11 +1424,12 @@
                 "tags": [
                     "ScoringFunctions"
                 ],
-                "description": "",
+                "description": "Get a scoring function by its ID.",
                 "parameters": [
                     {
                         "name": "scoring_fn_id",
                         "in": "path",
+                        "description": "The ID of the scoring function to get.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -1434,7 +1442,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A Shield.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -1459,11 +1467,12 @@
                 "tags": [
                     "Shields"
                 ],
-                "description": "",
+                "description": "Get a shield by its identifier.",
                 "parameters": [
                     {
                         "name": "identifier",
                         "in": "path",
+                        "description": "The identifier of the shield to get.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -1476,7 +1485,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A Span.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -1501,11 +1510,12 @@
                 "tags": [
                     "Telemetry"
                 ],
-                "description": "",
+                "description": "Get a span by its ID.",
                 "parameters": [
                     {
                         "name": "trace_id",
                         "in": "path",
+                        "description": "The ID of the trace to get the span from.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -1514,6 +1524,7 @@
                     {
                         "name": "span_id",
                         "in": "path",
+                        "description": "The ID of the span to get.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -1526,7 +1537,7 @@
             "post": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A QuerySpanTreeResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -1551,11 +1562,12 @@
                 "tags": [
                     "Telemetry"
                 ],
-                "description": "",
+                "description": "Get a span tree by its ID.",
                 "parameters": [
                     {
                         "name": "span_id",
                         "in": "path",
+                        "description": "The ID of the span to get the tree from.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -1578,7 +1590,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A Tool.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -1603,11 +1615,12 @@
                 "tags": [
                     "ToolGroups"
                 ],
-                "description": "",
+                "description": "Get a tool by its name.",
                 "parameters": [
                     {
                         "name": "tool_name",
                         "in": "path",
+                        "description": "The name of the tool to get.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -1620,7 +1633,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A ToolGroup.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -1645,11 +1658,12 @@
                 "tags": [
                     "ToolGroups"
                 ],
-                "description": "",
+                "description": "Get a tool group by its ID.",
                 "parameters": [
                     {
                         "name": "toolgroup_id",
                         "in": "path",
+                        "description": "The ID of the tool group to get.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -1678,11 +1692,12 @@
                 "tags": [
                     "ToolGroups"
                 ],
-                "description": "Unregister a tool group",
+                "description": "Unregister a tool group.",
                 "parameters": [
                     {
                         "name": "toolgroup_id",
                         "in": "path",
+                        "description": "The ID of the tool group to unregister.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -1695,7 +1710,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A Trace.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -1720,11 +1735,12 @@
                 "tags": [
                     "Telemetry"
                 ],
-                "description": "",
+                "description": "Get a trace by its ID.",
                 "parameters": [
                     {
                         "name": "trace_id",
                         "in": "path",
+                        "description": "The ID of the trace to get.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -1737,7 +1753,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A PostTrainingJobArtifactsResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -1762,11 +1778,12 @@
                 "tags": [
                     "PostTraining (Coming Soon)"
                 ],
-                "description": "",
+                "description": "Get the artifacts of a training job.",
                 "parameters": [
                     {
                         "name": "job_uuid",
                         "in": "query",
+                        "description": "The UUID of the job to get the artifacts of.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -1779,7 +1796,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A PostTrainingJobStatusResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -1804,11 +1821,12 @@
                 "tags": [
                     "PostTraining (Coming Soon)"
                 ],
-                "description": "",
+                "description": "Get the status of a training job.",
                 "parameters": [
                     {
                         "name": "job_uuid",
                         "in": "query",
+                        "description": "The UUID of the job to get the status of.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -1821,7 +1839,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A ListPostTrainingJobsResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -1846,7 +1864,7 @@
                 "tags": [
                     "PostTraining (Coming Soon)"
                 ],
-                "description": "",
+                "description": "Get all training jobs.",
                 "parameters": []
             }
         },
@@ -1854,7 +1872,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A FileUploadResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -1879,12 +1897,12 @@
                 "tags": [
                     "Files"
                 ],
-                "description": "Returns information about an existsing upload session",
+                "description": "Returns information about an existsing upload session.",
                 "parameters": [
                     {
                         "name": "upload_id",
                         "in": "path",
-                        "description": "ID of the upload session",
+                        "description": "ID of the upload session.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -1895,7 +1913,7 @@
             "post": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A FileResponse or None if the upload is not complete.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -1932,7 +1950,7 @@
                     {
                         "name": "upload_id",
                         "in": "path",
-                        "description": "ID of the upload session",
+                        "description": "ID of the upload session.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -1956,7 +1974,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A VectorDB.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -1981,11 +1999,12 @@
                 "tags": [
                     "VectorDBs"
                 ],
-                "description": "",
+                "description": "Get a vector database by its identifier.",
                 "parameters": [
                     {
                         "name": "vector_db_id",
                         "in": "path",
+                        "description": "The identifier of the vector database to get.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -2014,11 +2033,12 @@
                 "tags": [
                     "VectorDBs"
                 ],
-                "description": "",
+                "description": "Unregister a vector database.",
                 "parameters": [
                     {
                         "name": "vector_db_id",
                         "in": "path",
+                        "description": "The identifier of the vector database to unregister.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -2031,7 +2051,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A HealthInfo.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -2056,7 +2076,7 @@
                 "tags": [
                     "Inspect"
                 ],
-                "description": "",
+                "description": "Get the health of the service.",
                 "parameters": []
             }
         },
@@ -2118,7 +2138,7 @@
                 "tags": [
                     "VectorIO"
                 ],
-                "description": "",
+                "description": "Insert chunks into a vector database.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -2136,7 +2156,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A ProviderInfo object containing the provider's details.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -2161,11 +2181,12 @@
                 "tags": [
                     "Providers"
                 ],
-                "description": "",
+                "description": "Get detailed information about a specific provider.",
                 "parameters": [
                     {
                         "name": "provider_id",
                         "in": "path",
+                        "description": "The ID of the provider to inspect.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -2178,7 +2199,7 @@
             "post": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A ToolInvocationResult.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -2203,7 +2224,7 @@
                 "tags": [
                     "ToolRuntime"
                 ],
-                "description": "Run a tool with the given arguments",
+                "description": "Run a tool with the given arguments.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -2221,7 +2242,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A PaginatedResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -2246,7 +2267,7 @@
                 "tags": [
                     "DatasetIO"
                 ],
-                "description": "Get a paginated list of rows from a dataset.\nUses offset-based pagination where:\n- start_index: The starting index (0-based). If None, starts from beginning.\n- limit: Number of items to return. If None or -1, returns all items.\n\nThe response includes:\n- data: List of items for the current page\n- has_more: Whether there are more items available after this set",
+                "description": "Get a paginated list of rows from a dataset.\nUses offset-based pagination where:\n- start_index: The starting index (0-based). If None, starts from beginning.\n- limit: Number of items to return. If None or -1, returns all items.\n\nThe response includes:\n- data: List of items for the current page.\n- has_more: Whether there are more items available after this set.",
                 "parameters": [
                     {
                         "name": "dataset_id",
@@ -2282,7 +2303,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "The status of the evaluationjob.",
+                        "description": "The status of the evaluation job.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -2490,7 +2511,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A ListBenchmarksResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -2515,7 +2536,7 @@
                 "tags": [
                     "Benchmarks"
                 ],
-                "description": "",
+                "description": "List all benchmarks.",
                 "parameters": []
             },
             "post": {
@@ -2539,7 +2560,7 @@
                 "tags": [
                     "Benchmarks"
                 ],
-                "description": "",
+                "description": "Register a benchmark.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -2557,7 +2578,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A ListDatasetsResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -2582,13 +2603,13 @@
                 "tags": [
                     "Datasets"
                 ],
-                "description": "",
+                "description": "List all datasets.",
                 "parameters": []
             },
             "post": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A Dataset.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -2631,7 +2652,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A ListFileResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -2661,7 +2682,7 @@
                     {
                         "name": "bucket",
                         "in": "path",
-                        "description": "Bucket name (valid chars: a-zA-Z0-9_-)",
+                        "description": "Bucket name (valid chars: a-zA-Z0-9_-).",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -2674,7 +2695,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A ListModelsResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -2699,13 +2720,13 @@
                 "tags": [
                     "Models"
                 ],
-                "description": "",
+                "description": "List all models.",
                 "parameters": []
             },
             "post": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A Model.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -2730,7 +2751,7 @@
                 "tags": [
                     "Models"
                 ],
-                "description": "",
+                "description": "Register a model.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -2748,7 +2769,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A ListProvidersResponse containing information about all providers.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -2773,7 +2794,7 @@
                 "tags": [
                     "Providers"
                 ],
-                "description": "",
+                "description": "List all available providers.",
                 "parameters": []
             }
         },
@@ -2781,7 +2802,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A ListRoutesResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -2806,7 +2827,7 @@
                 "tags": [
                     "Inspect"
                 ],
-                "description": "",
+                "description": "List all routes.",
                 "parameters": []
             }
         },
@@ -2814,7 +2835,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A ListToolDefsResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -2839,11 +2860,12 @@
                 "tags": [
                     "ToolRuntime"
                 ],
-                "description": "",
+                "description": "List all tools in the runtime.",
                 "parameters": [
                     {
                         "name": "tool_group_id",
                         "in": "query",
+                        "description": "The ID of the tool group to list tools for.",
                         "required": false,
                         "schema": {
                             "type": "string"
@@ -2852,6 +2874,7 @@
                     {
                         "name": "mcp_endpoint",
                         "in": "query",
+                        "description": "The MCP endpoint to use for the tool group.",
                         "required": false,
                         "schema": {
                             "$ref": "#/components/schemas/URL"
@@ -2864,7 +2887,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A ListScoringFunctionsResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -2889,7 +2912,7 @@
                 "tags": [
                     "ScoringFunctions"
                 ],
-                "description": "",
+                "description": "List all scoring functions.",
                 "parameters": []
             },
             "post": {
@@ -2913,7 +2936,7 @@
                 "tags": [
                     "ScoringFunctions"
                 ],
-                "description": "",
+                "description": "Register a scoring function.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -2931,7 +2954,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A ListShieldsResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -2956,13 +2979,13 @@
                 "tags": [
                     "Shields"
                 ],
-                "description": "",
+                "description": "List all shields.",
                 "parameters": []
             },
             "post": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A Shield.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -2987,7 +3010,7 @@
                 "tags": [
                     "Shields"
                 ],
-                "description": "",
+                "description": "Register a shield.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -3005,7 +3028,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A ListToolGroupsResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -3030,7 +3053,7 @@
                 "tags": [
                     "ToolGroups"
                 ],
-                "description": "List tool groups with optional provider",
+                "description": "List tool groups with optional provider.",
                 "parameters": []
             },
             "post": {
@@ -3054,7 +3077,7 @@
                 "tags": [
                     "ToolGroups"
                 ],
-                "description": "Register a tool group",
+                "description": "Register a tool group.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -3072,7 +3095,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A ListToolsResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -3097,11 +3120,12 @@
                 "tags": [
                     "ToolGroups"
                 ],
-                "description": "List tools with optional tool group",
+                "description": "List tools with optional tool group.",
                 "parameters": [
                     {
                         "name": "toolgroup_id",
                         "in": "query",
+                        "description": "The ID of the tool group to list tools for.",
                         "required": false,
                         "schema": {
                             "type": "string"
@@ -3114,7 +3138,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A ListVectorDBsResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -3139,13 +3163,13 @@
                 "tags": [
                     "VectorDBs"
                 ],
-                "description": "",
+                "description": "List all vector databases.",
                 "parameters": []
             },
             "post": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A VectorDB.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -3170,7 +3194,7 @@
                 "tags": [
                     "VectorDBs"
                 ],
-                "description": "",
+                "description": "Register a vector database.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -3206,7 +3230,7 @@
                 "tags": [
                     "Telemetry"
                 ],
-                "description": "",
+                "description": "Log an event.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -3224,7 +3248,7 @@
             "post": {
                 "responses": {
                     "200": {
-                        "description": "Response from an OpenAI-compatible chat completion request. **OR** Chunk from a streaming response to an OpenAI-compatible chat completion request.",
+                        "description": "An OpenAIChatCompletion.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -3274,7 +3298,7 @@
             "post": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "An OpenAICompletion.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -3317,7 +3341,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A OpenAIListModelsResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -3342,7 +3366,7 @@
                 "tags": [
                     "Models"
                 ],
-                "description": "",
+                "description": "List models using the OpenAI API.",
                 "parameters": []
             }
         },
@@ -3350,7 +3374,7 @@
             "post": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A PostTrainingJob.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -3375,7 +3399,7 @@
                 "tags": [
                     "PostTraining (Coming Soon)"
                 ],
-                "description": "",
+                "description": "Run preference optimization of a model.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -3436,7 +3460,7 @@
             "post": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A QueryChunksResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -3461,7 +3485,7 @@
                 "tags": [
                     "VectorIO"
                 ],
-                "description": "",
+                "description": "Query chunks from a vector database.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -3479,7 +3503,7 @@
             "post": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A QueryMetricsResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -3504,11 +3528,12 @@
                 "tags": [
                     "Telemetry"
                 ],
-                "description": "",
+                "description": "Query metrics.",
                 "parameters": [
                     {
                         "name": "metric_name",
                         "in": "path",
+                        "description": "The name of the metric to query.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -3531,7 +3556,7 @@
             "post": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A QuerySpansResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -3556,7 +3581,7 @@
                 "tags": [
                     "Telemetry"
                 ],
-                "description": "",
+                "description": "Query spans.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -3574,7 +3599,7 @@
             "post": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A QueryTracesResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -3599,7 +3624,7 @@
                 "tags": [
                     "Telemetry"
                 ],
-                "description": "",
+                "description": "Query traces.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -3746,7 +3771,7 @@
             "post": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A RunShieldResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -3771,7 +3796,7 @@
                 "tags": [
                     "Safety"
                 ],
-                "description": "",
+                "description": "Run a shield.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -3807,7 +3832,7 @@
                 "tags": [
                     "Telemetry"
                 ],
-                "description": "",
+                "description": "Save spans to a dataset.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -3825,7 +3850,7 @@
             "post": {
                 "responses": {
                     "200": {
-                        "description": "ScoreResponse object containing rows and aggregated results",
+                        "description": "A ScoreResponse object containing rows and aggregated results.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -3868,7 +3893,7 @@
             "post": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A ScoreBatchResponse.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -3893,7 +3918,7 @@
                 "tags": [
                     "Scoring"
                 ],
-                "description": "",
+                "description": "Score a batch of rows.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -3911,7 +3936,7 @@
             "post": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A PostTrainingJob.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -3936,7 +3961,7 @@
                 "tags": [
                     "PostTraining (Coming Soon)"
                 ],
-                "description": "",
+                "description": "Run supervised fine-tuning of a model.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -3997,7 +4022,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "A VersionInfo.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -4022,7 +4047,7 @@
                 "tags": [
                     "Inspect"
                 ],
-                "description": "",
+                "description": "Get the version of the service.",
                 "parameters": []
             }
         }
@@ -4088,7 +4113,8 @@
                                     }
                                 ]
                             }
-                        }
+                        },
+                        "description": "The rows to append to the dataset."
                     }
                 },
                 "additionalProperties": false,
@@ -4795,7 +4821,8 @@
                 "type": "object",
                 "properties": {
                     "model_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint."
                     },
                     "messages_batch": {
                         "type": "array",
@@ -4804,22 +4831,27 @@
                             "items": {
                                 "$ref": "#/components/schemas/Message"
                             }
-                        }
+                        },
+                        "description": "The messages to generate completions for."
                     },
                     "sampling_params": {
-                        "$ref": "#/components/schemas/SamplingParams"
+                        "$ref": "#/components/schemas/SamplingParams",
+                        "description": "(Optional) Parameters to control the sampling strategy."
                     },
                     "tools": {
                         "type": "array",
                         "items": {
                             "$ref": "#/components/schemas/ToolDefinition"
-                        }
+                        },
+                        "description": "(Optional) List of tool definitions available to the model."
                     },
                     "tool_config": {
-                        "$ref": "#/components/schemas/ToolConfig"
+                        "$ref": "#/components/schemas/ToolConfig",
+                        "description": "(Optional) Configuration for tool use."
                     },
                     "response_format": {
-                        "$ref": "#/components/schemas/ResponseFormat"
+                        "$ref": "#/components/schemas/ResponseFormat",
+                        "description": "(Optional) Grammar specification for guided (structured) decoding."
                     },
                     "logprobs": {
                         "type": "object",
@@ -4831,7 +4863,7 @@
                             }
                         },
                         "additionalProperties": false,
-                        "title": "LogProbConfig"
+                        "description": "(Optional) If specified, log probabilities for each token position will be returned."
                     }
                 },
                 "additionalProperties": false,
@@ -4934,19 +4966,23 @@
                 "type": "object",
                 "properties": {
                     "model_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint."
                     },
                     "content_batch": {
                         "type": "array",
                         "items": {
                             "$ref": "#/components/schemas/InterleavedContent"
-                        }
+                        },
+                        "description": "The content to generate completions for."
                     },
                     "sampling_params": {
-                        "$ref": "#/components/schemas/SamplingParams"
+                        "$ref": "#/components/schemas/SamplingParams",
+                        "description": "(Optional) Parameters to control the sampling strategy."
                     },
                     "response_format": {
-                        "$ref": "#/components/schemas/ResponseFormat"
+                        "$ref": "#/components/schemas/ResponseFormat",
+                        "description": "(Optional) Grammar specification for guided (structured) decoding."
                     },
                     "logprobs": {
                         "type": "object",
@@ -4958,7 +4994,7 @@
                             }
                         },
                         "additionalProperties": false,
-                        "title": "LogProbConfig"
+                        "description": "(Optional) If specified, log probabilities for each token position will be returned."
                     }
                 },
                 "additionalProperties": false,
@@ -5026,7 +5062,8 @@
                 "type": "object",
                 "properties": {
                     "job_uuid": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The UUID of the job to cancel."
                     }
                 },
                 "additionalProperties": false,
@@ -5047,18 +5084,18 @@
                         "items": {
                             "$ref": "#/components/schemas/Message"
                         },
-                        "description": "List of messages in the conversation"
+                        "description": "List of messages in the conversation."
                     },
                     "sampling_params": {
                         "$ref": "#/components/schemas/SamplingParams",
-                        "description": "Parameters to control the sampling strategy"
+                        "description": "Parameters to control the sampling strategy."
                     },
                     "tools": {
                         "type": "array",
                         "items": {
                             "$ref": "#/components/schemas/ToolDefinition"
                         },
-                        "description": "(Optional) List of tool definitions available to the model"
+                        "description": "(Optional) List of tool definitions available to the model."
                     },
                     "tool_choice": {
                         "type": "string",
@@ -5278,15 +5315,15 @@
                     },
                     "content": {
                         "$ref": "#/components/schemas/InterleavedContent",
-                        "description": "The content to generate a completion for"
+                        "description": "The content to generate a completion for."
                     },
                     "sampling_params": {
                         "$ref": "#/components/schemas/SamplingParams",
-                        "description": "(Optional) Parameters to control the sampling strategy"
+                        "description": "(Optional) Parameters to control the sampling strategy."
                     },
                     "response_format": {
                         "$ref": "#/components/schemas/ResponseFormat",
-                        "description": "(Optional) Grammar specification for guided (structured) decoding"
+                        "description": "(Optional) Grammar specification for guided (structured) decoding."
                     },
                     "stream": {
                         "type": "boolean",
@@ -7065,19 +7102,19 @@
                 "properties": {
                     "bucket": {
                         "type": "string",
-                        "description": "Bucket under which the file is stored (valid chars: a-zA-Z0-9_-)"
+                        "description": "Bucket under which the file is stored (valid chars: a-zA-Z0-9_-)."
                     },
                     "key": {
                         "type": "string",
-                        "description": "Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)"
+                        "description": "Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)."
                     },
                     "mime_type": {
                         "type": "string",
-                        "description": "MIME type of the file"
+                        "description": "MIME type of the file."
                     },
                     "size": {
                         "type": "integer",
-                        "description": "File size in bytes"
+                        "description": "File size in bytes."
                     }
                 },
                 "additionalProperties": false,
@@ -8422,10 +8459,12 @@
                         "type": "array",
                         "items": {
                             "type": "string"
-                        }
+                        },
+                        "description": "The attributes to return in the tree."
                     },
                     "max_depth": {
-                        "type": "integer"
+                        "type": "integer",
+                        "description": "The maximum depth of the tree."
                     }
                 },
                 "additionalProperties": false,
@@ -8977,7 +9016,8 @@
                 "type": "object",
                 "properties": {
                     "vector_db_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The identifier of the vector database to insert the chunks into."
                     },
                     "chunks": {
                         "type": "array",
@@ -9019,10 +9059,12 @@
                                 "metadata"
                             ],
                             "title": "Chunk"
-                        }
+                        },
+                        "description": "The chunks to insert."
                     },
                     "ttl_seconds": {
-                        "type": "integer"
+                        "type": "integer",
+                        "description": "The time to live of the chunks."
                     }
                 },
                 "additionalProperties": false,
@@ -9109,7 +9151,8 @@
                 "type": "object",
                 "properties": {
                     "tool_name": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The name of the tool to invoke."
                     },
                     "kwargs": {
                         "type": "object",
@@ -9134,7 +9177,8 @@
                                     "type": "object"
                                 }
                             ]
-                        }
+                        },
+                        "description": "A dictionary of arguments to pass to the tool."
                     }
                 },
                 "additionalProperties": false,
@@ -9797,10 +9841,12 @@
                 "type": "object",
                 "properties": {
                     "event": {
-                        "$ref": "#/components/schemas/Event"
+                        "$ref": "#/components/schemas/Event",
+                        "description": "The event to log."
                     },
                     "ttl_seconds": {
-                        "type": "integer"
+                        "type": "integer",
+                        "description": "The time to live of the event."
                     }
                 },
                 "additionalProperties": false,
@@ -10260,11 +10306,11 @@
                         "items": {
                             "$ref": "#/components/schemas/OpenAIMessageParam"
                         },
-                        "description": "List of messages in the conversation"
+                        "description": "List of messages in the conversation."
                     },
                     "frequency_penalty": {
                         "type": "number",
-                        "description": "(Optional) The penalty for repeated tokens"
+                        "description": "(Optional) The penalty for repeated tokens."
                     },
                     "function_call": {
                         "oneOf": [
@@ -10297,7 +10343,7 @@
                                 }
                             }
                         ],
-                        "description": "(Optional) The function call to use"
+                        "description": "(Optional) The function call to use."
                     },
                     "functions": {
                         "type": "array",
@@ -10326,46 +10372,46 @@
                                 ]
                             }
                         },
-                        "description": "(Optional) List of functions to use"
+                        "description": "(Optional) List of functions to use."
                     },
                     "logit_bias": {
                         "type": "object",
                         "additionalProperties": {
                             "type": "number"
                         },
-                        "description": "(Optional) The logit bias to use"
+                        "description": "(Optional) The logit bias to use."
                     },
                     "logprobs": {
                         "type": "boolean",
-                        "description": "(Optional) The log probabilities to use"
+                        "description": "(Optional) The log probabilities to use."
                     },
                     "max_completion_tokens": {
                         "type": "integer",
-                        "description": "(Optional) The maximum number of tokens to generate"
+                        "description": "(Optional) The maximum number of tokens to generate."
                     },
                     "max_tokens": {
                         "type": "integer",
-                        "description": "(Optional) The maximum number of tokens to generate"
+                        "description": "(Optional) The maximum number of tokens to generate."
                     },
                     "n": {
                         "type": "integer",
-                        "description": "(Optional) The number of completions to generate"
+                        "description": "(Optional) The number of completions to generate."
                     },
                     "parallel_tool_calls": {
                         "type": "boolean",
-                        "description": "(Optional) Whether to parallelize tool calls"
+                        "description": "(Optional) Whether to parallelize tool calls."
                     },
                     "presence_penalty": {
                         "type": "number",
-                        "description": "(Optional) The penalty for repeated tokens"
+                        "description": "(Optional) The penalty for repeated tokens."
                     },
                     "response_format": {
                         "$ref": "#/components/schemas/OpenAIResponseFormatParam",
-                        "description": "(Optional) The response format to use"
+                        "description": "(Optional) The response format to use."
                     },
                     "seed": {
                         "type": "integer",
-                        "description": "(Optional) The seed to use"
+                        "description": "(Optional) The seed to use."
                     },
                     "stop": {
                         "oneOf": [
@@ -10379,11 +10425,11 @@
                                 }
                             }
                         ],
-                        "description": "(Optional) The stop tokens to use"
+                        "description": "(Optional) The stop tokens to use."
                     },
                     "stream": {
                         "type": "boolean",
-                        "description": "(Optional) Whether to stream the response"
+                        "description": "(Optional) Whether to stream the response."
                     },
                     "stream_options": {
                         "type": "object",
@@ -10409,11 +10455,11 @@
                                 }
                             ]
                         },
-                        "description": "(Optional) The stream options to use"
+                        "description": "(Optional) The stream options to use."
                     },
                     "temperature": {
                         "type": "number",
-                        "description": "(Optional) The temperature to use"
+                        "description": "(Optional) The temperature to use."
                     },
                     "tool_choice": {
                         "oneOf": [
@@ -10446,7 +10492,7 @@
                                 }
                             }
                         ],
-                        "description": "(Optional) The tool choice to use"
+                        "description": "(Optional) The tool choice to use."
                     },
                     "tools": {
                         "type": "array",
@@ -10475,19 +10521,19 @@
                                 ]
                             }
                         },
-                        "description": "(Optional) The tools to use"
+                        "description": "(Optional) The tools to use."
                     },
                     "top_logprobs": {
                         "type": "integer",
-                        "description": "(Optional) The top log probabilities to use"
+                        "description": "(Optional) The top log probabilities to use."
                     },
                     "top_p": {
                         "type": "number",
-                        "description": "(Optional) The top p to use"
+                        "description": "(Optional) The top p to use."
                     },
                     "user": {
                         "type": "string",
-                        "description": "(Optional) The user to use"
+                        "description": "(Optional) The user to use."
                     }
                 },
                 "additionalProperties": false,
@@ -10773,46 +10819,46 @@
                                 }
                             }
                         ],
-                        "description": "The prompt to generate a completion for"
+                        "description": "The prompt to generate a completion for."
                     },
                     "best_of": {
                         "type": "integer",
-                        "description": "(Optional) The number of completions to generate"
+                        "description": "(Optional) The number of completions to generate."
                     },
                     "echo": {
                         "type": "boolean",
-                        "description": "(Optional) Whether to echo the prompt"
+                        "description": "(Optional) Whether to echo the prompt."
                     },
                     "frequency_penalty": {
                         "type": "number",
-                        "description": "(Optional) The penalty for repeated tokens"
+                        "description": "(Optional) The penalty for repeated tokens."
                     },
                     "logit_bias": {
                         "type": "object",
                         "additionalProperties": {
                             "type": "number"
                         },
-                        "description": "(Optional) The logit bias to use"
+                        "description": "(Optional) The logit bias to use."
                     },
                     "logprobs": {
                         "type": "boolean",
-                        "description": "(Optional) The log probabilities to use"
+                        "description": "(Optional) The log probabilities to use."
                     },
                     "max_tokens": {
                         "type": "integer",
-                        "description": "(Optional) The maximum number of tokens to generate"
+                        "description": "(Optional) The maximum number of tokens to generate."
                     },
                     "n": {
                         "type": "integer",
-                        "description": "(Optional) The number of completions to generate"
+                        "description": "(Optional) The number of completions to generate."
                     },
                     "presence_penalty": {
                         "type": "number",
-                        "description": "(Optional) The penalty for repeated tokens"
+                        "description": "(Optional) The penalty for repeated tokens."
                     },
                     "seed": {
                         "type": "integer",
-                        "description": "(Optional) The seed to use"
+                        "description": "(Optional) The seed to use."
                     },
                     "stop": {
                         "oneOf": [
@@ -10826,11 +10872,11 @@
                                 }
                             }
                         ],
-                        "description": "(Optional) The stop tokens to use"
+                        "description": "(Optional) The stop tokens to use."
                     },
                     "stream": {
                         "type": "boolean",
-                        "description": "(Optional) Whether to stream the response"
+                        "description": "(Optional) Whether to stream the response."
                     },
                     "stream_options": {
                         "type": "object",
@@ -10856,19 +10902,19 @@
                                 }
                             ]
                         },
-                        "description": "(Optional) The stream options to use"
+                        "description": "(Optional) The stream options to use."
                     },
                     "temperature": {
                         "type": "number",
-                        "description": "(Optional) The temperature to use"
+                        "description": "(Optional) The temperature to use."
                     },
                     "top_p": {
                         "type": "number",
-                        "description": "(Optional) The top p to use"
+                        "description": "(Optional) The top p to use."
                     },
                     "user": {
                         "type": "string",
-                        "description": "(Optional) The user to use"
+                        "description": "(Optional) The user to use."
                     },
                     "guided_choice": {
                         "type": "array",
@@ -11161,16 +11207,20 @@
                 "type": "object",
                 "properties": {
                     "job_uuid": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The UUID of the job to create."
                     },
                     "finetuned_model": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The model to fine-tune."
                     },
                     "algorithm_config": {
-                        "$ref": "#/components/schemas/DPOAlignmentConfig"
+                        "$ref": "#/components/schemas/DPOAlignmentConfig",
+                        "description": "The algorithm configuration."
                     },
                     "training_config": {
-                        "$ref": "#/components/schemas/TrainingConfig"
+                        "$ref": "#/components/schemas/TrainingConfig",
+                        "description": "The training configuration."
                     },
                     "hyperparam_search_config": {
                         "type": "object",
@@ -11195,7 +11245,8 @@
                                     "type": "object"
                                 }
                             ]
-                        }
+                        },
+                        "description": "The hyperparam search configuration."
                     },
                     "logger_config": {
                         "type": "object",
@@ -11220,7 +11271,8 @@
                                     "type": "object"
                                 }
                             ]
-                        }
+                        },
+                        "description": "The logger configuration."
                     }
                 },
                 "additionalProperties": false,
@@ -11405,10 +11457,12 @@
                 "type": "object",
                 "properties": {
                     "vector_db_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The identifier of the vector database to query."
                     },
                     "query": {
-                        "$ref": "#/components/schemas/InterleavedContent"
+                        "$ref": "#/components/schemas/InterleavedContent",
+                        "description": "The query to search for."
                     },
                     "params": {
                         "type": "object",
@@ -11433,7 +11487,8 @@
                                     "type": "object"
                                 }
                             ]
-                        }
+                        },
+                        "description": "The parameters of the query."
                     }
                 },
                 "additionalProperties": false,
@@ -11506,13 +11561,16 @@
                 "type": "object",
                 "properties": {
                     "start_time": {
-                        "type": "integer"
+                        "type": "integer",
+                        "description": "The start time of the metric to query."
                     },
                     "end_time": {
-                        "type": "integer"
+                        "type": "integer",
+                        "description": "The end time of the metric to query."
                     },
                     "granularity": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The granularity of the metric to query."
                     },
                     "query_type": {
                         "type": "string",
@@ -11520,7 +11578,7 @@
                             "range",
                             "instant"
                         ],
-                        "title": "MetricQueryType"
+                        "description": "The type of query to perform."
                     },
                     "label_matchers": {
                         "type": "array",
@@ -11552,7 +11610,8 @@
                                 "operator"
                             ],
                             "title": "MetricLabelMatcher"
-                        }
+                        },
+                        "description": "The label matchers to apply to the metric."
                     }
                 },
                 "additionalProperties": false,
@@ -11696,16 +11755,19 @@
                         "type": "array",
                         "items": {
                             "$ref": "#/components/schemas/QueryCondition"
-                        }
+                        },
+                        "description": "The attribute filters to apply to the spans."
                     },
                     "attributes_to_return": {
                         "type": "array",
                         "items": {
                             "type": "string"
-                        }
+                        },
+                        "description": "The attributes to return in the spans."
                     },
                     "max_depth": {
-                        "type": "integer"
+                        "type": "integer",
+                        "description": "The maximum depth of the tree."
                     }
                 },
                 "additionalProperties": false,
@@ -11738,19 +11800,23 @@
                         "type": "array",
                         "items": {
                             "$ref": "#/components/schemas/QueryCondition"
-                        }
+                        },
+                        "description": "The attribute filters to apply to the traces."
                     },
                     "limit": {
-                        "type": "integer"
+                        "type": "integer",
+                        "description": "The limit of traces to return."
                     },
                     "offset": {
-                        "type": "integer"
+                        "type": "integer",
+                        "description": "The offset of the traces to return."
                     },
                     "order_by": {
                         "type": "array",
                         "items": {
                             "type": "string"
-                        }
+                        },
+                        "description": "The order by of the traces to return."
                     }
                 },
                 "additionalProperties": false,
@@ -11776,22 +11842,27 @@
                 "type": "object",
                 "properties": {
                     "benchmark_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The ID of the benchmark to register."
                     },
                     "dataset_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The ID of the dataset to use for the benchmark."
                     },
                     "scoring_functions": {
                         "type": "array",
                         "items": {
                             "type": "string"
-                        }
+                        },
+                        "description": "The scoring functions to use for the benchmark."
                     },
                     "provider_benchmark_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The ID of the provider benchmark to use for the benchmark."
                     },
                     "provider_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The ID of the provider to use for the benchmark."
                     },
                     "metadata": {
                         "type": "object",
@@ -11816,7 +11887,8 @@
                                     "type": "object"
                                 }
                             ]
-                        }
+                        },
+                        "description": "The metadata to use for the benchmark."
                     }
                 },
                 "additionalProperties": false,
@@ -11837,7 +11909,7 @@
                             "eval/question-answer",
                             "eval/messages-answer"
                         ],
-                        "description": "The purpose of the dataset. One of - \"post-training/messages\": The dataset contains a messages column with list of messages for post-training. { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}, ] } - \"eval/question-answer\": The dataset contains a question column and an answer column for evaluation. { \"question\": \"What is the capital of France?\", \"answer\": \"Paris\" } - \"eval/messages-answer\": The dataset contains a messages column with list of messages and an answer column for evaluation. { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, my name is John Doe.\"}, {\"role\": \"assistant\", \"content\": \"Hello, John Doe. How can I help you today?\"}, {\"role\": \"user\", \"content\": \"What's my name?\"}, ], \"answer\": \"John Doe\" }"
+                        "description": "The purpose of the dataset. One of: - \"post-training/messages\": The dataset contains a messages column with list of messages for post-training. { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}, ] } - \"eval/question-answer\": The dataset contains a question column and an answer column for evaluation. { \"question\": \"What is the capital of France?\", \"answer\": \"Paris\" } - \"eval/messages-answer\": The dataset contains a messages column with list of messages and an answer column for evaluation. { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, my name is John Doe.\"}, {\"role\": \"assistant\", \"content\": \"Hello, John Doe. How can I help you today?\"}, {\"role\": \"user\", \"content\": \"What's my name?\"}, ], \"answer\": \"John Doe\" }"
                     },
                     "source": {
                         "$ref": "#/components/schemas/DataSource",
@@ -11867,7 +11939,7 @@
                                 }
                             ]
                         },
-                        "description": "The metadata for the dataset. - E.g. {\"description\": \"My dataset\"}"
+                        "description": "The metadata for the dataset. - E.g. {\"description\": \"My dataset\"}."
                     },
                     "dataset_id": {
                         "type": "string",
@@ -11885,13 +11957,16 @@
                 "type": "object",
                 "properties": {
                     "model_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The identifier of the model to register."
                     },
                     "provider_model_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The identifier of the model in the provider."
                     },
                     "provider_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The identifier of the provider."
                     },
                     "metadata": {
                         "type": "object",
@@ -11916,10 +11991,12 @@
                                     "type": "object"
                                 }
                             ]
-                        }
+                        },
+                        "description": "Any additional metadata for this model."
                     },
                     "model_type": {
-                        "$ref": "#/components/schemas/ModelType"
+                        "$ref": "#/components/schemas/ModelType",
+                        "description": "The type of model to register."
                     }
                 },
                 "additionalProperties": false,
@@ -11932,22 +12009,28 @@
                 "type": "object",
                 "properties": {
                     "scoring_fn_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The ID of the scoring function to register."
                     },
                     "description": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The description of the scoring function."
                     },
                     "return_type": {
-                        "$ref": "#/components/schemas/ParamType"
+                        "$ref": "#/components/schemas/ParamType",
+                        "description": "The return type of the scoring function."
                     },
                     "provider_scoring_fn_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The ID of the provider scoring function to use for the scoring function."
                     },
                     "provider_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The ID of the provider to use for the scoring function."
                     },
                     "params": {
-                        "$ref": "#/components/schemas/ScoringFnParams"
+                        "$ref": "#/components/schemas/ScoringFnParams",
+                        "description": "The parameters for the scoring function for benchmark eval, these can be overridden for app eval."
                     }
                 },
                 "additionalProperties": false,
@@ -11962,13 +12045,16 @@
                 "type": "object",
                 "properties": {
                     "shield_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The identifier of the shield to register."
                     },
                     "provider_shield_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The identifier of the shield in the provider."
                     },
                     "provider_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The identifier of the provider."
                     },
                     "params": {
                         "type": "object",
@@ -11993,7 +12079,8 @@
                                     "type": "object"
                                 }
                             ]
-                        }
+                        },
+                        "description": "The parameters of the shield."
                     }
                 },
                 "additionalProperties": false,
@@ -12006,13 +12093,16 @@
                 "type": "object",
                 "properties": {
                     "toolgroup_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The ID of the tool group to register."
                     },
                     "provider_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The ID of the provider to use for the tool group."
                     },
                     "mcp_endpoint": {
-                        "$ref": "#/components/schemas/URL"
+                        "$ref": "#/components/schemas/URL",
+                        "description": "The MCP endpoint to use for the tool group."
                     },
                     "args": {
                         "type": "object",
@@ -12037,7 +12127,8 @@
                                     "type": "object"
                                 }
                             ]
-                        }
+                        },
+                        "description": "A dictionary of arguments to pass to the tool group."
                     }
                 },
                 "additionalProperties": false,
@@ -12051,19 +12142,24 @@
                 "type": "object",
                 "properties": {
                     "vector_db_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The identifier of the vector database to register."
                     },
                     "embedding_model": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The embedding model to use."
                     },
                     "embedding_dimension": {
-                        "type": "integer"
+                        "type": "integer",
+                        "description": "The dimension of the embedding model."
                     },
                     "provider_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The identifier of the provider."
                     },
                     "provider_vector_db_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The identifier of the vector database in the provider."
                     }
                 },
                 "additionalProperties": false,
@@ -12112,13 +12208,15 @@
                 "type": "object",
                 "properties": {
                     "shield_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The identifier of the shield to run."
                     },
                     "messages": {
                         "type": "array",
                         "items": {
                             "$ref": "#/components/schemas/Message"
-                        }
+                        },
+                        "description": "The messages to run the shield on."
                     },
                     "params": {
                         "type": "object",
@@ -12143,7 +12241,8 @@
                                     "type": "object"
                                 }
                             ]
-                        }
+                        },
+                        "description": "The parameters of the shield."
                     }
                 },
                 "additionalProperties": false,
@@ -12171,19 +12270,23 @@
                         "type": "array",
                         "items": {
                             "$ref": "#/components/schemas/QueryCondition"
-                        }
+                        },
+                        "description": "The attribute filters to apply to the spans."
                     },
                     "attributes_to_save": {
                         "type": "array",
                         "items": {
                             "type": "string"
-                        }
+                        },
+                        "description": "The attributes to save to the dataset."
                     },
                     "dataset_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The ID of the dataset to save the spans to."
                     },
                     "max_depth": {
-                        "type": "integer"
+                        "type": "integer",
+                        "description": "The maximum depth of the tree."
                     }
                 },
                 "additionalProperties": false,
@@ -12270,7 +12373,8 @@
                 "type": "object",
                 "properties": {
                     "dataset_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The ID of the dataset to score."
                     },
                     "scoring_functions": {
                         "type": "object",
@@ -12283,10 +12387,12 @@
                                     "type": "null"
                                 }
                             ]
-                        }
+                        },
+                        "description": "The scoring functions to use for the scoring."
                     },
                     "save_results_dataset": {
-                        "type": "boolean"
+                        "type": "boolean",
+                        "description": "Whether to save the results to a dataset."
                     }
                 },
                 "additionalProperties": false,
@@ -12406,10 +12512,12 @@
                 "type": "object",
                 "properties": {
                     "job_uuid": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The UUID of the job to create."
                     },
                     "training_config": {
-                        "$ref": "#/components/schemas/TrainingConfig"
+                        "$ref": "#/components/schemas/TrainingConfig",
+                        "description": "The training configuration."
                     },
                     "hyperparam_search_config": {
                         "type": "object",
@@ -12434,7 +12542,8 @@
                                     "type": "object"
                                 }
                             ]
-                        }
+                        },
+                        "description": "The hyperparam search configuration."
                     },
                     "logger_config": {
                         "type": "object",
@@ -12459,16 +12568,20 @@
                                     "type": "object"
                                 }
                             ]
-                        }
+                        },
+                        "description": "The logger configuration."
                     },
                     "model": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The model to fine-tune."
                     },
                     "checkpoint_dir": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The directory to save checkpoint(s) to."
                     },
                     "algorithm_config": {
-                        "$ref": "#/components/schemas/AlgorithmConfig"
+                        "$ref": "#/components/schemas/AlgorithmConfig",
+                        "description": "The algorithm configuration."
                     }
                 },
                 "additionalProperties": false,
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index fb2dbf241..a988e0eab 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -27,10 +27,12 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - DatasetIO
-      description: ''
+      description: Append rows to a dataset.
       parameters:
         - name: dataset_id
           in: path
+          description: >-
+            The ID of the dataset to append the rows to.
           required: true
           schema:
             type: string
@@ -44,7 +46,8 @@ paths:
     post:
       responses:
         '200':
-          description: OK
+          description: >-
+            A BatchChatCompletionResponse with the full completions.
           content:
             application/json:
               schema:
@@ -61,7 +64,8 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Inference
-      description: ''
+      description: >-
+        Generate chat completions for a batch of messages using the specified model.
       parameters: []
       requestBody:
         content:
@@ -73,7 +77,8 @@ paths:
     post:
       responses:
         '200':
-          description: OK
+          description: >-
+            A BatchCompletionResponse with the full completions.
           content:
             application/json:
               schema:
@@ -90,7 +95,8 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Inference
-      description: ''
+      description: >-
+        Generate completions for a batch of content using the specified model.
       parameters: []
       requestBody:
         content:
@@ -115,7 +121,7 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - PostTraining (Coming Soon)
-      description: ''
+      description: Cancel a training job.
       parameters: []
       requestBody:
         content:
@@ -129,7 +135,7 @@ paths:
         '200':
           description: >-
             If stream=False, returns a ChatCompletionResponse with the full completion.
-            If stream=True, returns an SSE event stream of ChatCompletionResponseStreamChunk
+            If stream=True, returns an SSE event stream of ChatCompletionResponseStreamChunk.
           content:
             application/json:
               schema:
@@ -164,7 +170,7 @@ paths:
         '200':
           description: >-
             If stream=False, returns a CompletionResponse with the full completion.
-            If stream=True, returns an SSE event stream of CompletionResponseStreamChunk
+            If stream=True, returns an SSE event stream of CompletionResponseStreamChunk.
           content:
             application/json:
               schema:
@@ -300,7 +306,7 @@ paths:
         '200':
           description: >-
             If stream=False, returns a Turn object. If stream=True, returns an SSE
-            event stream of AgentTurnResponseStreamChunk
+            event stream of AgentTurnResponseStreamChunk.
           content:
             application/json:
               schema:
@@ -346,8 +352,7 @@ paths:
     post:
       responses:
         '200':
-          description: >-
-            Runtime representation of an annotated type.
+          description: An OpenAIResponseObject.
           content:
             application/json:
               schema:
@@ -379,7 +384,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: A ListBucketResponse.
           content:
             application/json:
               schema:
@@ -400,13 +405,14 @@ paths:
       parameters:
         - name: bucket
           in: query
+          description: 'Bucket name (valid chars: a-zA-Z0-9_-).'
           required: true
           schema:
             type: string
     post:
       responses:
         '200':
-          description: OK
+          description: A FileUploadResponse.
           content:
             application/json:
               schema:
@@ -490,7 +496,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: A Session.
           content:
             application/json:
               schema:
@@ -567,7 +573,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: A FileResponse.
           content:
             application/json:
               schema:
@@ -589,14 +595,14 @@ paths:
       parameters:
         - name: bucket
           in: path
-          description: 'Bucket name (valid chars: a-zA-Z0-9_-)'
+          description: 'Bucket name (valid chars: a-zA-Z0-9_-).'
           required: true
           schema:
             type: string
         - name: key
           in: path
           description: >-
-            Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
+            Key under which the file is stored (valid chars: a-zA-Z0-9_-/.).
           required: true
           schema:
             type: string
@@ -621,14 +627,14 @@ paths:
       parameters:
         - name: bucket
           in: path
-          description: 'Bucket name (valid chars: a-zA-Z0-9_-)'
+          description: 'Bucket name (valid chars: a-zA-Z0-9_-).'
           required: true
           schema:
             type: string
         - name: key
           in: path
           description: >-
-            Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
+            Key under which the file is stored (valid chars: a-zA-Z0-9_-/.).
           required: true
           schema:
             type: string
@@ -639,7 +645,7 @@ paths:
           description: >-
             An array of embeddings, one for each content. Each embedding is a list
             of floats. The dimensionality of the embedding is model-specific; you
-            can check model metadata using /models/{model_id}
+            can check model metadata using /models/{model_id}.
           content:
             application/json:
               schema:
@@ -670,7 +676,7 @@ paths:
       responses:
         '200':
           description: >-
-            EvaluateResponse object containing generations and scores
+            EvaluateResponse object containing generations and scores.
           content:
             application/json:
               schema:
@@ -796,7 +802,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: A Benchmark.
           content:
             application/json:
               schema:
@@ -813,10 +819,11 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Benchmarks
-      description: ''
+      description: Get a benchmark by its ID.
       parameters:
         - name: benchmark_id
           in: path
+          description: The ID of the benchmark to get.
           required: true
           schema:
             type: string
@@ -824,7 +831,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: A Dataset.
           content:
             application/json:
               schema:
@@ -841,10 +848,11 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Datasets
-      description: ''
+      description: Get a dataset by its ID.
       parameters:
         - name: dataset_id
           in: path
+          description: The ID of the dataset to get.
           required: true
           schema:
             type: string
@@ -864,10 +872,11 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Datasets
-      description: ''
+      description: Unregister a dataset by its ID.
       parameters:
         - name: dataset_id
           in: path
+          description: The ID of the dataset to unregister.
           required: true
           schema:
             type: string
@@ -875,7 +884,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: A Model.
           content:
             application/json:
               schema:
@@ -892,10 +901,11 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Models
-      description: ''
+      description: Get a model by its identifier.
       parameters:
         - name: model_id
           in: path
+          description: The identifier of the model to get.
           required: true
           schema:
             type: string
@@ -915,10 +925,12 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Models
-      description: ''
+      description: Unregister a model.
       parameters:
         - name: model_id
           in: path
+          description: >-
+            The identifier of the model to unregister.
           required: true
           schema:
             type: string
@@ -956,7 +968,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: A ScoringFn.
           content:
             application/json:
               schema:
@@ -973,10 +985,11 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - ScoringFunctions
-      description: ''
+      description: Get a scoring function by its ID.
       parameters:
         - name: scoring_fn_id
           in: path
+          description: The ID of the scoring function to get.
           required: true
           schema:
             type: string
@@ -984,7 +997,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: A Shield.
           content:
             application/json:
               schema:
@@ -1001,10 +1014,11 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Shields
-      description: ''
+      description: Get a shield by its identifier.
       parameters:
         - name: identifier
           in: path
+          description: The identifier of the shield to get.
           required: true
           schema:
             type: string
@@ -1012,7 +1026,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: A Span.
           content:
             application/json:
               schema:
@@ -1029,15 +1043,18 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Telemetry
-      description: ''
+      description: Get a span by its ID.
       parameters:
         - name: trace_id
           in: path
+          description: >-
+            The ID of the trace to get the span from.
           required: true
           schema:
             type: string
         - name: span_id
           in: path
+          description: The ID of the span to get.
           required: true
           schema:
             type: string
@@ -1045,7 +1062,7 @@ paths:
     post:
       responses:
         '200':
-          description: OK
+          description: A QuerySpanTreeResponse.
           content:
             application/json:
               schema:
@@ -1062,10 +1079,11 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Telemetry
-      description: ''
+      description: Get a span tree by its ID.
       parameters:
         - name: span_id
           in: path
+          description: The ID of the span to get the tree from.
           required: true
           schema:
             type: string
@@ -1079,7 +1097,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: A Tool.
           content:
             application/json:
               schema:
@@ -1096,10 +1114,11 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - ToolGroups
-      description: ''
+      description: Get a tool by its name.
       parameters:
         - name: tool_name
           in: path
+          description: The name of the tool to get.
           required: true
           schema:
             type: string
@@ -1107,7 +1126,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: A ToolGroup.
           content:
             application/json:
               schema:
@@ -1124,10 +1143,11 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - ToolGroups
-      description: ''
+      description: Get a tool group by its ID.
       parameters:
         - name: toolgroup_id
           in: path
+          description: The ID of the tool group to get.
           required: true
           schema:
             type: string
@@ -1147,10 +1167,11 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - ToolGroups
-      description: Unregister a tool group
+      description: Unregister a tool group.
       parameters:
         - name: toolgroup_id
           in: path
+          description: The ID of the tool group to unregister.
           required: true
           schema:
             type: string
@@ -1158,7 +1179,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: A Trace.
           content:
             application/json:
               schema:
@@ -1175,10 +1196,11 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Telemetry
-      description: ''
+      description: Get a trace by its ID.
       parameters:
         - name: trace_id
           in: path
+          description: The ID of the trace to get.
           required: true
           schema:
             type: string
@@ -1186,7 +1208,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: A PostTrainingJobArtifactsResponse.
           content:
             application/json:
               schema:
@@ -1203,10 +1225,12 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - PostTraining (Coming Soon)
-      description: ''
+      description: Get the artifacts of a training job.
       parameters:
         - name: job_uuid
           in: query
+          description: >-
+            The UUID of the job to get the artifacts of.
           required: true
           schema:
             type: string
@@ -1214,7 +1238,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: A PostTrainingJobStatusResponse.
           content:
             application/json:
               schema:
@@ -1231,10 +1255,12 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - PostTraining (Coming Soon)
-      description: ''
+      description: Get the status of a training job.
       parameters:
         - name: job_uuid
           in: query
+          description: >-
+            The UUID of the job to get the status of.
           required: true
           schema:
             type: string
@@ -1242,7 +1268,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: A ListPostTrainingJobsResponse.
           content:
             application/json:
               schema:
@@ -1259,13 +1285,13 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - PostTraining (Coming Soon)
-      description: ''
+      description: Get all training jobs.
       parameters: []
   /v1/files/session:{upload_id}:
     get:
       responses:
         '200':
-          description: OK
+          description: A FileUploadResponse.
           content:
             application/json:
               schema:
@@ -1283,18 +1309,19 @@ paths:
       tags:
         - Files
       description: >-
-        Returns information about an existsing upload session
+        Returns information about an existsing upload session.
       parameters:
         - name: upload_id
           in: path
-          description: ID of the upload session
+          description: ID of the upload session.
           required: true
           schema:
             type: string
     post:
       responses:
         '200':
-          description: OK
+          description: >-
+            A FileResponse or None if the upload is not complete.
           content:
             application/json:
               schema:
@@ -1319,7 +1346,7 @@ paths:
       parameters:
         - name: upload_id
           in: path
-          description: ID of the upload session
+          description: ID of the upload session.
           required: true
           schema:
             type: string
@@ -1334,7 +1361,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: A VectorDB.
           content:
             application/json:
               schema:
@@ -1351,10 +1378,12 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - VectorDBs
-      description: ''
+      description: Get a vector database by its identifier.
       parameters:
         - name: vector_db_id
           in: path
+          description: >-
+            The identifier of the vector database to get.
           required: true
           schema:
             type: string
@@ -1374,10 +1403,12 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - VectorDBs
-      description: ''
+      description: Unregister a vector database.
       parameters:
         - name: vector_db_id
           in: path
+          description: >-
+            The identifier of the vector database to unregister.
           required: true
           schema:
             type: string
@@ -1385,7 +1416,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: A HealthInfo.
           content:
             application/json:
               schema:
@@ -1402,7 +1433,7 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Inspect
-      description: ''
+      description: Get the health of the service.
       parameters: []
   /v1/tool-runtime/rag-tool/insert:
     post:
@@ -1447,7 +1478,7 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - VectorIO
-      description: ''
+      description: Insert chunks into a vector database.
       parameters: []
       requestBody:
         content:
@@ -1459,7 +1490,8 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: >-
+            A ProviderInfo object containing the provider's details.
           content:
             application/json:
               schema:
@@ -1476,10 +1508,12 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Providers
-      description: ''
+      description: >-
+        Get detailed information about a specific provider.
       parameters:
         - name: provider_id
           in: path
+          description: The ID of the provider to inspect.
           required: true
           schema:
             type: string
@@ -1487,7 +1521,7 @@ paths:
     post:
       responses:
         '200':
-          description: OK
+          description: A ToolInvocationResult.
           content:
             application/json:
               schema:
@@ -1504,7 +1538,7 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - ToolRuntime
-      description: Run a tool with the given arguments
+      description: Run a tool with the given arguments.
       parameters: []
       requestBody:
         content:
@@ -1516,7 +1550,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: A PaginatedResponse.
           content:
             application/json:
               schema:
@@ -1545,9 +1579,9 @@ paths:
 
         The response includes:
 
-        - data: List of items for the current page
+        - data: List of items for the current page.
 
-        - has_more: Whether there are more items available after this set
+        - has_more: Whether there are more items available after this set.
       parameters:
         - name: dataset_id
           in: path
@@ -1573,7 +1607,7 @@ paths:
     get:
       responses:
         '200':
-          description: The status of the evaluationjob.
+          description: The status of the evaluation job.
           content:
             application/json:
               schema:
@@ -1718,7 +1752,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: A ListBenchmarksResponse.
           content:
             application/json:
               schema:
@@ -1735,7 +1769,7 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Benchmarks
-      description: ''
+      description: List all benchmarks.
       parameters: []
     post:
       responses:
@@ -1753,7 +1787,7 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Benchmarks
-      description: ''
+      description: Register a benchmark.
       parameters: []
       requestBody:
         content:
@@ -1765,7 +1799,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: A ListDatasetsResponse.
           content:
             application/json:
               schema:
@@ -1782,12 +1816,12 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Datasets
-      description: ''
+      description: List all datasets.
       parameters: []
     post:
       responses:
         '200':
-          description: OK
+          description: A Dataset.
           content:
             application/json:
               schema:
@@ -1816,7 +1850,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: A ListFileResponse.
           content:
             application/json:
               schema:
@@ -1837,7 +1871,7 @@ paths:
       parameters:
         - name: bucket
           in: path
-          description: 'Bucket name (valid chars: a-zA-Z0-9_-)'
+          description: 'Bucket name (valid chars: a-zA-Z0-9_-).'
           required: true
           schema:
             type: string
@@ -1845,7 +1879,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: A ListModelsResponse.
           content:
             application/json:
               schema:
@@ -1862,12 +1896,12 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Models
-      description: ''
+      description: List all models.
       parameters: []
     post:
       responses:
         '200':
-          description: OK
+          description: A Model.
           content:
             application/json:
               schema:
@@ -1884,7 +1918,7 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Models
-      description: ''
+      description: Register a model.
       parameters: []
       requestBody:
         content:
@@ -1896,7 +1930,8 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: >-
+            A ListProvidersResponse containing information about all providers.
           content:
             application/json:
               schema:
@@ -1913,13 +1948,13 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Providers
-      description: ''
+      description: List all available providers.
       parameters: []
   /v1/inspect/routes:
     get:
       responses:
         '200':
-          description: OK
+          description: A ListRoutesResponse.
           content:
             application/json:
               schema:
@@ -1936,13 +1971,13 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Inspect
-      description: ''
+      description: List all routes.
       parameters: []
   /v1/tool-runtime/list-tools:
     get:
       responses:
         '200':
-          description: OK
+          description: A ListToolDefsResponse.
           content:
             application/json:
               schema:
@@ -1959,15 +1994,19 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - ToolRuntime
-      description: ''
+      description: List all tools in the runtime.
       parameters:
         - name: tool_group_id
           in: query
+          description: >-
+            The ID of the tool group to list tools for.
           required: false
           schema:
             type: string
         - name: mcp_endpoint
           in: query
+          description: >-
+            The MCP endpoint to use for the tool group.
           required: false
           schema:
             $ref: '#/components/schemas/URL'
@@ -1975,7 +2014,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: A ListScoringFunctionsResponse.
           content:
             application/json:
               schema:
@@ -1992,7 +2031,7 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - ScoringFunctions
-      description: ''
+      description: List all scoring functions.
       parameters: []
     post:
       responses:
@@ -2010,7 +2049,7 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - ScoringFunctions
-      description: ''
+      description: Register a scoring function.
       parameters: []
       requestBody:
         content:
@@ -2022,7 +2061,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: A ListShieldsResponse.
           content:
             application/json:
               schema:
@@ -2039,12 +2078,12 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Shields
-      description: ''
+      description: List all shields.
       parameters: []
     post:
       responses:
         '200':
-          description: OK
+          description: A Shield.
           content:
             application/json:
               schema:
@@ -2061,7 +2100,7 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Shields
-      description: ''
+      description: Register a shield.
       parameters: []
       requestBody:
         content:
@@ -2073,7 +2112,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: A ListToolGroupsResponse.
           content:
             application/json:
               schema:
@@ -2090,7 +2129,7 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - ToolGroups
-      description: List tool groups with optional provider
+      description: List tool groups with optional provider.
       parameters: []
     post:
       responses:
@@ -2108,7 +2147,7 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - ToolGroups
-      description: Register a tool group
+      description: Register a tool group.
       parameters: []
       requestBody:
         content:
@@ -2120,7 +2159,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: A ListToolsResponse.
           content:
             application/json:
               schema:
@@ -2137,10 +2176,12 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - ToolGroups
-      description: List tools with optional tool group
+      description: List tools with optional tool group.
       parameters:
         - name: toolgroup_id
           in: query
+          description: >-
+            The ID of the tool group to list tools for.
           required: false
           schema:
             type: string
@@ -2148,7 +2189,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: A ListVectorDBsResponse.
           content:
             application/json:
               schema:
@@ -2165,12 +2206,12 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - VectorDBs
-      description: ''
+      description: List all vector databases.
       parameters: []
     post:
       responses:
         '200':
-          description: OK
+          description: A VectorDB.
           content:
             application/json:
               schema:
@@ -2187,7 +2228,7 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - VectorDBs
-      description: ''
+      description: Register a vector database.
       parameters: []
       requestBody:
         content:
@@ -2212,7 +2253,7 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Telemetry
-      description: ''
+      description: Log an event.
       parameters: []
       requestBody:
         content:
@@ -2224,9 +2265,7 @@ paths:
     post:
       responses:
         '200':
-          description: >-
-            Response from an OpenAI-compatible chat completion request. **OR** Chunk
-            from a streaming response to an OpenAI-compatible chat completion request.
+          description: An OpenAIChatCompletion.
           content:
             application/json:
               schema:
@@ -2259,7 +2298,7 @@ paths:
     post:
       responses:
         '200':
-          description: OK
+          description: An OpenAICompletion.
           content:
             application/json:
               schema:
@@ -2290,7 +2329,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: A OpenAIListModelsResponse.
           content:
             application/json:
               schema:
@@ -2307,13 +2346,13 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Models
-      description: ''
+      description: List models using the OpenAI API.
       parameters: []
   /v1/post-training/preference-optimize:
     post:
       responses:
         '200':
-          description: OK
+          description: A PostTrainingJob.
           content:
             application/json:
               schema:
@@ -2330,7 +2369,7 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - PostTraining (Coming Soon)
-      description: ''
+      description: Run preference optimization of a model.
       parameters: []
       requestBody:
         content:
@@ -2372,7 +2411,7 @@ paths:
     post:
       responses:
         '200':
-          description: OK
+          description: A QueryChunksResponse.
           content:
             application/json:
               schema:
@@ -2389,7 +2428,7 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - VectorIO
-      description: ''
+      description: Query chunks from a vector database.
       parameters: []
       requestBody:
         content:
@@ -2401,7 +2440,7 @@ paths:
     post:
       responses:
         '200':
-          description: OK
+          description: A QueryMetricsResponse.
           content:
             application/json:
               schema:
@@ -2418,10 +2457,11 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Telemetry
-      description: ''
+      description: Query metrics.
       parameters:
         - name: metric_name
           in: path
+          description: The name of the metric to query.
           required: true
           schema:
             type: string
@@ -2435,7 +2475,7 @@ paths:
     post:
       responses:
         '200':
-          description: OK
+          description: A QuerySpansResponse.
           content:
             application/json:
               schema:
@@ -2452,7 +2492,7 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Telemetry
-      description: ''
+      description: Query spans.
       parameters: []
       requestBody:
         content:
@@ -2464,7 +2504,7 @@ paths:
     post:
       responses:
         '200':
-          description: OK
+          description: A QueryTracesResponse.
           content:
             application/json:
               schema:
@@ -2481,7 +2521,7 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Telemetry
-      description: ''
+      description: Query traces.
       parameters: []
       requestBody:
         content:
@@ -2587,7 +2627,7 @@ paths:
     post:
       responses:
         '200':
-          description: OK
+          description: A RunShieldResponse.
           content:
             application/json:
               schema:
@@ -2604,7 +2644,7 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Safety
-      description: ''
+      description: Run a shield.
       parameters: []
       requestBody:
         content:
@@ -2629,7 +2669,7 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Telemetry
-      description: ''
+      description: Save spans to a dataset.
       parameters: []
       requestBody:
         content:
@@ -2642,7 +2682,7 @@ paths:
       responses:
         '200':
           description: >-
-            ScoreResponse object containing rows and aggregated results
+            A ScoreResponse object containing rows and aggregated results.
           content:
             application/json:
               schema:
@@ -2671,7 +2711,7 @@ paths:
     post:
       responses:
         '200':
-          description: OK
+          description: A ScoreBatchResponse.
           content:
             application/json:
               schema:
@@ -2688,7 +2728,7 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Scoring
-      description: ''
+      description: Score a batch of rows.
       parameters: []
       requestBody:
         content:
@@ -2700,7 +2740,7 @@ paths:
     post:
       responses:
         '200':
-          description: OK
+          description: A PostTrainingJob.
           content:
             application/json:
               schema:
@@ -2717,7 +2757,7 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - PostTraining (Coming Soon)
-      description: ''
+      description: Run supervised fine-tuning of a model.
       parameters: []
       requestBody:
         content:
@@ -2758,7 +2798,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: A VersionInfo.
           content:
             application/json:
               schema:
@@ -2775,7 +2815,7 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Inspect
-      description: ''
+      description: Get the version of the service.
       parameters: []
 jsonSchemaDialect: >-
   https://json-schema.org/draft/2020-12/schema
@@ -2824,6 +2864,7 @@ components:
                 - type: string
                 - type: array
                 - type: object
+          description: The rows to append to the dataset.
       additionalProperties: false
       required:
         - rows
@@ -3328,22 +3369,34 @@ components:
       properties:
         model_id:
           type: string
+          description: >-
+            The identifier of the model to use. The model must be registered with
+            Llama Stack and available via the /models endpoint.
         messages_batch:
           type: array
           items:
             type: array
             items:
               $ref: '#/components/schemas/Message'
+          description: >-
+            The messages to generate completions for.
         sampling_params:
           $ref: '#/components/schemas/SamplingParams'
+          description: >-
+            (Optional) Parameters to control the sampling strategy.
         tools:
           type: array
           items:
             $ref: '#/components/schemas/ToolDefinition'
+          description: >-
+            (Optional) List of tool definitions available to the model.
         tool_config:
           $ref: '#/components/schemas/ToolConfig'
+          description: (Optional) Configuration for tool use.
         response_format:
           $ref: '#/components/schemas/ResponseFormat'
+          description: >-
+            (Optional) Grammar specification for guided (structured) decoding.
         logprobs:
           type: object
           properties:
@@ -3353,7 +3406,9 @@ components:
               description: >-
                 How many tokens (for each position) to return log probabilities for.
           additionalProperties: false
-          title: LogProbConfig
+          description: >-
+            (Optional) If specified, log probabilities for each token position will
+            be returned.
       additionalProperties: false
       required:
         - model_id
@@ -3426,14 +3481,22 @@ components:
       properties:
         model_id:
           type: string
+          description: >-
+            The identifier of the model to use. The model must be registered with
+            Llama Stack and available via the /models endpoint.
         content_batch:
           type: array
           items:
             $ref: '#/components/schemas/InterleavedContent'
+          description: The content to generate completions for.
         sampling_params:
           $ref: '#/components/schemas/SamplingParams'
+          description: >-
+            (Optional) Parameters to control the sampling strategy.
         response_format:
           $ref: '#/components/schemas/ResponseFormat'
+          description: >-
+            (Optional) Grammar specification for guided (structured) decoding.
         logprobs:
           type: object
           properties:
@@ -3443,7 +3506,9 @@ components:
               description: >-
                 How many tokens (for each position) to return log probabilities for.
           additionalProperties: false
-          title: LogProbConfig
+          description: >-
+            (Optional) If specified, log probabilities for each token position will
+            be returned.
       additionalProperties: false
       required:
         - model_id
@@ -3494,6 +3559,7 @@ components:
       properties:
         job_uuid:
           type: string
+          description: The UUID of the job to cancel.
       additionalProperties: false
       required:
         - job_uuid
@@ -3510,17 +3576,17 @@ components:
           type: array
           items:
             $ref: '#/components/schemas/Message'
-          description: List of messages in the conversation
+          description: List of messages in the conversation.
         sampling_params:
           $ref: '#/components/schemas/SamplingParams'
           description: >-
-            Parameters to control the sampling strategy
+            Parameters to control the sampling strategy.
         tools:
           type: array
           items:
             $ref: '#/components/schemas/ToolDefinition'
           description: >-
-            (Optional) List of tool definitions available to the model
+            (Optional) List of tool definitions available to the model.
         tool_choice:
           type: string
           enum:
@@ -3703,15 +3769,16 @@ components:
             Llama Stack and available via the /models endpoint.
         content:
           $ref: '#/components/schemas/InterleavedContent'
-          description: The content to generate a completion for
+          description: >-
+            The content to generate a completion for.
         sampling_params:
           $ref: '#/components/schemas/SamplingParams'
           description: >-
-            (Optional) Parameters to control the sampling strategy
+            (Optional) Parameters to control the sampling strategy.
         response_format:
           $ref: '#/components/schemas/ResponseFormat'
           description: >-
-            (Optional) Grammar specification for guided (structured) decoding
+            (Optional) Grammar specification for guided (structured) decoding.
         stream:
           type: boolean
           description: >-
@@ -4939,17 +5006,17 @@ components:
         bucket:
           type: string
           description: >-
-            Bucket under which the file is stored (valid chars: a-zA-Z0-9_-)
+            Bucket under which the file is stored (valid chars: a-zA-Z0-9_-).
         key:
           type: string
           description: >-
-            Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
+            Key under which the file is stored (valid chars: a-zA-Z0-9_-/.).
         mime_type:
           type: string
-          description: MIME type of the file
+          description: MIME type of the file.
         size:
           type: integer
-          description: File size in bytes
+          description: File size in bytes.
       additionalProperties: false
       required:
         - bucket
@@ -5869,8 +5936,10 @@ components:
           type: array
           items:
             type: string
+          description: The attributes to return in the tree.
         max_depth:
           type: integer
+          description: The maximum depth of the tree.
       additionalProperties: false
       title: GetSpanTreeRequest
     SpanStatus:
@@ -6234,6 +6303,8 @@ components:
       properties:
         vector_db_id:
           type: string
+          description: >-
+            The identifier of the vector database to insert the chunks into.
         chunks:
           type: array
           items:
@@ -6256,8 +6327,10 @@ components:
               - content
               - metadata
             title: Chunk
+          description: The chunks to insert.
         ttl_seconds:
           type: integer
+          description: The time to live of the chunks.
       additionalProperties: false
       required:
         - vector_db_id
@@ -6305,6 +6378,7 @@ components:
       properties:
         tool_name:
           type: string
+          description: The name of the tool to invoke.
         kwargs:
           type: object
           additionalProperties:
@@ -6315,6 +6389,8 @@ components:
               - type: string
               - type: array
               - type: object
+          description: >-
+            A dictionary of arguments to pass to the tool.
       additionalProperties: false
       required:
         - tool_name
@@ -6750,8 +6826,10 @@ components:
       properties:
         event:
           $ref: '#/components/schemas/Event'
+          description: The event to log.
         ttl_seconds:
           type: integer
+          description: The time to live of the event.
       additionalProperties: false
       required:
         - event
@@ -7077,11 +7155,11 @@ components:
           type: array
           items:
             $ref: '#/components/schemas/OpenAIMessageParam'
-          description: List of messages in the conversation
+          description: List of messages in the conversation.
         frequency_penalty:
           type: number
           description: >-
-            (Optional) The penalty for repeated tokens
+            (Optional) The penalty for repeated tokens.
         function_call:
           oneOf:
             - type: string
@@ -7094,7 +7172,7 @@ components:
                   - type: string
                   - type: array
                   - type: object
-          description: (Optional) The function call to use
+          description: (Optional) The function call to use.
         functions:
           type: array
           items:
@@ -7107,52 +7185,52 @@ components:
                 - type: string
                 - type: array
                 - type: object
-          description: (Optional) List of functions to use
+          description: (Optional) List of functions to use.
         logit_bias:
           type: object
           additionalProperties:
             type: number
-          description: (Optional) The logit bias to use
+          description: (Optional) The logit bias to use.
         logprobs:
           type: boolean
-          description: (Optional) The log probabilities to use
+          description: (Optional) The log probabilities to use.
         max_completion_tokens:
           type: integer
           description: >-
-            (Optional) The maximum number of tokens to generate
+            (Optional) The maximum number of tokens to generate.
         max_tokens:
           type: integer
           description: >-
-            (Optional) The maximum number of tokens to generate
+            (Optional) The maximum number of tokens to generate.
         n:
           type: integer
           description: >-
-            (Optional) The number of completions to generate
+            (Optional) The number of completions to generate.
         parallel_tool_calls:
           type: boolean
           description: >-
-            (Optional) Whether to parallelize tool calls
+            (Optional) Whether to parallelize tool calls.
         presence_penalty:
           type: number
           description: >-
-            (Optional) The penalty for repeated tokens
+            (Optional) The penalty for repeated tokens.
         response_format:
           $ref: '#/components/schemas/OpenAIResponseFormatParam'
-          description: (Optional) The response format to use
+          description: (Optional) The response format to use.
         seed:
           type: integer
-          description: (Optional) The seed to use
+          description: (Optional) The seed to use.
         stop:
           oneOf:
             - type: string
             - type: array
               items:
                 type: string
-          description: (Optional) The stop tokens to use
+          description: (Optional) The stop tokens to use.
         stream:
           type: boolean
           description: >-
-            (Optional) Whether to stream the response
+            (Optional) Whether to stream the response.
         stream_options:
           type: object
           additionalProperties:
@@ -7163,10 +7241,10 @@ components:
               - type: string
               - type: array
               - type: object
-          description: (Optional) The stream options to use
+          description: (Optional) The stream options to use.
         temperature:
           type: number
-          description: (Optional) The temperature to use
+          description: (Optional) The temperature to use.
         tool_choice:
           oneOf:
             - type: string
@@ -7179,7 +7257,7 @@ components:
                   - type: string
                   - type: array
                   - type: object
-          description: (Optional) The tool choice to use
+          description: (Optional) The tool choice to use.
         tools:
           type: array
           items:
@@ -7192,17 +7270,17 @@ components:
                 - type: string
                 - type: array
                 - type: object
-          description: (Optional) The tools to use
+          description: (Optional) The tools to use.
         top_logprobs:
           type: integer
           description: >-
-            (Optional) The top log probabilities to use
+            (Optional) The top log probabilities to use.
         top_p:
           type: number
-          description: (Optional) The top p to use
+          description: (Optional) The top p to use.
         user:
           type: string
-          description: (Optional) The user to use
+          description: (Optional) The user to use.
       additionalProperties: false
       required:
         - model
@@ -7432,52 +7510,52 @@ components:
                 type: array
                 items:
                   type: integer
-          description: The prompt to generate a completion for
+          description: The prompt to generate a completion for.
         best_of:
           type: integer
           description: >-
-            (Optional) The number of completions to generate
+            (Optional) The number of completions to generate.
         echo:
           type: boolean
-          description: (Optional) Whether to echo the prompt
+          description: (Optional) Whether to echo the prompt.
         frequency_penalty:
           type: number
           description: >-
-            (Optional) The penalty for repeated tokens
+            (Optional) The penalty for repeated tokens.
         logit_bias:
           type: object
           additionalProperties:
             type: number
-          description: (Optional) The logit bias to use
+          description: (Optional) The logit bias to use.
         logprobs:
           type: boolean
-          description: (Optional) The log probabilities to use
+          description: (Optional) The log probabilities to use.
         max_tokens:
           type: integer
           description: >-
-            (Optional) The maximum number of tokens to generate
+            (Optional) The maximum number of tokens to generate.
         n:
           type: integer
           description: >-
-            (Optional) The number of completions to generate
+            (Optional) The number of completions to generate.
         presence_penalty:
           type: number
           description: >-
-            (Optional) The penalty for repeated tokens
+            (Optional) The penalty for repeated tokens.
         seed:
           type: integer
-          description: (Optional) The seed to use
+          description: (Optional) The seed to use.
         stop:
           oneOf:
             - type: string
             - type: array
               items:
                 type: string
-          description: (Optional) The stop tokens to use
+          description: (Optional) The stop tokens to use.
         stream:
           type: boolean
           description: >-
-            (Optional) Whether to stream the response
+            (Optional) Whether to stream the response.
         stream_options:
           type: object
           additionalProperties:
@@ -7488,16 +7566,16 @@ components:
               - type: string
               - type: array
               - type: object
-          description: (Optional) The stream options to use
+          description: (Optional) The stream options to use.
         temperature:
           type: number
-          description: (Optional) The temperature to use
+          description: (Optional) The temperature to use.
         top_p:
           type: number
-          description: (Optional) The top p to use
+          description: (Optional) The top p to use.
         user:
           type: string
-          description: (Optional) The user to use
+          description: (Optional) The user to use.
         guided_choice:
           type: array
           items:
@@ -7713,12 +7791,16 @@ components:
       properties:
         job_uuid:
           type: string
+          description: The UUID of the job to create.
         finetuned_model:
           type: string
+          description: The model to fine-tune.
         algorithm_config:
           $ref: '#/components/schemas/DPOAlignmentConfig'
+          description: The algorithm configuration.
         training_config:
           $ref: '#/components/schemas/TrainingConfig'
+          description: The training configuration.
         hyperparam_search_config:
           type: object
           additionalProperties:
@@ -7729,6 +7811,7 @@ components:
               - type: string
               - type: array
               - type: object
+          description: The hyperparam search configuration.
         logger_config:
           type: object
           additionalProperties:
@@ -7739,6 +7822,7 @@ components:
               - type: string
               - type: array
               - type: object
+          description: The logger configuration.
       additionalProperties: false
       required:
         - job_uuid
@@ -7874,8 +7958,11 @@ components:
       properties:
         vector_db_id:
           type: string
+          description: >-
+            The identifier of the vector database to query.
         query:
           $ref: '#/components/schemas/InterleavedContent'
+          description: The query to search for.
         params:
           type: object
           additionalProperties:
@@ -7886,6 +7973,7 @@ components:
               - type: string
               - type: array
               - type: object
+          description: The parameters of the query.
       additionalProperties: false
       required:
         - vector_db_id
@@ -7930,16 +8018,19 @@ components:
       properties:
         start_time:
           type: integer
+          description: The start time of the metric to query.
         end_time:
           type: integer
+          description: The end time of the metric to query.
         granularity:
           type: string
+          description: The granularity of the metric to query.
         query_type:
           type: string
           enum:
             - range
             - instant
-          title: MetricQueryType
+          description: The type of query to perform.
         label_matchers:
           type: array
           items:
@@ -7964,6 +8055,8 @@ components:
               - value
               - operator
             title: MetricLabelMatcher
+          description: >-
+            The label matchers to apply to the metric.
       additionalProperties: false
       required:
         - start_time
@@ -8059,12 +8152,16 @@ components:
           type: array
           items:
             $ref: '#/components/schemas/QueryCondition'
+          description: >-
+            The attribute filters to apply to the spans.
         attributes_to_return:
           type: array
           items:
             type: string
+          description: The attributes to return in the spans.
         max_depth:
           type: integer
+          description: The maximum depth of the tree.
       additionalProperties: false
       required:
         - attribute_filters
@@ -8088,14 +8185,19 @@ components:
           type: array
           items:
             $ref: '#/components/schemas/QueryCondition'
+          description: >-
+            The attribute filters to apply to the traces.
         limit:
           type: integer
+          description: The limit of traces to return.
         offset:
           type: integer
+          description: The offset of the traces to return.
         order_by:
           type: array
           items:
             type: string
+          description: The order by of the traces to return.
       additionalProperties: false
       title: QueryTracesRequest
     QueryTracesResponse:
@@ -8114,16 +8216,25 @@ components:
       properties:
         benchmark_id:
           type: string
+          description: The ID of the benchmark to register.
         dataset_id:
           type: string
+          description: >-
+            The ID of the dataset to use for the benchmark.
         scoring_functions:
           type: array
           items:
             type: string
+          description: >-
+            The scoring functions to use for the benchmark.
         provider_benchmark_id:
           type: string
+          description: >-
+            The ID of the provider benchmark to use for the benchmark.
         provider_id:
           type: string
+          description: >-
+            The ID of the provider to use for the benchmark.
         metadata:
           type: object
           additionalProperties:
@@ -8134,6 +8245,7 @@ components:
               - type: string
               - type: array
               - type: object
+          description: The metadata to use for the benchmark.
       additionalProperties: false
       required:
         - benchmark_id
@@ -8150,7 +8262,7 @@ components:
             - eval/question-answer
             - eval/messages-answer
           description: >-
-            The purpose of the dataset. One of - "post-training/messages": The dataset
+            The purpose of the dataset. One of: - "post-training/messages": The dataset
             contains a messages column with list of messages for post-training. {
             "messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant",
             "content": "Hello, world!"}, ] } - "eval/question-answer": The dataset
@@ -8183,7 +8295,7 @@ components:
               - type: array
               - type: object
           description: >-
-            The metadata for the dataset. - E.g. {"description": "My dataset"}
+            The metadata for the dataset. - E.g. {"description": "My dataset"}.
         dataset_id:
           type: string
           description: >-
@@ -8198,10 +8310,14 @@ components:
       properties:
         model_id:
           type: string
+          description: The identifier of the model to register.
         provider_model_id:
           type: string
+          description: >-
+            The identifier of the model in the provider.
         provider_id:
           type: string
+          description: The identifier of the provider.
         metadata:
           type: object
           additionalProperties:
@@ -8212,8 +8328,10 @@ components:
               - type: string
               - type: array
               - type: object
+          description: Any additional metadata for this model.
         model_type:
           $ref: '#/components/schemas/ModelType'
+          description: The type of model to register.
       additionalProperties: false
       required:
         - model_id
@@ -8223,16 +8341,27 @@ components:
       properties:
         scoring_fn_id:
           type: string
+          description: >-
+            The ID of the scoring function to register.
         description:
           type: string
+          description: The description of the scoring function.
         return_type:
           $ref: '#/components/schemas/ParamType'
+          description: The return type of the scoring function.
         provider_scoring_fn_id:
           type: string
+          description: >-
+            The ID of the provider scoring function to use for the scoring function.
         provider_id:
           type: string
+          description: >-
+            The ID of the provider to use for the scoring function.
         params:
           $ref: '#/components/schemas/ScoringFnParams'
+          description: >-
+            The parameters for the scoring function for benchmark eval, these can
+            be overridden for app eval.
       additionalProperties: false
       required:
         - scoring_fn_id
@@ -8244,10 +8373,15 @@ components:
       properties:
         shield_id:
           type: string
+          description: >-
+            The identifier of the shield to register.
         provider_shield_id:
           type: string
+          description: >-
+            The identifier of the shield in the provider.
         provider_id:
           type: string
+          description: The identifier of the provider.
         params:
           type: object
           additionalProperties:
@@ -8258,6 +8392,7 @@ components:
               - type: string
               - type: array
               - type: object
+          description: The parameters of the shield.
       additionalProperties: false
       required:
         - shield_id
@@ -8267,10 +8402,15 @@ components:
       properties:
         toolgroup_id:
           type: string
+          description: The ID of the tool group to register.
         provider_id:
           type: string
+          description: >-
+            The ID of the provider to use for the tool group.
         mcp_endpoint:
           $ref: '#/components/schemas/URL'
+          description: >-
+            The MCP endpoint to use for the tool group.
         args:
           type: object
           additionalProperties:
@@ -8281,6 +8421,8 @@ components:
               - type: string
               - type: array
               - type: object
+          description: >-
+            A dictionary of arguments to pass to the tool group.
       additionalProperties: false
       required:
         - toolgroup_id
@@ -8291,14 +8433,21 @@ components:
       properties:
         vector_db_id:
           type: string
+          description: >-
+            The identifier of the vector database to register.
         embedding_model:
           type: string
+          description: The embedding model to use.
         embedding_dimension:
           type: integer
+          description: The dimension of the embedding model.
         provider_id:
           type: string
+          description: The identifier of the provider.
         provider_vector_db_id:
           type: string
+          description: >-
+            The identifier of the vector database in the provider.
       additionalProperties: false
       required:
         - vector_db_id
@@ -8335,10 +8484,12 @@ components:
       properties:
         shield_id:
           type: string
+          description: The identifier of the shield to run.
         messages:
           type: array
           items:
             $ref: '#/components/schemas/Message'
+          description: The messages to run the shield on.
         params:
           type: object
           additionalProperties:
@@ -8349,6 +8500,7 @@ components:
               - type: string
               - type: array
               - type: object
+          description: The parameters of the shield.
       additionalProperties: false
       required:
         - shield_id
@@ -8369,14 +8521,20 @@ components:
           type: array
           items:
             $ref: '#/components/schemas/QueryCondition'
+          description: >-
+            The attribute filters to apply to the spans.
         attributes_to_save:
           type: array
           items:
             type: string
+          description: The attributes to save to the dataset.
         dataset_id:
           type: string
+          description: >-
+            The ID of the dataset to save the spans to.
         max_depth:
           type: integer
+          description: The maximum depth of the tree.
       additionalProperties: false
       required:
         - attribute_filters
@@ -8431,14 +8589,19 @@ components:
       properties:
         dataset_id:
           type: string
+          description: The ID of the dataset to score.
         scoring_functions:
           type: object
           additionalProperties:
             oneOf:
               - $ref: '#/components/schemas/ScoringFnParams'
               - type: 'null'
+          description: >-
+            The scoring functions to use for the scoring.
         save_results_dataset:
           type: boolean
+          description: >-
+            Whether to save the results to a dataset.
       additionalProperties: false
       required:
         - dataset_id
@@ -8523,8 +8686,10 @@ components:
       properties:
         job_uuid:
           type: string
+          description: The UUID of the job to create.
         training_config:
           $ref: '#/components/schemas/TrainingConfig'
+          description: The training configuration.
         hyperparam_search_config:
           type: object
           additionalProperties:
@@ -8535,6 +8700,7 @@ components:
               - type: string
               - type: array
               - type: object
+          description: The hyperparam search configuration.
         logger_config:
           type: object
           additionalProperties:
@@ -8545,12 +8711,16 @@ components:
               - type: string
               - type: array
               - type: object
+          description: The logger configuration.
         model:
           type: string
+          description: The model to fine-tune.
         checkpoint_dir:
           type: string
+          description: The directory to save checkpoint(s) to.
         algorithm_config:
           $ref: '#/components/schemas/AlgorithmConfig'
+          description: The algorithm configuration.
       additionalProperties: false
       required:
         - job_uuid
diff --git a/docs/openapi_generator/pyopenapi/utility.py b/docs/openapi_generator/pyopenapi/utility.py
index 9bd3cd2dd..12a69050c 100644
--- a/docs/openapi_generator/pyopenapi/utility.py
+++ b/docs/openapi_generator/pyopenapi/utility.py
@@ -179,6 +179,35 @@ def _validate_has_ellipsis(method) -> str | None:
     if "..." not in source and not "NotImplementedError" in source:
         return "does not contain ellipsis (...) in its implementation"
 
+def _validate_has_return_in_docstring(method) -> str | None:
+    source = inspect.getsource(method)
+    return_type = method.__annotations__.get('return')
+    if return_type is not None and return_type != type(None) and ":returns:" not in source:
+        return "does not have a ':returns:' in its docstring"
+
+def _validate_has_params_in_docstring(method) -> str | None:
+    source = inspect.getsource(method)
+    sig = inspect.signature(method)
+    # Only check if the method has more than one parameter
+    if len(sig.parameters) > 1 and ":param" not in source:
+        return "does not have a ':param' in its docstring"
+
+def _validate_has_no_return_none_in_docstring(method) -> str | None:
+    source = inspect.getsource(method)
+    return_type = method.__annotations__.get('return')
+    if return_type is None and ":returns: None" in source:
+        return "has a ':returns: None' in its docstring which is redundant for None-returning functions"
+
+def _validate_docstring_lines_end_with_dot(method) -> str | None:
+    docstring = inspect.getdoc(method)
+    if docstring is None:
+        return None
+
+    lines = docstring.split('\n')
+    for line in lines:
+        line = line.strip()
+        if line and not any(line.endswith(char) for char in '.:{}[]()",'):
+            return f"docstring line '{line}' does not end with a valid character: . : {{ }} [ ] ( ) , \""
 
 _VALIDATORS = {
     "GET": [
@@ -186,13 +215,23 @@ _VALIDATORS = {
         _validate_list_parameters_contain_data,
         _validate_api_method_doesnt_return_list,
         _validate_has_ellipsis,
+        _validate_has_return_in_docstring,
+        _validate_has_params_in_docstring,
+        _validate_docstring_lines_end_with_dot,
     ],
     "DELETE": [
         _validate_api_delete_method_returns_none,
         _validate_has_ellipsis,
+        _validate_has_return_in_docstring,
+        _validate_has_params_in_docstring,
+        _validate_has_no_return_none_in_docstring
     ],
     "POST": [
         _validate_has_ellipsis,
+        _validate_has_return_in_docstring,
+        _validate_has_params_in_docstring,
+        _validate_has_no_return_none_in_docstring,
+        _validate_docstring_lines_end_with_dot,
     ],
 }
 
diff --git a/llama_stack/apis/agents/agents.py b/llama_stack/apis/agents/agents.py
index 2a37f27c0..b2f85336c 100644
--- a/llama_stack/apis/agents/agents.py
+++ b/llama_stack/apis/agents/agents.py
@@ -413,7 +413,7 @@ class Agents(Protocol):
         :param toolgroups: (Optional) List of toolgroups to create the turn with, will be used in addition to the agent's config toolgroups for the request.
         :param tool_config: (Optional) The tool configuration to create the turn with, will be used to override the agent's tool_config.
         :returns: If stream=False, returns a Turn object.
-                  If stream=True, returns an SSE event stream of AgentTurnResponseStreamChunk
+                  If stream=True, returns an SSE event stream of AgentTurnResponseStreamChunk.
         """
         ...
 
@@ -509,6 +509,7 @@ class Agents(Protocol):
         :param session_id: The ID of the session to get.
         :param agent_id: The ID of the agent to get the session for.
         :param turn_ids: (Optional) List of turn IDs to filter the session by.
+        :returns: A Session.
         """
         ...
 
@@ -606,5 +607,6 @@ class Agents(Protocol):
         :param input: Input message(s) to create the response.
         :param model: The underlying LLM used for completions.
         :param previous_response_id: (Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses.
+        :returns: An OpenAIResponseObject.
         """
         ...
diff --git a/llama_stack/apis/batch_inference/batch_inference.py b/llama_stack/apis/batch_inference/batch_inference.py
index 79bc73e4c..b2aa637e2 100644
--- a/llama_stack/apis/batch_inference/batch_inference.py
+++ b/llama_stack/apis/batch_inference/batch_inference.py
@@ -38,7 +38,17 @@ class BatchInference(Protocol):
         sampling_params: SamplingParams | None = None,
         response_format: ResponseFormat | None = None,
         logprobs: LogProbConfig | None = None,
-    ) -> Job: ...
+    ) -> Job:
+        """Generate completions for a batch of content.
+
+        :param model: The model to use for the completion.
+        :param content_batch: The content to complete.
+        :param sampling_params: The sampling parameters to use for the completion.
+        :param response_format: The response format to use for the completion.
+        :param logprobs: The logprobs to use for the completion.
+        :returns: A job for the completion.
+        """
+        ...
 
     @webmethod(route="/batch-inference/chat-completion", method="POST")
     async def chat_completion(
@@ -52,4 +62,17 @@ class BatchInference(Protocol):
         tool_prompt_format: ToolPromptFormat | None = None,
         response_format: ResponseFormat | None = None,
         logprobs: LogProbConfig | None = None,
-    ) -> Job: ...
+    ) -> Job:
+        """Generate chat completions for a batch of messages.
+
+        :param model: The model to use for the chat completion.
+        :param messages_batch: The messages to complete.
+        :param sampling_params: The sampling parameters to use for the completion.
+        :param tools: The tools to use for the chat completion.
+        :param tool_choice: The tool choice to use for the chat completion.
+        :param tool_prompt_format: The tool prompt format to use for the chat completion.
+        :param response_format: The response format to use for the chat completion.
+        :param logprobs: The logprobs to use for the chat completion.
+        :returns: A job for the chat completion.
+        """
+        ...
diff --git a/llama_stack/apis/benchmarks/benchmarks.py b/llama_stack/apis/benchmarks/benchmarks.py
index e3b0502bc..d80c767f8 100644
--- a/llama_stack/apis/benchmarks/benchmarks.py
+++ b/llama_stack/apis/benchmarks/benchmarks.py
@@ -46,13 +46,24 @@ class ListBenchmarksResponse(BaseModel):
 @runtime_checkable
 class Benchmarks(Protocol):
     @webmethod(route="/eval/benchmarks", method="GET")
-    async def list_benchmarks(self) -> ListBenchmarksResponse: ...
+    async def list_benchmarks(self) -> ListBenchmarksResponse:
+        """List all benchmarks.
+
+        :returns: A ListBenchmarksResponse.
+        """
+        ...
 
     @webmethod(route="/eval/benchmarks/{benchmark_id}", method="GET")
     async def get_benchmark(
         self,
         benchmark_id: str,
-    ) -> Benchmark: ...
+    ) -> Benchmark:
+        """Get a benchmark by its ID.
+
+        :param benchmark_id: The ID of the benchmark to get.
+        :returns: A Benchmark.
+        """
+        ...
 
     @webmethod(route="/eval/benchmarks", method="POST")
     async def register_benchmark(
@@ -63,4 +74,14 @@ class Benchmarks(Protocol):
         provider_benchmark_id: str | None = None,
         provider_id: str | None = None,
         metadata: dict[str, Any] | None = None,
-    ) -> None: ...
+    ) -> None:
+        """Register a benchmark.
+
+        :param benchmark_id: The ID of the benchmark to register.
+        :param dataset_id: The ID of the dataset to use for the benchmark.
+        :param scoring_functions: The scoring functions to use for the benchmark.
+        :param provider_benchmark_id: The ID of the provider benchmark to use for the benchmark.
+        :param provider_id: The ID of the provider to use for the benchmark.
+        :param metadata: The metadata to use for the benchmark.
+        """
+        ...
diff --git a/llama_stack/apis/datasetio/datasetio.py b/llama_stack/apis/datasetio/datasetio.py
index 6d160a043..1183983cc 100644
--- a/llama_stack/apis/datasetio/datasetio.py
+++ b/llama_stack/apis/datasetio/datasetio.py
@@ -34,14 +34,21 @@ class DatasetIO(Protocol):
         - limit: Number of items to return. If None or -1, returns all items.
 
         The response includes:
-        - data: List of items for the current page
-        - has_more: Whether there are more items available after this set
+        - data: List of items for the current page.
+        - has_more: Whether there are more items available after this set.
 
         :param dataset_id: The ID of the dataset to get the rows from.
         :param start_index: Index into dataset for the first row to get. Get all rows if None.
         :param limit: The number of rows to get.
+        :returns: A PaginatedResponse.
         """
         ...
 
     @webmethod(route="/datasetio/append-rows/{dataset_id:path}", method="POST")
-    async def append_rows(self, dataset_id: str, rows: list[dict[str, Any]]) -> None: ...
+    async def append_rows(self, dataset_id: str, rows: list[dict[str, Any]]) -> None:
+        """Append rows to a dataset.
+
+        :param dataset_id: The ID of the dataset to append the rows to.
+        :param rows: The rows to append to the dataset.
+        """
+        ...
diff --git a/llama_stack/apis/datasets/datasets.py b/llama_stack/apis/datasets/datasets.py
index a0ee987ad..e3de3d5cb 100644
--- a/llama_stack/apis/datasets/datasets.py
+++ b/llama_stack/apis/datasets/datasets.py
@@ -137,7 +137,8 @@ class Datasets(Protocol):
         """
         Register a new dataset.
 
-        :param purpose: The purpose of the dataset. One of
+        :param purpose: The purpose of the dataset.
+        One of:
             - "post-training/messages": The dataset contains a messages column with list of messages for post-training.
                 {
                     "messages": [
@@ -188,8 +189,9 @@ class Datasets(Protocol):
                ]
            }
         :param metadata: The metadata for the dataset.
-           - E.g. {"description": "My dataset"}
+           - E.g. {"description": "My dataset"}.
         :param dataset_id: The ID of the dataset. If not provided, an ID will be generated.
+        :returns: A Dataset.
         """
         ...
 
@@ -197,13 +199,29 @@ class Datasets(Protocol):
     async def get_dataset(
         self,
         dataset_id: str,
-    ) -> Dataset: ...
+    ) -> Dataset:
+        """Get a dataset by its ID.
+
+        :param dataset_id: The ID of the dataset to get.
+        :returns: A Dataset.
+        """
+        ...
 
     @webmethod(route="/datasets", method="GET")
-    async def list_datasets(self) -> ListDatasetsResponse: ...
+    async def list_datasets(self) -> ListDatasetsResponse:
+        """List all datasets.
+
+        :returns: A ListDatasetsResponse.
+        """
+        ...
 
     @webmethod(route="/datasets/{dataset_id:path}", method="DELETE")
     async def unregister_dataset(
         self,
         dataset_id: str,
-    ) -> None: ...
+    ) -> None:
+        """Unregister a dataset by its ID.
+
+        :param dataset_id: The ID of the dataset to unregister.
+        """
+        ...
diff --git a/llama_stack/apis/eval/eval.py b/llama_stack/apis/eval/eval.py
index 38699d3f5..83a0a8e56 100644
--- a/llama_stack/apis/eval/eval.py
+++ b/llama_stack/apis/eval/eval.py
@@ -93,7 +93,7 @@ class Eval(Protocol):
 
         :param benchmark_id: The ID of the benchmark to run the evaluation on.
         :param benchmark_config: The configuration for the benchmark.
-        :return: The job that was created to run the evaluation.
+        :returns: The job that was created to run the evaluation.
         """
         ...
 
@@ -111,7 +111,7 @@ class Eval(Protocol):
         :param input_rows: The rows to evaluate.
         :param scoring_functions: The scoring functions to use for the evaluation.
         :param benchmark_config: The configuration for the benchmark.
-        :return: EvaluateResponse object containing generations and scores
+        :returns: EvaluateResponse object containing generations and scores.
         """
         ...
 
@@ -121,7 +121,7 @@ class Eval(Protocol):
 
         :param benchmark_id: The ID of the benchmark to run the evaluation on.
         :param job_id: The ID of the job to get the status of.
-        :return: The status of the evaluationjob.
+        :returns: The status of the evaluation job.
         """
         ...
 
@@ -140,6 +140,6 @@ class Eval(Protocol):
 
         :param benchmark_id: The ID of the benchmark to run the evaluation on.
         :param job_id: The ID of the job to get the result of.
-        :return: The result of the job.
+        :returns: The result of the job.
         """
         ...
diff --git a/llama_stack/apis/files/files.py b/llama_stack/apis/files/files.py
index 4a9b49978..1d762a68a 100644
--- a/llama_stack/apis/files/files.py
+++ b/llama_stack/apis/files/files.py
@@ -91,10 +91,11 @@ class Files(Protocol):
         """
         Create a new upload session for a file identified by a bucket and key.
 
-        :param bucket: Bucket under which the file is stored (valid chars: a-zA-Z0-9_-)
-        :param key: Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
-        :param mime_type: MIME type of the file
-        :param size: File size in bytes
+        :param bucket: Bucket under which the file is stored (valid chars: a-zA-Z0-9_-).
+        :param key: Key under which the file is stored (valid chars: a-zA-Z0-9_-/.).
+        :param mime_type: MIME type of the file.
+        :param size: File size in bytes.
+        :returns: A FileUploadResponse.
         """
         ...
 
@@ -107,7 +108,8 @@ class Files(Protocol):
         Upload file content to an existing upload session.
         On the server, request body will have the raw bytes that are uploaded.
 
-        :param upload_id: ID of the upload session
+        :param upload_id: ID of the upload session.
+        :returns: A FileResponse or None if the upload is not complete.
         """
         ...
 
@@ -117,9 +119,10 @@ class Files(Protocol):
         upload_id: str,
     ) -> FileUploadResponse:
         """
-        Returns information about an existsing upload session
+        Returns information about an existsing upload session.
 
-        :param upload_id: ID of the upload session
+        :param upload_id: ID of the upload session.
+        :returns: A FileUploadResponse.
         """
         ...
 
@@ -130,6 +133,9 @@ class Files(Protocol):
     ) -> ListBucketResponse:
         """
         List all buckets.
+
+        :param bucket: Bucket name (valid chars: a-zA-Z0-9_-).
+        :returns: A ListBucketResponse.
         """
         ...
 
@@ -141,7 +147,8 @@ class Files(Protocol):
         """
         List all files in a bucket.
 
-        :param bucket: Bucket name (valid chars: a-zA-Z0-9_-)
+        :param bucket: Bucket name (valid chars: a-zA-Z0-9_-).
+        :returns: A ListFileResponse.
         """
         ...
 
@@ -154,8 +161,9 @@ class Files(Protocol):
         """
         Get a file info identified by a bucket and key.
 
-        :param bucket: Bucket name (valid chars: a-zA-Z0-9_-)
-        :param key: Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
+        :param bucket: Bucket name (valid chars: a-zA-Z0-9_-).
+        :param key: Key under which the file is stored (valid chars: a-zA-Z0-9_-/.).
+        :returns: A FileResponse.
         """
         ...
 
@@ -168,7 +176,7 @@ class Files(Protocol):
         """
         Delete a file identified by a bucket and key.
 
-        :param bucket: Bucket name (valid chars: a-zA-Z0-9_-)
-        :param key: Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
+        :param bucket: Bucket name (valid chars: a-zA-Z0-9_-).
+        :param key: Key under which the file is stored (valid chars: a-zA-Z0-9_-/.).
         """
         ...
diff --git a/llama_stack/apis/inference/inference.py b/llama_stack/apis/inference/inference.py
index 00050779b..3c91b5a6e 100644
--- a/llama_stack/apis/inference/inference.py
+++ b/llama_stack/apis/inference/inference.py
@@ -845,13 +845,13 @@ class Inference(Protocol):
         """Generate a completion for the given content using the specified model.
 
         :param model_id: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
-        :param content: The content to generate a completion for
-        :param sampling_params: (Optional) Parameters to control the sampling strategy
-        :param response_format: (Optional) Grammar specification for guided (structured) decoding
+        :param content: The content to generate a completion for.
+        :param sampling_params: (Optional) Parameters to control the sampling strategy.
+        :param response_format: (Optional) Grammar specification for guided (structured) decoding.
         :param stream: (Optional) If True, generate an SSE event stream of the response. Defaults to False.
         :param logprobs: (Optional) If specified, log probabilities for each token position will be returned.
         :returns: If stream=False, returns a CompletionResponse with the full completion.
-                 If stream=True, returns an SSE event stream of CompletionResponseStreamChunk
+                 If stream=True, returns an SSE event stream of CompletionResponseStreamChunk.
         """
         ...
 
@@ -864,6 +864,15 @@ class Inference(Protocol):
         response_format: ResponseFormat | None = None,
         logprobs: LogProbConfig | None = None,
     ) -> BatchCompletionResponse:
+        """Generate completions for a batch of content using the specified model.
+
+        :param model_id: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
+        :param content_batch: The content to generate completions for.
+        :param sampling_params: (Optional) Parameters to control the sampling strategy.
+        :param response_format: (Optional) Grammar specification for guided (structured) decoding.
+        :param logprobs: (Optional) If specified, log probabilities for each token position will be returned.
+        :returns: A BatchCompletionResponse with the full completions.
+        """
         raise NotImplementedError("Batch completion is not implemented")
 
     @webmethod(route="/inference/chat-completion", method="POST")
@@ -883,9 +892,9 @@ class Inference(Protocol):
         """Generate a chat completion for the given messages using the specified model.
 
         :param model_id: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
-        :param messages: List of messages in the conversation
-        :param sampling_params: Parameters to control the sampling strategy
-        :param tools: (Optional) List of tool definitions available to the model
+        :param messages: List of messages in the conversation.
+        :param sampling_params: Parameters to control the sampling strategy.
+        :param tools: (Optional) List of tool definitions available to the model.
         :param tool_choice: (Optional) Whether tool use is required or automatic. Defaults to ToolChoice.auto.
             .. deprecated::
                Use tool_config instead.
@@ -902,7 +911,7 @@ class Inference(Protocol):
         :param logprobs: (Optional) If specified, log probabilities for each token position will be returned.
         :param tool_config: (Optional) Configuration for tool use.
         :returns: If stream=False, returns a ChatCompletionResponse with the full completion.
-                 If stream=True, returns an SSE event stream of ChatCompletionResponseStreamChunk
+                 If stream=True, returns an SSE event stream of ChatCompletionResponseStreamChunk.
         """
         ...
 
@@ -917,6 +926,17 @@ class Inference(Protocol):
         response_format: ResponseFormat | None = None,
         logprobs: LogProbConfig | None = None,
     ) -> BatchChatCompletionResponse:
+        """Generate chat completions for a batch of messages using the specified model.
+
+        :param model_id: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
+        :param messages_batch: The messages to generate completions for.
+        :param sampling_params: (Optional) Parameters to control the sampling strategy.
+        :param tools: (Optional) List of tool definitions available to the model.
+        :param tool_config: (Optional) Configuration for tool use.
+        :param response_format: (Optional) Grammar specification for guided (structured) decoding.
+        :param logprobs: (Optional) If specified, log probabilities for each token position will be returned.
+        :returns: A BatchChatCompletionResponse with the full completions.
+        """
         raise NotImplementedError("Batch chat completion is not implemented")
 
     @webmethod(route="/inference/embeddings", method="POST")
@@ -935,7 +955,7 @@ class Inference(Protocol):
         :param output_dimension: (Optional) Output dimensionality for the embeddings. Only supported by Matryoshka models.
         :param text_truncation: (Optional) Config for how to truncate text for embedding when text is longer than the model's max sequence length.
         :param task_type: (Optional) How is the embedding being used? This is only supported by asymmetric embedding models.
-        :returns: An array of embeddings, one for each content. Each embedding is a list of floats. The dimensionality of the embedding is model-specific; you can check model metadata using /models/{model_id}
+        :returns: An array of embeddings, one for each content. Each embedding is a list of floats. The dimensionality of the embedding is model-specific; you can check model metadata using /models/{model_id}.
         """
         ...
 
@@ -967,22 +987,23 @@ class Inference(Protocol):
         """Generate an OpenAI-compatible completion for the given prompt using the specified model.
 
         :param model: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
-        :param prompt: The prompt to generate a completion for
-        :param best_of: (Optional) The number of completions to generate
-        :param echo: (Optional) Whether to echo the prompt
-        :param frequency_penalty: (Optional) The penalty for repeated tokens
-        :param logit_bias: (Optional) The logit bias to use
-        :param logprobs: (Optional) The log probabilities to use
-        :param max_tokens: (Optional) The maximum number of tokens to generate
-        :param n: (Optional) The number of completions to generate
-        :param presence_penalty: (Optional) The penalty for repeated tokens
-        :param seed: (Optional) The seed to use
-        :param stop: (Optional) The stop tokens to use
-        :param stream: (Optional) Whether to stream the response
-        :param stream_options: (Optional) The stream options to use
-        :param temperature: (Optional) The temperature to use
-        :param top_p: (Optional) The top p to use
-        :param user: (Optional) The user to use
+        :param prompt: The prompt to generate a completion for.
+        :param best_of: (Optional) The number of completions to generate.
+        :param echo: (Optional) Whether to echo the prompt.
+        :param frequency_penalty: (Optional) The penalty for repeated tokens.
+        :param logit_bias: (Optional) The logit bias to use.
+        :param logprobs: (Optional) The log probabilities to use.
+        :param max_tokens: (Optional) The maximum number of tokens to generate.
+        :param n: (Optional) The number of completions to generate.
+        :param presence_penalty: (Optional) The penalty for repeated tokens.
+        :param seed: (Optional) The seed to use.
+        :param stop: (Optional) The stop tokens to use.
+        :param stream: (Optional) Whether to stream the response.
+        :param stream_options: (Optional) The stream options to use.
+        :param temperature: (Optional) The temperature to use.
+        :param top_p: (Optional) The top p to use.
+        :param user: (Optional) The user to use.
+        :returns: An OpenAICompletion.
         """
         ...
 
@@ -1016,27 +1037,28 @@ class Inference(Protocol):
         """Generate an OpenAI-compatible chat completion for the given messages using the specified model.
 
         :param model: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
-        :param messages: List of messages in the conversation
-        :param frequency_penalty: (Optional) The penalty for repeated tokens
-        :param function_call: (Optional) The function call to use
-        :param functions: (Optional) List of functions to use
-        :param logit_bias: (Optional) The logit bias to use
-        :param logprobs: (Optional) The log probabilities to use
-        :param max_completion_tokens: (Optional) The maximum number of tokens to generate
-        :param max_tokens: (Optional) The maximum number of tokens to generate
-        :param n: (Optional) The number of completions to generate
-        :param parallel_tool_calls: (Optional) Whether to parallelize tool calls
-        :param presence_penalty: (Optional) The penalty for repeated tokens
-        :param response_format: (Optional) The response format to use
-        :param seed: (Optional) The seed to use
-        :param stop: (Optional) The stop tokens to use
-        :param stream: (Optional) Whether to stream the response
-        :param stream_options: (Optional) The stream options to use
-        :param temperature: (Optional) The temperature to use
-        :param tool_choice: (Optional) The tool choice to use
-        :param tools: (Optional) The tools to use
-        :param top_logprobs: (Optional) The top log probabilities to use
-        :param top_p: (Optional) The top p to use
-        :param user: (Optional) The user to use
+        :param messages: List of messages in the conversation.
+        :param frequency_penalty: (Optional) The penalty for repeated tokens.
+        :param function_call: (Optional) The function call to use.
+        :param functions: (Optional) List of functions to use.
+        :param logit_bias: (Optional) The logit bias to use.
+        :param logprobs: (Optional) The log probabilities to use.
+        :param max_completion_tokens: (Optional) The maximum number of tokens to generate.
+        :param max_tokens: (Optional) The maximum number of tokens to generate.
+        :param n: (Optional) The number of completions to generate.
+        :param parallel_tool_calls: (Optional) Whether to parallelize tool calls.
+        :param presence_penalty: (Optional) The penalty for repeated tokens.
+        :param response_format: (Optional) The response format to use.
+        :param seed: (Optional) The seed to use.
+        :param stop: (Optional) The stop tokens to use.
+        :param stream: (Optional) Whether to stream the response.
+        :param stream_options: (Optional) The stream options to use.
+        :param temperature: (Optional) The temperature to use.
+        :param tool_choice: (Optional) The tool choice to use.
+        :param tools: (Optional) The tools to use.
+        :param top_logprobs: (Optional) The top log probabilities to use.
+        :param top_p: (Optional) The top p to use.
+        :param user: (Optional) The user to use.
+        :returns: An OpenAIChatCompletion.
         """
         ...
diff --git a/llama_stack/apis/inspect/inspect.py b/llama_stack/apis/inspect/inspect.py
index fb3167635..44a5e95b2 100644
--- a/llama_stack/apis/inspect/inspect.py
+++ b/llama_stack/apis/inspect/inspect.py
@@ -36,10 +36,25 @@ class ListRoutesResponse(BaseModel):
 @runtime_checkable
 class Inspect(Protocol):
     @webmethod(route="/inspect/routes", method="GET")
-    async def list_routes(self) -> ListRoutesResponse: ...
+    async def list_routes(self) -> ListRoutesResponse:
+        """List all routes.
+
+        :returns: A ListRoutesResponse.
+        """
+        ...
 
     @webmethod(route="/health", method="GET")
-    async def health(self) -> HealthInfo: ...
+    async def health(self) -> HealthInfo:
+        """Get the health of the service.
+
+        :returns: A HealthInfo.
+        """
+        ...
 
     @webmethod(route="/version", method="GET")
-    async def version(self) -> VersionInfo: ...
+    async def version(self) -> VersionInfo:
+        """Get the version of the service.
+
+        :returns: A VersionInfo.
+        """
+        ...
diff --git a/llama_stack/apis/models/models.py b/llama_stack/apis/models/models.py
index 37ae95fa5..3d90a92a0 100644
--- a/llama_stack/apis/models/models.py
+++ b/llama_stack/apis/models/models.py
@@ -80,16 +80,32 @@ class OpenAIListModelsResponse(BaseModel):
 @trace_protocol
 class Models(Protocol):
     @webmethod(route="/models", method="GET")
-    async def list_models(self) -> ListModelsResponse: ...
+    async def list_models(self) -> ListModelsResponse:
+        """List all models.
+
+        :returns: A ListModelsResponse.
+        """
+        ...
 
     @webmethod(route="/openai/v1/models", method="GET")
-    async def openai_list_models(self) -> OpenAIListModelsResponse: ...
+    async def openai_list_models(self) -> OpenAIListModelsResponse:
+        """List models using the OpenAI API.
+
+        :returns: A OpenAIListModelsResponse.
+        """
+        ...
 
     @webmethod(route="/models/{model_id:path}", method="GET")
     async def get_model(
         self,
         model_id: str,
-    ) -> Model: ...
+    ) -> Model:
+        """Get a model by its identifier.
+
+        :param model_id: The identifier of the model to get.
+        :returns: A Model.
+        """
+        ...
 
     @webmethod(route="/models", method="POST")
     async def register_model(
@@ -99,10 +115,25 @@ class Models(Protocol):
         provider_id: str | None = None,
         metadata: dict[str, Any] | None = None,
         model_type: ModelType | None = None,
-    ) -> Model: ...
+    ) -> Model:
+        """Register a model.
+
+        :param model_id: The identifier of the model to register.
+        :param provider_model_id: The identifier of the model in the provider.
+        :param provider_id: The identifier of the provider.
+        :param metadata: Any additional metadata for this model.
+        :param model_type: The type of model to register.
+        :returns: A Model.
+        """
+        ...
 
     @webmethod(route="/models/{model_id:path}", method="DELETE")
     async def unregister_model(
         self,
         model_id: str,
-    ) -> None: ...
+    ) -> None:
+        """Unregister a model.
+
+        :param model_id: The identifier of the model to unregister.
+        """
+        ...
diff --git a/llama_stack/apis/post_training/post_training.py b/llama_stack/apis/post_training/post_training.py
index 016f79fce..b196c8a17 100644
--- a/llama_stack/apis/post_training/post_training.py
+++ b/llama_stack/apis/post_training/post_training.py
@@ -182,7 +182,19 @@ class PostTraining(Protocol):
         ),
         checkpoint_dir: str | None = None,
         algorithm_config: AlgorithmConfig | None = None,
-    ) -> PostTrainingJob: ...
+    ) -> PostTrainingJob:
+        """Run supervised fine-tuning of a model.
+
+        :param job_uuid: The UUID of the job to create.
+        :param training_config: The training configuration.
+        :param hyperparam_search_config: The hyperparam search configuration.
+        :param logger_config: The logger configuration.
+        :param model: The model to fine-tune.
+        :param checkpoint_dir: The directory to save checkpoint(s) to.
+        :param algorithm_config: The algorithm configuration.
+        :returns: A PostTrainingJob.
+        """
+        ...
 
     @webmethod(route="/post-training/preference-optimize", method="POST")
     async def preference_optimize(
@@ -193,16 +205,49 @@ class PostTraining(Protocol):
         training_config: TrainingConfig,
         hyperparam_search_config: dict[str, Any],
         logger_config: dict[str, Any],
-    ) -> PostTrainingJob: ...
+    ) -> PostTrainingJob:
+        """Run preference optimization of a model.
+
+        :param job_uuid: The UUID of the job to create.
+        :param finetuned_model: The model to fine-tune.
+        :param algorithm_config: The algorithm configuration.
+        :param training_config: The training configuration.
+        :param hyperparam_search_config: The hyperparam search configuration.
+        :param logger_config: The logger configuration.
+        :returns: A PostTrainingJob.
+        """
+        ...
 
     @webmethod(route="/post-training/jobs", method="GET")
-    async def get_training_jobs(self) -> ListPostTrainingJobsResponse: ...
+    async def get_training_jobs(self) -> ListPostTrainingJobsResponse:
+        """Get all training jobs.
+
+        :returns: A ListPostTrainingJobsResponse.
+        """
+        ...
 
     @webmethod(route="/post-training/job/status", method="GET")
-    async def get_training_job_status(self, job_uuid: str) -> PostTrainingJobStatusResponse: ...
+    async def get_training_job_status(self, job_uuid: str) -> PostTrainingJobStatusResponse:
+        """Get the status of a training job.
+
+        :param job_uuid: The UUID of the job to get the status of.
+        :returns: A PostTrainingJobStatusResponse.
+        """
+        ...
 
     @webmethod(route="/post-training/job/cancel", method="POST")
-    async def cancel_training_job(self, job_uuid: str) -> None: ...
+    async def cancel_training_job(self, job_uuid: str) -> None:
+        """Cancel a training job.
+
+        :param job_uuid: The UUID of the job to cancel.
+        """
+        ...
 
     @webmethod(route="/post-training/job/artifacts", method="GET")
-    async def get_training_job_artifacts(self, job_uuid: str) -> PostTrainingJobArtifactsResponse: ...
+    async def get_training_job_artifacts(self, job_uuid: str) -> PostTrainingJobArtifactsResponse:
+        """Get the artifacts of a training job.
+
+        :param job_uuid: The UUID of the job to get the artifacts of.
+        :returns: A PostTrainingJobArtifactsResponse.
+        """
+        ...
diff --git a/llama_stack/apis/providers/providers.py b/llama_stack/apis/providers/providers.py
index 751c9263b..4bc977bf1 100644
--- a/llama_stack/apis/providers/providers.py
+++ b/llama_stack/apis/providers/providers.py
@@ -32,7 +32,18 @@ class Providers(Protocol):
     """
 
     @webmethod(route="/providers", method="GET")
-    async def list_providers(self) -> ListProvidersResponse: ...
+    async def list_providers(self) -> ListProvidersResponse:
+        """List all available providers.
+
+        :returns: A ListProvidersResponse containing information about all providers.
+        """
+        ...
 
     @webmethod(route="/providers/{provider_id}", method="GET")
-    async def inspect_provider(self, provider_id: str) -> ProviderInfo: ...
+    async def inspect_provider(self, provider_id: str) -> ProviderInfo:
+        """Get detailed information about a specific provider.
+
+        :param provider_id: The ID of the provider to inspect.
+        :returns: A ProviderInfo object containing the provider's details.
+        """
+        ...
diff --git a/llama_stack/apis/safety/safety.py b/llama_stack/apis/safety/safety.py
index b6b58262f..3aee52b7e 100644
--- a/llama_stack/apis/safety/safety.py
+++ b/llama_stack/apis/safety/safety.py
@@ -54,4 +54,12 @@ class Safety(Protocol):
         shield_id: str,
         messages: list[Message],
         params: dict[str, Any],
-    ) -> RunShieldResponse: ...
+    ) -> RunShieldResponse:
+        """Run a shield.
+
+        :param shield_id: The identifier of the shield to run.
+        :param messages: The messages to run the shield on.
+        :param params: The parameters of the shield.
+        :returns: A RunShieldResponse.
+        """
+        ...
diff --git a/llama_stack/apis/scoring/scoring.py b/llama_stack/apis/scoring/scoring.py
index 414f3d5e2..732e80e79 100644
--- a/llama_stack/apis/scoring/scoring.py
+++ b/llama_stack/apis/scoring/scoring.py
@@ -61,7 +61,15 @@ class Scoring(Protocol):
         dataset_id: str,
         scoring_functions: dict[str, ScoringFnParams | None],
         save_results_dataset: bool = False,
-    ) -> ScoreBatchResponse: ...
+    ) -> ScoreBatchResponse:
+        """Score a batch of rows.
+
+        :param dataset_id: The ID of the dataset to score.
+        :param scoring_functions: The scoring functions to use for the scoring.
+        :param save_results_dataset: Whether to save the results to a dataset.
+        :returns: A ScoreBatchResponse.
+        """
+        ...
 
     @webmethod(route="/scoring/score", method="POST")
     async def score(
@@ -73,6 +81,6 @@ class Scoring(Protocol):
 
         :param input_rows: The rows to score.
         :param scoring_functions: The scoring functions to use for the scoring.
-        :return: ScoreResponse object containing rows and aggregated results
+        :returns: A ScoreResponse object containing rows and aggregated results.
         """
         ...
diff --git a/llama_stack/apis/scoring_functions/scoring_functions.py b/llama_stack/apis/scoring_functions/scoring_functions.py
index 9ba9eb654..9cd21b7d1 100644
--- a/llama_stack/apis/scoring_functions/scoring_functions.py
+++ b/llama_stack/apis/scoring_functions/scoring_functions.py
@@ -134,10 +134,21 @@ class ListScoringFunctionsResponse(BaseModel):
 @runtime_checkable
 class ScoringFunctions(Protocol):
     @webmethod(route="/scoring-functions", method="GET")
-    async def list_scoring_functions(self) -> ListScoringFunctionsResponse: ...
+    async def list_scoring_functions(self) -> ListScoringFunctionsResponse:
+        """List all scoring functions.
+
+        :returns: A ListScoringFunctionsResponse.
+        """
+        ...
 
     @webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="GET")
-    async def get_scoring_function(self, scoring_fn_id: str, /) -> ScoringFn: ...
+    async def get_scoring_function(self, scoring_fn_id: str, /) -> ScoringFn:
+        """Get a scoring function by its ID.
+
+        :param scoring_fn_id: The ID of the scoring function to get.
+        :returns: A ScoringFn.
+        """
+        ...
 
     @webmethod(route="/scoring-functions", method="POST")
     async def register_scoring_function(
@@ -148,4 +159,14 @@ class ScoringFunctions(Protocol):
         provider_scoring_fn_id: str | None = None,
         provider_id: str | None = None,
         params: ScoringFnParams | None = None,
-    ) -> None: ...
+    ) -> None:
+        """Register a scoring function.
+
+        :param scoring_fn_id: The ID of the scoring function to register.
+        :param description: The description of the scoring function.
+        :param return_type: The return type of the scoring function.
+        :param provider_scoring_fn_id: The ID of the provider scoring function to use for the scoring function.
+        :param provider_id: The ID of the provider to use for the scoring function.
+        :param params: The parameters for the scoring function for benchmark eval, these can be overridden for app eval.
+        """
+        ...
diff --git a/llama_stack/apis/shields/shields.py b/llama_stack/apis/shields/shields.py
index 66bb9a0b8..ce1f73d8e 100644
--- a/llama_stack/apis/shields/shields.py
+++ b/llama_stack/apis/shields/shields.py
@@ -46,10 +46,21 @@ class ListShieldsResponse(BaseModel):
 @trace_protocol
 class Shields(Protocol):
     @webmethod(route="/shields", method="GET")
-    async def list_shields(self) -> ListShieldsResponse: ...
+    async def list_shields(self) -> ListShieldsResponse:
+        """List all shields.
+
+        :returns: A ListShieldsResponse.
+        """
+        ...
 
     @webmethod(route="/shields/{identifier:path}", method="GET")
-    async def get_shield(self, identifier: str) -> Shield: ...
+    async def get_shield(self, identifier: str) -> Shield:
+        """Get a shield by its identifier.
+
+        :param identifier: The identifier of the shield to get.
+        :returns: A Shield.
+        """
+        ...
 
     @webmethod(route="/shields", method="POST")
     async def register_shield(
@@ -58,4 +69,13 @@ class Shields(Protocol):
         provider_shield_id: str | None = None,
         provider_id: str | None = None,
         params: dict[str, Any] | None = None,
-    ) -> Shield: ...
+    ) -> Shield:
+        """Register a shield.
+
+        :param shield_id: The identifier of the shield to register.
+        :param provider_shield_id: The identifier of the shield in the provider.
+        :param provider_id: The identifier of the provider.
+        :param params: The parameters of the shield.
+        :returns: A Shield.
+        """
+        ...
diff --git a/llama_stack/apis/telemetry/telemetry.py b/llama_stack/apis/telemetry/telemetry.py
index 0a3e63a88..0eb53f397 100644
--- a/llama_stack/apis/telemetry/telemetry.py
+++ b/llama_stack/apis/telemetry/telemetry.py
@@ -247,7 +247,17 @@ class QueryMetricsResponse(BaseModel):
 @runtime_checkable
 class Telemetry(Protocol):
     @webmethod(route="/telemetry/events", method="POST")
-    async def log_event(self, event: Event, ttl_seconds: int = DEFAULT_TTL_DAYS * 86400) -> None: ...
+    async def log_event(
+        self,
+        event: Event,
+        ttl_seconds: int = DEFAULT_TTL_DAYS * 86400,
+    ) -> None:
+        """Log an event.
+
+        :param event: The event to log.
+        :param ttl_seconds: The time to live of the event.
+        """
+        ...
 
     @webmethod(route="/telemetry/traces", method="POST")
     async def query_traces(
@@ -256,13 +266,35 @@ class Telemetry(Protocol):
         limit: int | None = 100,
         offset: int | None = 0,
         order_by: list[str] | None = None,
-    ) -> QueryTracesResponse: ...
+    ) -> QueryTracesResponse:
+        """Query traces.
+
+        :param attribute_filters: The attribute filters to apply to the traces.
+        :param limit: The limit of traces to return.
+        :param offset: The offset of the traces to return.
+        :param order_by: The order by of the traces to return.
+        :returns: A QueryTracesResponse.
+        """
+        ...
 
     @webmethod(route="/telemetry/traces/{trace_id:path}", method="GET")
-    async def get_trace(self, trace_id: str) -> Trace: ...
+    async def get_trace(self, trace_id: str) -> Trace:
+        """Get a trace by its ID.
+
+        :param trace_id: The ID of the trace to get.
+        :returns: A Trace.
+        """
+        ...
 
     @webmethod(route="/telemetry/traces/{trace_id:path}/spans/{span_id:path}", method="GET")
-    async def get_span(self, trace_id: str, span_id: str) -> Span: ...
+    async def get_span(self, trace_id: str, span_id: str) -> Span:
+        """Get a span by its ID.
+
+        :param trace_id: The ID of the trace to get the span from.
+        :param span_id: The ID of the span to get.
+        :returns: A Span.
+        """
+        ...
 
     @webmethod(route="/telemetry/spans/{span_id:path}/tree", method="POST")
     async def get_span_tree(
@@ -270,7 +302,15 @@ class Telemetry(Protocol):
         span_id: str,
         attributes_to_return: list[str] | None = None,
         max_depth: int | None = None,
-    ) -> QuerySpanTreeResponse: ...
+    ) -> QuerySpanTreeResponse:
+        """Get a span tree by its ID.
+
+        :param span_id: The ID of the span to get the tree from.
+        :param attributes_to_return: The attributes to return in the tree.
+        :param max_depth: The maximum depth of the tree.
+        :returns: A QuerySpanTreeResponse.
+        """
+        ...
 
     @webmethod(route="/telemetry/spans", method="POST")
     async def query_spans(
@@ -278,7 +318,15 @@ class Telemetry(Protocol):
         attribute_filters: list[QueryCondition],
         attributes_to_return: list[str],
         max_depth: int | None = None,
-    ) -> QuerySpansResponse: ...
+    ) -> QuerySpansResponse:
+        """Query spans.
+
+        :param attribute_filters: The attribute filters to apply to the spans.
+        :param attributes_to_return: The attributes to return in the spans.
+        :param max_depth: The maximum depth of the tree.
+        :returns: A QuerySpansResponse.
+        """
+        ...
 
     @webmethod(route="/telemetry/spans/export", method="POST")
     async def save_spans_to_dataset(
@@ -287,7 +335,15 @@ class Telemetry(Protocol):
         attributes_to_save: list[str],
         dataset_id: str,
         max_depth: int | None = None,
-    ) -> None: ...
+    ) -> None:
+        """Save spans to a dataset.
+
+        :param attribute_filters: The attribute filters to apply to the spans.
+        :param attributes_to_save: The attributes to save to the dataset.
+        :param dataset_id: The ID of the dataset to save the spans to.
+        :param max_depth: The maximum depth of the tree.
+        """
+        ...
 
     @webmethod(route="/telemetry/metrics/{metric_name}", method="POST")
     async def query_metrics(
@@ -298,4 +354,15 @@ class Telemetry(Protocol):
         granularity: str | None = "1d",
         query_type: MetricQueryType = MetricQueryType.RANGE,
         label_matchers: list[MetricLabelMatcher] | None = None,
-    ) -> QueryMetricsResponse: ...
+    ) -> QueryMetricsResponse:
+        """Query metrics.
+
+        :param metric_name: The name of the metric to query.
+        :param start_time: The start time of the metric to query.
+        :param end_time: The end time of the metric to query.
+        :param granularity: The granularity of the metric to query.
+        :param query_type: The type of query to perform.
+        :param label_matchers: The label matchers to apply to the metric.
+        :returns: A QueryMetricsResponse.
+        """
+        ...
diff --git a/llama_stack/apis/tools/tools.py b/llama_stack/apis/tools/tools.py
index 2860ddbd8..2f62b0ba1 100644
--- a/llama_stack/apis/tools/tools.py
+++ b/llama_stack/apis/tools/tools.py
@@ -103,37 +103,65 @@ class ToolGroups(Protocol):
         mcp_endpoint: URL | None = None,
         args: dict[str, Any] | None = None,
     ) -> None:
-        """Register a tool group"""
+        """Register a tool group.
+
+        :param toolgroup_id: The ID of the tool group to register.
+        :param provider_id: The ID of the provider to use for the tool group.
+        :param mcp_endpoint: The MCP endpoint to use for the tool group.
+        :param args: A dictionary of arguments to pass to the tool group.
+        """
         ...
 
     @webmethod(route="/toolgroups/{toolgroup_id:path}", method="GET")
     async def get_tool_group(
         self,
         toolgroup_id: str,
-    ) -> ToolGroup: ...
+    ) -> ToolGroup:
+        """Get a tool group by its ID.
+
+        :param toolgroup_id: The ID of the tool group to get.
+        :returns: A ToolGroup.
+        """
+        ...
 
     @webmethod(route="/toolgroups", method="GET")
     async def list_tool_groups(self) -> ListToolGroupsResponse:
-        """List tool groups with optional provider"""
+        """List tool groups with optional provider.
+
+        :returns: A ListToolGroupsResponse.
+        """
         ...
 
     @webmethod(route="/tools", method="GET")
     async def list_tools(self, toolgroup_id: str | None = None) -> ListToolsResponse:
-        """List tools with optional tool group"""
+        """List tools with optional tool group.
+
+        :param toolgroup_id: The ID of the tool group to list tools for.
+        :returns: A ListToolsResponse.
+        """
         ...
 
     @webmethod(route="/tools/{tool_name:path}", method="GET")
     async def get_tool(
         self,
         tool_name: str,
-    ) -> Tool: ...
+    ) -> Tool:
+        """Get a tool by its name.
+
+        :param tool_name: The name of the tool to get.
+        :returns: A Tool.
+        """
+        ...
 
     @webmethod(route="/toolgroups/{toolgroup_id:path}", method="DELETE")
     async def unregister_toolgroup(
         self,
         toolgroup_id: str,
     ) -> None:
-        """Unregister a tool group"""
+        """Unregister a tool group.
+
+        :param toolgroup_id: The ID of the tool group to unregister.
+        """
         ...
 
 
@@ -152,9 +180,21 @@ class ToolRuntime(Protocol):
     @webmethod(route="/tool-runtime/list-tools", method="GET")
     async def list_runtime_tools(
         self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
-    ) -> ListToolDefsResponse: ...
+    ) -> ListToolDefsResponse:
+        """List all tools in the runtime.
+
+        :param tool_group_id: The ID of the tool group to list tools for.
+        :param mcp_endpoint: The MCP endpoint to use for the tool group.
+        :returns: A ListToolDefsResponse.
+        """
+        ...
 
     @webmethod(route="/tool-runtime/invoke", method="POST")
     async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult:
-        """Run a tool with the given arguments"""
+        """Run a tool with the given arguments.
+
+        :param tool_name: The name of the tool to invoke.
+        :param kwargs: A dictionary of arguments to pass to the tool.
+        :returns: A ToolInvocationResult.
+        """
         ...
diff --git a/llama_stack/apis/vector_dbs/vector_dbs.py b/llama_stack/apis/vector_dbs/vector_dbs.py
index a01892888..405852476 100644
--- a/llama_stack/apis/vector_dbs/vector_dbs.py
+++ b/llama_stack/apis/vector_dbs/vector_dbs.py
@@ -44,13 +44,24 @@ class ListVectorDBsResponse(BaseModel):
 @trace_protocol
 class VectorDBs(Protocol):
     @webmethod(route="/vector-dbs", method="GET")
-    async def list_vector_dbs(self) -> ListVectorDBsResponse: ...
+    async def list_vector_dbs(self) -> ListVectorDBsResponse:
+        """List all vector databases.
+
+        :returns: A ListVectorDBsResponse.
+        """
+        ...
 
     @webmethod(route="/vector-dbs/{vector_db_id:path}", method="GET")
     async def get_vector_db(
         self,
         vector_db_id: str,
-    ) -> VectorDB: ...
+    ) -> VectorDB:
+        """Get a vector database by its identifier.
+
+        :param vector_db_id: The identifier of the vector database to get.
+        :returns: A VectorDB.
+        """
+        ...
 
     @webmethod(route="/vector-dbs", method="POST")
     async def register_vector_db(
@@ -60,7 +71,22 @@ class VectorDBs(Protocol):
         embedding_dimension: int | None = 384,
         provider_id: str | None = None,
         provider_vector_db_id: str | None = None,
-    ) -> VectorDB: ...
+    ) -> VectorDB:
+        """Register a vector database.
+
+        :param vector_db_id: The identifier of the vector database to register.
+        :param embedding_model: The embedding model to use.
+        :param embedding_dimension: The dimension of the embedding model.
+        :param provider_id: The identifier of the provider.
+        :param provider_vector_db_id: The identifier of the vector database in the provider.
+        :returns: A VectorDB.
+        """
+        ...
 
     @webmethod(route="/vector-dbs/{vector_db_id:path}", method="DELETE")
-    async def unregister_vector_db(self, vector_db_id: str) -> None: ...
+    async def unregister_vector_db(self, vector_db_id: str) -> None:
+        """Unregister a vector database.
+
+        :param vector_db_id: The identifier of the vector database to unregister.
+        """
+        ...
diff --git a/llama_stack/apis/vector_io/vector_io.py b/llama_stack/apis/vector_io/vector_io.py
index bfae0f802..3ac62d42c 100644
--- a/llama_stack/apis/vector_io/vector_io.py
+++ b/llama_stack/apis/vector_io/vector_io.py
@@ -46,7 +46,14 @@ class VectorIO(Protocol):
         vector_db_id: str,
         chunks: list[Chunk],
         ttl_seconds: int | None = None,
-    ) -> None: ...
+    ) -> None:
+        """Insert chunks into a vector database.
+
+        :param vector_db_id: The identifier of the vector database to insert the chunks into.
+        :param chunks: The chunks to insert.
+        :param ttl_seconds: The time to live of the chunks.
+        """
+        ...
 
     @webmethod(route="/vector-io/query", method="POST")
     async def query_chunks(
@@ -54,4 +61,12 @@ class VectorIO(Protocol):
         vector_db_id: str,
         query: InterleavedContent,
         params: dict[str, Any] | None = None,
-    ) -> QueryChunksResponse: ...
+    ) -> QueryChunksResponse:
+        """Query chunks from a vector database.
+
+        :param vector_db_id: The identifier of the vector database to query.
+        :param query: The query to search for.
+        :param params: The parameters of the query.
+        :returns: A QueryChunksResponse.
+        """
+        ...

From 10b1056dea6ad80eecb40f3f877c468ea7e2b264 Mon Sep 17 00:00:00 2001
From: Ben Browning <bbrownin@redhat.com>
Date: Thu, 15 May 2025 14:23:29 -0400
Subject: [PATCH 6/9] fix: multiple tool calls in remote-vllm chat_completion
 (#2161)

# What does this PR do?

This fixes an issue in how we used the tool_call_buf from streaming tool
calls in the remote-vllm provider where it would end up concatenating
parameters from multiple different tool call results instead of
aggregating the results from each tool call separately.

It also fixes an issue found while digging into that where we were
accidentally mixing the json string form of tool call parameters with
the string representation of the python form, which mean we'd end up
with single quotes in what should be double-quoted json strings.

Closes #1120

## Test Plan

The following tests are now passing 100% for the remote-vllm provider,
where some of the test_text_inference were failing before this change:

```
VLLM_URL="http://localhost:8000/v1" INFERENCE_MODEL="RedHatAI/Llama-4-Scout-17B-16E-Instruct-FP8-dynamic" LLAMA_STACK_CONFIG=remote-vllm python -m pytest -v tests/integration/inference/test_text_inference.py --text-model "RedHatAI/Llama-4-Scout-17B-16E-Instruct-FP8-dynamic"

VLLM_URL="http://localhost:8000/v1" INFERENCE_MODEL="RedHatAI/Llama-4-Scout-17B-16E-Instruct-FP8-dynamic" LLAMA_STACK_CONFIG=remote-vllm python -m pytest -v tests/integration/inference/test_vision_inference.py --vision-model "RedHatAI/Llama-4-Scout-17B-16E-Instruct-FP8-dynamic"

```

All but one of the agent tests are passing (including the multi-tool
one). See the PR at https://github.com/vllm-project/vllm/pull/17917 and
a gist at
https://gist.github.com/bbrowning/4734240ce96b4264340caa9584e47c9e for
changes needed there, which will have to get made upstream in vLLM.

Agent tests:

```
VLLM_URL="http://localhost:8000/v1" INFERENCE_MODEL="RedHatAI/Llama-4-Scout-17B-16E-Instruct-FP8-dynamic" LLAMA_STACK_CONFIG=remote-vllm python -m pytest -v tests/integration/agents/test_agents.py --text-model "RedHatAI/Llama-4-Scout-17B-16E-Instruct-FP8-dynamic"
````

---------

Signed-off-by: Ben Browning <bbrownin@redhat.com>
---
 .../providers/remote/inference/vllm/vllm.py   |  36 ++--
 .../utils/inference/openai_compat.py          |   8 +-
 tests/integration/agents/test_agents.py       |  40 ++--
 .../providers/inference/test_remote_vllm.py   | 175 +++++++++++++++++-
 4 files changed, 225 insertions(+), 34 deletions(-)

diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py
index 070d94df8..d00218dd5 100644
--- a/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/llama_stack/providers/remote/inference/vllm/vllm.py
@@ -162,7 +162,7 @@ def _process_vllm_chat_completion_end_of_stream(
     finish_reason: str | None,
     last_chunk_content: str | None,
     current_event_type: ChatCompletionResponseEventType,
-    tool_call_buf: UnparseableToolCall,
+    tool_call_bufs: dict[str, UnparseableToolCall] | None = None,
 ) -> list[OpenAIChatCompletionChunk]:
     chunks = []
 
@@ -171,9 +171,8 @@ def _process_vllm_chat_completion_end_of_stream(
     else:
         stop_reason = StopReason.end_of_message
 
-    if tool_call_buf.tool_name:
-        # at least one tool call request is received
-
+    tool_call_bufs = tool_call_bufs or {}
+    for _index, tool_call_buf in sorted(tool_call_bufs.items()):
         args_str = tool_call_buf.arguments or "{}"
         try:
             args = json.loads(args_str)
@@ -225,8 +224,14 @@ def _process_vllm_chat_completion_end_of_stream(
 async def _process_vllm_chat_completion_stream_response(
     stream: AsyncGenerator[OpenAIChatCompletionChunk, None],
 ) -> AsyncGenerator:
-    event_type = ChatCompletionResponseEventType.start
-    tool_call_buf = UnparseableToolCall()
+    yield ChatCompletionResponseStreamChunk(
+        event=ChatCompletionResponseEvent(
+            event_type=ChatCompletionResponseEventType.start,
+            delta=TextDelta(text=""),
+        )
+    )
+    event_type = ChatCompletionResponseEventType.progress
+    tool_call_bufs: dict[str, UnparseableToolCall] = {}
     end_of_stream_processed = False
 
     async for chunk in stream:
@@ -235,17 +240,22 @@ async def _process_vllm_chat_completion_stream_response(
             return
         choice = chunk.choices[0]
         if choice.delta.tool_calls:
-            tool_call = convert_tool_call(choice.delta.tool_calls[0])
-            tool_call_buf.tool_name += str(tool_call.tool_name)
-            tool_call_buf.call_id += tool_call.call_id
-            # TODO: remove str() when dict type for 'arguments' is no longer allowed
-            tool_call_buf.arguments += str(tool_call.arguments)
+            for delta_tool_call in choice.delta.tool_calls:
+                tool_call = convert_tool_call(delta_tool_call)
+                if delta_tool_call.index not in tool_call_bufs:
+                    tool_call_bufs[delta_tool_call.index] = UnparseableToolCall()
+                tool_call_buf = tool_call_bufs[delta_tool_call.index]
+                tool_call_buf.tool_name += str(tool_call.tool_name)
+                tool_call_buf.call_id += tool_call.call_id
+                tool_call_buf.arguments += (
+                    tool_call.arguments if isinstance(tool_call.arguments, str) else json.dumps(tool_call.arguments)
+                )
         if choice.finish_reason:
             chunks = _process_vllm_chat_completion_end_of_stream(
                 finish_reason=choice.finish_reason,
                 last_chunk_content=choice.delta.content,
                 current_event_type=event_type,
-                tool_call_buf=tool_call_buf,
+                tool_call_bufs=tool_call_bufs,
             )
             for c in chunks:
                 yield c
@@ -266,7 +276,7 @@ async def _process_vllm_chat_completion_stream_response(
     # the stream ended without a chunk containing finish_reason - we have to generate the
     # respective completion chunks manually
     chunks = _process_vllm_chat_completion_end_of_stream(
-        finish_reason=None, last_chunk_content=None, current_event_type=event_type, tool_call_buf=tool_call_buf
+        finish_reason=None, last_chunk_content=None, current_event_type=event_type, tool_call_bufs=tool_call_bufs
     )
     for c in chunks:
         yield c
diff --git a/llama_stack/providers/utils/inference/openai_compat.py b/llama_stack/providers/utils/inference/openai_compat.py
index e2314d44f..cc0000528 100644
--- a/llama_stack/providers/utils/inference/openai_compat.py
+++ b/llama_stack/providers/utils/inference/openai_compat.py
@@ -531,13 +531,19 @@ async def convert_message_to_openai_dict(message: Message, download: bool = Fals
             tool_name = tc.tool_name
             if isinstance(tool_name, BuiltinTool):
                 tool_name = tool_name.value
+
+            # arguments_json can be None, so attempt it first and fall back to arguments
+            if hasattr(tc, "arguments_json") and tc.arguments_json:
+                arguments = tc.arguments_json
+            else:
+                arguments = json.dumps(tc.arguments)
             result["tool_calls"].append(
                 {
                     "id": tc.call_id,
                     "type": "function",
                     "function": {
                         "name": tool_name,
-                        "arguments": tc.arguments_json if hasattr(tc, "arguments_json") else json.dumps(tc.arguments),
+                        "arguments": arguments,
                     },
                 }
             )
diff --git a/tests/integration/agents/test_agents.py b/tests/integration/agents/test_agents.py
index 63fd74f53..66c9ab829 100644
--- a/tests/integration/agents/test_agents.py
+++ b/tests/integration/agents/test_agents.py
@@ -266,6 +266,7 @@ def test_builtin_tool_web_search(llama_stack_client, agent_config):
     assert found_tool_execution
 
 
+@pytest.mark.skip(reason="Code interpreter is currently disabled in the Stack")
 def test_builtin_tool_code_execution(llama_stack_client, agent_config):
     agent_config = {
         **agent_config,
@@ -346,7 +347,7 @@ def test_custom_tool(llama_stack_client, agent_config):
         messages=[
             {
                 "role": "user",
-                "content": "What is the boiling point of polyjuice?",
+                "content": "What is the boiling point of the liquid polyjuice in celsius?",
             },
         ],
         session_id=session_id,
@@ -420,7 +421,7 @@ def run_agent_with_tool_choice(client, agent_config, tool_choice):
         messages=[
             {
                 "role": "user",
-                "content": "What is the boiling point of polyjuice?",
+                "content": "What is the boiling point of the liquid polyjuice in celsius?",
             },
         ],
         session_id=session_id,
@@ -674,8 +675,8 @@ def test_create_turn_response(llama_stack_client, agent_config, client_tools):
 
 
 def test_multi_tool_calls(llama_stack_client, agent_config):
-    if "gpt" not in agent_config["model"]:
-        pytest.xfail("Only tested on GPT models")
+    if "gpt" not in agent_config["model"] and "llama-4" not in agent_config["model"].lower():
+        pytest.xfail("Only tested on GPT and Llama 4 models")
 
     agent_config = {
         **agent_config,
@@ -689,23 +690,34 @@ def test_multi_tool_calls(llama_stack_client, agent_config):
         messages=[
             {
                 "role": "user",
-                "content": "Call get_boiling_point twice to answer: What is the boiling point of polyjuice in both celsius and fahrenheit?",
+                "content": "Call get_boiling_point twice to answer: What is the boiling point of polyjuice in both celsius and fahrenheit?.\nUse the tool responses to answer the question.",
             },
         ],
         session_id=session_id,
         stream=False,
     )
     steps = response.steps
-    assert len(steps) == 7
-    assert steps[0].step_type == "shield_call"
-    assert steps[1].step_type == "inference"
-    assert steps[2].step_type == "shield_call"
-    assert steps[3].step_type == "tool_execution"
-    assert steps[4].step_type == "shield_call"
-    assert steps[5].step_type == "inference"
-    assert steps[6].step_type == "shield_call"
 
-    tool_execution_step = steps[3]
+    has_input_shield = agent_config.get("input_shields")
+    has_output_shield = agent_config.get("output_shields")
+    assert len(steps) == 3 + (2 if has_input_shield else 0) + (2 if has_output_shield else 0)
+    if has_input_shield:
+        assert steps[0].step_type == "shield_call"
+        steps.pop(0)
+    assert steps[0].step_type == "inference"
+    if has_output_shield:
+        assert steps[1].step_type == "shield_call"
+        steps.pop(1)
+    assert steps[1].step_type == "tool_execution"
+    tool_execution_step = steps[1]
+    if has_input_shield:
+        assert steps[2].step_type == "shield_call"
+        steps.pop(2)
+    assert steps[2].step_type == "inference"
+    if has_output_shield:
+        assert steps[3].step_type == "shield_call"
+        steps.pop(3)
+
     assert len(tool_execution_step.tool_calls) == 2
     assert tool_execution_step.tool_calls[0].tool_name.startswith("get_boiling_point")
     assert tool_execution_step.tool_calls[1].tool_name.startswith("get_boiling_point")
diff --git a/tests/unit/providers/inference/test_remote_vllm.py b/tests/unit/providers/inference/test_remote_vllm.py
index 6e1623131..f9eaee7d6 100644
--- a/tests/unit/providers/inference/test_remote_vllm.py
+++ b/tests/unit/providers/inference/test_remote_vllm.py
@@ -24,6 +24,12 @@ from openai.types.chat.chat_completion_chunk import (
 from openai.types.chat.chat_completion_chunk import (
     ChoiceDelta as OpenAIChoiceDelta,
 )
+from openai.types.chat.chat_completion_chunk import (
+    ChoiceDeltaToolCall as OpenAIChoiceDeltaToolCall,
+)
+from openai.types.chat.chat_completion_chunk import (
+    ChoiceDeltaToolCallFunction as OpenAIChoiceDeltaToolCallFunction,
+)
 from openai.types.model import Model as OpenAIModel
 
 from llama_stack.apis.inference import (
@@ -206,8 +212,164 @@ async def test_tool_call_delta_empty_tool_call_buf():
             yield chunk
 
     chunks = [chunk async for chunk in _process_vllm_chat_completion_stream_response(mock_stream())]
-    assert len(chunks) == 1
-    assert chunks[0].event.stop_reason == StopReason.end_of_turn
+    assert len(chunks) == 2
+    assert chunks[0].event.event_type.value == "start"
+    assert chunks[1].event.event_type.value == "complete"
+    assert chunks[1].event.stop_reason == StopReason.end_of_turn
+
+
+@pytest.mark.asyncio
+async def test_tool_call_delta_streaming_arguments_dict():
+    async def mock_stream():
+        mock_chunk_1 = OpenAIChatCompletionChunk(
+            id="chunk-1",
+            created=1,
+            model="foo",
+            object="chat.completion.chunk",
+            choices=[
+                OpenAIChoice(
+                    delta=OpenAIChoiceDelta(
+                        content="",
+                        tool_calls=[
+                            OpenAIChoiceDeltaToolCall(
+                                id="tc_1",
+                                index=1,
+                                function=OpenAIChoiceDeltaToolCallFunction(
+                                    name="power",
+                                    arguments="",
+                                ),
+                            )
+                        ],
+                    ),
+                    finish_reason=None,
+                    index=0,
+                )
+            ],
+        )
+        mock_chunk_2 = OpenAIChatCompletionChunk(
+            id="chunk-2",
+            created=1,
+            model="foo",
+            object="chat.completion.chunk",
+            choices=[
+                OpenAIChoice(
+                    delta=OpenAIChoiceDelta(
+                        content="",
+                        tool_calls=[
+                            OpenAIChoiceDeltaToolCall(
+                                id="tc_1",
+                                index=1,
+                                function=OpenAIChoiceDeltaToolCallFunction(
+                                    name="power",
+                                    arguments='{"number": 28, "power": 3}',
+                                ),
+                            )
+                        ],
+                    ),
+                    finish_reason=None,
+                    index=0,
+                )
+            ],
+        )
+        mock_chunk_3 = OpenAIChatCompletionChunk(
+            id="chunk-3",
+            created=1,
+            model="foo",
+            object="chat.completion.chunk",
+            choices=[
+                OpenAIChoice(delta=OpenAIChoiceDelta(content="", tool_calls=None), finish_reason="tool_calls", index=0)
+            ],
+        )
+        for chunk in [mock_chunk_1, mock_chunk_2, mock_chunk_3]:
+            yield chunk
+
+    chunks = [chunk async for chunk in _process_vllm_chat_completion_stream_response(mock_stream())]
+    assert len(chunks) == 3
+    assert chunks[0].event.event_type.value == "start"
+    assert chunks[1].event.event_type.value == "progress"
+    assert chunks[1].event.delta.type == "tool_call"
+    assert chunks[1].event.delta.parse_status.value == "succeeded"
+    assert chunks[1].event.delta.tool_call.arguments_json == '{"number": 28, "power": 3}'
+    assert chunks[2].event.event_type.value == "complete"
+
+
+@pytest.mark.asyncio
+async def test_multiple_tool_calls():
+    async def mock_stream():
+        mock_chunk_1 = OpenAIChatCompletionChunk(
+            id="chunk-1",
+            created=1,
+            model="foo",
+            object="chat.completion.chunk",
+            choices=[
+                OpenAIChoice(
+                    delta=OpenAIChoiceDelta(
+                        content="",
+                        tool_calls=[
+                            OpenAIChoiceDeltaToolCall(
+                                id="",
+                                index=1,
+                                function=OpenAIChoiceDeltaToolCallFunction(
+                                    name="power",
+                                    arguments='{"number": 28, "power": 3}',
+                                ),
+                            ),
+                        ],
+                    ),
+                    finish_reason=None,
+                    index=0,
+                )
+            ],
+        )
+        mock_chunk_2 = OpenAIChatCompletionChunk(
+            id="chunk-2",
+            created=1,
+            model="foo",
+            object="chat.completion.chunk",
+            choices=[
+                OpenAIChoice(
+                    delta=OpenAIChoiceDelta(
+                        content="",
+                        tool_calls=[
+                            OpenAIChoiceDeltaToolCall(
+                                id="",
+                                index=2,
+                                function=OpenAIChoiceDeltaToolCallFunction(
+                                    name="multiple",
+                                    arguments='{"first_number": 4, "second_number": 7}',
+                                ),
+                            ),
+                        ],
+                    ),
+                    finish_reason=None,
+                    index=0,
+                )
+            ],
+        )
+        mock_chunk_3 = OpenAIChatCompletionChunk(
+            id="chunk-3",
+            created=1,
+            model="foo",
+            object="chat.completion.chunk",
+            choices=[
+                OpenAIChoice(delta=OpenAIChoiceDelta(content="", tool_calls=None), finish_reason="tool_calls", index=0)
+            ],
+        )
+        for chunk in [mock_chunk_1, mock_chunk_2, mock_chunk_3]:
+            yield chunk
+
+    chunks = [chunk async for chunk in _process_vllm_chat_completion_stream_response(mock_stream())]
+    assert len(chunks) == 4
+    assert chunks[0].event.event_type.value == "start"
+    assert chunks[1].event.event_type.value == "progress"
+    assert chunks[1].event.delta.type == "tool_call"
+    assert chunks[1].event.delta.parse_status.value == "succeeded"
+    assert chunks[1].event.delta.tool_call.arguments_json == '{"number": 28, "power": 3}'
+    assert chunks[2].event.event_type.value == "progress"
+    assert chunks[2].event.delta.type == "tool_call"
+    assert chunks[2].event.delta.parse_status.value == "succeeded"
+    assert chunks[2].event.delta.tool_call.arguments_json == '{"first_number": 4, "second_number": 7}'
+    assert chunks[3].event.event_type.value == "complete"
 
 
 @pytest.mark.asyncio
@@ -231,7 +393,8 @@ async def test_process_vllm_chat_completion_stream_response_no_choices():
             yield chunk
 
     chunks = [chunk async for chunk in _process_vllm_chat_completion_stream_response(mock_stream())]
-    assert len(chunks) == 0
+    assert len(chunks) == 1
+    assert chunks[0].event.event_type.value == "start"
 
 
 def test_chat_completion_doesnt_block_event_loop(caplog):
@@ -369,7 +532,7 @@ async def test_process_vllm_chat_completion_stream_response_tool_call_args_last_
             yield chunk
 
     chunks = [chunk async for chunk in _process_vllm_chat_completion_stream_response(mock_stream())]
-    assert len(chunks) == 2
+    assert len(chunks) == 3
     assert chunks[-1].event.event_type == ChatCompletionResponseEventType.complete
     assert chunks[-2].event.delta.type == "tool_call"
     assert chunks[-2].event.delta.tool_call.tool_name == mock_tool_name
@@ -422,7 +585,7 @@ async def test_process_vllm_chat_completion_stream_response_no_finish_reason():
             yield chunk
 
     chunks = [chunk async for chunk in _process_vllm_chat_completion_stream_response(mock_stream())]
-    assert len(chunks) == 2
+    assert len(chunks) == 3
     assert chunks[-1].event.event_type == ChatCompletionResponseEventType.complete
     assert chunks[-2].event.delta.type == "tool_call"
     assert chunks[-2].event.delta.tool_call.tool_name == mock_tool_name
@@ -471,7 +634,7 @@ async def test_process_vllm_chat_completion_stream_response_tool_without_args():
             yield chunk
 
     chunks = [chunk async for chunk in _process_vllm_chat_completion_stream_response(mock_stream())]
-    assert len(chunks) == 2
+    assert len(chunks) == 3
     assert chunks[-1].event.event_type == ChatCompletionResponseEventType.complete
     assert chunks[-2].event.delta.type == "tool_call"
     assert chunks[-2].event.delta.tool_call.tool_name == mock_tool_name

From 87e284f1a0beb4642a7eba6345b72debdcd99746 Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Thu, 15 May 2025 12:31:12 -0700
Subject: [PATCH 7/9] chore: update CODEOWNERS

---
 .github/CODEOWNERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 54c01c80d..5884f2582 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -2,4 +2,4 @@
 
 # These owners will be the default owners for everything in
 # the repo. Unless a later match takes precedence,
-* @ashwinb @yanxi0830 @hardikjshah @dltn @raghotham @dineshyv @vladimirivic @sixianyi0721 @ehhuang @terrytangyuan @SLR722 @leseb
+* @ashwinb @yanxi0830 @hardikjshah @raghotham @ehhuang @terrytangyuan @leseb @bbrowning

From 1a6d4af5e9217a253607d263be4186167d40d215 Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Thu, 15 May 2025 12:52:34 -0700
Subject: [PATCH 8/9] refactor: rename dev distro as starter (#2181)

We want this to be a "flagship" distribution we can advertize to a
segment of users to get started quickly. This distro should package a
bunch of remote providers and some cheap inline providers so they get a
solid "AI Platform in a box" setup instantly.
---
 llama_stack/templates/dependencies.json       | 80 +++++++++----------
 .../templates/{dev => starter}/__init__.py    |  2 +-
 .../templates/{dev => starter}/build.yaml     |  2 +-
 .../templates/{dev => starter}/run.yaml       | 16 ++--
 .../{dev/dev.py => starter/starter.py}        |  7 +-
 pyproject.toml                                |  1 -
 6 files changed, 54 insertions(+), 54 deletions(-)
 rename llama_stack/templates/{dev => starter}/__init__.py (76%)
 rename llama_stack/templates/{dev => starter}/build.yaml (89%)
 rename llama_stack/templates/{dev => starter}/run.yaml (96%)
 rename llama_stack/templates/{dev/dev.py => starter/starter.py} (95%)

diff --git a/llama_stack/templates/dependencies.json b/llama_stack/templates/dependencies.json
index 35cbc8878..d1a17e48e 100644
--- a/llama_stack/templates/dependencies.json
+++ b/llama_stack/templates/dependencies.json
@@ -152,46 +152,6 @@
     "sentence-transformers --no-deps",
     "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
   ],
-  "dev": [
-    "aiosqlite",
-    "autoevals",
-    "blobfile",
-    "chardet",
-    "chromadb-client",
-    "datasets",
-    "emoji",
-    "fastapi",
-    "fire",
-    "fireworks-ai",
-    "httpx",
-    "langdetect",
-    "litellm",
-    "matplotlib",
-    "mcp",
-    "nltk",
-    "numpy",
-    "openai",
-    "opentelemetry-exporter-otlp-proto-http",
-    "opentelemetry-sdk",
-    "pandas",
-    "pillow",
-    "psycopg2-binary",
-    "pymongo",
-    "pypdf",
-    "pythainlp",
-    "redis",
-    "requests",
-    "scikit-learn",
-    "scipy",
-    "sentencepiece",
-    "sqlite-vec",
-    "tqdm",
-    "transformers",
-    "tree_sitter",
-    "uvicorn",
-    "sentence-transformers --no-deps",
-    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
-  ],
   "fireworks": [
     "aiosqlite",
     "autoevals",
@@ -642,6 +602,46 @@
     "sentence-transformers --no-deps",
     "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
   ],
+  "starter": [
+    "aiosqlite",
+    "autoevals",
+    "blobfile",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "emoji",
+    "fastapi",
+    "fire",
+    "fireworks-ai",
+    "httpx",
+    "langdetect",
+    "litellm",
+    "matplotlib",
+    "mcp",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pymongo",
+    "pypdf",
+    "pythainlp",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "sqlite-vec",
+    "tqdm",
+    "transformers",
+    "tree_sitter",
+    "uvicorn",
+    "sentence-transformers --no-deps",
+    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
+  ],
   "tgi": [
     "aiohttp",
     "aiosqlite",
diff --git a/llama_stack/templates/dev/__init__.py b/llama_stack/templates/starter/__init__.py
similarity index 76%
rename from llama_stack/templates/dev/__init__.py
rename to llama_stack/templates/starter/__init__.py
index cf966c2a6..9c0d937ce 100644
--- a/llama_stack/templates/dev/__init__.py
+++ b/llama_stack/templates/starter/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .dev import get_distribution_template  # noqa: F401
+from .starter import get_distribution_template  # noqa: F401
diff --git a/llama_stack/templates/dev/build.yaml b/llama_stack/templates/starter/build.yaml
similarity index 89%
rename from llama_stack/templates/dev/build.yaml
rename to llama_stack/templates/starter/build.yaml
index afa1614bf..35bd0c713 100644
--- a/llama_stack/templates/dev/build.yaml
+++ b/llama_stack/templates/starter/build.yaml
@@ -1,6 +1,6 @@
 version: '2'
 distribution_spec:
-  description: Distribution for running e2e tests in CI
+  description: Quick start template for running Llama Stack with several popular providers
   providers:
     inference:
     - remote::openai
diff --git a/llama_stack/templates/dev/run.yaml b/llama_stack/templates/starter/run.yaml
similarity index 96%
rename from llama_stack/templates/dev/run.yaml
rename to llama_stack/templates/starter/run.yaml
index a3b51e7bf..52d7a6a07 100644
--- a/llama_stack/templates/dev/run.yaml
+++ b/llama_stack/templates/starter/run.yaml
@@ -1,5 +1,5 @@
 version: '2'
-image_name: dev
+image_name: starter
 apis:
 - agents
 - datasetio
@@ -46,7 +46,7 @@ providers:
   - provider_id: sqlite-vec
     provider_type: inline::sqlite-vec
     config:
-      db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dev}/sqlite_vec.db
+      db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/sqlite_vec.db
   - provider_id: ${env.ENABLE_CHROMADB+chromadb}
     provider_type: remote::chromadb
     config:
@@ -71,14 +71,14 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dev}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/agents_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
       service_name: ${env.OTEL_SERVICE_NAME:}
       sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dev}/trace_store.db
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -86,7 +86,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dev}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -94,14 +94,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dev}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dev}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -132,7 +132,7 @@ providers:
     config: {}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dev}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/registry.db
 models:
 - metadata: {}
   model_id: openai/gpt-4o
diff --git a/llama_stack/templates/dev/dev.py b/llama_stack/templates/starter/starter.py
similarity index 95%
rename from llama_stack/templates/dev/dev.py
rename to llama_stack/templates/starter/starter.py
index 76d5a1fb3..0932bfdfe 100644
--- a/llama_stack/templates/dev/dev.py
+++ b/llama_stack/templates/starter/starter.py
@@ -46,6 +46,7 @@ from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOC
 from llama_stack.providers.remote.vector_io.pgvector.config import (
     PGVectorVectorIOConfig,
 )
+from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
 from llama_stack.templates.template import (
     DistributionTemplate,
     RunConfigSettings,
@@ -53,7 +54,7 @@ from llama_stack.templates.template import (
 )
 
 
-def get_inference_providers() -> tuple[list[Provider], list[ModelInput]]:
+def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderModelEntry]]]:
     # in this template, we allow each API key to be optional
     providers = [
         (
@@ -119,7 +120,7 @@ def get_distribution_template() -> DistributionTemplate:
             "remote::model-context-protocol",
         ],
     }
-    name = "dev"
+    name = "starter"
 
     vector_io_providers = [
         Provider(
@@ -171,7 +172,7 @@ def get_distribution_template() -> DistributionTemplate:
     return DistributionTemplate(
         name=name,
         distro_type="self_hosted",
-        description="Distribution for running e2e tests in CI",
+        description="Quick start template for running Llama Stack with several popular providers",
         container_image=None,
         template_path=None,
         providers=providers,
diff --git a/pyproject.toml b/pyproject.toml
index f1bf7384f..88c331b78 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -304,7 +304,6 @@ exclude = [
     "^llama_stack/strong_typing/inspection\\.py$",
     "^llama_stack/strong_typing/schema\\.py$",
     "^llama_stack/strong_typing/serializer\\.py$",
-    "^llama_stack/templates/dev/dev\\.py$",
     "^llama_stack/templates/groq/groq\\.py$",
     "^llama_stack/templates/llama_api/llama_api\\.py$",
     "^llama_stack/templates/sambanova/sambanova\\.py$",

From 3cc15f7d152743d3c12a411334b59ca25628f4b1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Han?= <seb@redhat.com>
Date: Thu, 15 May 2025 22:03:05 +0200
Subject: [PATCH 9/9] fix: misc UI changes (#2175)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

# What does this PR do?

- Add pre-req to run the server (install deps)
- Fix the static build

Closes: https://github.com/meta-llama/llama-stack/issues/2174

Signed-off-by: Sébastien Han <seb@redhat.com>
---
 llama_stack/ui/README.md                  |  8 +++++++-
 llama_stack/ui/app/layout.tsx             | 22 +++++++++++-----------
 llama_stack/ui/components/app-sidebar.tsx | 11 ++++++-----
 llama_stack/ui/package-lock.json          |  6 +++---
 llama_stack/ui/package.json               |  6 +++---
 5 files changed, 30 insertions(+), 23 deletions(-)

diff --git a/llama_stack/ui/README.md b/llama_stack/ui/README.md
index b9f60cdb7..36eee4cff 100644
--- a/llama_stack/ui/README.md
+++ b/llama_stack/ui/README.md
@@ -2,7 +2,13 @@
 
 ## Getting Started
 
-First, run the development server:
+First, install dependencies:
+
+```bash
+npm install next react react-dom
+```
+
+Then, run the development server:
 
 ```bash
 npm run dev
diff --git a/llama_stack/ui/app/layout.tsx b/llama_stack/ui/app/layout.tsx
index 965e1fb8f..a61fff38f 100644
--- a/llama_stack/ui/app/layout.tsx
+++ b/llama_stack/ui/app/layout.tsx
@@ -22,16 +22,16 @@ import { AppSidebar } from "@/components/app-sidebar"
 
 export default function Layout({ children }: { children: React.ReactNode }) {
   return (
-<html lang="en">
-<body>
-<SidebarProvider>
-      <AppSidebar />
-      <main>
-        <SidebarTrigger />
-        {children}
-      </main>
-    </SidebarProvider>
-  </body>
-</html>
+    <html lang="en" className={`${geistSans.variable} ${geistMono.variable}`}>
+      <body>
+        <SidebarProvider>
+          <AppSidebar />
+          <main>
+            <SidebarTrigger />
+            {children}
+          </main>
+        </SidebarProvider>
+      </body>
+    </html>
   )
 }
diff --git a/llama_stack/ui/components/app-sidebar.tsx b/llama_stack/ui/components/app-sidebar.tsx
index a8718ad07..b8dd070bb 100644
--- a/llama_stack/ui/components/app-sidebar.tsx
+++ b/llama_stack/ui/components/app-sidebar.tsx
@@ -1,4 +1,5 @@
 import { MessageSquareText, MessagesSquare } from "lucide-react"
+import Link from "next/link"
 
 import {
   Sidebar,
@@ -29,9 +30,9 @@ const logItems = [
 export function AppSidebar() {
   return (
     <Sidebar>
-        <SidebarHeader>
-            <a href="/">Llama Stack</a>
-        </SidebarHeader>
+      <SidebarHeader>
+        <Link href="/">Llama Stack</Link>
+      </SidebarHeader>
       <SidebarContent>
         <SidebarGroup>
           <SidebarGroupLabel>Logs</SidebarGroupLabel>
@@ -40,10 +41,10 @@ export function AppSidebar() {
               {logItems.map((item) => (
                 <SidebarMenuItem key={item.title}>
                   <SidebarMenuButton asChild>
-                    <a href={item.url}>
+                    <Link href={item.url}>
                       <item.icon />
                       <span>{item.title}</span>
-                    </a>
+                    </Link>
                   </SidebarMenuButton>
                 </SidebarMenuItem>
               ))}
diff --git a/llama_stack/ui/package-lock.json b/llama_stack/ui/package-lock.json
index d8fc13790..7ecaeccc4 100644
--- a/llama_stack/ui/package-lock.json
+++ b/llama_stack/ui/package-lock.json
@@ -15,9 +15,9 @@
         "class-variance-authority": "^0.7.1",
         "clsx": "^2.1.1",
         "lucide-react": "^0.510.0",
-        "next": "15.3.2",
-        "react": "^19.0.0",
-        "react-dom": "^19.0.0",
+        "next": "^15.3.2",
+        "react": "^19.1.0",
+        "react-dom": "^19.1.0",
         "tailwind-merge": "^3.3.0"
       },
       "devDependencies": {
diff --git a/llama_stack/ui/package.json b/llama_stack/ui/package.json
index 4da9923e4..7e6e19976 100644
--- a/llama_stack/ui/package.json
+++ b/llama_stack/ui/package.json
@@ -16,9 +16,9 @@
     "class-variance-authority": "^0.7.1",
     "clsx": "^2.1.1",
     "lucide-react": "^0.510.0",
-    "next": "15.3.2",
-    "react": "^19.0.0",
-    "react-dom": "^19.0.0",
+    "next": "^15.3.2",
+    "react": "^19.1.0",
+    "react-dom": "^19.1.0",
     "tailwind-merge": "^3.3.0"
   },
   "devDependencies": {