From 28bbbcf2c167f3c224e4ad8dea4d0b0dbbc29a5e Mon Sep 17 00:00:00 2001
From: Alexey Rybak <50731695+reluctantfuturist@users.noreply.github.com>
Date: Wed, 1 Oct 2025 10:15:30 -0700
Subject: [PATCH] docs: adding supplementary markdown content to API specs
 (#3632)

# What does this PR do?

Adds supplementary static content to root API spec pages. This is useful for giving context behind a specific API group, adding information on supported features or work in progress, etc.

This PR introduces supplementary information for Agents (experimental, deprecated) and Responses (stable) APIs.

<!-- Provide a short summary of what this PR does and why. Link to relevant issues if applicable. -->

<!-- If resolving an issue, uncomment and update the line below -->

<!-- Closes #[issue-number] -->

## Test Plan

Documentation server renders rich static content for the Agents API group:

![image.png](https://app.graphite.dev/user-attachments/assets/fc521619-0320-4a22-9409-8ee3fb57ed0e.png)

<!-- Describe the tests you ran to verify your changes with result summaries. *Provide clear instructions so the plan can be easily re-executed.* -->
---
 docs/openapi_generator/pyopenapi/generator.py | 97 ++++++++++++++++++-
 docs/static/deprecated-llama-stack-spec.html  | 18 ++--
 docs/static/deprecated-llama-stack-spec.yaml  | 25 ++++-
 .../static/experimental-llama-stack-spec.html | 18 ++--
 .../static/experimental-llama-stack-spec.yaml | 49 ++++++++++
 docs/static/llama-stack-spec.html             | 43 +++++---
 docs/static/llama-stack-spec.yaml             | 90 +++++++++++++++++
 docs/supplementary/deprecated/agents-api.md   |  9 ++
 docs/supplementary/experimental/agents-api.md | 21 ++++
 docs/supplementary/stable/agents-api.md       | 40 ++++++++
 10 files changed, 381 insertions(+), 29 deletions(-)
 create mode 100644 docs/supplementary/deprecated/agents-api.md
 create mode 100644 docs/supplementary/experimental/agents-api.md
 create mode 100644 docs/supplementary/stable/agents-api.md

diff --git a/docs/openapi_generator/pyopenapi/generator.py b/docs/openapi_generator/pyopenapi/generator.py
index 2f06b5b41..d3ad2201b 100644
--- a/docs/openapi_generator/pyopenapi/generator.py
+++ b/docs/openapi_generator/pyopenapi/generator.py
@@ -548,6 +548,84 @@ class Generator:
 
         return extra_tags
 
+    def _get_api_group_for_operation(self, op) -> str | None:
+        """
+        Determine the API group for an operation based on its route path.
+
+        Args:
+            op: The endpoint operation
+
+        Returns:
+            The API group name derived from the route, or None if unable to determine
+        """
+        if not hasattr(op, 'webmethod') or not op.webmethod or not hasattr(op.webmethod, 'route'):
+            return None
+
+        route = op.webmethod.route
+        if not route or not route.startswith('/'):
+            return None
+
+        # Extract API group from route path
+        # Examples: /v1/agents/list -> agents-api
+        #          /v1/responses -> responses-api
+        #          /v1/models -> models-api
+        path_parts = route.strip('/').split('/')
+
+        if len(path_parts) < 2:
+            return None
+
+        # Skip version prefix (v1, v1alpha, v1beta, etc.)
+        if path_parts[0].startswith('v1'):
+            if len(path_parts) < 2:
+                return None
+            api_segment = path_parts[1]
+        else:
+            api_segment = path_parts[0]
+
+        # Convert to supplementary file naming convention
+        # agents -> agents-api, responses -> responses-api, etc.
+        return f"{api_segment}-api"
+
+    def _load_supplemental_content(self, api_group: str | None) -> str:
+        """
+        Load supplemental content for an API group based on stability level.
+
+        Follows this resolution order:
+        1. docs/supplementary/{stability}/{api_group}.md
+        2. docs/supplementary/shared/{api_group}.md (fallback)
+        3. Empty string if no files found
+
+        Args:
+            api_group: The API group name (e.g., "agents-responses-api"), or None if no mapping exists
+
+        Returns:
+            The supplemental content as markdown string, or empty string if not found
+        """
+        if not api_group:
+            return ""
+
+        base_path = Path(__file__).parent.parent.parent / "supplementary"
+
+        # Try stability-specific content first if stability filter is set
+        if self.options.stability_filter:
+            stability_path = base_path / self.options.stability_filter / f"{api_group}.md"
+            if stability_path.exists():
+                try:
+                    return stability_path.read_text(encoding="utf-8")
+                except Exception as e:
+                    print(f"Warning: Could not read stability-specific supplemental content from {stability_path}: {e}")
+
+        # Fall back to shared content
+        shared_path = base_path / "shared" / f"{api_group}.md"
+        if shared_path.exists():
+            try:
+                return shared_path.read_text(encoding="utf-8")
+            except Exception as e:
+                print(f"Warning: Could not read shared supplemental content from {shared_path}: {e}")
+
+        # No supplemental content found
+        return ""
+
     def _build_operation(self, op: EndpointOperation) -> Operation:
         if op.defining_class.__name__ in [
             "SyntheticDataGeneration",
@@ -799,10 +877,14 @@ class Generator:
         else:
             callbacks = None
 
-        description = "\n".join(
+        # Build base description from docstring
+        base_description = "\n".join(
             filter(None, [doc_string.short_description, doc_string.long_description])
         )
 
+        # Individual endpoints get clean descriptions only
+        description = base_description
+
         return Operation(
             tags=[
                 getattr(op.defining_class, "API_NAMESPACE", op.defining_class.__name__)
@@ -959,10 +1041,21 @@ class Generator:
             if hasattr(cls, "API_NAMESPACE") and cls.API_NAMESPACE != cls.__name__:
                 continue
 
+            # Add supplemental content to tag pages
+            api_group = f"{cls.__name__.lower()}-api"
+            supplemental_content = self._load_supplemental_content(api_group)
+
+            tag_description = doc_string.long_description or ""
+            if supplemental_content:
+                if tag_description:
+                    tag_description = f"{tag_description}\n\n{supplemental_content}"
+                else:
+                    tag_description = supplemental_content
+
             operation_tags.append(
                 Tag(
                     name=cls.__name__,
-                    description=doc_string.long_description,
+                    description=tag_description,
                     displayName=doc_string.short_description,
                 )
             )
diff --git a/docs/static/deprecated-llama-stack-spec.html b/docs/static/deprecated-llama-stack-spec.html
index 3e5af5719..21ba4a1de 100644
--- a/docs/static/deprecated-llama-stack-spec.html
+++ b/docs/static/deprecated-llama-stack-spec.html
@@ -6282,27 +6282,33 @@
     "tags": [
         {
             "name": "Agents",
-            "description": "APIs for creating and interacting with agentic systems.",
+            "description": "APIs for creating and interacting with agentic systems.\n\n## Deprecated APIs\n\n> **⚠️ DEPRECATED**: These APIs are provided for migration reference and will be removed in future versions. Not recommended for new projects.\n\n### Migration Guidance\n\nIf you are using deprecated versions of the Agents or Responses APIs, please migrate to:\n\n- **Responses API**: Use the stable v1 Responses API endpoints\n",
             "x-displayName": "Agents"
         },
         {
-            "name": "Benchmarks"
+            "name": "Benchmarks",
+            "description": ""
         },
         {
-            "name": "DatasetIO"
+            "name": "DatasetIO",
+            "description": ""
         },
         {
-            "name": "Datasets"
+            "name": "Datasets",
+            "description": ""
         },
         {
             "name": "Eval",
+            "description": "",
             "x-displayName": "Llama Stack Evaluation API for running evaluations on model and agent candidates."
         },
         {
-            "name": "PostTraining (Coming Soon)"
+            "name": "PostTraining (Coming Soon)",
+            "description": ""
         },
         {
-            "name": "Telemetry"
+            "name": "Telemetry",
+            "description": ""
         }
     ],
     "x-tagGroups": [
diff --git a/docs/static/deprecated-llama-stack-spec.yaml b/docs/static/deprecated-llama-stack-spec.yaml
index b7ff528e4..ee8458c4e 100644
--- a/docs/static/deprecated-llama-stack-spec.yaml
+++ b/docs/static/deprecated-llama-stack-spec.yaml
@@ -4613,17 +4613,40 @@ security:
   - Default: []
 tags:
   - name: Agents
-    description: >-
+    description: >
       APIs for creating and interacting with agentic systems.
+
+
+      ## Deprecated APIs
+
+
+      > **⚠️ DEPRECATED**: These APIs are provided for migration reference and will
+      be removed in future versions. Not recommended for new projects.
+
+
+      ### Migration Guidance
+
+
+      If you are using deprecated versions of the Agents or Responses APIs, please
+      migrate to:
+
+
+      - **Responses API**: Use the stable v1 Responses API endpoints
     x-displayName: Agents
   - name: Benchmarks
+    description: ''
   - name: DatasetIO
+    description: ''
   - name: Datasets
+    description: ''
   - name: Eval
+    description: ''
     x-displayName: >-
       Llama Stack Evaluation API for running evaluations on model and agent candidates.
   - name: PostTraining (Coming Soon)
+    description: ''
   - name: Telemetry
+    description: ''
 x-tagGroups:
   - name: Operations
     tags:
diff --git a/docs/static/experimental-llama-stack-spec.html b/docs/static/experimental-llama-stack-spec.html
index 811f3d9f5..fe57f9132 100644
--- a/docs/static/experimental-llama-stack-spec.html
+++ b/docs/static/experimental-llama-stack-spec.html
@@ -6479,27 +6479,33 @@
     "tags": [
         {
             "name": "Agents",
-            "description": "APIs for creating and interacting with agentic systems.",
+            "description": "APIs for creating and interacting with agentic systems.\n\n## Agents API (Experimental)\n\n> **🧪 EXPERIMENTAL**: This API is in preview and may change based on user feedback. Great for exploring new capabilities and providing feedback to influence the final design.\n\nMain functionalities provided by this API:\n\n- Create agents with specific instructions and ability to use tools.\n- Interactions with agents are grouped into sessions (\"threads\"), and each interaction is called a \"turn\".\n- Agents can be provided with various tools (see the ToolGroups and ToolRuntime APIs for more details).\n- Agents can be provided with various shields (see the Safety API for more details).\n- Agents can also use Memory to retrieve information from knowledge bases. See the RAG Tool and Vector IO APIs for more details.\n\n### 🧪 Feedback Welcome\n\nThis API is actively being developed. We welcome feedback on:\n- API design and usability\n- Performance characteristics\n- Missing features or capabilities\n- Integration patterns\n\n**Provide Feedback**: [GitHub Discussions](https://github.com/llamastack/llama-stack/discussions) or [GitHub Issues](https://github.com/llamastack/llama-stack/issues)",
             "x-displayName": "Agents"
         },
         {
-            "name": "Benchmarks"
+            "name": "Benchmarks",
+            "description": ""
         },
         {
-            "name": "DatasetIO"
+            "name": "DatasetIO",
+            "description": ""
         },
         {
-            "name": "Datasets"
+            "name": "Datasets",
+            "description": ""
         },
         {
             "name": "Eval",
+            "description": "",
             "x-displayName": "Llama Stack Evaluation API for running evaluations on model and agent candidates."
         },
         {
-            "name": "PostTraining (Coming Soon)"
+            "name": "PostTraining (Coming Soon)",
+            "description": ""
         },
         {
-            "name": "Telemetry"
+            "name": "Telemetry",
+            "description": ""
         }
     ],
     "x-tagGroups": [
diff --git a/docs/static/experimental-llama-stack-spec.yaml b/docs/static/experimental-llama-stack-spec.yaml
index 4fda1d1d4..85129336f 100644
--- a/docs/static/experimental-llama-stack-spec.yaml
+++ b/docs/static/experimental-llama-stack-spec.yaml
@@ -4777,15 +4777,64 @@ tags:
   - name: Agents
     description: >-
       APIs for creating and interacting with agentic systems.
+
+
+      ## Agents API (Experimental)
+
+
+      > **🧪 EXPERIMENTAL**: This API is in preview and may change based on user feedback.
+      Great for exploring new capabilities and providing feedback to influence the
+      final design.
+
+
+      Main functionalities provided by this API:
+
+
+      - Create agents with specific instructions and ability to use tools.
+
+      - Interactions with agents are grouped into sessions ("threads"), and each interaction
+      is called a "turn".
+
+      - Agents can be provided with various tools (see the ToolGroups and ToolRuntime
+      APIs for more details).
+
+      - Agents can be provided with various shields (see the Safety API for more details).
+
+      - Agents can also use Memory to retrieve information from knowledge bases. See
+      the RAG Tool and Vector IO APIs for more details.
+
+
+      ### 🧪 Feedback Welcome
+
+
+      This API is actively being developed. We welcome feedback on:
+
+      - API design and usability
+
+      - Performance characteristics
+
+      - Missing features or capabilities
+
+      - Integration patterns
+
+
+      **Provide Feedback**: [GitHub Discussions](https://github.com/llamastack/llama-stack/discussions)
+      or [GitHub Issues](https://github.com/llamastack/llama-stack/issues)
     x-displayName: Agents
   - name: Benchmarks
+    description: ''
   - name: DatasetIO
+    description: ''
   - name: Datasets
+    description: ''
   - name: Eval
+    description: ''
     x-displayName: >-
       Llama Stack Evaluation API for running evaluations on model and agent candidates.
   - name: PostTraining (Coming Soon)
+    description: ''
   - name: Telemetry
+    description: ''
 x-tagGroups:
   - name: Operations
     tags:
diff --git a/docs/static/llama-stack-spec.html b/docs/static/llama-stack-spec.html
index 3c270e23d..fa16e62ee 100644
--- a/docs/static/llama-stack-spec.html
+++ b/docs/static/llama-stack-spec.html
@@ -12372,11 +12372,12 @@
     "tags": [
         {
             "name": "Agents",
-            "description": "APIs for creating and interacting with agentic systems.",
+            "description": "APIs for creating and interacting with agentic systems.\n\n## Responses API\n\nThe Responses API provides OpenAI-compatible functionality with enhanced capabilities for dynamic, stateful interactions.\n\n> **✅ STABLE**: This API is production-ready with backward compatibility guarantees. Recommended for production applications.\n\n### ✅ Supported Tools\n\nThe Responses API supports the following tool types:\n\n- **`web_search`**: Search the web for current information and real-time data\n- **`file_search`**: Search through uploaded files and vector stores\n  - Supports dynamic `vector_store_ids` per call\n  - Compatible with OpenAI file search patterns\n- **`function`**: Call custom functions with JSON schema validation\n- **`mcp_tool`**: Model Context Protocol integration\n\n### ✅ Supported Fields & Features\n\n**Core Capabilities:**\n- **Dynamic Configuration**: Switch models, vector stores, and tools per request without pre-configuration\n- **Conversation Branching**: Use `previous_response_id` to branch conversations and explore different paths\n- **Rich Annotations**: Automatic file citations, URL citations, and container file citations\n- **Status Tracking**: Monitor tool call execution status and handle failures gracefully\n\n### 🚧 Work in Progress\n\n- Full real-time response streaming support\n- `tool_choice` parameter\n- `max_tool_calls` parameter\n- Built-in tools (code interpreter, containers API)\n- Safety & guardrails\n- `reasoning` capabilities\n- `service_tier`\n- `logprobs`\n- `max_output_tokens`\n- `metadata` handling\n- `instructions`\n- `incomplete_details`\n- `background`",
             "x-displayName": "Agents"
         },
         {
-            "name": "Files"
+            "name": "Files",
+            "description": ""
         },
         {
             "name": "Inference",
@@ -12384,48 +12385,62 @@
             "x-displayName": "Llama Stack Inference API for generating completions, chat completions, and embeddings."
         },
         {
-            "name": "Inspect"
+            "name": "Inspect",
+            "description": ""
         },
         {
-            "name": "Models"
+            "name": "Models",
+            "description": ""
         },
         {
             "name": "Prompts",
+            "description": "",
             "x-displayName": "Protocol for prompt management operations."
         },
         {
             "name": "Providers",
+            "description": "",
             "x-displayName": "Providers API for inspecting, listing, and modifying providers and their configurations."
         },
         {
-            "name": "Safety"
+            "name": "Safety",
+            "description": ""
         },
         {
-            "name": "Scoring"
+            "name": "Scoring",
+            "description": ""
         },
         {
-            "name": "ScoringFunctions"
+            "name": "ScoringFunctions",
+            "description": ""
         },
         {
-            "name": "Shields"
+            "name": "Shields",
+            "description": ""
         },
         {
-            "name": "SyntheticDataGeneration (Coming Soon)"
+            "name": "SyntheticDataGeneration (Coming Soon)",
+            "description": ""
         },
         {
-            "name": "Telemetry"
+            "name": "Telemetry",
+            "description": ""
         },
         {
-            "name": "ToolGroups"
+            "name": "ToolGroups",
+            "description": ""
         },
         {
-            "name": "ToolRuntime"
+            "name": "ToolRuntime",
+            "description": ""
         },
         {
-            "name": "VectorDBs"
+            "name": "VectorDBs",
+            "description": ""
         },
         {
-            "name": "VectorIO"
+            "name": "VectorIO",
+            "description": ""
         }
     ],
     "x-tagGroups": [
diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml
index 2e4cfd60c..733e2cd21 100644
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@@ -9197,8 +9197,84 @@ tags:
   - name: Agents
     description: >-
       APIs for creating and interacting with agentic systems.
+
+
+      ## Responses API
+
+
+      The Responses API provides OpenAI-compatible functionality with enhanced capabilities
+      for dynamic, stateful interactions.
+
+
+      > **✅ STABLE**: This API is production-ready with backward compatibility guarantees.
+      Recommended for production applications.
+
+
+      ### ✅ Supported Tools
+
+
+      The Responses API supports the following tool types:
+
+
+      - **`web_search`**: Search the web for current information and real-time data
+
+      - **`file_search`**: Search through uploaded files and vector stores
+        - Supports dynamic `vector_store_ids` per call
+        - Compatible with OpenAI file search patterns
+      - **`function`**: Call custom functions with JSON schema validation
+
+      - **`mcp_tool`**: Model Context Protocol integration
+
+
+      ### ✅ Supported Fields & Features
+
+
+      **Core Capabilities:**
+
+      - **Dynamic Configuration**: Switch models, vector stores, and tools per request
+      without pre-configuration
+
+      - **Conversation Branching**: Use `previous_response_id` to branch conversations
+      and explore different paths
+
+      - **Rich Annotations**: Automatic file citations, URL citations, and container
+      file citations
+
+      - **Status Tracking**: Monitor tool call execution status and handle failures
+      gracefully
+
+
+      ### 🚧 Work in Progress
+
+
+      - Full real-time response streaming support
+
+      - `tool_choice` parameter
+
+      - `max_tool_calls` parameter
+
+      - Built-in tools (code interpreter, containers API)
+
+      - Safety & guardrails
+
+      - `reasoning` capabilities
+
+      - `service_tier`
+
+      - `logprobs`
+
+      - `max_output_tokens`
+
+      - `metadata` handling
+
+      - `instructions`
+
+      - `incomplete_details`
+
+      - `background`
     x-displayName: Agents
   - name: Files
+    description: ''
   - name: Inference
     description: >-
       This API provides the raw interface to the underlying models. Two kinds of models
@@ -9212,23 +9288,37 @@ tags:
       Llama Stack Inference API for generating completions, chat completions, and
       embeddings.
   - name: Inspect
+    description: ''
   - name: Models
+    description: ''
   - name: Prompts
+    description: ''
     x-displayName: >-
       Protocol for prompt management operations.
   - name: Providers
+    description: ''
     x-displayName: >-
       Providers API for inspecting, listing, and modifying providers and their configurations.
   - name: Safety
+    description: ''
   - name: Scoring
+    description: ''
   - name: ScoringFunctions
+    description: ''
   - name: Shields
+    description: ''
   - name: SyntheticDataGeneration (Coming Soon)
+    description: ''
   - name: Telemetry
+    description: ''
   - name: ToolGroups
+    description: ''
   - name: ToolRuntime
+    description: ''
   - name: VectorDBs
+    description: ''
   - name: VectorIO
+    description: ''
 x-tagGroups:
   - name: Operations
     tags:
diff --git a/docs/supplementary/deprecated/agents-api.md b/docs/supplementary/deprecated/agents-api.md
new file mode 100644
index 000000000..ddbf8f871
--- /dev/null
+++ b/docs/supplementary/deprecated/agents-api.md
@@ -0,0 +1,9 @@
+## Deprecated APIs
+
+> **⚠️ DEPRECATED**: These APIs are provided for migration reference and will be removed in future versions. Not recommended for new projects.
+
+### Migration Guidance
+
+If you are using deprecated versions of the Agents or Responses APIs, please migrate to:
+
+- **Responses API**: Use the stable v1 Responses API endpoints
diff --git a/docs/supplementary/experimental/agents-api.md b/docs/supplementary/experimental/agents-api.md
new file mode 100644
index 000000000..9737b6aba
--- /dev/null
+++ b/docs/supplementary/experimental/agents-api.md
@@ -0,0 +1,21 @@
+## Agents API (Experimental)
+
+> **🧪 EXPERIMENTAL**: This API is in preview and may change based on user feedback. Great for exploring new capabilities and providing feedback to influence the final design.
+
+Main functionalities provided by this API:
+
+- Create agents with specific instructions and ability to use tools.
+- Interactions with agents are grouped into sessions ("threads"), and each interaction is called a "turn".
+- Agents can be provided with various tools (see the ToolGroups and ToolRuntime APIs for more details).
+- Agents can be provided with various shields (see the Safety API for more details).
+- Agents can also use Memory to retrieve information from knowledge bases. See the RAG Tool and Vector IO APIs for more details.
+
+### 🧪 Feedback Welcome
+
+This API is actively being developed. We welcome feedback on:
+- API design and usability
+- Performance characteristics
+- Missing features or capabilities
+- Integration patterns
+
+**Provide Feedback**: [GitHub Discussions](https://github.com/llamastack/llama-stack/discussions) or [GitHub Issues](https://github.com/llamastack/llama-stack/issues)
\ No newline at end of file
diff --git a/docs/supplementary/stable/agents-api.md b/docs/supplementary/stable/agents-api.md
new file mode 100644
index 000000000..e2011f7a7
--- /dev/null
+++ b/docs/supplementary/stable/agents-api.md
@@ -0,0 +1,40 @@
+## Responses API
+
+The Responses API provides OpenAI-compatible functionality with enhanced capabilities for dynamic, stateful interactions.
+
+> **✅ STABLE**: This API is production-ready with backward compatibility guarantees. Recommended for production applications.
+
+### ✅ Supported Tools
+
+The Responses API supports the following tool types:
+
+- **`web_search`**: Search the web for current information and real-time data
+- **`file_search`**: Search through uploaded files and vector stores
+  - Supports dynamic `vector_store_ids` per call
+  - Compatible with OpenAI file search patterns
+- **`function`**: Call custom functions with JSON schema validation
+- **`mcp_tool`**: Model Context Protocol integration
+
+### ✅ Supported Fields & Features
+
+**Core Capabilities:**
+- **Dynamic Configuration**: Switch models, vector stores, and tools per request without pre-configuration
+- **Conversation Branching**: Use `previous_response_id` to branch conversations and explore different paths
+- **Rich Annotations**: Automatic file citations, URL citations, and container file citations
+- **Status Tracking**: Monitor tool call execution status and handle failures gracefully
+
+### 🚧 Work in Progress
+
+- Full real-time response streaming support
+- `tool_choice` parameter
+- `max_tool_calls` parameter
+- Built-in tools (code interpreter, containers API)
+- Safety & guardrails
+- `reasoning` capabilities
+- `service_tier`
+- `logprobs`
+- `max_output_tokens`
+- `metadata` handling
+- `instructions`
+- `incomplete_details`
+- `background`
\ No newline at end of file