docs: adding supplementary markdown content to API specs (#3632)

# What does this PR do? Adds supplementary static content to root API spec pages. This is useful for giving context behind a specific API group, adding information on supported features or work in progress, etc. This PR introduces supplementary information for Agents (experimental, deprecated) and Responses (stable) APIs.    ## Test Plan Documentation server renders rich static content for the Agents API group: ![image.png](https://app.graphite.dev/user-attachments/assets/fc521619-0320-4a22-9409-8ee3fb57ed0e.png)
2025-12-03 09:53:45 +00:00 · 2025-10-01 10:15:30 -07:00 · 2025-10-01 10:15:30 -07:00 · 28bbbcf2c1
commit 28bbbcf2c1
parent b6a5bccadf
10 changed files with 381 additions and 29 deletions
--- a/docs/openapi_generator/pyopenapi/generator.py
+++ b/docs/openapi_generator/pyopenapi/generator.py
@ -548,6 +548,84 @@ class Generator:

        return extra_tags

+    def _get_api_group_for_operation(self, op) -> str | None:
+        """
+        Determine the API group for an operation based on its route path.
+
+        Args:
+            op: The endpoint operation
+
+        Returns:
+            The API group name derived from the route, or None if unable to determine
+        """
+        if not hasattr(op, 'webmethod') or not op.webmethod or not hasattr(op.webmethod, 'route'):
+            return None
+
+        route = op.webmethod.route
+        if not route or not route.startswith('/'):
+            return None
+
+        # Extract API group from route path
+        # Examples: /v1/agents/list -> agents-api
+        #          /v1/responses -> responses-api
+        #          /v1/models -> models-api
+        path_parts = route.strip('/').split('/')
+
+        if len(path_parts) < 2:
+            return None
+
+        # Skip version prefix (v1, v1alpha, v1beta, etc.)
+        if path_parts[0].startswith('v1'):
+            if len(path_parts) < 2:
+                return None
+            api_segment = path_parts[1]
+        else:
+            api_segment = path_parts[0]
+
+        # Convert to supplementary file naming convention
+        # agents -> agents-api, responses -> responses-api, etc.
+        return f"{api_segment}-api"
+
+    def _load_supplemental_content(self, api_group: str | None) -> str:
+        """
+        Load supplemental content for an API group based on stability level.
+
+        Follows this resolution order:
+        1. docs/supplementary/{stability}/{api_group}.md
+        2. docs/supplementary/shared/{api_group}.md (fallback)
+        3. Empty string if no files found
+
+        Args:
+            api_group: The API group name (e.g., "agents-responses-api"), or None if no mapping exists
+
+        Returns:
+            The supplemental content as markdown string, or empty string if not found
+        """
+        if not api_group:
+            return ""
+
+        base_path = Path(__file__).parent.parent.parent / "supplementary"
+
+        # Try stability-specific content first if stability filter is set
+        if self.options.stability_filter:
+            stability_path = base_path / self.options.stability_filter / f"{api_group}.md"
+            if stability_path.exists():
+                try:
+                    return stability_path.read_text(encoding="utf-8")
+                except Exception as e:
+                    print(f"Warning: Could not read stability-specific supplemental content from {stability_path}: {e}")
+
+        # Fall back to shared content
+        shared_path = base_path / "shared" / f"{api_group}.md"
+        if shared_path.exists():
+            try:
+                return shared_path.read_text(encoding="utf-8")
+            except Exception as e:
+                print(f"Warning: Could not read shared supplemental content from {shared_path}: {e}")
+
+        # No supplemental content found
+        return ""
+
    def _build_operation(self, op: EndpointOperation) -> Operation:
        if op.defining_class.__name__ in [
            "SyntheticDataGeneration",
@ -799,10 +877,14 @@ class Generator:
        else:
            callbacks = None

-        description = "\n".join(
+        # Build base description from docstring
+        base_description = "\n".join(
            filter(None, [doc_string.short_description, doc_string.long_description])
        )

+        # Individual endpoints get clean descriptions only
+        description = base_description
+
        return Operation(
            tags=[
                getattr(op.defining_class, "API_NAMESPACE", op.defining_class.__name__)
@ -959,10 +1041,21 @@ class Generator:
            if hasattr(cls, "API_NAMESPACE") and cls.API_NAMESPACE != cls.__name__:
                continue

+            # Add supplemental content to tag pages
+            api_group = f"{cls.__name__.lower()}-api"
+            supplemental_content = self._load_supplemental_content(api_group)
+
+            tag_description = doc_string.long_description or ""
+            if supplemental_content:
+                if tag_description:
+                    tag_description = f"{tag_description}\n\n{supplemental_content}"
+                else:
+                    tag_description = supplemental_content
+
            operation_tags.append(
                Tag(
                    name=cls.__name__,
-                    description=doc_string.long_description,
+                    description=tag_description,
                    displayName=doc_string.short_description,
                )
            )
--- a/docs/static/deprecated-llama-stack-spec.html
+++ b/docs/static/deprecated-llama-stack-spec.html
@ -6282,27 +6282,33 @@
    "tags": [
        {
            "name": "Agents",
-            "description": "APIs for creating and interacting with agentic systems.",
+            "description": "APIs for creating and interacting with agentic systems.\n\n## Deprecated APIs\n\n> **⚠️ DEPRECATED**: These APIs are provided for migration reference and will be removed in future versions. Not recommended for new projects.\n\n### Migration Guidance\n\nIf you are using deprecated versions of the Agents or Responses APIs, please migrate to:\n\n- **Responses API**: Use the stable v1 Responses API endpoints\n",
            "x-displayName": "Agents"
        },
        {
-            "name": "Benchmarks"
+            "name": "Benchmarks",
+            "description": ""
        },
        {
-            "name": "DatasetIO"
+            "name": "DatasetIO",
+            "description": ""
        },
        {
-            "name": "Datasets"
+            "name": "Datasets",
+            "description": ""
        },
        {
            "name": "Eval",
+            "description": "",
            "x-displayName": "Llama Stack Evaluation API for running evaluations on model and agent candidates."
        },
        {
-            "name": "PostTraining (Coming Soon)"
+            "name": "PostTraining (Coming Soon)",
+            "description": ""
        },
        {
-            "name": "Telemetry"
+            "name": "Telemetry",
+            "description": ""
        }
    ],
    "x-tagGroups": [
--- a/docs/static/deprecated-llama-stack-spec.yaml
+++ b/docs/static/deprecated-llama-stack-spec.yaml
@ -4613,17 +4613,40 @@ security:
  - Default: []
 tags:
  - name: Agents
-    description: >-
+    description: >
      APIs for creating and interacting with agentic systems.
+
+
+      ## Deprecated APIs
+
+
+      > **⚠️ DEPRECATED**: These APIs are provided for migration reference and will
+      be removed in future versions. Not recommended for new projects.
+
+
+      ### Migration Guidance
+
+
+      If you are using deprecated versions of the Agents or Responses APIs, please
+      migrate to:
+
+
+      - **Responses API**: Use the stable v1 Responses API endpoints
    x-displayName: Agents
  - name: Benchmarks
+    description: ''
  - name: DatasetIO
+    description: ''
  - name: Datasets
+    description: ''
  - name: Eval
+    description: ''
    x-displayName: >-
      Llama Stack Evaluation API for running evaluations on model and agent candidates.
  - name: PostTraining (Coming Soon)
+    description: ''
  - name: Telemetry
+    description: ''
 x-tagGroups:
  - name: Operations
    tags:
--- a/docs/static/experimental-llama-stack-spec.html
+++ b/docs/static/experimental-llama-stack-spec.html
@ -6479,27 +6479,33 @@
    "tags": [
        {
            "name": "Agents",
-            "description": "APIs for creating and interacting with agentic systems.",
+            "description": "APIs for creating and interacting with agentic systems.\n\n## Agents API (Experimental)\n\n> **🧪 EXPERIMENTAL**: This API is in preview and may change based on user feedback. Great for exploring new capabilities and providing feedback to influence the final design.\n\nMain functionalities provided by this API:\n\n- Create agents with specific instructions and ability to use tools.\n- Interactions with agents are grouped into sessions (\"threads\"), and each interaction is called a \"turn\".\n- Agents can be provided with various tools (see the ToolGroups and ToolRuntime APIs for more details).\n- Agents can be provided with various shields (see the Safety API for more details).\n- Agents can also use Memory to retrieve information from knowledge bases. See the RAG Tool and Vector IO APIs for more details.\n\n### 🧪 Feedback Welcome\n\nThis API is actively being developed. We welcome feedback on:\n- API design and usability\n- Performance characteristics\n- Missing features or capabilities\n- Integration patterns\n\n**Provide Feedback**: [GitHub Discussions](https://github.com/llamastack/llama-stack/discussions) or [GitHub Issues](https://github.com/llamastack/llama-stack/issues)",
            "x-displayName": "Agents"
        },
        {
-            "name": "Benchmarks"
+            "name": "Benchmarks",
+            "description": ""
        },
        {
-            "name": "DatasetIO"
+            "name": "DatasetIO",
+            "description": ""
        },
        {
-            "name": "Datasets"
+            "name": "Datasets",
+            "description": ""
        },
        {
            "name": "Eval",
+            "description": "",
            "x-displayName": "Llama Stack Evaluation API for running evaluations on model and agent candidates."
        },
        {
-            "name": "PostTraining (Coming Soon)"
+            "name": "PostTraining (Coming Soon)",
+            "description": ""
        },
        {
-            "name": "Telemetry"
+            "name": "Telemetry",
+            "description": ""
        }
    ],
    "x-tagGroups": [
--- a/docs/static/experimental-llama-stack-spec.yaml
+++ b/docs/static/experimental-llama-stack-spec.yaml
@ -4777,15 +4777,64 @@ tags:
  - name: Agents
    description: >-
      APIs for creating and interacting with agentic systems.
+
+
+      ## Agents API (Experimental)
+
+
+      > **🧪 EXPERIMENTAL**: This API is in preview and may change based on user feedback.
+      Great for exploring new capabilities and providing feedback to influence the
+      final design.
+
+
+      Main functionalities provided by this API:
+
+
+      - Create agents with specific instructions and ability to use tools.
+
+      - Interactions with agents are grouped into sessions ("threads"), and each interaction
+      is called a "turn".
+
+      - Agents can be provided with various tools (see the ToolGroups and ToolRuntime
+      APIs for more details).
+
+      - Agents can be provided with various shields (see the Safety API for more details).
+
+      - Agents can also use Memory to retrieve information from knowledge bases. See
+      the RAG Tool and Vector IO APIs for more details.
+
+
+      ### 🧪 Feedback Welcome
+
+
+      This API is actively being developed. We welcome feedback on:
+
+      - API design and usability
+
+      - Performance characteristics
+
+      - Missing features or capabilities
+
+      - Integration patterns
+
+
+      **Provide Feedback**: [GitHub Discussions](https://github.com/llamastack/llama-stack/discussions)
+      or [GitHub Issues](https://github.com/llamastack/llama-stack/issues)
    x-displayName: Agents
  - name: Benchmarks
+    description: ''
  - name: DatasetIO
+    description: ''
  - name: Datasets
+    description: ''
  - name: Eval
+    description: ''
    x-displayName: >-
      Llama Stack Evaluation API for running evaluations on model and agent candidates.
  - name: PostTraining (Coming Soon)
+    description: ''
  - name: Telemetry
+    description: ''
 x-tagGroups:
  - name: Operations
    tags:
--- a/docs/static/llama-stack-spec.html
+++ b/docs/static/llama-stack-spec.html
@ -12372,11 +12372,12 @@
    "tags": [
        {
            "name": "Agents",
-            "description": "APIs for creating and interacting with agentic systems.",
+            "description": "APIs for creating and interacting with agentic systems.\n\n## Responses API\n\nThe Responses API provides OpenAI-compatible functionality with enhanced capabilities for dynamic, stateful interactions.\n\n> **✅ STABLE**: This API is production-ready with backward compatibility guarantees. Recommended for production applications.\n\n### ✅ Supported Tools\n\nThe Responses API supports the following tool types:\n\n- **`web_search`**: Search the web for current information and real-time data\n- **`file_search`**: Search through uploaded files and vector stores\n  - Supports dynamic `vector_store_ids` per call\n  - Compatible with OpenAI file search patterns\n- **`function`**: Call custom functions with JSON schema validation\n- **`mcp_tool`**: Model Context Protocol integration\n\n### ✅ Supported Fields & Features\n\n**Core Capabilities:**\n- **Dynamic Configuration**: Switch models, vector stores, and tools per request without pre-configuration\n- **Conversation Branching**: Use `previous_response_id` to branch conversations and explore different paths\n- **Rich Annotations**: Automatic file citations, URL citations, and container file citations\n- **Status Tracking**: Monitor tool call execution status and handle failures gracefully\n\n### 🚧 Work in Progress\n\n- Full real-time response streaming support\n- `tool_choice` parameter\n- `max_tool_calls` parameter\n- Built-in tools (code interpreter, containers API)\n- Safety & guardrails\n- `reasoning` capabilities\n- `service_tier`\n- `logprobs`\n- `max_output_tokens`\n- `metadata` handling\n- `instructions`\n- `incomplete_details`\n- `background`",
            "x-displayName": "Agents"
        },
        {
-            "name": "Files"
+            "name": "Files",
+            "description": ""
        },
        {
            "name": "Inference",
@ -12384,48 +12385,62 @@
            "x-displayName": "Llama Stack Inference API for generating completions, chat completions, and embeddings."
        },
        {
-            "name": "Inspect"
+            "name": "Inspect",
+            "description": ""
        },
        {
-            "name": "Models"
+            "name": "Models",
+            "description": ""
        },
        {
            "name": "Prompts",
+            "description": "",
            "x-displayName": "Protocol for prompt management operations."
        },
        {
            "name": "Providers",
+            "description": "",
            "x-displayName": "Providers API for inspecting, listing, and modifying providers and their configurations."
        },
        {
-            "name": "Safety"
+            "name": "Safety",
+            "description": ""
        },
        {
-            "name": "Scoring"
+            "name": "Scoring",
+            "description": ""
        },
        {
-            "name": "ScoringFunctions"
+            "name": "ScoringFunctions",
+            "description": ""
        },
        {
-            "name": "Shields"
+            "name": "Shields",
+            "description": ""
        },
        {
-            "name": "SyntheticDataGeneration (Coming Soon)"
+            "name": "SyntheticDataGeneration (Coming Soon)",
+            "description": ""
        },
        {
-            "name": "Telemetry"
+            "name": "Telemetry",
+            "description": ""
        },
        {
-            "name": "ToolGroups"
+            "name": "ToolGroups",
+            "description": ""
        },
        {
-            "name": "ToolRuntime"
+            "name": "ToolRuntime",
+            "description": ""
        },
        {
-            "name": "VectorDBs"
+            "name": "VectorDBs",
+            "description": ""
        },
        {
-            "name": "VectorIO"
+            "name": "VectorIO",
+            "description": ""
        }
    ],
    "x-tagGroups": [
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@ -9197,8 +9197,84 @@ tags:
  - name: Agents
    description: >-
      APIs for creating and interacting with agentic systems.
+
+
+      ## Responses API
+
+
+      The Responses API provides OpenAI-compatible functionality with enhanced capabilities
+      for dynamic, stateful interactions.
+
+
+      > **✅ STABLE**: This API is production-ready with backward compatibility guarantees.
+      Recommended for production applications.
+
+
+      ### ✅ Supported Tools
+
+
+      The Responses API supports the following tool types:
+
+
+      - **`web_search`**: Search the web for current information and real-time data
+
+      - **`file_search`**: Search through uploaded files and vector stores
+        - Supports dynamic `vector_store_ids` per call
+        - Compatible with OpenAI file search patterns
+      - **`function`**: Call custom functions with JSON schema validation
+
+      - **`mcp_tool`**: Model Context Protocol integration
+
+
+      ### ✅ Supported Fields & Features
+
+
+      **Core Capabilities:**
+
+      - **Dynamic Configuration**: Switch models, vector stores, and tools per request
+      without pre-configuration
+
+      - **Conversation Branching**: Use `previous_response_id` to branch conversations
+      and explore different paths
+
+      - **Rich Annotations**: Automatic file citations, URL citations, and container
+      file citations
+
+      - **Status Tracking**: Monitor tool call execution status and handle failures
+      gracefully
+
+
+      ### 🚧 Work in Progress
+
+
+      - Full real-time response streaming support
+
+      - `tool_choice` parameter
+
+      - `max_tool_calls` parameter
+
+      - Built-in tools (code interpreter, containers API)
+
+      - Safety & guardrails
+
+      - `reasoning` capabilities
+
+      - `service_tier`
+
+      - `logprobs`
+
+      - `max_output_tokens`
+
+      - `metadata` handling
+
+      - `instructions`
+
+      - `incomplete_details`
+
+      - `background`
    x-displayName: Agents
  - name: Files
+    description: ''
  - name: Inference
    description: >-
      This API provides the raw interface to the underlying models. Two kinds of models
@ -9212,23 +9288,37 @@ tags:
      Llama Stack Inference API for generating completions, chat completions, and
      embeddings.
  - name: Inspect
+    description: ''
  - name: Models
+    description: ''
  - name: Prompts
+    description: ''
    x-displayName: >-
      Protocol for prompt management operations.
  - name: Providers
+    description: ''
    x-displayName: >-
      Providers API for inspecting, listing, and modifying providers and their configurations.
  - name: Safety
+    description: ''
  - name: Scoring
+    description: ''
  - name: ScoringFunctions
+    description: ''
  - name: Shields
+    description: ''
  - name: SyntheticDataGeneration (Coming Soon)
+    description: ''
  - name: Telemetry
+    description: ''
  - name: ToolGroups
+    description: ''
  - name: ToolRuntime
+    description: ''
  - name: VectorDBs
+    description: ''
  - name: VectorIO
+    description: ''
 x-tagGroups:
  - name: Operations
    tags:
--- a/docs/supplementary/deprecated/agents-api.md
+++ b/docs/supplementary/deprecated/agents-api.md
@ -0,0 +1,9 @@
+## Deprecated APIs
+
+> **⚠️ DEPRECATED**: These APIs are provided for migration reference and will be removed in future versions. Not recommended for new projects.
+
+### Migration Guidance
+
+If you are using deprecated versions of the Agents or Responses APIs, please migrate to:
+
+- **Responses API**: Use the stable v1 Responses API endpoints
--- a/docs/supplementary/experimental/agents-api.md
+++ b/docs/supplementary/experimental/agents-api.md
@ -0,0 +1,21 @@
+## Agents API (Experimental)
+
+> **🧪 EXPERIMENTAL**: This API is in preview and may change based on user feedback. Great for exploring new capabilities and providing feedback to influence the final design.
+
+Main functionalities provided by this API:
+
+- Create agents with specific instructions and ability to use tools.
+- Interactions with agents are grouped into sessions ("threads"), and each interaction is called a "turn".
+- Agents can be provided with various tools (see the ToolGroups and ToolRuntime APIs for more details).
+- Agents can be provided with various shields (see the Safety API for more details).
+- Agents can also use Memory to retrieve information from knowledge bases. See the RAG Tool and Vector IO APIs for more details.
+
+### 🧪 Feedback Welcome
+
+This API is actively being developed. We welcome feedback on:
+- API design and usability
+- Performance characteristics
+- Missing features or capabilities
+- Integration patterns
+
+**Provide Feedback**: [GitHub Discussions](https://github.com/llamastack/llama-stack/discussions) or [GitHub Issues](https://github.com/llamastack/llama-stack/issues)
--- a/docs/supplementary/stable/agents-api.md
+++ b/docs/supplementary/stable/agents-api.md
@ -0,0 +1,40 @@
+## Responses API
+
+The Responses API provides OpenAI-compatible functionality with enhanced capabilities for dynamic, stateful interactions.
+
+> **✅ STABLE**: This API is production-ready with backward compatibility guarantees. Recommended for production applications.
+
+### ✅ Supported Tools
+
+The Responses API supports the following tool types:
+
+- **`web_search`**: Search the web for current information and real-time data
+- **`file_search`**: Search through uploaded files and vector stores
+  - Supports dynamic `vector_store_ids` per call
+  - Compatible with OpenAI file search patterns
+- **`function`**: Call custom functions with JSON schema validation
+- **`mcp_tool`**: Model Context Protocol integration
+
+### ✅ Supported Fields & Features
+
+**Core Capabilities:**
+- **Dynamic Configuration**: Switch models, vector stores, and tools per request without pre-configuration
+- **Conversation Branching**: Use `previous_response_id` to branch conversations and explore different paths
+- **Rich Annotations**: Automatic file citations, URL citations, and container file citations
+- **Status Tracking**: Monitor tool call execution status and handle failures gracefully
+
+### 🚧 Work in Progress
+
+- Full real-time response streaming support
+- `tool_choice` parameter
+- `max_tool_calls` parameter
+- Built-in tools (code interpreter, containers API)
+- Safety & guardrails
+- `reasoning` capabilities
+- `service_tier`
+- `logprobs`
+- `max_output_tokens`
+- `metadata` handling
+- `instructions`
+- `incomplete_details`
+- `background`