docs: adding supplementary markdown content to API specs (#3632)

# What does this PR do? Adds supplementary static content to root API spec pages. This is useful for giving context behind a specific API group, adding information on supported features or work in progress, etc. This PR introduces supplementary information for Agents (experimental, deprecated) and Responses (stable) APIs.    ## Test Plan Documentation server renders rich static content for the Agents API group: ![image.png](https://app.graphite.dev/user-attachments/assets/fc521619-0320-4a22-9409-8ee3fb57ed0e.png)
2025-12-04 10:10:36 +00:00 · 2025-10-01 10:15:30 -07:00 · 2025-10-01 10:15:30 -07:00 · 28bbbcf2c1
commit 28bbbcf2c1
parent b6a5bccadf
10 changed files with 381 additions and 29 deletions
--- a/docs/openapi_generator/pyopenapi/generator.py
+++ b/docs/openapi_generator/pyopenapi/generator.py
@ -548,6 +548,84 @@ class Generator:
        return extra_tags
    def _get_api_group_for_operation(self, op) -> str | None:
        """
        Determine the API group for an operation based on its route path.
        Args:
            op: The endpoint operation
        Returns:
            The API group name derived from the route, or None if unable to determine
        """
        if not hasattr(op, 'webmethod') or not op.webmethod or not hasattr(op.webmethod, 'route'):
            return None
        route = op.webmethod.route
        if not route or not route.startswith('/'):
            return None
        # Extract API group from route path
        # Examples: /v1/agents/list -> agents-api
        #          /v1/responses -> responses-api
        #          /v1/models -> models-api
        path_parts = route.strip('/').split('/')
        if len(path_parts) < 2:
            return None
        # Skip version prefix (v1, v1alpha, v1beta, etc.)
        if path_parts[0].startswith('v1'):
            if len(path_parts) < 2:
                return None
            api_segment = path_parts[1]
        else:
            api_segment = path_parts[0]
        # Convert to supplementary file naming convention
        # agents -> agents-api, responses -> responses-api, etc.
        return f"{api_segment}-api"
    def _load_supplemental_content(self, api_group: str | None) -> str:
        """
        Load supplemental content for an API group based on stability level.
        Follows this resolution order:
        1. docs/supplementary/{stability}/{api_group}.md
        2. docs/supplementary/shared/{api_group}.md (fallback)
        3. Empty string if no files found
        Args:
            api_group: The API group name (e.g., "agents-responses-api"), or None if no mapping exists
        Returns:
            The supplemental content as markdown string, or empty string if not found
        """
        if not api_group:
            return ""
        base_path = Path(__file__).parent.parent.parent / "supplementary"
        # Try stability-specific content first if stability filter is set
        if self.options.stability_filter:
            stability_path = base_path / self.options.stability_filter / f"{api_group}.md"
            if stability_path.exists():
                try:
                    return stability_path.read_text(encoding="utf-8")
                except Exception as e:
                    print(f"Warning: Could not read stability-specific supplemental content from {stability_path}: {e}")
        # Fall back to shared content
        shared_path = base_path / "shared" / f"{api_group}.md"
        if shared_path.exists():
            try:
                return shared_path.read_text(encoding="utf-8")
            except Exception as e:
                print(f"Warning: Could not read shared supplemental content from {shared_path}: {e}")
        # No supplemental content found
        return ""
    def _build_operation(self, op: EndpointOperation) -> Operation:
        if op.defining_class.__name__ in [
            "SyntheticDataGeneration",
@ -799,10 +877,14 @@ class Generator:
        else:
            callbacks = None
-        description = "\n".join(
+        # Build base description from docstring
        base_description = "\n".join(
            filter(None, [doc_string.short_description, doc_string.long_description])
        )
        # Individual endpoints get clean descriptions only
        description = base_description
        return Operation(
            tags=[
                getattr(op.defining_class, "API_NAMESPACE", op.defining_class.__name__)
@ -959,10 +1041,21 @@ class Generator:
            if hasattr(cls, "API_NAMESPACE") and cls.API_NAMESPACE != cls.__name__:
                continue
            # Add supplemental content to tag pages
            api_group = f"{cls.__name__.lower()}-api"
            supplemental_content = self._load_supplemental_content(api_group)
            tag_description = doc_string.long_description or ""
            if supplemental_content:
                if tag_description:
                    tag_description = f"{tag_description}\n\n{supplemental_content}"
                else:
                    tag_description = supplemental_content
            operation_tags.append(
                Tag(
                    name=cls.__name__,
-                    description=doc_string.long_description,
+                    description=tag_description,
                    displayName=doc_string.short_description,
                )
            )
--- a/docs/static/deprecated-llama-stack-spec.html
+++ b/docs/static/deprecated-llama-stack-spec.html
@ -6282,27 +6282,33 @@
    "tags": [
        {
            "name": "Agents",
-            "description": "APIs for creating and interacting with agentic systems.",
+            "description": "APIs for creating and interacting with agentic systems.\n\n## Deprecated APIs\n\n> **⚠️ DEPRECATED**: These APIs are provided for migration reference and will be removed in future versions. Not recommended for new projects.\n\n### Migration Guidance\n\nIf you are using deprecated versions of the Agents or Responses APIs, please migrate to:\n\n- **Responses API**: Use the stable v1 Responses API endpoints\n",
            "x-displayName": "Agents"
        },
        {
-            "name": "Benchmarks"
+            "name": "Benchmarks",
            "description": ""
        },
        {
-            "name": "DatasetIO"
+            "name": "DatasetIO",
            "description": ""
        },
        {
-            "name": "Datasets"
+            "name": "Datasets",
            "description": ""
        },
        {
            "name": "Eval",
            "description": "",
            "x-displayName": "Llama Stack Evaluation API for running evaluations on model and agent candidates."
        },
        {
-            "name": "PostTraining (Coming Soon)"
+            "name": "PostTraining (Coming Soon)",
            "description": ""
        },
        {
-            "name": "Telemetry"
+            "name": "Telemetry",
            "description": ""
        }
    ],
    "x-tagGroups": [
--- a/docs/static/deprecated-llama-stack-spec.yaml
+++ b/docs/static/deprecated-llama-stack-spec.yaml
@ -4613,17 +4613,40 @@ security:
  - Default: []
 tags:
  - name: Agents
-    description: >-
+    description: >
      APIs for creating and interacting with agentic systems.
      ## Deprecated APIs
      > **⚠️ DEPRECATED**: These APIs are provided for migration reference and will
      be removed in future versions. Not recommended for new projects.
      ### Migration Guidance
      If you are using deprecated versions of the Agents or Responses APIs, please
      migrate to:
      - **Responses API**: Use the stable v1 Responses API endpoints
    x-displayName: Agents
  - name: Benchmarks
    description: ''
  - name: DatasetIO
    description: ''
  - name: Datasets
    description: ''
  - name: Eval
    description: ''
    x-displayName: >-
      Llama Stack Evaluation API for running evaluations on model and agent candidates.
  - name: PostTraining (Coming Soon)
    description: ''
  - name: Telemetry
    description: ''
 x-tagGroups:
  - name: Operations
    tags:
--- a/docs/static/experimental-llama-stack-spec.html
+++ b/docs/static/experimental-llama-stack-spec.html
@ -6479,27 +6479,33 @@
    "tags": [
        {
            "name": "Agents",
-            "description": "APIs for creating and interacting with agentic systems.",
+            "description": "APIs for creating and interacting with agentic systems.\n\n## Agents API (Experimental)\n\n> **🧪 EXPERIMENTAL**: This API is in preview and may change based on user feedback. Great for exploring new capabilities and providing feedback to influence the final design.\n\nMain functionalities provided by this API:\n\n- Create agents with specific instructions and ability to use tools.\n- Interactions with agents are grouped into sessions (\"threads\"), and each interaction is called a \"turn\".\n- Agents can be provided with various tools (see the ToolGroups and ToolRuntime APIs for more details).\n- Agents can be provided with various shields (see the Safety API for more details).\n- Agents can also use Memory to retrieve information from knowledge bases. See the RAG Tool and Vector IO APIs for more details.\n\n### 🧪 Feedback Welcome\n\nThis API is actively being developed. We welcome feedback on:\n- API design and usability\n- Performance characteristics\n- Missing features or capabilities\n- Integration patterns\n\n**Provide Feedback**: [GitHub Discussions](https://github.com/llamastack/llama-stack/discussions) or [GitHub Issues](https://github.com/llamastack/llama-stack/issues)",
            "x-displayName": "Agents"
        },
        {
-            "name": "Benchmarks"
+            "name": "Benchmarks",
            "description": ""
        },
        {
-            "name": "DatasetIO"
+            "name": "DatasetIO",
            "description": ""
        },
        {
-            "name": "Datasets"
+            "name": "Datasets",
            "description": ""
        },
        {
            "name": "Eval",
            "description": "",
            "x-displayName": "Llama Stack Evaluation API for running evaluations on model and agent candidates."
        },
        {
-            "name": "PostTraining (Coming Soon)"
+            "name": "PostTraining (Coming Soon)",
            "description": ""
        },
        {
-            "name": "Telemetry"
+            "name": "Telemetry",
            "description": ""
        }
    ],
    "x-tagGroups": [
--- a/docs/static/experimental-llama-stack-spec.yaml
+++ b/docs/static/experimental-llama-stack-spec.yaml
@ -4777,15 +4777,64 @@ tags:
  - name: Agents
    description: >-
      APIs for creating and interacting with agentic systems.
      ## Agents API (Experimental)
      > **🧪 EXPERIMENTAL**: This API is in preview and may change based on user feedback.
      Great for exploring new capabilities and providing feedback to influence the
      final design.
      Main functionalities provided by this API:
      - Create agents with specific instructions and ability to use tools.
      - Interactions with agents are grouped into sessions ("threads"), and each interaction
      is called a "turn".
      - Agents can be provided with various tools (see the ToolGroups and ToolRuntime
      APIs for more details).
      - Agents can be provided with various shields (see the Safety API for more details).
      - Agents can also use Memory to retrieve information from knowledge bases. See
      the RAG Tool and Vector IO APIs for more details.
      ### 🧪 Feedback Welcome
      This API is actively being developed. We welcome feedback on:
      - API design and usability
      - Performance characteristics
      - Missing features or capabilities
      - Integration patterns
      **Provide Feedback**: [GitHub Discussions](https://github.com/llamastack/llama-stack/discussions)
      or [GitHub Issues](https://github.com/llamastack/llama-stack/issues)
    x-displayName: Agents
  - name: Benchmarks
    description: ''
  - name: DatasetIO
    description: ''
  - name: Datasets
    description: ''
  - name: Eval
    description: ''
    x-displayName: >-
      Llama Stack Evaluation API for running evaluations on model and agent candidates.
  - name: PostTraining (Coming Soon)
    description: ''
  - name: Telemetry
    description: ''
 x-tagGroups:
  - name: Operations
    tags:
--- a/docs/static/llama-stack-spec.html
+++ b/docs/static/llama-stack-spec.html
@ -12372,11 +12372,12 @@
    "tags": [
        {
            "name": "Agents",
-            "description": "APIs for creating and interacting with agentic systems.",
+            "description": "APIs for creating and interacting with agentic systems.\n\n## Responses API\n\nThe Responses API provides OpenAI-compatible functionality with enhanced capabilities for dynamic, stateful interactions.\n\n> **✅ STABLE**: This API is production-ready with backward compatibility guarantees. Recommended for production applications.\n\n### ✅ Supported Tools\n\nThe Responses API supports the following tool types:\n\n- **`web_search`**: Search the web for current information and real-time data\n- **`file_search`**: Search through uploaded files and vector stores\n  - Supports dynamic `vector_store_ids` per call\n  - Compatible with OpenAI file search patterns\n- **`function`**: Call custom functions with JSON schema validation\n- **`mcp_tool`**: Model Context Protocol integration\n\n### ✅ Supported Fields & Features\n\n**Core Capabilities:**\n- **Dynamic Configuration**: Switch models, vector stores, and tools per request without pre-configuration\n- **Conversation Branching**: Use `previous_response_id` to branch conversations and explore different paths\n- **Rich Annotations**: Automatic file citations, URL citations, and container file citations\n- **Status Tracking**: Monitor tool call execution status and handle failures gracefully\n\n### 🚧 Work in Progress\n\n- Full real-time response streaming support\n- `tool_choice` parameter\n- `max_tool_calls` parameter\n- Built-in tools (code interpreter, containers API)\n- Safety & guardrails\n- `reasoning` capabilities\n- `service_tier`\n- `logprobs`\n- `max_output_tokens`\n- `metadata` handling\n- `instructions`\n- `incomplete_details`\n- `background`",
            "x-displayName": "Agents"
        },
        {
-            "name": "Files"
+            "name": "Files",
            "description": ""
        },
        {
            "name": "Inference",
@ -12384,48 +12385,62 @@
            "x-displayName": "Llama Stack Inference API for generating completions, chat completions, and embeddings."
        },
        {
-            "name": "Inspect"
+            "name": "Inspect",
            "description": ""
        },
        {
-            "name": "Models"
+            "name": "Models",
            "description": ""
        },
        {
            "name": "Prompts",
            "description": "",
            "x-displayName": "Protocol for prompt management operations."
        },
        {
            "name": "Providers",
            "description": "",
            "x-displayName": "Providers API for inspecting, listing, and modifying providers and their configurations."
        },
        {
-            "name": "Safety"
+            "name": "Safety",
            "description": ""
        },
        {
-            "name": "Scoring"
+            "name": "Scoring",
            "description": ""
        },
        {
-            "name": "ScoringFunctions"
+            "name": "ScoringFunctions",
            "description": ""
        },
        {
-            "name": "Shields"
+            "name": "Shields",
            "description": ""
        },
        {
-            "name": "SyntheticDataGeneration (Coming Soon)"
+            "name": "SyntheticDataGeneration (Coming Soon)",
            "description": ""
        },
        {
-            "name": "Telemetry"
+            "name": "Telemetry",
            "description": ""
        },
        {
-            "name": "ToolGroups"
+            "name": "ToolGroups",
            "description": ""
        },
        {
-            "name": "ToolRuntime"
+            "name": "ToolRuntime",
            "description": ""
        },
        {
-            "name": "VectorDBs"
+            "name": "VectorDBs",
            "description": ""
        },
        {
-            "name": "VectorIO"
+            "name": "VectorIO",
            "description": ""
        }
    ],
    "x-tagGroups": [
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@ -9197,8 +9197,84 @@ tags:
  - name: Agents
    description: >-
      APIs for creating and interacting with agentic systems.
      ## Responses API
      The Responses API provides OpenAI-compatible functionality with enhanced capabilities
      for dynamic, stateful interactions.
      > **✅ STABLE**: This API is production-ready with backward compatibility guarantees.
      Recommended for production applications.
      ### ✅ Supported Tools
      The Responses API supports the following tool types:
      - **`web_search`**: Search the web for current information and real-time data
      - **`file_search`**: Search through uploaded files and vector stores
        - Supports dynamic `vector_store_ids` per call
        - Compatible with OpenAI file search patterns
      - **`function`**: Call custom functions with JSON schema validation
      - **`mcp_tool`**: Model Context Protocol integration
      ### ✅ Supported Fields & Features
      **Core Capabilities:**
      - **Dynamic Configuration**: Switch models, vector stores, and tools per request
      without pre-configuration
      - **Conversation Branching**: Use `previous_response_id` to branch conversations
      and explore different paths
      - **Rich Annotations**: Automatic file citations, URL citations, and container
      file citations
      - **Status Tracking**: Monitor tool call execution status and handle failures
      gracefully
      ### 🚧 Work in Progress
      - Full real-time response streaming support
      - `tool_choice` parameter
      - `max_tool_calls` parameter
      - Built-in tools (code interpreter, containers API)
      - Safety & guardrails
      - `reasoning` capabilities
      - `service_tier`
      - `logprobs`
      - `max_output_tokens`
      - `metadata` handling
      - `instructions`
      - `incomplete_details`
      - `background`
    x-displayName: Agents
  - name: Files
    description: ''
  - name: Inference
    description: >-
      This API provides the raw interface to the underlying models. Two kinds of models
@ -9212,23 +9288,37 @@ tags:
      Llama Stack Inference API for generating completions, chat completions, and
      embeddings.
  - name: Inspect
    description: ''
  - name: Models
    description: ''
  - name: Prompts
    description: ''
    x-displayName: >-
      Protocol for prompt management operations.
  - name: Providers
    description: ''
    x-displayName: >-
      Providers API for inspecting, listing, and modifying providers and their configurations.
  - name: Safety
    description: ''
  - name: Scoring
    description: ''
  - name: ScoringFunctions
    description: ''
  - name: Shields
    description: ''
  - name: SyntheticDataGeneration (Coming Soon)
    description: ''
  - name: Telemetry
    description: ''
  - name: ToolGroups
    description: ''
  - name: ToolRuntime
    description: ''
  - name: VectorDBs
    description: ''
  - name: VectorIO
    description: ''
 x-tagGroups:
  - name: Operations
    tags:
--- a/docs/supplementary/deprecated/agents-api.md
+++ b/docs/supplementary/deprecated/agents-api.md
@ -0,0 +1,9 @@
 ## Deprecated APIs
 > **⚠️ DEPRECATED**: These APIs are provided for migration reference and will be removed in future versions. Not recommended for new projects.
 ### Migration Guidance
 If you are using deprecated versions of the Agents or Responses APIs, please migrate to:
 - **Responses API**: Use the stable v1 Responses API endpoints
--- a/docs/supplementary/experimental/agents-api.md
+++ b/docs/supplementary/experimental/agents-api.md
@ -0,0 +1,21 @@
 ## Agents API (Experimental)
 > **🧪 EXPERIMENTAL**: This API is in preview and may change based on user feedback. Great for exploring new capabilities and providing feedback to influence the final design.
 Main functionalities provided by this API:
 - Create agents with specific instructions and ability to use tools.
 - Interactions with agents are grouped into sessions ("threads"), and each interaction is called a "turn".
 - Agents can be provided with various tools (see the ToolGroups and ToolRuntime APIs for more details).
 - Agents can be provided with various shields (see the Safety API for more details).
 - Agents can also use Memory to retrieve information from knowledge bases. See the RAG Tool and Vector IO APIs for more details.
 ### 🧪 Feedback Welcome
 This API is actively being developed. We welcome feedback on:
 - API design and usability
 - Performance characteristics
 - Missing features or capabilities
 - Integration patterns
 **Provide Feedback**: [GitHub Discussions](https://github.com/llamastack/llama-stack/discussions) or [GitHub Issues](https://github.com/llamastack/llama-stack/issues)
--- a/docs/supplementary/stable/agents-api.md
+++ b/docs/supplementary/stable/agents-api.md
@ -0,0 +1,40 @@
 ## Responses API
 The Responses API provides OpenAI-compatible functionality with enhanced capabilities for dynamic, stateful interactions.
 > **✅ STABLE**: This API is production-ready with backward compatibility guarantees. Recommended for production applications.
 ### ✅ Supported Tools
 The Responses API supports the following tool types:
 - **`web_search`**: Search the web for current information and real-time data
 - **`file_search`**: Search through uploaded files and vector stores
  - Supports dynamic `vector_store_ids` per call
  - Compatible with OpenAI file search patterns
 - **`function`**: Call custom functions with JSON schema validation
 - **`mcp_tool`**: Model Context Protocol integration
 ### ✅ Supported Fields & Features
 **Core Capabilities:**
 - **Dynamic Configuration**: Switch models, vector stores, and tools per request without pre-configuration
 - **Conversation Branching**: Use `previous_response_id` to branch conversations and explore different paths
 - **Rich Annotations**: Automatic file citations, URL citations, and container file citations
 - **Status Tracking**: Monitor tool call execution status and handle failures gracefully
 ### 🚧 Work in Progress
 - Full real-time response streaming support
 - `tool_choice` parameter
 - `max_tool_calls` parameter
 - Built-in tools (code interpreter, containers API)
 - Safety & guardrails
 - `reasoning` capabilities
 - `service_tier`
 - `logprobs`
 - `max_output_tokens`
 - `metadata` handling
 - `instructions`
 - `incomplete_details`
 - `background`