diff --git a/docs/openapi_generator/pyopenapi/generator.py b/docs/openapi_generator/pyopenapi/generator.py
index 390f0c627..202d3732b 100644
--- a/docs/openapi_generator/pyopenapi/generator.py
+++ b/docs/openapi_generator/pyopenapi/generator.py
@@ -677,12 +677,6 @@ class Generator:
                 )
             )
 
-        # types that are produced/consumed by operations
-        type_tags = [
-            self._build_type_tag(ref, schema)
-            for ref, schema in self.schema_builder.schemas.items()
-        ]
-
         # types that are emitted by events
         event_tags: List[Tag] = []
         events = get_endpoint_events(self.endpoint)
@@ -709,7 +703,6 @@ class Generator:
         # list all operations and types
         tags: List[Tag] = []
         tags.extend(operation_tags)
-        tags.extend(type_tags)
         tags.extend(event_tags)
         for extra_tag_group in extra_tag_groups.values():
             tags.extend(extra_tag_group)
@@ -724,13 +717,6 @@ class Generator:
                     tags=sorted(tag.name for tag in operation_tags),
                 )
             )
-        if type_tags:
-            tag_groups.append(
-                TagGroup(
-                    name=self.options.map("Types"),
-                    tags=sorted(tag.name for tag in type_tags),
-                )
-            )
         if event_tags:
             tag_groups.append(
                 TagGroup(
diff --git a/docs/resources/llama-stack-spec.html b/docs/resources/llama-stack-spec.html
index 15c9c9484..0454e22ec 100644
--- a/docs/resources/llama-stack-spec.html
+++ b/docs/resources/llama-stack-spec.html
@@ -2989,8 +2989,7 @@
                             "function_tag",
                             "python_list"
                         ],
-                        "title": "This Enum refers to the prompt format for calling custom / zero shot tools",
-                        "description": "The detailed prompts for each of these formats are added to llama cli"
+                        "title": "Prompt format for calling custom / zero shot tools."
                     },
                     "response_format": {
                         "$ref": "#/components/schemas/ResponseFormat"
@@ -3494,8 +3493,7 @@
                             "function_tag",
                             "python_list"
                         ],
-                        "title": "This Enum refers to the prompt format for calling custom / zero shot tools",
-                        "description": "The detailed prompts for each of these formats are added to llama cli"
+                        "title": "Prompt format for calling custom / zero shot tools."
                     },
                     "max_infer_iters": {
                         "type": "integer",
@@ -7842,804 +7840,68 @@
     ],
     "tags": [
         {
-            "name": "AgentCandidate",
-            "description": ""
-        },
-        {
-            "name": "AgentConfig",
-            "description": ""
-        },
-        {
-            "name": "AgentCreateResponse",
-            "description": ""
-        },
-        {
-            "name": "AgentSessionCreateResponse",
-            "description": ""
-        },
-        {
-            "name": "AgentStepResponse",
-            "description": ""
-        },
-        {
-            "name": "AgentTool",
-            "description": ""
-        },
-        {
-            "name": "AgentTurnInputType",
-            "description": ""
-        },
-        {
-            "name": "AgentTurnResponseEvent",
-            "description": ""
-        },
-        {
-            "name": "AgentTurnResponseEventPayload",
-            "description": ""
-        },
-        {
-            "name": "AgentTurnResponseStepCompletePayload",
-            "description": ""
-        },
-        {
-            "name": "AgentTurnResponseStepProgressPayload",
-            "description": ""
-        },
-        {
-            "name": "AgentTurnResponseStepStartPayload",
-            "description": ""
-        },
-        {
-            "name": "AgentTurnResponseStreamChunk",
-            "description": "streamed agent turn completion response."
-        },
-        {
-            "name": "AgentTurnResponseTurnCompletePayload",
-            "description": ""
-        },
-        {
-            "name": "AgentTurnResponseTurnStartPayload",
-            "description": ""
-        },
-        {
-            "name": "Agents"
-        },
-        {
-            "name": "AggregationFunctionType",
-            "description": ""
-        },
-        {
-            "name": "AlgorithmConfig",
-            "description": ""
-        },
-        {
-            "name": "AppEvalTaskConfig",
-            "description": ""
-        },
-        {
-            "name": "AppendRowsRequest",
-            "description": ""
-        },
-        {
-            "name": "ArrayType",
-            "description": ""
-        },
-        {
-            "name": "BasicScoringFnParams",
-            "description": ""
-        },
-        {
-            "name": "BatchChatCompletionRequest",
-            "description": ""
-        },
-        {
-            "name": "BatchChatCompletionResponse",
-            "description": ""
-        },
-        {
-            "name": "BatchCompletionRequest",
-            "description": ""
-        },
-        {
-            "name": "BatchCompletionResponse",
-            "description": ""
+            "name": "Agents",
+            "description": "Main functionalities provided by this API:\n- Create agents with specific instructions and ability to use tools.\n- Interactions with agents are grouped into sessions (\"threads\"), and each interaction is called a \"turn\".\n- Agents can be provided with various tools (see the ToolGroups and ToolRuntime APIs for more details).\n- Agents can be provided with various shields (see the Safety API for more details).\n- Agents can also use Memory to retrieve information from knowledge bases. See the RAG Tool and Vector IO APIs for more details.",
+            "x-displayName": "Agents API for creating and interacting with agentic systems."
         },
         {
             "name": "BatchInference (Coming Soon)"
         },
-        {
-            "name": "BenchmarkEvalTaskConfig",
-            "description": ""
-        },
-        {
-            "name": "BooleanType",
-            "description": ""
-        },
-        {
-            "name": "CancelTrainingJobRequest",
-            "description": ""
-        },
-        {
-            "name": "ChatCompletionInputType",
-            "description": ""
-        },
-        {
-            "name": "ChatCompletionRequest",
-            "description": ""
-        },
-        {
-            "name": "ChatCompletionResponse",
-            "description": "Response from a chat completion request."
-        },
-        {
-            "name": "ChatCompletionResponseEvent",
-            "description": "An event during chat completion generation."
-        },
-        {
-            "name": "ChatCompletionResponseStreamChunk",
-            "description": "A chunk of a streamed chat completion response."
-        },
-        {
-            "name": "Checkpoint",
-            "description": "Checkpoint created during training runs"
-        },
-        {
-            "name": "CompletionInputType",
-            "description": ""
-        },
-        {
-            "name": "CompletionMessage",
-            "description": "A message containing the model's (assistant) response in a chat conversation."
-        },
-        {
-            "name": "CompletionRequest",
-            "description": ""
-        },
-        {
-            "name": "CompletionResponse",
-            "description": "Response from a completion request."
-        },
-        {
-            "name": "CompletionResponseStreamChunk",
-            "description": "A chunk of a streamed completion response."
-        },
-        {
-            "name": "ContentDelta",
-            "description": ""
-        },
-        {
-            "name": "CreateAgentRequest",
-            "description": ""
-        },
-        {
-            "name": "CreateAgentSessionRequest",
-            "description": ""
-        },
-        {
-            "name": "CreateAgentTurnRequest",
-            "description": ""
-        },
-        {
-            "name": "DPOAlignmentConfig",
-            "description": ""
-        },
-        {
-            "name": "DataConfig",
-            "description": ""
-        },
-        {
-            "name": "Dataset",
-            "description": ""
-        },
-        {
-            "name": "DatasetFormat",
-            "description": ""
-        },
         {
             "name": "DatasetIO"
         },
         {
             "name": "Datasets"
         },
-        {
-            "name": "DefaultRAGQueryGeneratorConfig",
-            "description": ""
-        },
-        {
-            "name": "EfficiencyConfig",
-            "description": ""
-        },
-        {
-            "name": "EmbeddingsRequest",
-            "description": ""
-        },
-        {
-            "name": "EmbeddingsResponse",
-            "description": "Response containing generated embeddings."
-        },
         {
             "name": "Eval"
         },
-        {
-            "name": "EvalCandidate",
-            "description": ""
-        },
-        {
-            "name": "EvalTask",
-            "description": ""
-        },
-        {
-            "name": "EvalTaskConfig",
-            "description": ""
-        },
         {
             "name": "EvalTasks"
         },
         {
-            "name": "EvaluateResponse",
-            "description": ""
-        },
-        {
-            "name": "EvaluateRowsRequest",
-            "description": ""
-        },
-        {
-            "name": "Event",
-            "description": ""
-        },
-        {
-            "name": "GrammarResponseFormat",
-            "description": "Configuration for grammar-guided response generation."
-        },
-        {
-            "name": "GreedySamplingStrategy",
-            "description": ""
-        },
-        {
-            "name": "HealthInfo",
-            "description": ""
-        },
-        {
-            "name": "ImageContentItem",
-            "description": ""
-        },
-        {
-            "name": "ImageDelta",
-            "description": ""
-        },
-        {
-            "name": "Inference"
-        },
-        {
-            "name": "InferenceStep",
-            "description": ""
-        },
-        {
-            "name": "InsertChunksRequest",
-            "description": ""
-        },
-        {
-            "name": "InsertRequest",
-            "description": ""
+            "name": "Inference",
+            "description": "This API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
+            "x-displayName": "Llama Stack Inference API for generating completions, chat completions, and embeddings."
         },
         {
             "name": "Inspect"
         },
-        {
-            "name": "InterleavedContent",
-            "description": ""
-        },
-        {
-            "name": "InterleavedContentItem",
-            "description": ""
-        },
-        {
-            "name": "InvokeToolRequest",
-            "description": ""
-        },
-        {
-            "name": "Job",
-            "description": ""
-        },
-        {
-            "name": "JobStatus",
-            "description": ""
-        },
-        {
-            "name": "JsonSchemaResponseFormat",
-            "description": "Configuration for JSON schema-guided response generation."
-        },
-        {
-            "name": "JsonType",
-            "description": ""
-        },
-        {
-            "name": "LLMAsJudgeScoringFnParams",
-            "description": ""
-        },
-        {
-            "name": "LLMRAGQueryGeneratorConfig",
-            "description": ""
-        },
-        {
-            "name": "ListDatasetsResponse",
-            "description": ""
-        },
-        {
-            "name": "ListEvalTasksResponse",
-            "description": ""
-        },
-        {
-            "name": "ListModelsResponse",
-            "description": ""
-        },
-        {
-            "name": "ListPostTrainingJobsResponse",
-            "description": ""
-        },
-        {
-            "name": "ListProvidersResponse",
-            "description": ""
-        },
-        {
-            "name": "ListRoutesResponse",
-            "description": ""
-        },
-        {
-            "name": "ListScoringFunctionsResponse",
-            "description": ""
-        },
-        {
-            "name": "ListShieldsResponse",
-            "description": ""
-        },
-        {
-            "name": "ListToolGroupsResponse",
-            "description": ""
-        },
-        {
-            "name": "ListToolsResponse",
-            "description": ""
-        },
-        {
-            "name": "ListVectorDBsResponse",
-            "description": ""
-        },
-        {
-            "name": "LogEventRequest",
-            "description": ""
-        },
-        {
-            "name": "LogSeverity",
-            "description": ""
-        },
-        {
-            "name": "LoraFinetuningConfig",
-            "description": ""
-        },
-        {
-            "name": "MemoryRetrievalStep",
-            "description": ""
-        },
-        {
-            "name": "Message",
-            "description": ""
-        },
-        {
-            "name": "MetricEvent",
-            "description": ""
-        },
-        {
-            "name": "Model",
-            "description": ""
-        },
-        {
-            "name": "ModelCandidate",
-            "description": ""
-        },
-        {
-            "name": "ModelType",
-            "description": ""
-        },
         {
             "name": "Models"
         },
-        {
-            "name": "NumberType",
-            "description": ""
-        },
-        {
-            "name": "ObjectType",
-            "description": ""
-        },
-        {
-            "name": "OptimizerConfig",
-            "description": ""
-        },
-        {
-            "name": "OptimizerType",
-            "description": ""
-        },
-        {
-            "name": "PaginatedRowsResult",
-            "description": ""
-        },
-        {
-            "name": "ParamType",
-            "description": ""
-        },
         {
             "name": "PostTraining (Coming Soon)"
         },
-        {
-            "name": "PostTrainingJob",
-            "description": ""
-        },
-        {
-            "name": "PostTrainingJobArtifactsResponse",
-            "description": "Artifacts of a finetuning job."
-        },
-        {
-            "name": "PostTrainingJobStatusResponse",
-            "description": "Status of a finetuning job."
-        },
-        {
-            "name": "PreferenceOptimizeRequest",
-            "description": ""
-        },
-        {
-            "name": "ProviderInfo",
-            "description": ""
-        },
-        {
-            "name": "QATFinetuningConfig",
-            "description": ""
-        },
-        {
-            "name": "QueryChunksRequest",
-            "description": ""
-        },
-        {
-            "name": "QueryChunksResponse",
-            "description": ""
-        },
-        {
-            "name": "QueryCondition",
-            "description": ""
-        },
-        {
-            "name": "QueryConditionOp",
-            "description": ""
-        },
-        {
-            "name": "QueryRequest",
-            "description": ""
-        },
-        {
-            "name": "QuerySpanTreeResponse",
-            "description": ""
-        },
-        {
-            "name": "QuerySpansResponse",
-            "description": ""
-        },
-        {
-            "name": "QueryTracesResponse",
-            "description": ""
-        },
-        {
-            "name": "RAGDocument",
-            "description": ""
-        },
-        {
-            "name": "RAGQueryConfig",
-            "description": ""
-        },
-        {
-            "name": "RAGQueryGeneratorConfig",
-            "description": ""
-        },
-        {
-            "name": "RAGQueryResult",
-            "description": ""
-        },
-        {
-            "name": "RegexParserScoringFnParams",
-            "description": ""
-        },
-        {
-            "name": "RegisterDatasetRequest",
-            "description": ""
-        },
-        {
-            "name": "RegisterEvalTaskRequest",
-            "description": ""
-        },
-        {
-            "name": "RegisterModelRequest",
-            "description": ""
-        },
-        {
-            "name": "RegisterScoringFunctionRequest",
-            "description": ""
-        },
-        {
-            "name": "RegisterShieldRequest",
-            "description": ""
-        },
-        {
-            "name": "RegisterToolGroupRequest",
-            "description": ""
-        },
-        {
-            "name": "RegisterVectorDbRequest",
-            "description": ""
-        },
-        {
-            "name": "ResponseFormat",
-            "description": ""
-        },
-        {
-            "name": "RouteInfo",
-            "description": ""
-        },
-        {
-            "name": "RunEvalRequest",
-            "description": ""
-        },
-        {
-            "name": "RunShieldRequest",
-            "description": ""
-        },
-        {
-            "name": "RunShieldResponse",
-            "description": ""
-        },
         {
             "name": "Safety"
         },
-        {
-            "name": "SafetyViolation",
-            "description": ""
-        },
-        {
-            "name": "SamplingParams",
-            "description": ""
-        },
-        {
-            "name": "SamplingStrategy",
-            "description": ""
-        },
-        {
-            "name": "SaveSpansToDatasetRequest",
-            "description": ""
-        },
-        {
-            "name": "ScoreBatchRequest",
-            "description": ""
-        },
-        {
-            "name": "ScoreBatchResponse",
-            "description": ""
-        },
-        {
-            "name": "ScoreRequest",
-            "description": ""
-        },
-        {
-            "name": "ScoreResponse",
-            "description": ""
-        },
         {
             "name": "Scoring"
         },
-        {
-            "name": "ScoringFn",
-            "description": ""
-        },
-        {
-            "name": "ScoringFnParams",
-            "description": ""
-        },
         {
             "name": "ScoringFunctions"
         },
-        {
-            "name": "ScoringResult",
-            "description": ""
-        },
-        {
-            "name": "Session",
-            "description": "A single session of an interaction with an Agentic System."
-        },
-        {
-            "name": "Shield",
-            "description": "A safety shield resource that can be used to check content"
-        },
-        {
-            "name": "ShieldCallStep",
-            "description": ""
-        },
         {
             "name": "Shields"
         },
-        {
-            "name": "Span",
-            "description": ""
-        },
-        {
-            "name": "SpanEndPayload",
-            "description": ""
-        },
-        {
-            "name": "SpanStartPayload",
-            "description": ""
-        },
-        {
-            "name": "SpanStatus",
-            "description": ""
-        },
-        {
-            "name": "SpanWithStatus",
-            "description": ""
-        },
-        {
-            "name": "StringType",
-            "description": ""
-        },
-        {
-            "name": "StructuredLogEvent",
-            "description": ""
-        },
-        {
-            "name": "StructuredLogPayload",
-            "description": ""
-        },
-        {
-            "name": "SupervisedFineTuneRequest",
-            "description": ""
-        },
-        {
-            "name": "SyntheticDataGenerateRequest",
-            "description": ""
-        },
         {
             "name": "SyntheticDataGeneration (Coming Soon)"
         },
-        {
-            "name": "SyntheticDataGenerationResponse",
-            "description": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold."
-        },
-        {
-            "name": "SystemMessage",
-            "description": "A system message providing instructions or context to the model."
-        },
         {
             "name": "Telemetry"
         },
-        {
-            "name": "TextContentItem",
-            "description": ""
-        },
-        {
-            "name": "TextDelta",
-            "description": ""
-        },
-        {
-            "name": "TokenLogProbs",
-            "description": "Log probabilities for generated tokens."
-        },
-        {
-            "name": "Tool",
-            "description": ""
-        },
-        {
-            "name": "ToolCall",
-            "description": ""
-        },
-        {
-            "name": "ToolCallDelta",
-            "description": ""
-        },
-        {
-            "name": "ToolDef",
-            "description": ""
-        },
-        {
-            "name": "ToolDefinition",
-            "description": ""
-        },
-        {
-            "name": "ToolExecutionStep",
-            "description": ""
-        },
-        {
-            "name": "ToolGroup",
-            "description": ""
-        },
         {
             "name": "ToolGroups"
         },
-        {
-            "name": "ToolHost",
-            "description": ""
-        },
-        {
-            "name": "ToolInvocationResult",
-            "description": ""
-        },
-        {
-            "name": "ToolParamDefinition",
-            "description": ""
-        },
-        {
-            "name": "ToolParameter",
-            "description": ""
-        },
-        {
-            "name": "ToolResponse",
-            "description": ""
-        },
-        {
-            "name": "ToolResponseMessage",
-            "description": "A message representing the result of a tool invocation."
-        },
         {
             "name": "ToolRuntime"
         },
-        {
-            "name": "TopKSamplingStrategy",
-            "description": ""
-        },
-        {
-            "name": "TopPSamplingStrategy",
-            "description": ""
-        },
-        {
-            "name": "Trace",
-            "description": ""
-        },
-        {
-            "name": "TrainingConfig",
-            "description": ""
-        },
-        {
-            "name": "Turn",
-            "description": "A single turn in an interaction with an Agentic System."
-        },
-        {
-            "name": "URL",
-            "description": ""
-        },
-        {
-            "name": "UnionType",
-            "description": ""
-        },
-        {
-            "name": "UnstructuredLogEvent",
-            "description": ""
-        },
-        {
-            "name": "UserMessage",
-            "description": "A message from the user in a chat conversation."
-        },
-        {
-            "name": "VectorDB",
-            "description": ""
-        },
         {
             "name": "VectorDBs"
         },
         {
             "name": "VectorIO"
-        },
-        {
-            "name": "VersionInfo",
-            "description": ""
-        },
-        {
-            "name": "ViolationLevel",
-            "description": ""
         }
     ],
     "x-tagGroups": [
@@ -8667,196 +7929,6 @@
                 "VectorDBs",
                 "VectorIO"
             ]
-        },
-        {
-            "name": "Types",
-            "tags": [
-                "AgentCandidate",
-                "AgentConfig",
-                "AgentCreateResponse",
-                "AgentSessionCreateResponse",
-                "AgentStepResponse",
-                "AgentTool",
-                "AgentTurnInputType",
-                "AgentTurnResponseEvent",
-                "AgentTurnResponseEventPayload",
-                "AgentTurnResponseStepCompletePayload",
-                "AgentTurnResponseStepProgressPayload",
-                "AgentTurnResponseStepStartPayload",
-                "AgentTurnResponseStreamChunk",
-                "AgentTurnResponseTurnCompletePayload",
-                "AgentTurnResponseTurnStartPayload",
-                "AggregationFunctionType",
-                "AlgorithmConfig",
-                "AppEvalTaskConfig",
-                "AppendRowsRequest",
-                "ArrayType",
-                "BasicScoringFnParams",
-                "BatchChatCompletionRequest",
-                "BatchChatCompletionResponse",
-                "BatchCompletionRequest",
-                "BatchCompletionResponse",
-                "BenchmarkEvalTaskConfig",
-                "BooleanType",
-                "CancelTrainingJobRequest",
-                "ChatCompletionInputType",
-                "ChatCompletionRequest",
-                "ChatCompletionResponse",
-                "ChatCompletionResponseEvent",
-                "ChatCompletionResponseStreamChunk",
-                "Checkpoint",
-                "CompletionInputType",
-                "CompletionMessage",
-                "CompletionRequest",
-                "CompletionResponse",
-                "CompletionResponseStreamChunk",
-                "ContentDelta",
-                "CreateAgentRequest",
-                "CreateAgentSessionRequest",
-                "CreateAgentTurnRequest",
-                "DPOAlignmentConfig",
-                "DataConfig",
-                "Dataset",
-                "DatasetFormat",
-                "DefaultRAGQueryGeneratorConfig",
-                "EfficiencyConfig",
-                "EmbeddingsRequest",
-                "EmbeddingsResponse",
-                "EvalCandidate",
-                "EvalTask",
-                "EvalTaskConfig",
-                "EvaluateResponse",
-                "EvaluateRowsRequest",
-                "Event",
-                "GrammarResponseFormat",
-                "GreedySamplingStrategy",
-                "HealthInfo",
-                "ImageContentItem",
-                "ImageDelta",
-                "InferenceStep",
-                "InsertChunksRequest",
-                "InsertRequest",
-                "InterleavedContent",
-                "InterleavedContentItem",
-                "InvokeToolRequest",
-                "Job",
-                "JobStatus",
-                "JsonSchemaResponseFormat",
-                "JsonType",
-                "LLMAsJudgeScoringFnParams",
-                "LLMRAGQueryGeneratorConfig",
-                "ListDatasetsResponse",
-                "ListEvalTasksResponse",
-                "ListModelsResponse",
-                "ListPostTrainingJobsResponse",
-                "ListProvidersResponse",
-                "ListRoutesResponse",
-                "ListScoringFunctionsResponse",
-                "ListShieldsResponse",
-                "ListToolGroupsResponse",
-                "ListToolsResponse",
-                "ListVectorDBsResponse",
-                "LogEventRequest",
-                "LogSeverity",
-                "LoraFinetuningConfig",
-                "MemoryRetrievalStep",
-                "Message",
-                "MetricEvent",
-                "Model",
-                "ModelCandidate",
-                "ModelType",
-                "NumberType",
-                "ObjectType",
-                "OptimizerConfig",
-                "OptimizerType",
-                "PaginatedRowsResult",
-                "ParamType",
-                "PostTrainingJob",
-                "PostTrainingJobArtifactsResponse",
-                "PostTrainingJobStatusResponse",
-                "PreferenceOptimizeRequest",
-                "ProviderInfo",
-                "QATFinetuningConfig",
-                "QueryChunksRequest",
-                "QueryChunksResponse",
-                "QueryCondition",
-                "QueryConditionOp",
-                "QueryRequest",
-                "QuerySpanTreeResponse",
-                "QuerySpansResponse",
-                "QueryTracesResponse",
-                "RAGDocument",
-                "RAGQueryConfig",
-                "RAGQueryGeneratorConfig",
-                "RAGQueryResult",
-                "RegexParserScoringFnParams",
-                "RegisterDatasetRequest",
-                "RegisterEvalTaskRequest",
-                "RegisterModelRequest",
-                "RegisterScoringFunctionRequest",
-                "RegisterShieldRequest",
-                "RegisterToolGroupRequest",
-                "RegisterVectorDbRequest",
-                "ResponseFormat",
-                "RouteInfo",
-                "RunEvalRequest",
-                "RunShieldRequest",
-                "RunShieldResponse",
-                "SafetyViolation",
-                "SamplingParams",
-                "SamplingStrategy",
-                "SaveSpansToDatasetRequest",
-                "ScoreBatchRequest",
-                "ScoreBatchResponse",
-                "ScoreRequest",
-                "ScoreResponse",
-                "ScoringFn",
-                "ScoringFnParams",
-                "ScoringResult",
-                "Session",
-                "Shield",
-                "ShieldCallStep",
-                "Span",
-                "SpanEndPayload",
-                "SpanStartPayload",
-                "SpanStatus",
-                "SpanWithStatus",
-                "StringType",
-                "StructuredLogEvent",
-                "StructuredLogPayload",
-                "SupervisedFineTuneRequest",
-                "SyntheticDataGenerateRequest",
-                "SyntheticDataGenerationResponse",
-                "SystemMessage",
-                "TextContentItem",
-                "TextDelta",
-                "TokenLogProbs",
-                "Tool",
-                "ToolCall",
-                "ToolCallDelta",
-                "ToolDef",
-                "ToolDefinition",
-                "ToolExecutionStep",
-                "ToolGroup",
-                "ToolHost",
-                "ToolInvocationResult",
-                "ToolParamDefinition",
-                "ToolParameter",
-                "ToolResponse",
-                "ToolResponseMessage",
-                "TopKSamplingStrategy",
-                "TopPSamplingStrategy",
-                "Trace",
-                "TrainingConfig",
-                "Turn",
-                "URL",
-                "UnionType",
-                "UnstructuredLogEvent",
-                "UserMessage",
-                "VectorDB",
-                "VersionInfo",
-                "ViolationLevel"
-            ]
         }
     ]
 };
diff --git a/docs/resources/llama-stack-spec.yaml b/docs/resources/llama-stack-spec.yaml
index a83b3afe5..0734ef236 100644
--- a/docs/resources/llama-stack-spec.yaml
+++ b/docs/resources/llama-stack-spec.yaml
@@ -1817,9 +1817,7 @@ components:
             - function_tag
             - python_list
           title: >-
-            This Enum refers to the prompt format for calling custom / zero shot tools
-          description: >-
-            The detailed prompts for each of these formats are added to llama cli
+            Prompt format for calling custom / zero shot tools.
         response_format:
           $ref: '#/components/schemas/ResponseFormat'
         logprobs:
@@ -2225,9 +2223,7 @@ components:
             - function_tag
             - python_list
           title: >-
-            This Enum refers to the prompt format for calling custom / zero shot tools
-          description: >-
-            The detailed prompts for each of these formats are added to llama cli
+            Prompt format for calling custom / zero shot tools.
         max_infer_iters:
           type: integer
           default: 10
@@ -4905,411 +4901,54 @@ components:
 security:
   - Default: []
 tags:
-  - name: AgentCandidate
-    description: ''
-  - name: AgentConfig
-    description: ''
-  - name: AgentCreateResponse
-    description: ''
-  - name: AgentSessionCreateResponse
-    description: ''
-  - name: AgentStepResponse
-    description: ''
-  - name: AgentTool
-    description: ''
-  - name: AgentTurnInputType
-    description: ''
-  - name: AgentTurnResponseEvent
-    description: ''
-  - name: AgentTurnResponseEventPayload
-    description: ''
-  - name: AgentTurnResponseStepCompletePayload
-    description: ''
-  - name: AgentTurnResponseStepProgressPayload
-    description: ''
-  - name: AgentTurnResponseStepStartPayload
-    description: ''
-  - name: AgentTurnResponseStreamChunk
-    description: streamed agent turn completion response.
-  - name: AgentTurnResponseTurnCompletePayload
-    description: ''
-  - name: AgentTurnResponseTurnStartPayload
-    description: ''
   - name: Agents
-  - name: AggregationFunctionType
-    description: ''
-  - name: AlgorithmConfig
-    description: ''
-  - name: AppEvalTaskConfig
-    description: ''
-  - name: AppendRowsRequest
-    description: ''
-  - name: ArrayType
-    description: ''
-  - name: BasicScoringFnParams
-    description: ''
-  - name: BatchChatCompletionRequest
-    description: ''
-  - name: BatchChatCompletionResponse
-    description: ''
-  - name: BatchCompletionRequest
-    description: ''
-  - name: BatchCompletionResponse
-    description: ''
+    description: >-
+      Main functionalities provided by this API:
+
+      - Create agents with specific instructions and ability to use tools.
+
+      - Interactions with agents are grouped into sessions ("threads"), and each interaction
+      is called a "turn".
+
+      - Agents can be provided with various tools (see the ToolGroups and ToolRuntime
+      APIs for more details).
+
+      - Agents can be provided with various shields (see the Safety API for more details).
+
+      - Agents can also use Memory to retrieve information from knowledge bases. See
+      the RAG Tool and Vector IO APIs for more details.
+    x-displayName: >-
+      Agents API for creating and interacting with agentic systems.
   - name: BatchInference (Coming Soon)
-  - name: BenchmarkEvalTaskConfig
-    description: ''
-  - name: BooleanType
-    description: ''
-  - name: CancelTrainingJobRequest
-    description: ''
-  - name: ChatCompletionInputType
-    description: ''
-  - name: ChatCompletionRequest
-    description: ''
-  - name: ChatCompletionResponse
-    description: Response from a chat completion request.
-  - name: ChatCompletionResponseEvent
-    description: >-
-      An event during chat completion generation.
-  - name: ChatCompletionResponseStreamChunk
-    description: >-
-      A chunk of a streamed chat completion response.
-  - name: Checkpoint
-    description: Checkpoint created during training runs
-  - name: CompletionInputType
-    description: ''
-  - name: CompletionMessage
-    description: >-
-      A message containing the model's (assistant) response in a chat conversation.
-  - name: CompletionRequest
-    description: ''
-  - name: CompletionResponse
-    description: Response from a completion request.
-  - name: CompletionResponseStreamChunk
-    description: >-
-      A chunk of a streamed completion response.
-  - name: ContentDelta
-    description: ''
-  - name: CreateAgentRequest
-    description: ''
-  - name: CreateAgentSessionRequest
-    description: ''
-  - name: CreateAgentTurnRequest
-    description: ''
-  - name: DPOAlignmentConfig
-    description: ''
-  - name: DataConfig
-    description: ''
-  - name: Dataset
-    description: ''
-  - name: DatasetFormat
-    description: ''
   - name: DatasetIO
   - name: Datasets
-  - name: DefaultRAGQueryGeneratorConfig
-    description: ''
-  - name: EfficiencyConfig
-    description: ''
-  - name: EmbeddingsRequest
-    description: ''
-  - name: EmbeddingsResponse
-    description: >-
-      Response containing generated embeddings.
   - name: Eval
-  - name: EvalCandidate
-    description: ''
-  - name: EvalTask
-    description: ''
-  - name: EvalTaskConfig
-    description: ''
   - name: EvalTasks
-  - name: EvaluateResponse
-    description: ''
-  - name: EvaluateRowsRequest
-    description: ''
-  - name: Event
-    description: ''
-  - name: GrammarResponseFormat
-    description: >-
-      Configuration for grammar-guided response generation.
-  - name: GreedySamplingStrategy
-    description: ''
-  - name: HealthInfo
-    description: ''
-  - name: ImageContentItem
-    description: ''
-  - name: ImageDelta
-    description: ''
   - name: Inference
-  - name: InferenceStep
-    description: ''
-  - name: InsertChunksRequest
-    description: ''
-  - name: InsertRequest
-    description: ''
+    description: >-
+      This API provides the raw interface to the underlying models. Two kinds of models
+      are supported:
+
+      - LLM models: these models generate "raw" and "chat" (conversational) completions.
+
+      - Embedding models: these models generate embeddings to be used for semantic
+      search.
+    x-displayName: >-
+      Llama Stack Inference API for generating completions, chat completions, and
+      embeddings.
   - name: Inspect
-  - name: InterleavedContent
-    description: ''
-  - name: InterleavedContentItem
-    description: ''
-  - name: InvokeToolRequest
-    description: ''
-  - name: Job
-    description: ''
-  - name: JobStatus
-    description: ''
-  - name: JsonSchemaResponseFormat
-    description: >-
-      Configuration for JSON schema-guided response generation.
-  - name: JsonType
-    description: ''
-  - name: LLMAsJudgeScoringFnParams
-    description: ''
-  - name: LLMRAGQueryGeneratorConfig
-    description: ''
-  - name: ListDatasetsResponse
-    description: ''
-  - name: ListEvalTasksResponse
-    description: ''
-  - name: ListModelsResponse
-    description: ''
-  - name: ListPostTrainingJobsResponse
-    description: ''
-  - name: ListProvidersResponse
-    description: ''
-  - name: ListRoutesResponse
-    description: ''
-  - name: ListScoringFunctionsResponse
-    description: ''
-  - name: ListShieldsResponse
-    description: ''
-  - name: ListToolGroupsResponse
-    description: ''
-  - name: ListToolsResponse
-    description: ''
-  - name: ListVectorDBsResponse
-    description: ''
-  - name: LogEventRequest
-    description: ''
-  - name: LogSeverity
-    description: ''
-  - name: LoraFinetuningConfig
-    description: ''
-  - name: MemoryRetrievalStep
-    description: ''
-  - name: Message
-    description: ''
-  - name: MetricEvent
-    description: ''
-  - name: Model
-    description: ''
-  - name: ModelCandidate
-    description: ''
-  - name: ModelType
-    description: ''
   - name: Models
-  - name: NumberType
-    description: ''
-  - name: ObjectType
-    description: ''
-  - name: OptimizerConfig
-    description: ''
-  - name: OptimizerType
-    description: ''
-  - name: PaginatedRowsResult
-    description: ''
-  - name: ParamType
-    description: ''
   - name: PostTraining (Coming Soon)
-  - name: PostTrainingJob
-    description: ''
-  - name: PostTrainingJobArtifactsResponse
-    description: Artifacts of a finetuning job.
-  - name: PostTrainingJobStatusResponse
-    description: Status of a finetuning job.
-  - name: PreferenceOptimizeRequest
-    description: ''
-  - name: ProviderInfo
-    description: ''
-  - name: QATFinetuningConfig
-    description: ''
-  - name: QueryChunksRequest
-    description: ''
-  - name: QueryChunksResponse
-    description: ''
-  - name: QueryCondition
-    description: ''
-  - name: QueryConditionOp
-    description: ''
-  - name: QueryRequest
-    description: ''
-  - name: QuerySpanTreeResponse
-    description: ''
-  - name: QuerySpansResponse
-    description: ''
-  - name: QueryTracesResponse
-    description: ''
-  - name: RAGDocument
-    description: ''
-  - name: RAGQueryConfig
-    description: ''
-  - name: RAGQueryGeneratorConfig
-    description: ''
-  - name: RAGQueryResult
-    description: ''
-  - name: RegexParserScoringFnParams
-    description: ''
-  - name: RegisterDatasetRequest
-    description: ''
-  - name: RegisterEvalTaskRequest
-    description: ''
-  - name: RegisterModelRequest
-    description: ''
-  - name: RegisterScoringFunctionRequest
-    description: ''
-  - name: RegisterShieldRequest
-    description: ''
-  - name: RegisterToolGroupRequest
-    description: ''
-  - name: RegisterVectorDbRequest
-    description: ''
-  - name: ResponseFormat
-    description: ''
-  - name: RouteInfo
-    description: ''
-  - name: RunEvalRequest
-    description: ''
-  - name: RunShieldRequest
-    description: ''
-  - name: RunShieldResponse
-    description: ''
   - name: Safety
-  - name: SafetyViolation
-    description: ''
-  - name: SamplingParams
-    description: ''
-  - name: SamplingStrategy
-    description: ''
-  - name: SaveSpansToDatasetRequest
-    description: ''
-  - name: ScoreBatchRequest
-    description: ''
-  - name: ScoreBatchResponse
-    description: ''
-  - name: ScoreRequest
-    description: ''
-  - name: ScoreResponse
-    description: ''
   - name: Scoring
-  - name: ScoringFn
-    description: ''
-  - name: ScoringFnParams
-    description: ''
   - name: ScoringFunctions
-  - name: ScoringResult
-    description: ''
-  - name: Session
-    description: >-
-      A single session of an interaction with an Agentic System.
-  - name: Shield
-    description: >-
-      A safety shield resource that can be used to check content
-  - name: ShieldCallStep
-    description: ''
   - name: Shields
-  - name: Span
-    description: ''
-  - name: SpanEndPayload
-    description: ''
-  - name: SpanStartPayload
-    description: ''
-  - name: SpanStatus
-    description: ''
-  - name: SpanWithStatus
-    description: ''
-  - name: StringType
-    description: ''
-  - name: StructuredLogEvent
-    description: ''
-  - name: StructuredLogPayload
-    description: ''
-  - name: SupervisedFineTuneRequest
-    description: ''
-  - name: SyntheticDataGenerateRequest
-    description: ''
   - name: SyntheticDataGeneration (Coming Soon)
-  - name: SyntheticDataGenerationResponse
-    description: >-
-      Response from the synthetic data generation. Batch of (prompt, response, score)
-      tuples that pass the threshold.
-  - name: SystemMessage
-    description: >-
-      A system message providing instructions or context to the model.
   - name: Telemetry
-  - name: TextContentItem
-    description: ''
-  - name: TextDelta
-    description: ''
-  - name: TokenLogProbs
-    description: Log probabilities for generated tokens.
-  - name: Tool
-    description: ''
-  - name: ToolCall
-    description: ''
-  - name: ToolCallDelta
-    description: ''
-  - name: ToolDef
-    description: ''
-  - name: ToolDefinition
-    description: ''
-  - name: ToolExecutionStep
-    description: ''
-  - name: ToolGroup
-    description: ''
   - name: ToolGroups
-  - name: ToolHost
-    description: ''
-  - name: ToolInvocationResult
-    description: ''
-  - name: ToolParamDefinition
-    description: ''
-  - name: ToolParameter
-    description: ''
-  - name: ToolResponse
-    description: ''
-  - name: ToolResponseMessage
-    description: >-
-      A message representing the result of a tool invocation.
   - name: ToolRuntime
-  - name: TopKSamplingStrategy
-    description: ''
-  - name: TopPSamplingStrategy
-    description: ''
-  - name: Trace
-    description: ''
-  - name: TrainingConfig
-    description: ''
-  - name: Turn
-    description: >-
-      A single turn in an interaction with an Agentic System.
-  - name: URL
-    description: ''
-  - name: UnionType
-    description: ''
-  - name: UnstructuredLogEvent
-    description: ''
-  - name: UserMessage
-    description: >-
-      A message from the user in a chat conversation.
-  - name: VectorDB
-    description: ''
   - name: VectorDBs
   - name: VectorIO
-  - name: VersionInfo
-    description: ''
-  - name: ViolationLevel
-    description: ''
 x-tagGroups:
   - name: Operations
     tags:
@@ -5333,190 +4972,3 @@ x-tagGroups:
       - ToolRuntime
       - VectorDBs
       - VectorIO
-  - name: Types
-    tags:
-      - AgentCandidate
-      - AgentConfig
-      - AgentCreateResponse
-      - AgentSessionCreateResponse
-      - AgentStepResponse
-      - AgentTool
-      - AgentTurnInputType
-      - AgentTurnResponseEvent
-      - AgentTurnResponseEventPayload
-      - AgentTurnResponseStepCompletePayload
-      - AgentTurnResponseStepProgressPayload
-      - AgentTurnResponseStepStartPayload
-      - AgentTurnResponseStreamChunk
-      - AgentTurnResponseTurnCompletePayload
-      - AgentTurnResponseTurnStartPayload
-      - AggregationFunctionType
-      - AlgorithmConfig
-      - AppEvalTaskConfig
-      - AppendRowsRequest
-      - ArrayType
-      - BasicScoringFnParams
-      - BatchChatCompletionRequest
-      - BatchChatCompletionResponse
-      - BatchCompletionRequest
-      - BatchCompletionResponse
-      - BenchmarkEvalTaskConfig
-      - BooleanType
-      - CancelTrainingJobRequest
-      - ChatCompletionInputType
-      - ChatCompletionRequest
-      - ChatCompletionResponse
-      - ChatCompletionResponseEvent
-      - ChatCompletionResponseStreamChunk
-      - Checkpoint
-      - CompletionInputType
-      - CompletionMessage
-      - CompletionRequest
-      - CompletionResponse
-      - CompletionResponseStreamChunk
-      - ContentDelta
-      - CreateAgentRequest
-      - CreateAgentSessionRequest
-      - CreateAgentTurnRequest
-      - DPOAlignmentConfig
-      - DataConfig
-      - Dataset
-      - DatasetFormat
-      - DefaultRAGQueryGeneratorConfig
-      - EfficiencyConfig
-      - EmbeddingsRequest
-      - EmbeddingsResponse
-      - EvalCandidate
-      - EvalTask
-      - EvalTaskConfig
-      - EvaluateResponse
-      - EvaluateRowsRequest
-      - Event
-      - GrammarResponseFormat
-      - GreedySamplingStrategy
-      - HealthInfo
-      - ImageContentItem
-      - ImageDelta
-      - InferenceStep
-      - InsertChunksRequest
-      - InsertRequest
-      - InterleavedContent
-      - InterleavedContentItem
-      - InvokeToolRequest
-      - Job
-      - JobStatus
-      - JsonSchemaResponseFormat
-      - JsonType
-      - LLMAsJudgeScoringFnParams
-      - LLMRAGQueryGeneratorConfig
-      - ListDatasetsResponse
-      - ListEvalTasksResponse
-      - ListModelsResponse
-      - ListPostTrainingJobsResponse
-      - ListProvidersResponse
-      - ListRoutesResponse
-      - ListScoringFunctionsResponse
-      - ListShieldsResponse
-      - ListToolGroupsResponse
-      - ListToolsResponse
-      - ListVectorDBsResponse
-      - LogEventRequest
-      - LogSeverity
-      - LoraFinetuningConfig
-      - MemoryRetrievalStep
-      - Message
-      - MetricEvent
-      - Model
-      - ModelCandidate
-      - ModelType
-      - NumberType
-      - ObjectType
-      - OptimizerConfig
-      - OptimizerType
-      - PaginatedRowsResult
-      - ParamType
-      - PostTrainingJob
-      - PostTrainingJobArtifactsResponse
-      - PostTrainingJobStatusResponse
-      - PreferenceOptimizeRequest
-      - ProviderInfo
-      - QATFinetuningConfig
-      - QueryChunksRequest
-      - QueryChunksResponse
-      - QueryCondition
-      - QueryConditionOp
-      - QueryRequest
-      - QuerySpanTreeResponse
-      - QuerySpansResponse
-      - QueryTracesResponse
-      - RAGDocument
-      - RAGQueryConfig
-      - RAGQueryGeneratorConfig
-      - RAGQueryResult
-      - RegexParserScoringFnParams
-      - RegisterDatasetRequest
-      - RegisterEvalTaskRequest
-      - RegisterModelRequest
-      - RegisterScoringFunctionRequest
-      - RegisterShieldRequest
-      - RegisterToolGroupRequest
-      - RegisterVectorDbRequest
-      - ResponseFormat
-      - RouteInfo
-      - RunEvalRequest
-      - RunShieldRequest
-      - RunShieldResponse
-      - SafetyViolation
-      - SamplingParams
-      - SamplingStrategy
-      - SaveSpansToDatasetRequest
-      - ScoreBatchRequest
-      - ScoreBatchResponse
-      - ScoreRequest
-      - ScoreResponse
-      - ScoringFn
-      - ScoringFnParams
-      - ScoringResult
-      - Session
-      - Shield
-      - ShieldCallStep
-      - Span
-      - SpanEndPayload
-      - SpanStartPayload
-      - SpanStatus
-      - SpanWithStatus
-      - StringType
-      - StructuredLogEvent
-      - StructuredLogPayload
-      - SupervisedFineTuneRequest
-      - SyntheticDataGenerateRequest
-      - SyntheticDataGenerationResponse
-      - SystemMessage
-      - TextContentItem
-      - TextDelta
-      - TokenLogProbs
-      - Tool
-      - ToolCall
-      - ToolCallDelta
-      - ToolDef
-      - ToolDefinition
-      - ToolExecutionStep
-      - ToolGroup
-      - ToolHost
-      - ToolInvocationResult
-      - ToolParamDefinition
-      - ToolParameter
-      - ToolResponse
-      - ToolResponseMessage
-      - TopKSamplingStrategy
-      - TopPSamplingStrategy
-      - Trace
-      - TrainingConfig
-      - Turn
-      - URL
-      - UnionType
-      - UnstructuredLogEvent
-      - UserMessage
-      - VectorDB
-      - VersionInfo
-      - ViolationLevel
diff --git a/llama_stack/apis/agents/agents.py b/llama_stack/apis/agents/agents.py
index d41abc846..68eecaccb 100644
--- a/llama_stack/apis/agents/agents.py
+++ b/llama_stack/apis/agents/agents.py
@@ -297,6 +297,16 @@ class AgentStepResponse(BaseModel):
 @runtime_checkable
 @trace_protocol
 class Agents(Protocol):
+    """Agents API for creating and interacting with agentic systems.
+
+    Main functionalities provided by this API:
+    - Create agents with specific instructions and ability to use tools.
+    - Interactions with agents are grouped into sessions ("threads"), and each interaction is called a "turn".
+    - Agents can be provided with various tools (see the ToolGroups and ToolRuntime APIs for more details).
+    - Agents can be provided with various shields (see the Safety API for more details).
+    - Agents can also use Memory to retrieve information from knowledge bases. See the RAG Tool and Vector IO APIs for more details.
+    """
+
     @webmethod(route="/agents", method="POST")
     async def create_agent(
         self,
diff --git a/llama_stack/apis/inference/inference.py b/llama_stack/apis/inference/inference.py
index a163943eb..2debce1a7 100644
--- a/llama_stack/apis/inference/inference.py
+++ b/llama_stack/apis/inference/inference.py
@@ -362,6 +362,13 @@ class ModelStore(Protocol):
 @runtime_checkable
 @trace_protocol
 class Inference(Protocol):
+    """Llama Stack Inference API for generating completions, chat completions, and embeddings.
+
+    This API provides the raw interface to the underlying models. Two kinds of models are supported:
+    - LLM models: these models generate "raw" and "chat" (conversational) completions.
+    - Embedding models: these models generate embeddings to be used for semantic search.
+    """
+
     model_store: ModelStore
 
     @webmethod(route="/inference/completion", method="POST")