diff --git a/docs/openapi_generator/pyopenapi/generator.py b/docs/openapi_generator/pyopenapi/generator.py index 390f0c627..202d3732b 100644 --- a/docs/openapi_generator/pyopenapi/generator.py +++ b/docs/openapi_generator/pyopenapi/generator.py @@ -677,12 +677,6 @@ class Generator: ) ) - # types that are produced/consumed by operations - type_tags = [ - self._build_type_tag(ref, schema) - for ref, schema in self.schema_builder.schemas.items() - ] - # types that are emitted by events event_tags: List[Tag] = [] events = get_endpoint_events(self.endpoint) @@ -709,7 +703,6 @@ class Generator: # list all operations and types tags: List[Tag] = [] tags.extend(operation_tags) - tags.extend(type_tags) tags.extend(event_tags) for extra_tag_group in extra_tag_groups.values(): tags.extend(extra_tag_group) @@ -724,13 +717,6 @@ class Generator: tags=sorted(tag.name for tag in operation_tags), ) ) - if type_tags: - tag_groups.append( - TagGroup( - name=self.options.map("Types"), - tags=sorted(tag.name for tag in type_tags), - ) - ) if event_tags: tag_groups.append( TagGroup( diff --git a/docs/resources/llama-stack-spec.html b/docs/resources/llama-stack-spec.html index 15c9c9484..0454e22ec 100644 --- a/docs/resources/llama-stack-spec.html +++ b/docs/resources/llama-stack-spec.html @@ -2989,8 +2989,7 @@ "function_tag", "python_list" ], - "title": "This Enum refers to the prompt format for calling custom / zero shot tools", - "description": "The detailed prompts for each of these formats are added to llama cli" + "title": "Prompt format for calling custom / zero shot tools." }, "response_format": { "$ref": "#/components/schemas/ResponseFormat" @@ -3494,8 +3493,7 @@ "function_tag", "python_list" ], - "title": "This Enum refers to the prompt format for calling custom / zero shot tools", - "description": "The detailed prompts for each of these formats are added to llama cli" + "title": "Prompt format for calling custom / zero shot tools." }, "max_infer_iters": { "type": "integer", @@ -7842,804 +7840,68 @@ ], "tags": [ { - "name": "AgentCandidate", - "description": "" - }, - { - "name": "AgentConfig", - "description": "" - }, - { - "name": "AgentCreateResponse", - "description": "" - }, - { - "name": "AgentSessionCreateResponse", - "description": "" - }, - { - "name": "AgentStepResponse", - "description": "" - }, - { - "name": "AgentTool", - "description": "" - }, - { - "name": "AgentTurnInputType", - "description": "" - }, - { - "name": "AgentTurnResponseEvent", - "description": "" - }, - { - "name": "AgentTurnResponseEventPayload", - "description": "" - }, - { - "name": "AgentTurnResponseStepCompletePayload", - "description": "" - }, - { - "name": "AgentTurnResponseStepProgressPayload", - "description": "" - }, - { - "name": "AgentTurnResponseStepStartPayload", - "description": "" - }, - { - "name": "AgentTurnResponseStreamChunk", - "description": "streamed agent turn completion response." - }, - { - "name": "AgentTurnResponseTurnCompletePayload", - "description": "" - }, - { - "name": "AgentTurnResponseTurnStartPayload", - "description": "" - }, - { - "name": "Agents" - }, - { - "name": "AggregationFunctionType", - "description": "" - }, - { - "name": "AlgorithmConfig", - "description": "" - }, - { - "name": "AppEvalTaskConfig", - "description": "" - }, - { - "name": "AppendRowsRequest", - "description": "" - }, - { - "name": "ArrayType", - "description": "" - }, - { - "name": "BasicScoringFnParams", - "description": "" - }, - { - "name": "BatchChatCompletionRequest", - "description": "" - }, - { - "name": "BatchChatCompletionResponse", - "description": "" - }, - { - "name": "BatchCompletionRequest", - "description": "" - }, - { - "name": "BatchCompletionResponse", - "description": "" + "name": "Agents", + "description": "Main functionalities provided by this API:\n- Create agents with specific instructions and ability to use tools.\n- Interactions with agents are grouped into sessions (\"threads\"), and each interaction is called a \"turn\".\n- Agents can be provided with various tools (see the ToolGroups and ToolRuntime APIs for more details).\n- Agents can be provided with various shields (see the Safety API for more details).\n- Agents can also use Memory to retrieve information from knowledge bases. See the RAG Tool and Vector IO APIs for more details.", + "x-displayName": "Agents API for creating and interacting with agentic systems." }, { "name": "BatchInference (Coming Soon)" }, - { - "name": "BenchmarkEvalTaskConfig", - "description": "" - }, - { - "name": "BooleanType", - "description": "" - }, - { - "name": "CancelTrainingJobRequest", - "description": "" - }, - { - "name": "ChatCompletionInputType", - "description": "" - }, - { - "name": "ChatCompletionRequest", - "description": "" - }, - { - "name": "ChatCompletionResponse", - "description": "Response from a chat completion request." - }, - { - "name": "ChatCompletionResponseEvent", - "description": "An event during chat completion generation." - }, - { - "name": "ChatCompletionResponseStreamChunk", - "description": "A chunk of a streamed chat completion response." - }, - { - "name": "Checkpoint", - "description": "Checkpoint created during training runs" - }, - { - "name": "CompletionInputType", - "description": "" - }, - { - "name": "CompletionMessage", - "description": "A message containing the model's (assistant) response in a chat conversation." - }, - { - "name": "CompletionRequest", - "description": "" - }, - { - "name": "CompletionResponse", - "description": "Response from a completion request." - }, - { - "name": "CompletionResponseStreamChunk", - "description": "A chunk of a streamed completion response." - }, - { - "name": "ContentDelta", - "description": "" - }, - { - "name": "CreateAgentRequest", - "description": "" - }, - { - "name": "CreateAgentSessionRequest", - "description": "" - }, - { - "name": "CreateAgentTurnRequest", - "description": "" - }, - { - "name": "DPOAlignmentConfig", - "description": "" - }, - { - "name": "DataConfig", - "description": "" - }, - { - "name": "Dataset", - "description": "" - }, - { - "name": "DatasetFormat", - "description": "" - }, { "name": "DatasetIO" }, { "name": "Datasets" }, - { - "name": "DefaultRAGQueryGeneratorConfig", - "description": "" - }, - { - "name": "EfficiencyConfig", - "description": "" - }, - { - "name": "EmbeddingsRequest", - "description": "" - }, - { - "name": "EmbeddingsResponse", - "description": "Response containing generated embeddings." - }, { "name": "Eval" }, - { - "name": "EvalCandidate", - "description": "" - }, - { - "name": "EvalTask", - "description": "" - }, - { - "name": "EvalTaskConfig", - "description": "" - }, { "name": "EvalTasks" }, { - "name": "EvaluateResponse", - "description": "" - }, - { - "name": "EvaluateRowsRequest", - "description": "" - }, - { - "name": "Event", - "description": "" - }, - { - "name": "GrammarResponseFormat", - "description": "Configuration for grammar-guided response generation." - }, - { - "name": "GreedySamplingStrategy", - "description": "" - }, - { - "name": "HealthInfo", - "description": "" - }, - { - "name": "ImageContentItem", - "description": "" - }, - { - "name": "ImageDelta", - "description": "" - }, - { - "name": "Inference" - }, - { - "name": "InferenceStep", - "description": "" - }, - { - "name": "InsertChunksRequest", - "description": "" - }, - { - "name": "InsertRequest", - "description": "" + "name": "Inference", + "description": "This API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.", + "x-displayName": "Llama Stack Inference API for generating completions, chat completions, and embeddings." }, { "name": "Inspect" }, - { - "name": "InterleavedContent", - "description": "" - }, - { - "name": "InterleavedContentItem", - "description": "" - }, - { - "name": "InvokeToolRequest", - "description": "" - }, - { - "name": "Job", - "description": "" - }, - { - "name": "JobStatus", - "description": "" - }, - { - "name": "JsonSchemaResponseFormat", - "description": "Configuration for JSON schema-guided response generation." - }, - { - "name": "JsonType", - "description": "" - }, - { - "name": "LLMAsJudgeScoringFnParams", - "description": "" - }, - { - "name": "LLMRAGQueryGeneratorConfig", - "description": "" - }, - { - "name": "ListDatasetsResponse", - "description": "" - }, - { - "name": "ListEvalTasksResponse", - "description": "" - }, - { - "name": "ListModelsResponse", - "description": "" - }, - { - "name": "ListPostTrainingJobsResponse", - "description": "" - }, - { - "name": "ListProvidersResponse", - "description": "" - }, - { - "name": "ListRoutesResponse", - "description": "" - }, - { - "name": "ListScoringFunctionsResponse", - "description": "" - }, - { - "name": "ListShieldsResponse", - "description": "" - }, - { - "name": "ListToolGroupsResponse", - "description": "" - }, - { - "name": "ListToolsResponse", - "description": "" - }, - { - "name": "ListVectorDBsResponse", - "description": "" - }, - { - "name": "LogEventRequest", - "description": "" - }, - { - "name": "LogSeverity", - "description": "" - }, - { - "name": "LoraFinetuningConfig", - "description": "" - }, - { - "name": "MemoryRetrievalStep", - "description": "" - }, - { - "name": "Message", - "description": "" - }, - { - "name": "MetricEvent", - "description": "" - }, - { - "name": "Model", - "description": "" - }, - { - "name": "ModelCandidate", - "description": "" - }, - { - "name": "ModelType", - "description": "" - }, { "name": "Models" }, - { - "name": "NumberType", - "description": "" - }, - { - "name": "ObjectType", - "description": "" - }, - { - "name": "OptimizerConfig", - "description": "" - }, - { - "name": "OptimizerType", - "description": "" - }, - { - "name": "PaginatedRowsResult", - "description": "" - }, - { - "name": "ParamType", - "description": "" - }, { "name": "PostTraining (Coming Soon)" }, - { - "name": "PostTrainingJob", - "description": "" - }, - { - "name": "PostTrainingJobArtifactsResponse", - "description": "Artifacts of a finetuning job." - }, - { - "name": "PostTrainingJobStatusResponse", - "description": "Status of a finetuning job." - }, - { - "name": "PreferenceOptimizeRequest", - "description": "" - }, - { - "name": "ProviderInfo", - "description": "" - }, - { - "name": "QATFinetuningConfig", - "description": "" - }, - { - "name": "QueryChunksRequest", - "description": "" - }, - { - "name": "QueryChunksResponse", - "description": "" - }, - { - "name": "QueryCondition", - "description": "" - }, - { - "name": "QueryConditionOp", - "description": "" - }, - { - "name": "QueryRequest", - "description": "" - }, - { - "name": "QuerySpanTreeResponse", - "description": "" - }, - { - "name": "QuerySpansResponse", - "description": "" - }, - { - "name": "QueryTracesResponse", - "description": "" - }, - { - "name": "RAGDocument", - "description": "" - }, - { - "name": "RAGQueryConfig", - "description": "" - }, - { - "name": "RAGQueryGeneratorConfig", - "description": "" - }, - { - "name": "RAGQueryResult", - "description": "" - }, - { - "name": "RegexParserScoringFnParams", - "description": "" - }, - { - "name": "RegisterDatasetRequest", - "description": "" - }, - { - "name": "RegisterEvalTaskRequest", - "description": "" - }, - { - "name": "RegisterModelRequest", - "description": "" - }, - { - "name": "RegisterScoringFunctionRequest", - "description": "" - }, - { - "name": "RegisterShieldRequest", - "description": "" - }, - { - "name": "RegisterToolGroupRequest", - "description": "" - }, - { - "name": "RegisterVectorDbRequest", - "description": "" - }, - { - "name": "ResponseFormat", - "description": "" - }, - { - "name": "RouteInfo", - "description": "" - }, - { - "name": "RunEvalRequest", - "description": "" - }, - { - "name": "RunShieldRequest", - "description": "" - }, - { - "name": "RunShieldResponse", - "description": "" - }, { "name": "Safety" }, - { - "name": "SafetyViolation", - "description": "" - }, - { - "name": "SamplingParams", - "description": "" - }, - { - "name": "SamplingStrategy", - "description": "" - }, - { - "name": "SaveSpansToDatasetRequest", - "description": "" - }, - { - "name": "ScoreBatchRequest", - "description": "" - }, - { - "name": "ScoreBatchResponse", - "description": "" - }, - { - "name": "ScoreRequest", - "description": "" - }, - { - "name": "ScoreResponse", - "description": "" - }, { "name": "Scoring" }, - { - "name": "ScoringFn", - "description": "" - }, - { - "name": "ScoringFnParams", - "description": "" - }, { "name": "ScoringFunctions" }, - { - "name": "ScoringResult", - "description": "" - }, - { - "name": "Session", - "description": "A single session of an interaction with an Agentic System." - }, - { - "name": "Shield", - "description": "A safety shield resource that can be used to check content" - }, - { - "name": "ShieldCallStep", - "description": "" - }, { "name": "Shields" }, - { - "name": "Span", - "description": "" - }, - { - "name": "SpanEndPayload", - "description": "" - }, - { - "name": "SpanStartPayload", - "description": "" - }, - { - "name": "SpanStatus", - "description": "" - }, - { - "name": "SpanWithStatus", - "description": "" - }, - { - "name": "StringType", - "description": "" - }, - { - "name": "StructuredLogEvent", - "description": "" - }, - { - "name": "StructuredLogPayload", - "description": "" - }, - { - "name": "SupervisedFineTuneRequest", - "description": "" - }, - { - "name": "SyntheticDataGenerateRequest", - "description": "" - }, { "name": "SyntheticDataGeneration (Coming Soon)" }, - { - "name": "SyntheticDataGenerationResponse", - "description": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold." - }, - { - "name": "SystemMessage", - "description": "A system message providing instructions or context to the model." - }, { "name": "Telemetry" }, - { - "name": "TextContentItem", - "description": "" - }, - { - "name": "TextDelta", - "description": "" - }, - { - "name": "TokenLogProbs", - "description": "Log probabilities for generated tokens." - }, - { - "name": "Tool", - "description": "" - }, - { - "name": "ToolCall", - "description": "" - }, - { - "name": "ToolCallDelta", - "description": "" - }, - { - "name": "ToolDef", - "description": "" - }, - { - "name": "ToolDefinition", - "description": "" - }, - { - "name": "ToolExecutionStep", - "description": "" - }, - { - "name": "ToolGroup", - "description": "" - }, { "name": "ToolGroups" }, - { - "name": "ToolHost", - "description": "" - }, - { - "name": "ToolInvocationResult", - "description": "" - }, - { - "name": "ToolParamDefinition", - "description": "" - }, - { - "name": "ToolParameter", - "description": "" - }, - { - "name": "ToolResponse", - "description": "" - }, - { - "name": "ToolResponseMessage", - "description": "A message representing the result of a tool invocation." - }, { "name": "ToolRuntime" }, - { - "name": "TopKSamplingStrategy", - "description": "" - }, - { - "name": "TopPSamplingStrategy", - "description": "" - }, - { - "name": "Trace", - "description": "" - }, - { - "name": "TrainingConfig", - "description": "" - }, - { - "name": "Turn", - "description": "A single turn in an interaction with an Agentic System." - }, - { - "name": "URL", - "description": "" - }, - { - "name": "UnionType", - "description": "" - }, - { - "name": "UnstructuredLogEvent", - "description": "" - }, - { - "name": "UserMessage", - "description": "A message from the user in a chat conversation." - }, - { - "name": "VectorDB", - "description": "" - }, { "name": "VectorDBs" }, { "name": "VectorIO" - }, - { - "name": "VersionInfo", - "description": "" - }, - { - "name": "ViolationLevel", - "description": "" } ], "x-tagGroups": [ @@ -8667,196 +7929,6 @@ "VectorDBs", "VectorIO" ] - }, - { - "name": "Types", - "tags": [ - "AgentCandidate", - "AgentConfig", - "AgentCreateResponse", - "AgentSessionCreateResponse", - "AgentStepResponse", - "AgentTool", - "AgentTurnInputType", - "AgentTurnResponseEvent", - "AgentTurnResponseEventPayload", - "AgentTurnResponseStepCompletePayload", - "AgentTurnResponseStepProgressPayload", - "AgentTurnResponseStepStartPayload", - "AgentTurnResponseStreamChunk", - "AgentTurnResponseTurnCompletePayload", - "AgentTurnResponseTurnStartPayload", - "AggregationFunctionType", - "AlgorithmConfig", - "AppEvalTaskConfig", - "AppendRowsRequest", - "ArrayType", - "BasicScoringFnParams", - "BatchChatCompletionRequest", - "BatchChatCompletionResponse", - "BatchCompletionRequest", - "BatchCompletionResponse", - "BenchmarkEvalTaskConfig", - "BooleanType", - "CancelTrainingJobRequest", - "ChatCompletionInputType", - "ChatCompletionRequest", - "ChatCompletionResponse", - "ChatCompletionResponseEvent", - "ChatCompletionResponseStreamChunk", - "Checkpoint", - "CompletionInputType", - "CompletionMessage", - "CompletionRequest", - "CompletionResponse", - "CompletionResponseStreamChunk", - "ContentDelta", - "CreateAgentRequest", - "CreateAgentSessionRequest", - "CreateAgentTurnRequest", - "DPOAlignmentConfig", - "DataConfig", - "Dataset", - "DatasetFormat", - "DefaultRAGQueryGeneratorConfig", - "EfficiencyConfig", - "EmbeddingsRequest", - "EmbeddingsResponse", - "EvalCandidate", - "EvalTask", - "EvalTaskConfig", - "EvaluateResponse", - "EvaluateRowsRequest", - "Event", - "GrammarResponseFormat", - "GreedySamplingStrategy", - "HealthInfo", - "ImageContentItem", - "ImageDelta", - "InferenceStep", - "InsertChunksRequest", - "InsertRequest", - "InterleavedContent", - "InterleavedContentItem", - "InvokeToolRequest", - "Job", - "JobStatus", - "JsonSchemaResponseFormat", - "JsonType", - "LLMAsJudgeScoringFnParams", - "LLMRAGQueryGeneratorConfig", - "ListDatasetsResponse", - "ListEvalTasksResponse", - "ListModelsResponse", - "ListPostTrainingJobsResponse", - "ListProvidersResponse", - "ListRoutesResponse", - "ListScoringFunctionsResponse", - "ListShieldsResponse", - "ListToolGroupsResponse", - "ListToolsResponse", - "ListVectorDBsResponse", - "LogEventRequest", - "LogSeverity", - "LoraFinetuningConfig", - "MemoryRetrievalStep", - "Message", - "MetricEvent", - "Model", - "ModelCandidate", - "ModelType", - "NumberType", - "ObjectType", - "OptimizerConfig", - "OptimizerType", - "PaginatedRowsResult", - "ParamType", - "PostTrainingJob", - "PostTrainingJobArtifactsResponse", - "PostTrainingJobStatusResponse", - "PreferenceOptimizeRequest", - "ProviderInfo", - "QATFinetuningConfig", - "QueryChunksRequest", - "QueryChunksResponse", - "QueryCondition", - "QueryConditionOp", - "QueryRequest", - "QuerySpanTreeResponse", - "QuerySpansResponse", - "QueryTracesResponse", - "RAGDocument", - "RAGQueryConfig", - "RAGQueryGeneratorConfig", - "RAGQueryResult", - "RegexParserScoringFnParams", - "RegisterDatasetRequest", - "RegisterEvalTaskRequest", - "RegisterModelRequest", - "RegisterScoringFunctionRequest", - "RegisterShieldRequest", - "RegisterToolGroupRequest", - "RegisterVectorDbRequest", - "ResponseFormat", - "RouteInfo", - "RunEvalRequest", - "RunShieldRequest", - "RunShieldResponse", - "SafetyViolation", - "SamplingParams", - "SamplingStrategy", - "SaveSpansToDatasetRequest", - "ScoreBatchRequest", - "ScoreBatchResponse", - "ScoreRequest", - "ScoreResponse", - "ScoringFn", - "ScoringFnParams", - "ScoringResult", - "Session", - "Shield", - "ShieldCallStep", - "Span", - "SpanEndPayload", - "SpanStartPayload", - "SpanStatus", - "SpanWithStatus", - "StringType", - "StructuredLogEvent", - "StructuredLogPayload", - "SupervisedFineTuneRequest", - "SyntheticDataGenerateRequest", - "SyntheticDataGenerationResponse", - "SystemMessage", - "TextContentItem", - "TextDelta", - "TokenLogProbs", - "Tool", - "ToolCall", - "ToolCallDelta", - "ToolDef", - "ToolDefinition", - "ToolExecutionStep", - "ToolGroup", - "ToolHost", - "ToolInvocationResult", - "ToolParamDefinition", - "ToolParameter", - "ToolResponse", - "ToolResponseMessage", - "TopKSamplingStrategy", - "TopPSamplingStrategy", - "Trace", - "TrainingConfig", - "Turn", - "URL", - "UnionType", - "UnstructuredLogEvent", - "UserMessage", - "VectorDB", - "VersionInfo", - "ViolationLevel" - ] } ] }; diff --git a/docs/resources/llama-stack-spec.yaml b/docs/resources/llama-stack-spec.yaml index a83b3afe5..0734ef236 100644 --- a/docs/resources/llama-stack-spec.yaml +++ b/docs/resources/llama-stack-spec.yaml @@ -1817,9 +1817,7 @@ components: - function_tag - python_list title: >- - This Enum refers to the prompt format for calling custom / zero shot tools - description: >- - The detailed prompts for each of these formats are added to llama cli + Prompt format for calling custom / zero shot tools. response_format: $ref: '#/components/schemas/ResponseFormat' logprobs: @@ -2225,9 +2223,7 @@ components: - function_tag - python_list title: >- - This Enum refers to the prompt format for calling custom / zero shot tools - description: >- - The detailed prompts for each of these formats are added to llama cli + Prompt format for calling custom / zero shot tools. max_infer_iters: type: integer default: 10 @@ -4905,411 +4901,54 @@ components: security: - Default: [] tags: - - name: AgentCandidate - description: '' - - name: AgentConfig - description: '' - - name: AgentCreateResponse - description: '' - - name: AgentSessionCreateResponse - description: '' - - name: AgentStepResponse - description: '' - - name: AgentTool - description: '' - - name: AgentTurnInputType - description: '' - - name: AgentTurnResponseEvent - description: '' - - name: AgentTurnResponseEventPayload - description: '' - - name: AgentTurnResponseStepCompletePayload - description: '' - - name: AgentTurnResponseStepProgressPayload - description: '' - - name: AgentTurnResponseStepStartPayload - description: '' - - name: AgentTurnResponseStreamChunk - description: streamed agent turn completion response. - - name: AgentTurnResponseTurnCompletePayload - description: '' - - name: AgentTurnResponseTurnStartPayload - description: '' - name: Agents - - name: AggregationFunctionType - description: '' - - name: AlgorithmConfig - description: '' - - name: AppEvalTaskConfig - description: '' - - name: AppendRowsRequest - description: '' - - name: ArrayType - description: '' - - name: BasicScoringFnParams - description: '' - - name: BatchChatCompletionRequest - description: '' - - name: BatchChatCompletionResponse - description: '' - - name: BatchCompletionRequest - description: '' - - name: BatchCompletionResponse - description: '' + description: >- + Main functionalities provided by this API: + + - Create agents with specific instructions and ability to use tools. + + - Interactions with agents are grouped into sessions ("threads"), and each interaction + is called a "turn". + + - Agents can be provided with various tools (see the ToolGroups and ToolRuntime + APIs for more details). + + - Agents can be provided with various shields (see the Safety API for more details). + + - Agents can also use Memory to retrieve information from knowledge bases. See + the RAG Tool and Vector IO APIs for more details. + x-displayName: >- + Agents API for creating and interacting with agentic systems. - name: BatchInference (Coming Soon) - - name: BenchmarkEvalTaskConfig - description: '' - - name: BooleanType - description: '' - - name: CancelTrainingJobRequest - description: '' - - name: ChatCompletionInputType - description: '' - - name: ChatCompletionRequest - description: '' - - name: ChatCompletionResponse - description: Response from a chat completion request. - - name: ChatCompletionResponseEvent - description: >- - An event during chat completion generation. - - name: ChatCompletionResponseStreamChunk - description: >- - A chunk of a streamed chat completion response. - - name: Checkpoint - description: Checkpoint created during training runs - - name: CompletionInputType - description: '' - - name: CompletionMessage - description: >- - A message containing the model's (assistant) response in a chat conversation. - - name: CompletionRequest - description: '' - - name: CompletionResponse - description: Response from a completion request. - - name: CompletionResponseStreamChunk - description: >- - A chunk of a streamed completion response. - - name: ContentDelta - description: '' - - name: CreateAgentRequest - description: '' - - name: CreateAgentSessionRequest - description: '' - - name: CreateAgentTurnRequest - description: '' - - name: DPOAlignmentConfig - description: '' - - name: DataConfig - description: '' - - name: Dataset - description: '' - - name: DatasetFormat - description: '' - name: DatasetIO - name: Datasets - - name: DefaultRAGQueryGeneratorConfig - description: '' - - name: EfficiencyConfig - description: '' - - name: EmbeddingsRequest - description: '' - - name: EmbeddingsResponse - description: >- - Response containing generated embeddings. - name: Eval - - name: EvalCandidate - description: '' - - name: EvalTask - description: '' - - name: EvalTaskConfig - description: '' - name: EvalTasks - - name: EvaluateResponse - description: '' - - name: EvaluateRowsRequest - description: '' - - name: Event - description: '' - - name: GrammarResponseFormat - description: >- - Configuration for grammar-guided response generation. - - name: GreedySamplingStrategy - description: '' - - name: HealthInfo - description: '' - - name: ImageContentItem - description: '' - - name: ImageDelta - description: '' - name: Inference - - name: InferenceStep - description: '' - - name: InsertChunksRequest - description: '' - - name: InsertRequest - description: '' + description: >- + This API provides the raw interface to the underlying models. Two kinds of models + are supported: + + - LLM models: these models generate "raw" and "chat" (conversational) completions. + + - Embedding models: these models generate embeddings to be used for semantic + search. + x-displayName: >- + Llama Stack Inference API for generating completions, chat completions, and + embeddings. - name: Inspect - - name: InterleavedContent - description: '' - - name: InterleavedContentItem - description: '' - - name: InvokeToolRequest - description: '' - - name: Job - description: '' - - name: JobStatus - description: '' - - name: JsonSchemaResponseFormat - description: >- - Configuration for JSON schema-guided response generation. - - name: JsonType - description: '' - - name: LLMAsJudgeScoringFnParams - description: '' - - name: LLMRAGQueryGeneratorConfig - description: '' - - name: ListDatasetsResponse - description: '' - - name: ListEvalTasksResponse - description: '' - - name: ListModelsResponse - description: '' - - name: ListPostTrainingJobsResponse - description: '' - - name: ListProvidersResponse - description: '' - - name: ListRoutesResponse - description: '' - - name: ListScoringFunctionsResponse - description: '' - - name: ListShieldsResponse - description: '' - - name: ListToolGroupsResponse - description: '' - - name: ListToolsResponse - description: '' - - name: ListVectorDBsResponse - description: '' - - name: LogEventRequest - description: '' - - name: LogSeverity - description: '' - - name: LoraFinetuningConfig - description: '' - - name: MemoryRetrievalStep - description: '' - - name: Message - description: '' - - name: MetricEvent - description: '' - - name: Model - description: '' - - name: ModelCandidate - description: '' - - name: ModelType - description: '' - name: Models - - name: NumberType - description: '' - - name: ObjectType - description: '' - - name: OptimizerConfig - description: '' - - name: OptimizerType - description: '' - - name: PaginatedRowsResult - description: '' - - name: ParamType - description: '' - name: PostTraining (Coming Soon) - - name: PostTrainingJob - description: '' - - name: PostTrainingJobArtifactsResponse - description: Artifacts of a finetuning job. - - name: PostTrainingJobStatusResponse - description: Status of a finetuning job. - - name: PreferenceOptimizeRequest - description: '' - - name: ProviderInfo - description: '' - - name: QATFinetuningConfig - description: '' - - name: QueryChunksRequest - description: '' - - name: QueryChunksResponse - description: '' - - name: QueryCondition - description: '' - - name: QueryConditionOp - description: '' - - name: QueryRequest - description: '' - - name: QuerySpanTreeResponse - description: '' - - name: QuerySpansResponse - description: '' - - name: QueryTracesResponse - description: '' - - name: RAGDocument - description: '' - - name: RAGQueryConfig - description: '' - - name: RAGQueryGeneratorConfig - description: '' - - name: RAGQueryResult - description: '' - - name: RegexParserScoringFnParams - description: '' - - name: RegisterDatasetRequest - description: '' - - name: RegisterEvalTaskRequest - description: '' - - name: RegisterModelRequest - description: '' - - name: RegisterScoringFunctionRequest - description: '' - - name: RegisterShieldRequest - description: '' - - name: RegisterToolGroupRequest - description: '' - - name: RegisterVectorDbRequest - description: '' - - name: ResponseFormat - description: '' - - name: RouteInfo - description: '' - - name: RunEvalRequest - description: '' - - name: RunShieldRequest - description: '' - - name: RunShieldResponse - description: '' - name: Safety - - name: SafetyViolation - description: '' - - name: SamplingParams - description: '' - - name: SamplingStrategy - description: '' - - name: SaveSpansToDatasetRequest - description: '' - - name: ScoreBatchRequest - description: '' - - name: ScoreBatchResponse - description: '' - - name: ScoreRequest - description: '' - - name: ScoreResponse - description: '' - name: Scoring - - name: ScoringFn - description: '' - - name: ScoringFnParams - description: '' - name: ScoringFunctions - - name: ScoringResult - description: '' - - name: Session - description: >- - A single session of an interaction with an Agentic System. - - name: Shield - description: >- - A safety shield resource that can be used to check content - - name: ShieldCallStep - description: '' - name: Shields - - name: Span - description: '' - - name: SpanEndPayload - description: '' - - name: SpanStartPayload - description: '' - - name: SpanStatus - description: '' - - name: SpanWithStatus - description: '' - - name: StringType - description: '' - - name: StructuredLogEvent - description: '' - - name: StructuredLogPayload - description: '' - - name: SupervisedFineTuneRequest - description: '' - - name: SyntheticDataGenerateRequest - description: '' - name: SyntheticDataGeneration (Coming Soon) - - name: SyntheticDataGenerationResponse - description: >- - Response from the synthetic data generation. Batch of (prompt, response, score) - tuples that pass the threshold. - - name: SystemMessage - description: >- - A system message providing instructions or context to the model. - name: Telemetry - - name: TextContentItem - description: '' - - name: TextDelta - description: '' - - name: TokenLogProbs - description: Log probabilities for generated tokens. - - name: Tool - description: '' - - name: ToolCall - description: '' - - name: ToolCallDelta - description: '' - - name: ToolDef - description: '' - - name: ToolDefinition - description: '' - - name: ToolExecutionStep - description: '' - - name: ToolGroup - description: '' - name: ToolGroups - - name: ToolHost - description: '' - - name: ToolInvocationResult - description: '' - - name: ToolParamDefinition - description: '' - - name: ToolParameter - description: '' - - name: ToolResponse - description: '' - - name: ToolResponseMessage - description: >- - A message representing the result of a tool invocation. - name: ToolRuntime - - name: TopKSamplingStrategy - description: '' - - name: TopPSamplingStrategy - description: '' - - name: Trace - description: '' - - name: TrainingConfig - description: '' - - name: Turn - description: >- - A single turn in an interaction with an Agentic System. - - name: URL - description: '' - - name: UnionType - description: '' - - name: UnstructuredLogEvent - description: '' - - name: UserMessage - description: >- - A message from the user in a chat conversation. - - name: VectorDB - description: '' - name: VectorDBs - name: VectorIO - - name: VersionInfo - description: '' - - name: ViolationLevel - description: '' x-tagGroups: - name: Operations tags: @@ -5333,190 +4972,3 @@ x-tagGroups: - ToolRuntime - VectorDBs - VectorIO - - name: Types - tags: - - AgentCandidate - - AgentConfig - - AgentCreateResponse - - AgentSessionCreateResponse - - AgentStepResponse - - AgentTool - - AgentTurnInputType - - AgentTurnResponseEvent - - AgentTurnResponseEventPayload - - AgentTurnResponseStepCompletePayload - - AgentTurnResponseStepProgressPayload - - AgentTurnResponseStepStartPayload - - AgentTurnResponseStreamChunk - - AgentTurnResponseTurnCompletePayload - - AgentTurnResponseTurnStartPayload - - AggregationFunctionType - - AlgorithmConfig - - AppEvalTaskConfig - - AppendRowsRequest - - ArrayType - - BasicScoringFnParams - - BatchChatCompletionRequest - - BatchChatCompletionResponse - - BatchCompletionRequest - - BatchCompletionResponse - - BenchmarkEvalTaskConfig - - BooleanType - - CancelTrainingJobRequest - - ChatCompletionInputType - - ChatCompletionRequest - - ChatCompletionResponse - - ChatCompletionResponseEvent - - ChatCompletionResponseStreamChunk - - Checkpoint - - CompletionInputType - - CompletionMessage - - CompletionRequest - - CompletionResponse - - CompletionResponseStreamChunk - - ContentDelta - - CreateAgentRequest - - CreateAgentSessionRequest - - CreateAgentTurnRequest - - DPOAlignmentConfig - - DataConfig - - Dataset - - DatasetFormat - - DefaultRAGQueryGeneratorConfig - - EfficiencyConfig - - EmbeddingsRequest - - EmbeddingsResponse - - EvalCandidate - - EvalTask - - EvalTaskConfig - - EvaluateResponse - - EvaluateRowsRequest - - Event - - GrammarResponseFormat - - GreedySamplingStrategy - - HealthInfo - - ImageContentItem - - ImageDelta - - InferenceStep - - InsertChunksRequest - - InsertRequest - - InterleavedContent - - InterleavedContentItem - - InvokeToolRequest - - Job - - JobStatus - - JsonSchemaResponseFormat - - JsonType - - LLMAsJudgeScoringFnParams - - LLMRAGQueryGeneratorConfig - - ListDatasetsResponse - - ListEvalTasksResponse - - ListModelsResponse - - ListPostTrainingJobsResponse - - ListProvidersResponse - - ListRoutesResponse - - ListScoringFunctionsResponse - - ListShieldsResponse - - ListToolGroupsResponse - - ListToolsResponse - - ListVectorDBsResponse - - LogEventRequest - - LogSeverity - - LoraFinetuningConfig - - MemoryRetrievalStep - - Message - - MetricEvent - - Model - - ModelCandidate - - ModelType - - NumberType - - ObjectType - - OptimizerConfig - - OptimizerType - - PaginatedRowsResult - - ParamType - - PostTrainingJob - - PostTrainingJobArtifactsResponse - - PostTrainingJobStatusResponse - - PreferenceOptimizeRequest - - ProviderInfo - - QATFinetuningConfig - - QueryChunksRequest - - QueryChunksResponse - - QueryCondition - - QueryConditionOp - - QueryRequest - - QuerySpanTreeResponse - - QuerySpansResponse - - QueryTracesResponse - - RAGDocument - - RAGQueryConfig - - RAGQueryGeneratorConfig - - RAGQueryResult - - RegexParserScoringFnParams - - RegisterDatasetRequest - - RegisterEvalTaskRequest - - RegisterModelRequest - - RegisterScoringFunctionRequest - - RegisterShieldRequest - - RegisterToolGroupRequest - - RegisterVectorDbRequest - - ResponseFormat - - RouteInfo - - RunEvalRequest - - RunShieldRequest - - RunShieldResponse - - SafetyViolation - - SamplingParams - - SamplingStrategy - - SaveSpansToDatasetRequest - - ScoreBatchRequest - - ScoreBatchResponse - - ScoreRequest - - ScoreResponse - - ScoringFn - - ScoringFnParams - - ScoringResult - - Session - - Shield - - ShieldCallStep - - Span - - SpanEndPayload - - SpanStartPayload - - SpanStatus - - SpanWithStatus - - StringType - - StructuredLogEvent - - StructuredLogPayload - - SupervisedFineTuneRequest - - SyntheticDataGenerateRequest - - SyntheticDataGenerationResponse - - SystemMessage - - TextContentItem - - TextDelta - - TokenLogProbs - - Tool - - ToolCall - - ToolCallDelta - - ToolDef - - ToolDefinition - - ToolExecutionStep - - ToolGroup - - ToolHost - - ToolInvocationResult - - ToolParamDefinition - - ToolParameter - - ToolResponse - - ToolResponseMessage - - TopKSamplingStrategy - - TopPSamplingStrategy - - Trace - - TrainingConfig - - Turn - - URL - - UnionType - - UnstructuredLogEvent - - UserMessage - - VectorDB - - VersionInfo - - ViolationLevel diff --git a/llama_stack/apis/agents/agents.py b/llama_stack/apis/agents/agents.py index d41abc846..68eecaccb 100644 --- a/llama_stack/apis/agents/agents.py +++ b/llama_stack/apis/agents/agents.py @@ -297,6 +297,16 @@ class AgentStepResponse(BaseModel): @runtime_checkable @trace_protocol class Agents(Protocol): + """Agents API for creating and interacting with agentic systems. + + Main functionalities provided by this API: + - Create agents with specific instructions and ability to use tools. + - Interactions with agents are grouped into sessions ("threads"), and each interaction is called a "turn". + - Agents can be provided with various tools (see the ToolGroups and ToolRuntime APIs for more details). + - Agents can be provided with various shields (see the Safety API for more details). + - Agents can also use Memory to retrieve information from knowledge bases. See the RAG Tool and Vector IO APIs for more details. + """ + @webmethod(route="/agents", method="POST") async def create_agent( self, diff --git a/llama_stack/apis/inference/inference.py b/llama_stack/apis/inference/inference.py index a163943eb..2debce1a7 100644 --- a/llama_stack/apis/inference/inference.py +++ b/llama_stack/apis/inference/inference.py @@ -362,6 +362,13 @@ class ModelStore(Protocol): @runtime_checkable @trace_protocol class Inference(Protocol): + """Llama Stack Inference API for generating completions, chat completions, and embeddings. + + This API provides the raw interface to the underlying models. Two kinds of models are supported: + - LLM models: these models generate "raw" and "chat" (conversational) completions. + - Embedding models: these models generate embeddings to be used for semantic search. + """ + model_store: ModelStore @webmethod(route="/inference/completion", method="POST")