mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-25 21:57:45 +00:00
Merge 48c5d089c6
into 226b877ca6
This commit is contained in:
commit
14ff5d79b7
28 changed files with 4079 additions and 812 deletions
2147
docs/_static/llama-stack-spec.html
vendored
2147
docs/_static/llama-stack-spec.html
vendored
File diff suppressed because it is too large
Load diff
1309
docs/_static/llama-stack-spec.yaml
vendored
1309
docs/_static/llama-stack-spec.yaml
vendored
File diff suppressed because it is too large
Load diff
|
@ -152,7 +152,17 @@ Step = Annotated[
|
|||
|
||||
@json_schema_type
|
||||
class Turn(BaseModel):
|
||||
"""A single turn in an interaction with an Agentic System."""
|
||||
"""A single turn in an interaction with an Agentic System.
|
||||
|
||||
:param turn_id: Unique identifier for the turn within a session
|
||||
:param session_id: Unique identifier for the conversation session
|
||||
:param input_messages: List of messages that initiated this turn
|
||||
:param steps: Ordered list of processing steps executed during this turn
|
||||
:param output_message: The model's generated response containing content and metadata
|
||||
:param output_attachments: (Optional) Files or media attached to the agent's response
|
||||
:param started_at: Timestamp when the turn began
|
||||
:param completed_at: (Optional) Timestamp when the turn finished, if completed
|
||||
"""
|
||||
|
||||
turn_id: str
|
||||
session_id: str
|
||||
|
@ -167,7 +177,13 @@ class Turn(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class Session(BaseModel):
|
||||
"""A single session of an interaction with an Agentic System."""
|
||||
"""A single session of an interaction with an Agentic System.
|
||||
|
||||
:param session_id: Unique identifier for the conversation session
|
||||
:param session_name: Human-readable name for the session
|
||||
:param turns: List of all turns that have occurred in this session
|
||||
:param started_at: Timestamp when the session was created
|
||||
"""
|
||||
|
||||
session_id: str
|
||||
session_name: str
|
||||
|
@ -232,6 +248,13 @@ class AgentConfig(AgentConfigCommon):
|
|||
|
||||
@json_schema_type
|
||||
class Agent(BaseModel):
|
||||
"""An agent instance with configuration and metadata.
|
||||
|
||||
:param agent_id: Unique identifier for the agent
|
||||
:param agent_config: Configuration settings for the agent
|
||||
:param created_at: Timestamp when the agent was created
|
||||
"""
|
||||
|
||||
agent_id: str
|
||||
agent_config: AgentConfig
|
||||
created_at: datetime
|
||||
|
@ -253,6 +276,14 @@ class AgentTurnResponseEventType(StrEnum):
|
|||
|
||||
@json_schema_type
|
||||
class AgentTurnResponseStepStartPayload(BaseModel):
|
||||
"""Payload for step start events in agent turn responses.
|
||||
|
||||
:param event_type: Type of event being reported
|
||||
:param step_type: Type of step being executed
|
||||
:param step_id: Unique identifier for the step within a turn
|
||||
:param metadata: (Optional) Additional metadata for the step
|
||||
"""
|
||||
|
||||
event_type: Literal[AgentTurnResponseEventType.step_start] = AgentTurnResponseEventType.step_start
|
||||
step_type: StepType
|
||||
step_id: str
|
||||
|
@ -261,6 +292,14 @@ class AgentTurnResponseStepStartPayload(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class AgentTurnResponseStepCompletePayload(BaseModel):
|
||||
"""Payload for step completion events in agent turn responses.
|
||||
|
||||
:param event_type: Type of event being reported
|
||||
:param step_type: Type of step being executed
|
||||
:param step_id: Unique identifier for the step within a turn
|
||||
:param step_details: Complete details of the executed step
|
||||
"""
|
||||
|
||||
event_type: Literal[AgentTurnResponseEventType.step_complete] = AgentTurnResponseEventType.step_complete
|
||||
step_type: StepType
|
||||
step_id: str
|
||||
|
@ -269,6 +308,14 @@ class AgentTurnResponseStepCompletePayload(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class AgentTurnResponseStepProgressPayload(BaseModel):
|
||||
"""Payload for step progress events in agent turn responses.
|
||||
|
||||
:param event_type: Type of event being reported
|
||||
:param step_type: Type of step being executed
|
||||
:param step_id: Unique identifier for the step within a turn
|
||||
:param delta: Incremental content changes during step execution
|
||||
"""
|
||||
|
||||
model_config = ConfigDict(protected_namespaces=())
|
||||
|
||||
event_type: Literal[AgentTurnResponseEventType.step_progress] = AgentTurnResponseEventType.step_progress
|
||||
|
@ -280,18 +327,36 @@ class AgentTurnResponseStepProgressPayload(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class AgentTurnResponseTurnStartPayload(BaseModel):
|
||||
"""Payload for turn start events in agent turn responses.
|
||||
|
||||
:param event_type: Type of event being reported
|
||||
:param turn_id: Unique identifier for the turn within a session
|
||||
"""
|
||||
|
||||
event_type: Literal[AgentTurnResponseEventType.turn_start] = AgentTurnResponseEventType.turn_start
|
||||
turn_id: str
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class AgentTurnResponseTurnCompletePayload(BaseModel):
|
||||
"""Payload for turn completion events in agent turn responses.
|
||||
|
||||
:param event_type: Type of event being reported
|
||||
:param turn: Complete turn data including all steps and results
|
||||
"""
|
||||
|
||||
event_type: Literal[AgentTurnResponseEventType.turn_complete] = AgentTurnResponseEventType.turn_complete
|
||||
turn: Turn
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class AgentTurnResponseTurnAwaitingInputPayload(BaseModel):
|
||||
"""Payload for turn awaiting input events in agent turn responses.
|
||||
|
||||
:param event_type: Type of event being reported
|
||||
:param turn: Turn data when waiting for external tool responses
|
||||
"""
|
||||
|
||||
event_type: Literal[AgentTurnResponseEventType.turn_awaiting_input] = AgentTurnResponseEventType.turn_awaiting_input
|
||||
turn: Turn
|
||||
|
||||
|
@ -310,21 +375,47 @@ register_schema(AgentTurnResponseEventPayload, name="AgentTurnResponseEventPaylo
|
|||
|
||||
@json_schema_type
|
||||
class AgentTurnResponseEvent(BaseModel):
|
||||
"""An event in an agent turn response stream.
|
||||
|
||||
:param payload: Event-specific payload containing event data
|
||||
"""
|
||||
|
||||
payload: AgentTurnResponseEventPayload
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class AgentCreateResponse(BaseModel):
|
||||
"""Response returned when creating a new agent.
|
||||
|
||||
:param agent_id: Unique identifier for the created agent
|
||||
"""
|
||||
|
||||
agent_id: str
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class AgentSessionCreateResponse(BaseModel):
|
||||
"""Response returned when creating a new agent session.
|
||||
|
||||
:param session_id: Unique identifier for the created session
|
||||
"""
|
||||
|
||||
session_id: str
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class AgentTurnCreateRequest(AgentConfigOverridablePerTurn):
|
||||
"""Request to create a new turn for an agent.
|
||||
|
||||
:param agent_id: Unique identifier for the agent
|
||||
:param session_id: Unique identifier for the conversation session
|
||||
:param messages: List of messages to start the turn with
|
||||
:param documents: (Optional) List of documents to provide to the agent
|
||||
:param toolgroups: (Optional) List of tool groups to make available for this turn
|
||||
:param stream: (Optional) Whether to stream the response
|
||||
:param tool_config: (Optional) Tool configuration to override agent defaults
|
||||
"""
|
||||
|
||||
agent_id: str
|
||||
session_id: str
|
||||
|
||||
|
@ -342,6 +433,15 @@ class AgentTurnCreateRequest(AgentConfigOverridablePerTurn):
|
|||
|
||||
@json_schema_type
|
||||
class AgentTurnResumeRequest(BaseModel):
|
||||
"""Request to resume an agent turn with tool responses.
|
||||
|
||||
:param agent_id: Unique identifier for the agent
|
||||
:param session_id: Unique identifier for the conversation session
|
||||
:param turn_id: Unique identifier for the turn within a session
|
||||
:param tool_responses: List of tool responses to submit to continue the turn
|
||||
:param stream: (Optional) Whether to stream the response
|
||||
"""
|
||||
|
||||
agent_id: str
|
||||
session_id: str
|
||||
turn_id: str
|
||||
|
@ -351,13 +451,21 @@ class AgentTurnResumeRequest(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class AgentTurnResponseStreamChunk(BaseModel):
|
||||
"""streamed agent turn completion response."""
|
||||
"""Streamed agent turn completion response.
|
||||
|
||||
:param event: Individual event in the agent turn response stream
|
||||
"""
|
||||
|
||||
event: AgentTurnResponseEvent
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class AgentStepResponse(BaseModel):
|
||||
"""Response containing details of a specific agent step.
|
||||
|
||||
:param step: The complete step data and execution details
|
||||
"""
|
||||
|
||||
step: Step
|
||||
|
||||
|
||||
|
|
|
@ -18,18 +18,37 @@ from llama_stack.schema_utils import json_schema_type, register_schema
|
|||
|
||||
@json_schema_type
|
||||
class OpenAIResponseError(BaseModel):
|
||||
"""Error details for failed OpenAI response requests.
|
||||
|
||||
:param code: Error code identifying the type of failure
|
||||
:param message: Human-readable error message describing the failure
|
||||
"""
|
||||
|
||||
code: str
|
||||
message: str
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseInputMessageContentText(BaseModel):
|
||||
"""Text content for input messages in OpenAI response format.
|
||||
|
||||
:param text: The text content of the input message
|
||||
:param type: Content type identifier, always "input_text"
|
||||
"""
|
||||
|
||||
text: str
|
||||
type: Literal["input_text"] = "input_text"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseInputMessageContentImage(BaseModel):
|
||||
"""Image content for input messages in OpenAI response format.
|
||||
|
||||
:param detail: Level of detail for image processing, can be "low", "high", or "auto"
|
||||
:param type: Content type identifier, always "input_image"
|
||||
:param image_url: (Optional) URL of the image content
|
||||
"""
|
||||
|
||||
detail: Literal["low"] | Literal["high"] | Literal["auto"] = "auto"
|
||||
type: Literal["input_image"] = "input_image"
|
||||
# TODO: handle file_id
|
||||
|
@ -46,6 +65,14 @@ register_schema(OpenAIResponseInputMessageContent, name="OpenAIResponseInputMess
|
|||
|
||||
@json_schema_type
|
||||
class OpenAIResponseAnnotationFileCitation(BaseModel):
|
||||
"""File citation annotation for referencing specific files in response content.
|
||||
|
||||
:param type: Annotation type identifier, always "file_citation"
|
||||
:param file_id: Unique identifier of the referenced file
|
||||
:param filename: Name of the referenced file
|
||||
:param index: Position index of the citation within the content
|
||||
"""
|
||||
|
||||
type: Literal["file_citation"] = "file_citation"
|
||||
file_id: str
|
||||
filename: str
|
||||
|
@ -54,6 +81,15 @@ class OpenAIResponseAnnotationFileCitation(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class OpenAIResponseAnnotationCitation(BaseModel):
|
||||
"""URL citation annotation for referencing external web resources.
|
||||
|
||||
:param type: Annotation type identifier, always "url_citation"
|
||||
:param end_index: End position of the citation span in the content
|
||||
:param start_index: Start position of the citation span in the content
|
||||
:param title: Title of the referenced web resource
|
||||
:param url: URL of the referenced web resource
|
||||
"""
|
||||
|
||||
type: Literal["url_citation"] = "url_citation"
|
||||
end_index: int
|
||||
start_index: int
|
||||
|
@ -122,6 +158,13 @@ class OpenAIResponseMessage(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class OpenAIResponseOutputMessageWebSearchToolCall(BaseModel):
|
||||
"""Web search tool call output message for OpenAI responses.
|
||||
|
||||
:param id: Unique identifier for this tool call
|
||||
:param status: Current status of the web search operation
|
||||
:param type: Tool call type identifier, always "web_search_call"
|
||||
"""
|
||||
|
||||
id: str
|
||||
status: str
|
||||
type: Literal["web_search_call"] = "web_search_call"
|
||||
|
@ -129,6 +172,15 @@ class OpenAIResponseOutputMessageWebSearchToolCall(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class OpenAIResponseOutputMessageFileSearchToolCall(BaseModel):
|
||||
"""File search tool call output message for OpenAI responses.
|
||||
|
||||
:param id: Unique identifier for this tool call
|
||||
:param queries: List of search queries executed
|
||||
:param status: Current status of the file search operation
|
||||
:param type: Tool call type identifier, always "file_search_call"
|
||||
:param results: (Optional) Search results returned by the file search operation
|
||||
"""
|
||||
|
||||
id: str
|
||||
queries: list[str]
|
||||
status: str
|
||||
|
@ -138,6 +190,16 @@ class OpenAIResponseOutputMessageFileSearchToolCall(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class OpenAIResponseOutputMessageFunctionToolCall(BaseModel):
|
||||
"""Function tool call output message for OpenAI responses.
|
||||
|
||||
:param call_id: Unique identifier for the function call
|
||||
:param name: Name of the function being called
|
||||
:param arguments: JSON string containing the function arguments
|
||||
:param type: Tool call type identifier, always "function_call"
|
||||
:param id: (Optional) Additional identifier for the tool call
|
||||
:param status: (Optional) Current status of the function call execution
|
||||
"""
|
||||
|
||||
call_id: str
|
||||
name: str
|
||||
arguments: str
|
||||
|
@ -148,6 +210,17 @@ class OpenAIResponseOutputMessageFunctionToolCall(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class OpenAIResponseOutputMessageMCPCall(BaseModel):
|
||||
"""Model Context Protocol (MCP) call output message for OpenAI responses.
|
||||
|
||||
:param id: Unique identifier for this MCP call
|
||||
:param type: Tool call type identifier, always "mcp_call"
|
||||
:param arguments: JSON string containing the MCP call arguments
|
||||
:param name: Name of the MCP method being called
|
||||
:param server_label: Label identifying the MCP server handling the call
|
||||
:param error: (Optional) Error message if the MCP call failed
|
||||
:param output: (Optional) Output result from the successful MCP call
|
||||
"""
|
||||
|
||||
id: str
|
||||
type: Literal["mcp_call"] = "mcp_call"
|
||||
arguments: str
|
||||
|
@ -158,6 +231,13 @@ class OpenAIResponseOutputMessageMCPCall(BaseModel):
|
|||
|
||||
|
||||
class MCPListToolsTool(BaseModel):
|
||||
"""Tool definition returned by MCP list tools operation.
|
||||
|
||||
:param input_schema: JSON schema defining the tool's input parameters
|
||||
:param name: Name of the tool
|
||||
:param description: (Optional) Description of what the tool does
|
||||
"""
|
||||
|
||||
input_schema: dict[str, Any]
|
||||
name: str
|
||||
description: str | None = None
|
||||
|
@ -165,6 +245,14 @@ class MCPListToolsTool(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class OpenAIResponseOutputMessageMCPListTools(BaseModel):
|
||||
"""MCP list tools output message containing available tools from an MCP server.
|
||||
|
||||
:param id: Unique identifier for this MCP list tools operation
|
||||
:param type: Tool call type identifier, always "mcp_list_tools"
|
||||
:param server_label: Label identifying the MCP server providing the tools
|
||||
:param tools: List of available tools provided by the MCP server
|
||||
"""
|
||||
|
||||
id: str
|
||||
type: Literal["mcp_list_tools"] = "mcp_list_tools"
|
||||
server_label: str
|
||||
|
@ -206,11 +294,34 @@ class OpenAIResponseTextFormat(TypedDict, total=False):
|
|||
|
||||
@json_schema_type
|
||||
class OpenAIResponseText(BaseModel):
|
||||
"""Text response configuration for OpenAI responses.
|
||||
|
||||
:param format: (Optional) Text format configuration specifying output format requirements
|
||||
"""
|
||||
|
||||
format: OpenAIResponseTextFormat | None = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseObject(BaseModel):
|
||||
"""Complete OpenAI response object containing generation results and metadata.
|
||||
|
||||
:param created_at: Unix timestamp when the response was created
|
||||
:param error: (Optional) Error details if the response generation failed
|
||||
:param id: Unique identifier for this response
|
||||
:param model: Model identifier used for generation
|
||||
:param object: Object type identifier, always "response"
|
||||
:param output: List of generated output items (messages, tool calls, etc.)
|
||||
:param parallel_tool_calls: Whether tool calls can be executed in parallel
|
||||
:param previous_response_id: (Optional) ID of the previous response in a conversation
|
||||
:param status: Current status of the response generation
|
||||
:param temperature: (Optional) Sampling temperature used for generation
|
||||
:param text: Text formatting configuration for the response
|
||||
:param top_p: (Optional) Nucleus sampling parameter used for generation
|
||||
:param truncation: (Optional) Truncation strategy applied to the response
|
||||
:param user: (Optional) User identifier associated with the request
|
||||
"""
|
||||
|
||||
created_at: int
|
||||
error: OpenAIResponseError | None = None
|
||||
id: str
|
||||
|
@ -231,6 +342,13 @@ class OpenAIResponseObject(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class OpenAIDeleteResponseObject(BaseModel):
|
||||
"""Response object confirming deletion of an OpenAI response.
|
||||
|
||||
:param id: Unique identifier of the deleted response
|
||||
:param object: Object type identifier, always "response"
|
||||
:param deleted: Deletion confirmation flag, always True
|
||||
"""
|
||||
|
||||
id: str
|
||||
object: Literal["response"] = "response"
|
||||
deleted: bool = True
|
||||
|
@ -238,18 +356,39 @@ class OpenAIDeleteResponseObject(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class OpenAIResponseObjectStreamResponseCreated(BaseModel):
|
||||
"""Streaming event indicating a new response has been created.
|
||||
|
||||
:param response: The newly created response object
|
||||
:param type: Event type identifier, always "response.created"
|
||||
"""
|
||||
|
||||
response: OpenAIResponseObject
|
||||
type: Literal["response.created"] = "response.created"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseObjectStreamResponseCompleted(BaseModel):
|
||||
"""Streaming event indicating a response has been completed.
|
||||
|
||||
:param response: The completed response object
|
||||
:param type: Event type identifier, always "response.completed"
|
||||
"""
|
||||
|
||||
response: OpenAIResponseObject
|
||||
type: Literal["response.completed"] = "response.completed"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseObjectStreamResponseOutputItemAdded(BaseModel):
|
||||
"""Streaming event for when a new output item is added to the response.
|
||||
|
||||
:param response_id: Unique identifier of the response containing this output
|
||||
:param item: The output item that was added (message, tool call, etc.)
|
||||
:param output_index: Index position of this item in the output list
|
||||
:param sequence_number: Sequential number for ordering streaming events
|
||||
:param type: Event type identifier, always "response.output_item.added"
|
||||
"""
|
||||
|
||||
response_id: str
|
||||
item: OpenAIResponseOutput
|
||||
output_index: int
|
||||
|
@ -259,6 +398,15 @@ class OpenAIResponseObjectStreamResponseOutputItemAdded(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class OpenAIResponseObjectStreamResponseOutputItemDone(BaseModel):
|
||||
"""Streaming event for when an output item is completed.
|
||||
|
||||
:param response_id: Unique identifier of the response containing this output
|
||||
:param item: The completed output item (message, tool call, etc.)
|
||||
:param output_index: Index position of this item in the output list
|
||||
:param sequence_number: Sequential number for ordering streaming events
|
||||
:param type: Event type identifier, always "response.output_item.done"
|
||||
"""
|
||||
|
||||
response_id: str
|
||||
item: OpenAIResponseOutput
|
||||
output_index: int
|
||||
|
@ -268,6 +416,16 @@ class OpenAIResponseObjectStreamResponseOutputItemDone(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class OpenAIResponseObjectStreamResponseOutputTextDelta(BaseModel):
|
||||
"""Streaming event for incremental text content updates.
|
||||
|
||||
:param content_index: Index position within the text content
|
||||
:param delta: Incremental text content being added
|
||||
:param item_id: Unique identifier of the output item being updated
|
||||
:param output_index: Index position of the item in the output list
|
||||
:param sequence_number: Sequential number for ordering streaming events
|
||||
:param type: Event type identifier, always "response.output_text.delta"
|
||||
"""
|
||||
|
||||
content_index: int
|
||||
delta: str
|
||||
item_id: str
|
||||
|
@ -278,6 +436,16 @@ class OpenAIResponseObjectStreamResponseOutputTextDelta(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class OpenAIResponseObjectStreamResponseOutputTextDone(BaseModel):
|
||||
"""Streaming event for when text output is completed.
|
||||
|
||||
:param content_index: Index position within the text content
|
||||
:param text: Final complete text content of the output item
|
||||
:param item_id: Unique identifier of the completed output item
|
||||
:param output_index: Index position of the item in the output list
|
||||
:param sequence_number: Sequential number for ordering streaming events
|
||||
:param type: Event type identifier, always "response.output_text.done"
|
||||
"""
|
||||
|
||||
content_index: int
|
||||
text: str # final text of the output item
|
||||
item_id: str
|
||||
|
@ -288,6 +456,15 @@ class OpenAIResponseObjectStreamResponseOutputTextDone(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta(BaseModel):
|
||||
"""Streaming event for incremental function call argument updates.
|
||||
|
||||
:param delta: Incremental function call arguments being added
|
||||
:param item_id: Unique identifier of the function call being updated
|
||||
:param output_index: Index position of the item in the output list
|
||||
:param sequence_number: Sequential number for ordering streaming events
|
||||
:param type: Event type identifier, always "response.function_call_arguments.delta"
|
||||
"""
|
||||
|
||||
delta: str
|
||||
item_id: str
|
||||
output_index: int
|
||||
|
@ -297,6 +474,15 @@ class OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone(BaseModel):
|
||||
"""Streaming event for when function call arguments are completed.
|
||||
|
||||
:param arguments: Final complete arguments JSON string for the function call
|
||||
:param item_id: Unique identifier of the completed function call
|
||||
:param output_index: Index position of the item in the output list
|
||||
:param sequence_number: Sequential number for ordering streaming events
|
||||
:param type: Event type identifier, always "response.function_call_arguments.done"
|
||||
"""
|
||||
|
||||
arguments: str # final arguments of the function call
|
||||
item_id: str
|
||||
output_index: int
|
||||
|
@ -306,6 +492,14 @@ class OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class OpenAIResponseObjectStreamResponseWebSearchCallInProgress(BaseModel):
|
||||
"""Streaming event for web search calls in progress.
|
||||
|
||||
:param item_id: Unique identifier of the web search call
|
||||
:param output_index: Index position of the item in the output list
|
||||
:param sequence_number: Sequential number for ordering streaming events
|
||||
:param type: Event type identifier, always "response.web_search_call.in_progress"
|
||||
"""
|
||||
|
||||
item_id: str
|
||||
output_index: int
|
||||
sequence_number: int
|
||||
|
@ -322,6 +516,14 @@ class OpenAIResponseObjectStreamResponseWebSearchCallSearching(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class OpenAIResponseObjectStreamResponseWebSearchCallCompleted(BaseModel):
|
||||
"""Streaming event for completed web search calls.
|
||||
|
||||
:param item_id: Unique identifier of the completed web search call
|
||||
:param output_index: Index position of the item in the output list
|
||||
:param sequence_number: Sequential number for ordering streaming events
|
||||
:param type: Event type identifier, always "response.web_search_call.completed"
|
||||
"""
|
||||
|
||||
item_id: str
|
||||
output_index: int
|
||||
sequence_number: int
|
||||
|
@ -366,6 +568,14 @@ class OpenAIResponseObjectStreamResponseMcpCallArgumentsDone(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class OpenAIResponseObjectStreamResponseMcpCallInProgress(BaseModel):
|
||||
"""Streaming event for MCP calls in progress.
|
||||
|
||||
:param item_id: Unique identifier of the MCP call
|
||||
:param output_index: Index position of the item in the output list
|
||||
:param sequence_number: Sequential number for ordering streaming events
|
||||
:param type: Event type identifier, always "response.mcp_call.in_progress"
|
||||
"""
|
||||
|
||||
item_id: str
|
||||
output_index: int
|
||||
sequence_number: int
|
||||
|
@ -374,12 +584,24 @@ class OpenAIResponseObjectStreamResponseMcpCallInProgress(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class OpenAIResponseObjectStreamResponseMcpCallFailed(BaseModel):
|
||||
"""Streaming event for failed MCP calls.
|
||||
|
||||
:param sequence_number: Sequential number for ordering streaming events
|
||||
:param type: Event type identifier, always "response.mcp_call.failed"
|
||||
"""
|
||||
|
||||
sequence_number: int
|
||||
type: Literal["response.mcp_call.failed"] = "response.mcp_call.failed"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseObjectStreamResponseMcpCallCompleted(BaseModel):
|
||||
"""Streaming event for completed MCP calls.
|
||||
|
||||
:param sequence_number: Sequential number for ordering streaming events
|
||||
:param type: Event type identifier, always "response.mcp_call.completed"
|
||||
"""
|
||||
|
||||
sequence_number: int
|
||||
type: Literal["response.mcp_call.completed"] = "response.mcp_call.completed"
|
||||
|
||||
|
@ -442,6 +664,12 @@ WebSearchToolTypes = ["web_search", "web_search_preview", "web_search_preview_20
|
|||
|
||||
@json_schema_type
|
||||
class OpenAIResponseInputToolWebSearch(BaseModel):
|
||||
"""Web search tool configuration for OpenAI response inputs.
|
||||
|
||||
:param type: Web search tool type variant to use
|
||||
:param search_context_size: (Optional) Size of search context, must be "low", "medium", or "high"
|
||||
"""
|
||||
|
||||
# Must match values of WebSearchToolTypes above
|
||||
type: Literal["web_search"] | Literal["web_search_preview"] | Literal["web_search_preview_2025_03_11"] = (
|
||||
"web_search"
|
||||
|
@ -453,6 +681,15 @@ class OpenAIResponseInputToolWebSearch(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class OpenAIResponseInputToolFunction(BaseModel):
|
||||
"""Function tool configuration for OpenAI response inputs.
|
||||
|
||||
:param type: Tool type identifier, always "function"
|
||||
:param name: Name of the function that can be called
|
||||
:param description: (Optional) Description of what the function does
|
||||
:param parameters: (Optional) JSON schema defining the function's parameters
|
||||
:param strict: (Optional) Whether to enforce strict parameter validation
|
||||
"""
|
||||
|
||||
type: Literal["function"] = "function"
|
||||
name: str
|
||||
description: str | None = None
|
||||
|
@ -462,6 +699,15 @@ class OpenAIResponseInputToolFunction(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class OpenAIResponseInputToolFileSearch(BaseModel):
|
||||
"""File search tool configuration for OpenAI response inputs.
|
||||
|
||||
:param type: Tool type identifier, always "file_search"
|
||||
:param vector_store_ids: List of vector store identifiers to search within
|
||||
:param filters: (Optional) Additional filters to apply to the search
|
||||
:param max_num_results: (Optional) Maximum number of search results to return (1-50)
|
||||
:param ranking_options: (Optional) Options for ranking and scoring search results
|
||||
"""
|
||||
|
||||
type: Literal["file_search"] = "file_search"
|
||||
vector_store_ids: list[str]
|
||||
filters: dict[str, Any] | None = None
|
||||
|
@ -470,16 +716,37 @@ class OpenAIResponseInputToolFileSearch(BaseModel):
|
|||
|
||||
|
||||
class ApprovalFilter(BaseModel):
|
||||
"""Filter configuration for MCP tool approval requirements.
|
||||
|
||||
:param always: (Optional) List of tool names that always require approval
|
||||
:param never: (Optional) List of tool names that never require approval
|
||||
"""
|
||||
|
||||
always: list[str] | None = None
|
||||
never: list[str] | None = None
|
||||
|
||||
|
||||
class AllowedToolsFilter(BaseModel):
|
||||
"""Filter configuration for restricting which MCP tools can be used.
|
||||
|
||||
:param tool_names: (Optional) List of specific tool names that are allowed
|
||||
"""
|
||||
|
||||
tool_names: list[str] | None = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseInputToolMCP(BaseModel):
|
||||
"""Model Context Protocol (MCP) tool configuration for OpenAI response inputs.
|
||||
|
||||
:param type: Tool type identifier, always "mcp"
|
||||
:param server_label: Label to identify this MCP server
|
||||
:param server_url: URL endpoint of the MCP server
|
||||
:param headers: (Optional) HTTP headers to include when connecting to the server
|
||||
:param require_approval: Approval requirement for tool calls ("always", "never", or filter)
|
||||
:param allowed_tools: (Optional) Restriction on which tools can be used from this server
|
||||
"""
|
||||
|
||||
type: Literal["mcp"] = "mcp"
|
||||
server_label: str
|
||||
server_url: str
|
||||
|
@ -500,17 +767,37 @@ register_schema(OpenAIResponseInputTool, name="OpenAIResponseInputTool")
|
|||
|
||||
|
||||
class ListOpenAIResponseInputItem(BaseModel):
|
||||
"""List container for OpenAI response input items.
|
||||
|
||||
:param data: List of input items
|
||||
:param object: Object type identifier, always "list"
|
||||
"""
|
||||
|
||||
data: list[OpenAIResponseInput]
|
||||
object: Literal["list"] = "list"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseObjectWithInput(OpenAIResponseObject):
|
||||
"""OpenAI response object extended with input context information.
|
||||
|
||||
:param input: List of input items that led to this response
|
||||
"""
|
||||
|
||||
input: list[OpenAIResponseInput]
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ListOpenAIResponseObject(BaseModel):
|
||||
"""Paginated list of OpenAI response objects with navigation metadata.
|
||||
|
||||
:param data: List of response objects with their input context
|
||||
:param has_more: Whether there are more results available beyond this page
|
||||
:param first_id: Identifier of the first item in this page
|
||||
:param last_id: Identifier of the last item in this page
|
||||
:param object: Object type identifier, always "list"
|
||||
"""
|
||||
|
||||
data: list[OpenAIResponseObjectWithInput]
|
||||
has_more: bool
|
||||
first_id: str
|
||||
|
|
|
@ -22,6 +22,14 @@ class CommonBenchmarkFields(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class Benchmark(CommonBenchmarkFields, Resource):
|
||||
"""A benchmark resource for evaluating model performance.
|
||||
|
||||
:param dataset_id: Identifier of the dataset to use for the benchmark evaluation
|
||||
:param scoring_functions: List of scoring function identifiers to apply during evaluation
|
||||
:param metadata: Metadata for this evaluation task
|
||||
:param type: The resource type, always benchmark
|
||||
"""
|
||||
|
||||
type: Literal[ResourceType.benchmark] = ResourceType.benchmark
|
||||
|
||||
@property
|
||||
|
|
|
@ -15,6 +15,11 @@ from llama_stack.schema_utils import json_schema_type, register_schema
|
|||
|
||||
@json_schema_type
|
||||
class URL(BaseModel):
|
||||
"""A URL reference to external content.
|
||||
|
||||
:param uri: The URL string pointing to the resource
|
||||
"""
|
||||
|
||||
uri: str
|
||||
|
||||
|
||||
|
@ -76,17 +81,36 @@ register_schema(InterleavedContent, name="InterleavedContent")
|
|||
|
||||
@json_schema_type
|
||||
class TextDelta(BaseModel):
|
||||
"""A text content delta for streaming responses.
|
||||
|
||||
:param type: Discriminator type of the delta. Always "text"
|
||||
:param text: The incremental text content
|
||||
"""
|
||||
|
||||
type: Literal["text"] = "text"
|
||||
text: str
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ImageDelta(BaseModel):
|
||||
"""An image content delta for streaming responses.
|
||||
|
||||
:param type: Discriminator type of the delta. Always "image"
|
||||
:param image: The incremental image data as bytes
|
||||
"""
|
||||
|
||||
type: Literal["image"] = "image"
|
||||
image: bytes
|
||||
|
||||
|
||||
class ToolCallParseStatus(Enum):
|
||||
"""Status of tool call parsing during streaming.
|
||||
:cvar started: Tool call parsing has begun
|
||||
:cvar in_progress: Tool call parsing is ongoing
|
||||
:cvar failed: Tool call parsing failed
|
||||
:cvar succeeded: Tool call parsing completed successfully
|
||||
"""
|
||||
|
||||
started = "started"
|
||||
in_progress = "in_progress"
|
||||
failed = "failed"
|
||||
|
@ -95,6 +119,13 @@ class ToolCallParseStatus(Enum):
|
|||
|
||||
@json_schema_type
|
||||
class ToolCallDelta(BaseModel):
|
||||
"""A tool call content delta for streaming responses.
|
||||
|
||||
:param type: Discriminator type of the delta. Always "tool_call"
|
||||
:param tool_call: Either an in-progress tool call string or the final parsed tool call
|
||||
:param parse_status: Current parsing status of the tool call
|
||||
"""
|
||||
|
||||
type: Literal["tool_call"] = "tool_call"
|
||||
|
||||
# you either send an in-progress tool call so the client can stream a long
|
||||
|
|
|
@ -11,6 +11,14 @@ from llama_stack.schema_utils import json_schema_type
|
|||
|
||||
|
||||
class JobStatus(Enum):
|
||||
"""Status of a job execution.
|
||||
:cvar completed: Job has finished successfully
|
||||
:cvar in_progress: Job is currently running
|
||||
:cvar failed: Job has failed during execution
|
||||
:cvar scheduled: Job is scheduled but not yet started
|
||||
:cvar cancelled: Job was cancelled before completion
|
||||
"""
|
||||
|
||||
completed = "completed"
|
||||
in_progress = "in_progress"
|
||||
failed = "failed"
|
||||
|
@ -20,5 +28,11 @@ class JobStatus(Enum):
|
|||
|
||||
@json_schema_type
|
||||
class Job(BaseModel):
|
||||
"""A job execution instance with status tracking.
|
||||
|
||||
:param job_id: Unique identifier for the job
|
||||
:param status: Current execution status of the job
|
||||
"""
|
||||
|
||||
job_id: str
|
||||
status: JobStatus
|
||||
|
|
|
@ -13,6 +13,11 @@ from llama_stack.schema_utils import json_schema_type
|
|||
|
||||
|
||||
class Order(Enum):
|
||||
"""Sort order for paginated responses.
|
||||
:cvar asc: Ascending order
|
||||
:cvar desc: Descending order
|
||||
"""
|
||||
|
||||
asc = "asc"
|
||||
desc = "desc"
|
||||
|
||||
|
|
|
@ -13,6 +13,14 @@ from llama_stack.schema_utils import json_schema_type
|
|||
|
||||
@json_schema_type
|
||||
class PostTrainingMetric(BaseModel):
|
||||
"""Training metrics captured during post-training jobs.
|
||||
|
||||
:param epoch: Training epoch number
|
||||
:param train_loss: Loss value on the training dataset
|
||||
:param validation_loss: Loss value on the validation dataset
|
||||
:param perplexity: Perplexity metric indicating model confidence
|
||||
"""
|
||||
|
||||
epoch: int
|
||||
train_loss: float
|
||||
validation_loss: float
|
||||
|
@ -21,7 +29,15 @@ class PostTrainingMetric(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class Checkpoint(BaseModel):
|
||||
"""Checkpoint created during training runs"""
|
||||
"""Checkpoint created during training runs.
|
||||
|
||||
:param identifier: Unique identifier for the checkpoint
|
||||
:param created_at: Timestamp when the checkpoint was created
|
||||
:param epoch: Training epoch when the checkpoint was saved
|
||||
:param post_training_job_id: Identifier of the training job that created this checkpoint
|
||||
:param path: File system path where the checkpoint is stored
|
||||
:param training_metrics: (Optional) Training metrics associated with this checkpoint
|
||||
"""
|
||||
|
||||
identifier: str
|
||||
created_at: datetime
|
||||
|
|
|
@ -13,59 +13,114 @@ from llama_stack.schema_utils import json_schema_type, register_schema
|
|||
|
||||
@json_schema_type
|
||||
class StringType(BaseModel):
|
||||
"""Parameter type for string values.
|
||||
|
||||
:param type: Discriminator type. Always "string"
|
||||
"""
|
||||
|
||||
type: Literal["string"] = "string"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class NumberType(BaseModel):
|
||||
"""Parameter type for numeric values.
|
||||
|
||||
:param type: Discriminator type. Always "number"
|
||||
"""
|
||||
|
||||
type: Literal["number"] = "number"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class BooleanType(BaseModel):
|
||||
"""Parameter type for boolean values.
|
||||
|
||||
:param type: Discriminator type. Always "boolean"
|
||||
"""
|
||||
|
||||
type: Literal["boolean"] = "boolean"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ArrayType(BaseModel):
|
||||
"""Parameter type for array values.
|
||||
|
||||
:param type: Discriminator type. Always "array"
|
||||
"""
|
||||
|
||||
type: Literal["array"] = "array"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ObjectType(BaseModel):
|
||||
"""Parameter type for object values.
|
||||
|
||||
:param type: Discriminator type. Always "object"
|
||||
"""
|
||||
|
||||
type: Literal["object"] = "object"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class JsonType(BaseModel):
|
||||
"""Parameter type for JSON values.
|
||||
|
||||
:param type: Discriminator type. Always "json"
|
||||
"""
|
||||
|
||||
type: Literal["json"] = "json"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class UnionType(BaseModel):
|
||||
"""Parameter type for union values.
|
||||
|
||||
:param type: Discriminator type. Always "union"
|
||||
"""
|
||||
|
||||
type: Literal["union"] = "union"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ChatCompletionInputType(BaseModel):
|
||||
"""Parameter type for chat completion input.
|
||||
|
||||
:param type: Discriminator type. Always "chat_completion_input"
|
||||
"""
|
||||
|
||||
# expects List[Message] for messages
|
||||
type: Literal["chat_completion_input"] = "chat_completion_input"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class CompletionInputType(BaseModel):
|
||||
"""Parameter type for completion input.
|
||||
|
||||
:param type: Discriminator type. Always "completion_input"
|
||||
"""
|
||||
|
||||
# expects InterleavedTextMedia for content
|
||||
type: Literal["completion_input"] = "completion_input"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class AgentTurnInputType(BaseModel):
|
||||
"""Parameter type for agent turn input.
|
||||
|
||||
:param type: Discriminator type. Always "agent_turn_input"
|
||||
"""
|
||||
|
||||
# expects List[Message] for messages (may also include attachments?)
|
||||
type: Literal["agent_turn_input"] = "agent_turn_input"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class DialogType(BaseModel):
|
||||
"""Parameter type for dialog data with semantic output labels.
|
||||
|
||||
:param type: Discriminator type. Always "dialog"
|
||||
"""
|
||||
|
||||
# expects List[Message] for messages
|
||||
# this type semantically contains the output label whereas ChatCompletionInputType does not
|
||||
type: Literal["dialog"] = "dialog"
|
||||
|
|
|
@ -94,6 +94,10 @@ register_schema(DataSource, name="DataSource")
|
|||
class CommonDatasetFields(BaseModel):
|
||||
"""
|
||||
Common fields for a dataset.
|
||||
|
||||
:param purpose: Purpose of the dataset indicating its intended use
|
||||
:param source: Data source configuration for the dataset
|
||||
:param metadata: Additional metadata for the dataset
|
||||
"""
|
||||
|
||||
purpose: DatasetPurpose
|
||||
|
@ -106,6 +110,11 @@ class CommonDatasetFields(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class Dataset(CommonDatasetFields, Resource):
|
||||
"""Dataset resource for storing and accessing training or evaluation data.
|
||||
|
||||
:param type: Type of resource, always 'dataset' for datasets
|
||||
"""
|
||||
|
||||
type: Literal[ResourceType.dataset] = ResourceType.dataset
|
||||
|
||||
@property
|
||||
|
@ -118,10 +127,20 @@ class Dataset(CommonDatasetFields, Resource):
|
|||
|
||||
|
||||
class DatasetInput(CommonDatasetFields, BaseModel):
|
||||
"""Input parameters for dataset operations.
|
||||
|
||||
:param dataset_id: Unique identifier for the dataset
|
||||
"""
|
||||
|
||||
dataset_id: str
|
||||
|
||||
|
||||
class ListDatasetsResponse(BaseModel):
|
||||
"""Response from listing datasets.
|
||||
|
||||
:param data: List of datasets
|
||||
"""
|
||||
|
||||
data: list[Dataset]
|
||||
|
||||
|
||||
|
|
|
@ -13,6 +13,29 @@ from llama_stack.schema_utils import json_schema_type
|
|||
|
||||
@json_schema_type
|
||||
class Api(Enum):
|
||||
"""Enumeration of all available APIs in the Llama Stack system.
|
||||
:cvar providers: Provider management and configuration
|
||||
:cvar inference: Text generation, chat completions, and embeddings
|
||||
:cvar safety: Content moderation and safety shields
|
||||
:cvar agents: Agent orchestration and execution
|
||||
:cvar vector_io: Vector database operations and queries
|
||||
:cvar datasetio: Dataset input/output operations
|
||||
:cvar scoring: Model output evaluation and scoring
|
||||
:cvar eval: Model evaluation and benchmarking framework
|
||||
:cvar post_training: Fine-tuning and model training
|
||||
:cvar tool_runtime: Tool execution and management
|
||||
:cvar telemetry: Observability and system monitoring
|
||||
:cvar models: Model metadata and management
|
||||
:cvar shields: Safety shield implementations
|
||||
:cvar vector_dbs: Vector database management
|
||||
:cvar datasets: Dataset creation and management
|
||||
:cvar scoring_functions: Scoring function definitions
|
||||
:cvar benchmarks: Benchmark suite management
|
||||
:cvar tool_groups: Tool group organization
|
||||
:cvar files: File storage and management
|
||||
:cvar inspect: Built-in system inspection and introspection
|
||||
"""
|
||||
|
||||
providers = "providers"
|
||||
inference = "inference"
|
||||
safety = "safety"
|
||||
|
|
|
@ -54,6 +54,9 @@ class ListOpenAIFileResponse(BaseModel):
|
|||
Response for listing files in OpenAI Files API.
|
||||
|
||||
:param data: List of file objects
|
||||
:param has_more: Whether there are more files available beyond this page
|
||||
:param first_id: ID of the first file in the list for pagination
|
||||
:param last_id: ID of the last file in the list for pagination
|
||||
:param object: The object type, which is always "list"
|
||||
"""
|
||||
|
||||
|
|
|
@ -41,11 +41,23 @@ from enum import StrEnum
|
|||
|
||||
@json_schema_type
|
||||
class GreedySamplingStrategy(BaseModel):
|
||||
"""Greedy sampling strategy that selects the highest probability token at each step.
|
||||
|
||||
:param type: Must be "greedy" to identify this sampling strategy
|
||||
"""
|
||||
|
||||
type: Literal["greedy"] = "greedy"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class TopPSamplingStrategy(BaseModel):
|
||||
"""Top-p (nucleus) sampling strategy that samples from the smallest set of tokens with cumulative probability >= p.
|
||||
|
||||
:param type: Must be "top_p" to identify this sampling strategy
|
||||
:param temperature: Controls randomness in sampling. Higher values increase randomness
|
||||
:param top_p: Cumulative probability threshold for nucleus sampling. Defaults to 0.95
|
||||
"""
|
||||
|
||||
type: Literal["top_p"] = "top_p"
|
||||
temperature: float | None = Field(..., gt=0.0)
|
||||
top_p: float | None = 0.95
|
||||
|
@ -53,6 +65,12 @@ class TopPSamplingStrategy(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class TopKSamplingStrategy(BaseModel):
|
||||
"""Top-k sampling strategy that restricts sampling to the k most likely tokens.
|
||||
|
||||
:param type: Must be "top_k" to identify this sampling strategy
|
||||
:param top_k: Number of top tokens to consider for sampling. Must be at least 1
|
||||
"""
|
||||
|
||||
type: Literal["top_k"] = "top_k"
|
||||
top_k: int = Field(..., ge=1)
|
||||
|
||||
|
@ -108,11 +126,21 @@ class QuantizationType(Enum):
|
|||
|
||||
@json_schema_type
|
||||
class Fp8QuantizationConfig(BaseModel):
|
||||
"""Configuration for 8-bit floating point quantization.
|
||||
|
||||
:param type: Must be "fp8_mixed" to identify this quantization type
|
||||
"""
|
||||
|
||||
type: Literal["fp8_mixed"] = "fp8_mixed"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class Bf16QuantizationConfig(BaseModel):
|
||||
"""Configuration for BFloat16 precision (typically no quantization).
|
||||
|
||||
:param type: Must be "bf16" to identify this quantization type
|
||||
"""
|
||||
|
||||
type: Literal["bf16"] = "bf16"
|
||||
|
||||
|
||||
|
@ -202,6 +230,14 @@ register_schema(Message, name="Message")
|
|||
|
||||
@json_schema_type
|
||||
class ToolResponse(BaseModel):
|
||||
"""Response from a tool invocation.
|
||||
|
||||
:param call_id: Unique identifier for the tool call this response is for
|
||||
:param tool_name: Name of the tool that was invoked
|
||||
:param content: The response content from the tool
|
||||
:param metadata: (Optional) Additional metadata about the tool response
|
||||
"""
|
||||
|
||||
call_id: str
|
||||
tool_name: BuiltinTool | str
|
||||
content: InterleavedContent
|
||||
|
@ -439,18 +475,36 @@ class EmbeddingsResponse(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class OpenAIChatCompletionContentPartTextParam(BaseModel):
|
||||
"""Text content part for OpenAI-compatible chat completion messages.
|
||||
|
||||
:param type: Must be "text" to identify this as text content
|
||||
:param text: The text content of the message
|
||||
"""
|
||||
|
||||
type: Literal["text"] = "text"
|
||||
text: str
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIImageURL(BaseModel):
|
||||
"""Image URL specification for OpenAI-compatible chat completion messages.
|
||||
|
||||
:param url: URL of the image to include in the message
|
||||
:param detail: (Optional) Level of detail for image processing. Can be "low", "high", or "auto"
|
||||
"""
|
||||
|
||||
url: str
|
||||
detail: str | None = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIChatCompletionContentPartImageParam(BaseModel):
|
||||
"""Image content part for OpenAI-compatible chat completion messages.
|
||||
|
||||
:param type: Must be "image_url" to identify this as image content
|
||||
:param image_url: Image URL specification and processing details
|
||||
"""
|
||||
|
||||
type: Literal["image_url"] = "image_url"
|
||||
image_url: OpenAIImageURL
|
||||
|
||||
|
@ -495,12 +549,26 @@ class OpenAISystemMessageParam(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class OpenAIChatCompletionToolCallFunction(BaseModel):
|
||||
"""Function call details for OpenAI-compatible tool calls.
|
||||
|
||||
:param name: (Optional) Name of the function to call
|
||||
:param arguments: (Optional) Arguments to pass to the function as a JSON string
|
||||
"""
|
||||
|
||||
name: str | None = None
|
||||
arguments: str | None = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIChatCompletionToolCall(BaseModel):
|
||||
"""Tool call specification for OpenAI-compatible chat completion responses.
|
||||
|
||||
:param index: (Optional) Index of the tool call in the list
|
||||
:param id: (Optional) Unique identifier for the tool call
|
||||
:param type: Must be "function" to identify this as a function call
|
||||
:param function: (Optional) Function call details
|
||||
"""
|
||||
|
||||
index: int | None = None
|
||||
id: str | None = None
|
||||
type: Literal["function"] = "function"
|
||||
|
@ -564,11 +632,24 @@ register_schema(OpenAIMessageParam, name="OpenAIMessageParam")
|
|||
|
||||
@json_schema_type
|
||||
class OpenAIResponseFormatText(BaseModel):
|
||||
"""Text response format for OpenAI-compatible chat completion requests.
|
||||
|
||||
:param type: Must be "text" to indicate plain text response format
|
||||
"""
|
||||
|
||||
type: Literal["text"] = "text"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIJSONSchema(TypedDict, total=False):
|
||||
"""JSON schema specification for OpenAI-compatible structured response format.
|
||||
|
||||
:param name: Name of the schema
|
||||
:param description: (Optional) Description of the schema
|
||||
:param strict: (Optional) Whether to enforce strict adherence to the schema
|
||||
:param schema: (Optional) The JSON schema definition
|
||||
"""
|
||||
|
||||
name: str
|
||||
description: str | None
|
||||
strict: bool | None
|
||||
|
@ -582,12 +663,23 @@ class OpenAIJSONSchema(TypedDict, total=False):
|
|||
|
||||
@json_schema_type
|
||||
class OpenAIResponseFormatJSONSchema(BaseModel):
|
||||
"""JSON schema response format for OpenAI-compatible chat completion requests.
|
||||
|
||||
:param type: Must be "json_schema" to indicate structured JSON response format
|
||||
:param json_schema: The JSON schema specification for the response
|
||||
"""
|
||||
|
||||
type: Literal["json_schema"] = "json_schema"
|
||||
json_schema: OpenAIJSONSchema
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseFormatJSONObject(BaseModel):
|
||||
"""JSON object response format for OpenAI-compatible chat completion requests.
|
||||
|
||||
:param type: Must be "json_object" to indicate generic JSON object response format
|
||||
"""
|
||||
|
||||
type: Literal["json_object"] = "json_object"
|
||||
|
||||
|
||||
|
@ -846,11 +938,21 @@ class EmbeddingTaskType(Enum):
|
|||
|
||||
@json_schema_type
|
||||
class BatchCompletionResponse(BaseModel):
|
||||
"""Response from a batch completion request.
|
||||
|
||||
:param batch: List of completion responses, one for each input in the batch
|
||||
"""
|
||||
|
||||
batch: list[CompletionResponse]
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class BatchChatCompletionResponse(BaseModel):
|
||||
"""Response from a batch chat completion request.
|
||||
|
||||
:param batch: List of chat completion responses, one for each conversation in the batch
|
||||
"""
|
||||
|
||||
batch: list[ChatCompletionResponse]
|
||||
|
||||
|
||||
|
@ -860,6 +962,15 @@ class OpenAICompletionWithInputMessages(OpenAIChatCompletion):
|
|||
|
||||
@json_schema_type
|
||||
class ListOpenAIChatCompletionResponse(BaseModel):
|
||||
"""Response from listing OpenAI-compatible chat completions.
|
||||
|
||||
:param data: List of chat completion objects with their input messages
|
||||
:param has_more: Whether there are more completions available beyond this list
|
||||
:param first_id: ID of the first completion in this list
|
||||
:param last_id: ID of the last completion in this list
|
||||
:param object: Must be "list" to identify this as a list response
|
||||
"""
|
||||
|
||||
data: list[OpenAICompletionWithInputMessages]
|
||||
has_more: bool
|
||||
first_id: str
|
||||
|
|
|
@ -14,6 +14,13 @@ from llama_stack.schema_utils import json_schema_type, webmethod
|
|||
|
||||
@json_schema_type
|
||||
class RouteInfo(BaseModel):
|
||||
"""Information about an API route including its path, method, and implementing providers.
|
||||
|
||||
:param route: The API endpoint path
|
||||
:param method: HTTP method for the route
|
||||
:param provider_types: List of provider types that implement this route
|
||||
"""
|
||||
|
||||
route: str
|
||||
method: str
|
||||
provider_types: list[str]
|
||||
|
@ -21,15 +28,30 @@ class RouteInfo(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class HealthInfo(BaseModel):
|
||||
"""Health status information for the service.
|
||||
|
||||
:param status: Current health status of the service
|
||||
"""
|
||||
|
||||
status: HealthStatus
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class VersionInfo(BaseModel):
|
||||
"""Version information for the service.
|
||||
|
||||
:param version: Version number of the service
|
||||
"""
|
||||
|
||||
version: str
|
||||
|
||||
|
||||
class ListRoutesResponse(BaseModel):
|
||||
"""Response containing a list of all available API routes.
|
||||
|
||||
:param data: List of available route information objects
|
||||
"""
|
||||
|
||||
data: list[RouteInfo]
|
||||
|
||||
|
||||
|
@ -37,17 +59,17 @@ class ListRoutesResponse(BaseModel):
|
|||
class Inspect(Protocol):
|
||||
@webmethod(route="/inspect/routes", method="GET")
|
||||
async def list_routes(self) -> ListRoutesResponse:
|
||||
"""List all routes.
|
||||
"""List all available API routes with their methods and implementing providers.
|
||||
|
||||
:returns: A ListRoutesResponse.
|
||||
:returns: Response containing information about all available routes.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/health", method="GET")
|
||||
async def health(self) -> HealthInfo:
|
||||
"""Get the health of the service.
|
||||
"""Get the current health status of the service.
|
||||
|
||||
:returns: A HealthInfo.
|
||||
:returns: Health information indicating if the service is operational.
|
||||
"""
|
||||
...
|
||||
|
||||
|
@ -55,6 +77,6 @@ class Inspect(Protocol):
|
|||
async def version(self) -> VersionInfo:
|
||||
"""Get the version of the service.
|
||||
|
||||
:returns: A VersionInfo.
|
||||
:returns: Version information containing the service version number.
|
||||
"""
|
||||
...
|
||||
|
|
|
@ -23,12 +23,27 @@ class CommonModelFields(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class ModelType(StrEnum):
|
||||
"""Enumeration of supported model types in Llama Stack.
|
||||
:cvar llm: Large language model for text generation and completion
|
||||
:cvar embedding: Embedding model for converting text to vector representations
|
||||
"""
|
||||
|
||||
llm = "llm"
|
||||
embedding = "embedding"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class Model(CommonModelFields, Resource):
|
||||
"""A model resource representing an AI model registered in Llama Stack.
|
||||
|
||||
:param type: The resource type, always 'model' for model resources
|
||||
:param model_type: The type of model (LLM or embedding model)
|
||||
:param metadata: Any additional metadata for this model
|
||||
:param identifier: Unique identifier for this resource in llama stack
|
||||
:param provider_resource_id: Unique identifier for this resource in the provider
|
||||
:param provider_id: ID of the provider that owns this resource
|
||||
"""
|
||||
|
||||
type: Literal[ResourceType.model] = ResourceType.model
|
||||
|
||||
@property
|
||||
|
|
|
@ -18,6 +18,12 @@ from llama_stack.schema_utils import json_schema_type, register_schema, webmetho
|
|||
|
||||
@json_schema_type
|
||||
class OptimizerType(Enum):
|
||||
"""Available optimizer algorithms for training.
|
||||
:cvar adam: Adaptive Moment Estimation optimizer
|
||||
:cvar adamw: AdamW optimizer with weight decay
|
||||
:cvar sgd: Stochastic Gradient Descent optimizer
|
||||
"""
|
||||
|
||||
adam = "adam"
|
||||
adamw = "adamw"
|
||||
sgd = "sgd"
|
||||
|
@ -25,12 +31,28 @@ class OptimizerType(Enum):
|
|||
|
||||
@json_schema_type
|
||||
class DatasetFormat(Enum):
|
||||
"""Format of the training dataset.
|
||||
:cvar instruct: Instruction-following format with prompt and completion
|
||||
:cvar dialog: Multi-turn conversation format with messages
|
||||
"""
|
||||
|
||||
instruct = "instruct"
|
||||
dialog = "dialog"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class DataConfig(BaseModel):
|
||||
"""Configuration for training data and data loading.
|
||||
|
||||
:param dataset_id: Unique identifier for the training dataset
|
||||
:param batch_size: Number of samples per training batch
|
||||
:param shuffle: Whether to shuffle the dataset during training
|
||||
:param data_format: Format of the dataset (instruct or dialog)
|
||||
:param validation_dataset_id: (Optional) Unique identifier for the validation dataset
|
||||
:param packed: (Optional) Whether to pack multiple samples into a single sequence for efficiency
|
||||
:param train_on_input: (Optional) Whether to compute loss on input tokens as well as output tokens
|
||||
"""
|
||||
|
||||
dataset_id: str
|
||||
batch_size: int
|
||||
shuffle: bool
|
||||
|
@ -42,6 +64,14 @@ class DataConfig(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class OptimizerConfig(BaseModel):
|
||||
"""Configuration parameters for the optimization algorithm.
|
||||
|
||||
:param optimizer_type: Type of optimizer to use (adam, adamw, or sgd)
|
||||
:param lr: Learning rate for the optimizer
|
||||
:param weight_decay: Weight decay coefficient for regularization
|
||||
:param num_warmup_steps: Number of steps for learning rate warmup
|
||||
"""
|
||||
|
||||
optimizer_type: OptimizerType
|
||||
lr: float
|
||||
weight_decay: float
|
||||
|
@ -50,6 +80,14 @@ class OptimizerConfig(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class EfficiencyConfig(BaseModel):
|
||||
"""Configuration for memory and compute efficiency optimizations.
|
||||
|
||||
:param enable_activation_checkpointing: (Optional) Whether to use activation checkpointing to reduce memory usage
|
||||
:param enable_activation_offloading: (Optional) Whether to offload activations to CPU to save GPU memory
|
||||
:param memory_efficient_fsdp_wrap: (Optional) Whether to use memory-efficient FSDP wrapping
|
||||
:param fsdp_cpu_offload: (Optional) Whether to offload FSDP parameters to CPU
|
||||
"""
|
||||
|
||||
enable_activation_checkpointing: bool | None = False
|
||||
enable_activation_offloading: bool | None = False
|
||||
memory_efficient_fsdp_wrap: bool | None = False
|
||||
|
@ -58,6 +96,18 @@ class EfficiencyConfig(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class TrainingConfig(BaseModel):
|
||||
"""Comprehensive configuration for the training process.
|
||||
|
||||
:param n_epochs: Number of training epochs to run
|
||||
:param max_steps_per_epoch: Maximum number of steps to run per epoch
|
||||
:param gradient_accumulation_steps: Number of steps to accumulate gradients before updating
|
||||
:param max_validation_steps: (Optional) Maximum number of validation steps per epoch
|
||||
:param data_config: (Optional) Configuration for data loading and formatting
|
||||
:param optimizer_config: (Optional) Configuration for the optimization algorithm
|
||||
:param efficiency_config: (Optional) Configuration for memory and compute optimizations
|
||||
:param dtype: (Optional) Data type for model parameters (bf16, fp16, fp32)
|
||||
"""
|
||||
|
||||
n_epochs: int
|
||||
max_steps_per_epoch: int = 1
|
||||
gradient_accumulation_steps: int = 1
|
||||
|
@ -70,6 +120,18 @@ class TrainingConfig(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class LoraFinetuningConfig(BaseModel):
|
||||
"""Configuration for Low-Rank Adaptation (LoRA) fine-tuning.
|
||||
|
||||
:param type: Algorithm type identifier, always "LoRA"
|
||||
:param lora_attn_modules: List of attention module names to apply LoRA to
|
||||
:param apply_lora_to_mlp: Whether to apply LoRA to MLP layers
|
||||
:param apply_lora_to_output: Whether to apply LoRA to output projection layers
|
||||
:param rank: Rank of the LoRA adaptation (lower rank = fewer parameters)
|
||||
:param alpha: LoRA scaling parameter that controls adaptation strength
|
||||
:param use_dora: (Optional) Whether to use DoRA (Weight-Decomposed Low-Rank Adaptation)
|
||||
:param quantize_base: (Optional) Whether to quantize the base model weights
|
||||
"""
|
||||
|
||||
type: Literal["LoRA"] = "LoRA"
|
||||
lora_attn_modules: list[str]
|
||||
apply_lora_to_mlp: bool
|
||||
|
@ -82,6 +144,13 @@ class LoraFinetuningConfig(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class QATFinetuningConfig(BaseModel):
|
||||
"""Configuration for Quantization-Aware Training (QAT) fine-tuning.
|
||||
|
||||
:param type: Algorithm type identifier, always "QAT"
|
||||
:param quantizer_name: Name of the quantization algorithm to use
|
||||
:param group_size: Size of groups for grouped quantization
|
||||
"""
|
||||
|
||||
type: Literal["QAT"] = "QAT"
|
||||
quantizer_name: str
|
||||
group_size: int
|
||||
|
@ -93,7 +162,11 @@ register_schema(AlgorithmConfig, name="AlgorithmConfig")
|
|||
|
||||
@json_schema_type
|
||||
class PostTrainingJobLogStream(BaseModel):
|
||||
"""Stream of logs from a finetuning job."""
|
||||
"""Stream of logs from a finetuning job.
|
||||
|
||||
:param job_uuid: Unique identifier for the training job
|
||||
:param log_lines: List of log message strings from the training process
|
||||
"""
|
||||
|
||||
job_uuid: str
|
||||
log_lines: list[str]
|
||||
|
@ -101,6 +174,10 @@ class PostTrainingJobLogStream(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class RLHFAlgorithm(Enum):
|
||||
"""Available reinforcement learning from human feedback algorithms.
|
||||
:cvar dpo: Direct Preference Optimization algorithm
|
||||
"""
|
||||
|
||||
dpo = "dpo"
|
||||
|
||||
|
||||
|
@ -114,13 +191,39 @@ class DPOLossType(Enum):
|
|||
|
||||
@json_schema_type
|
||||
class DPOAlignmentConfig(BaseModel):
|
||||
"""Configuration for Direct Preference Optimization (DPO) alignment.
|
||||
|
||||
:param reward_scale: Scaling factor for the reward signal
|
||||
:param reward_clip: Maximum absolute value for reward clipping
|
||||
:param epsilon: Small value added for numerical stability
|
||||
:param gamma: Discount factor for future rewards
|
||||
:param beta: Temperature parameter for the DPO loss
|
||||
:param loss_type: The type of loss function to use for DPO
|
||||
"""
|
||||
|
||||
reward_scale: float
|
||||
reward_clip: float
|
||||
epsilon: float
|
||||
gamma: float
|
||||
beta: float
|
||||
loss_type: DPOLossType = DPOLossType.sigmoid
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class PostTrainingRLHFRequest(BaseModel):
|
||||
"""Request to finetune a model."""
|
||||
"""Request to finetune a model using reinforcement learning from human feedback.
|
||||
|
||||
:param job_uuid: Unique identifier for the training job
|
||||
:param finetuned_model: URL or path to the base model to fine-tune
|
||||
:param dataset_id: Unique identifier for the training dataset
|
||||
:param validation_dataset_id: Unique identifier for the validation dataset
|
||||
:param algorithm: RLHF algorithm to use for training
|
||||
:param algorithm_config: Configuration parameters for the RLHF algorithm
|
||||
:param optimizer_config: Configuration parameters for the optimization algorithm
|
||||
:param training_config: Configuration parameters for the training process
|
||||
:param hyperparam_search_config: Configuration for hyperparameter search
|
||||
:param logger_config: Configuration for training logging
|
||||
"""
|
||||
|
||||
job_uuid: str
|
||||
|
||||
|
@ -146,7 +249,16 @@ class PostTrainingJob(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class PostTrainingJobStatusResponse(BaseModel):
|
||||
"""Status of a finetuning job."""
|
||||
"""Status of a finetuning job.
|
||||
|
||||
:param job_uuid: Unique identifier for the training job
|
||||
:param status: Current status of the training job
|
||||
:param scheduled_at: (Optional) Timestamp when the job was scheduled
|
||||
:param started_at: (Optional) Timestamp when the job execution began
|
||||
:param completed_at: (Optional) Timestamp when the job finished, if completed
|
||||
:param resources_allocated: (Optional) Information about computational resources allocated to the job
|
||||
:param checkpoints: List of model checkpoints created during training
|
||||
"""
|
||||
|
||||
job_uuid: str
|
||||
status: JobStatus
|
||||
|
@ -166,7 +278,11 @@ class ListPostTrainingJobsResponse(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class PostTrainingJobArtifactsResponse(BaseModel):
|
||||
"""Artifacts of a finetuning job."""
|
||||
"""Artifacts of a finetuning job.
|
||||
|
||||
:param job_uuid: Unique identifier for the training job
|
||||
:param checkpoints: List of model checkpoints created during training
|
||||
"""
|
||||
|
||||
job_uuid: str
|
||||
checkpoints: list[Checkpoint] = Field(default_factory=list)
|
||||
|
|
|
@ -14,6 +14,15 @@ from llama_stack.schema_utils import json_schema_type, webmethod
|
|||
|
||||
@json_schema_type
|
||||
class ProviderInfo(BaseModel):
|
||||
"""Information about a registered provider including its configuration and health status.
|
||||
|
||||
:param api: The API name this provider implements
|
||||
:param provider_id: Unique identifier for the provider
|
||||
:param provider_type: The type of provider implementation
|
||||
:param config: Configuration parameters for the provider
|
||||
:param health: Current health status of the provider
|
||||
"""
|
||||
|
||||
api: str
|
||||
provider_id: str
|
||||
provider_type: str
|
||||
|
@ -22,6 +31,11 @@ class ProviderInfo(BaseModel):
|
|||
|
||||
|
||||
class ListProvidersResponse(BaseModel):
|
||||
"""Response containing a list of all available providers.
|
||||
|
||||
:param data: List of provider information objects
|
||||
"""
|
||||
|
||||
data: list[ProviderInfo]
|
||||
|
||||
|
||||
|
|
|
@ -17,6 +17,13 @@ from llama_stack.schema_utils import json_schema_type, webmethod
|
|||
|
||||
@json_schema_type
|
||||
class ViolationLevel(Enum):
|
||||
"""Severity level of a safety violation.
|
||||
|
||||
:cvar INFO: Informational level violation that does not require action
|
||||
:cvar WARN: Warning level violation that suggests caution but allows continuation
|
||||
:cvar ERROR: Error level violation that requires blocking or intervention
|
||||
"""
|
||||
|
||||
INFO = "info"
|
||||
WARN = "warn"
|
||||
ERROR = "error"
|
||||
|
@ -24,6 +31,13 @@ class ViolationLevel(Enum):
|
|||
|
||||
@json_schema_type
|
||||
class SafetyViolation(BaseModel):
|
||||
"""Details of a safety violation detected by content moderation.
|
||||
|
||||
:param violation_level: Severity level of the violation
|
||||
:param user_message: (Optional) Message to convey to the user about the violation
|
||||
:param metadata: Additional metadata including specific violation codes for debugging and telemetry
|
||||
"""
|
||||
|
||||
violation_level: ViolationLevel
|
||||
|
||||
# what message should you convey to the user
|
||||
|
@ -36,6 +50,11 @@ class SafetyViolation(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class RunShieldResponse(BaseModel):
|
||||
"""Response from running a safety shield.
|
||||
|
||||
:param violation: (Optional) Safety violation detected by the shield, if any
|
||||
"""
|
||||
|
||||
violation: SafetyViolation | None = None
|
||||
|
||||
|
||||
|
|
|
@ -31,6 +31,12 @@ class ScoringResult(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class ScoreBatchResponse(BaseModel):
|
||||
"""Response from batch scoring operations on datasets.
|
||||
|
||||
:param dataset_id: (Optional) The identifier of the dataset that was scored
|
||||
:param results: A map of scoring function name to ScoringResult
|
||||
"""
|
||||
|
||||
dataset_id: str | None = None
|
||||
results: dict[str, ScoringResult]
|
||||
|
||||
|
|
|
@ -25,6 +25,12 @@ from llama_stack.schema_utils import json_schema_type, register_schema, webmetho
|
|||
# with standard metrics so they can be rolled up?
|
||||
@json_schema_type
|
||||
class ScoringFnParamsType(StrEnum):
|
||||
"""Types of scoring function parameter configurations.
|
||||
:cvar llm_as_judge: Use an LLM model to evaluate and score responses
|
||||
:cvar regex_parser: Use regex patterns to extract and score specific parts of responses
|
||||
:cvar basic: Basic scoring with simple aggregation functions
|
||||
"""
|
||||
|
||||
llm_as_judge = "llm_as_judge"
|
||||
regex_parser = "regex_parser"
|
||||
basic = "basic"
|
||||
|
@ -32,6 +38,14 @@ class ScoringFnParamsType(StrEnum):
|
|||
|
||||
@json_schema_type
|
||||
class AggregationFunctionType(StrEnum):
|
||||
"""Types of aggregation functions for scoring results.
|
||||
:cvar average: Calculate the arithmetic mean of scores
|
||||
:cvar weighted_average: Calculate a weighted average of scores
|
||||
:cvar median: Calculate the median value of scores
|
||||
:cvar categorical_count: Count occurrences of categorical values
|
||||
:cvar accuracy: Calculate accuracy as the proportion of correct answers
|
||||
"""
|
||||
|
||||
average = "average"
|
||||
weighted_average = "weighted_average"
|
||||
median = "median"
|
||||
|
@ -41,6 +55,14 @@ class AggregationFunctionType(StrEnum):
|
|||
|
||||
@json_schema_type
|
||||
class LLMAsJudgeScoringFnParams(BaseModel):
|
||||
"""Parameters for LLM-as-judge scoring function configuration.
|
||||
:param type: The type of scoring function parameters, always llm_as_judge
|
||||
:param judge_model: Identifier of the LLM model to use as a judge for scoring
|
||||
:param prompt_template: (Optional) Custom prompt template for the judge model
|
||||
:param judge_score_regexes: Regexes to extract the answer from generated response
|
||||
:param aggregation_functions: Aggregation functions to apply to the scores of each row
|
||||
"""
|
||||
|
||||
type: Literal[ScoringFnParamsType.llm_as_judge] = ScoringFnParamsType.llm_as_judge
|
||||
judge_model: str
|
||||
prompt_template: str | None = None
|
||||
|
@ -56,6 +78,12 @@ class LLMAsJudgeScoringFnParams(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class RegexParserScoringFnParams(BaseModel):
|
||||
"""Parameters for regex parser scoring function configuration.
|
||||
:param type: The type of scoring function parameters, always regex_parser
|
||||
:param parsing_regexes: Regex to extract the answer from generated response
|
||||
:param aggregation_functions: Aggregation functions to apply to the scores of each row
|
||||
"""
|
||||
|
||||
type: Literal[ScoringFnParamsType.regex_parser] = ScoringFnParamsType.regex_parser
|
||||
parsing_regexes: list[str] = Field(
|
||||
description="Regex to extract the answer from generated response",
|
||||
|
@ -69,6 +97,11 @@ class RegexParserScoringFnParams(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class BasicScoringFnParams(BaseModel):
|
||||
"""Parameters for basic scoring function configuration.
|
||||
:param type: The type of scoring function parameters, always basic
|
||||
:param aggregation_functions: Aggregation functions to apply to the scores of each row
|
||||
"""
|
||||
|
||||
type: Literal[ScoringFnParamsType.basic] = ScoringFnParamsType.basic
|
||||
aggregation_functions: list[AggregationFunctionType] = Field(
|
||||
description="Aggregation functions to apply to the scores of each row",
|
||||
|
@ -100,6 +133,10 @@ class CommonScoringFnFields(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class ScoringFn(CommonScoringFnFields, Resource):
|
||||
"""A scoring function resource for evaluating model outputs.
|
||||
:param type: The resource type, always scoring_function
|
||||
"""
|
||||
|
||||
type: Literal[ResourceType.scoring_function] = ResourceType.scoring_function
|
||||
|
||||
@property
|
||||
|
|
|
@ -19,7 +19,11 @@ class CommonShieldFields(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class Shield(CommonShieldFields, Resource):
|
||||
"""A safety shield resource that can be used to check content"""
|
||||
"""A safety shield resource that can be used to check content.
|
||||
|
||||
:param params: (Optional) Configuration parameters for the shield
|
||||
:param type: The resource type, always shield
|
||||
"""
|
||||
|
||||
type: Literal[ResourceType.shield] = ResourceType.shield
|
||||
|
||||
|
|
|
@ -14,7 +14,15 @@ from llama_stack.schema_utils import json_schema_type, webmethod
|
|||
|
||||
|
||||
class FilteringFunction(Enum):
|
||||
"""The type of filtering function."""
|
||||
"""The type of filtering function.
|
||||
|
||||
:cvar none: No filtering applied, accept all generated synthetic data
|
||||
:cvar random: Random sampling of generated data points
|
||||
:cvar top_k: Keep only the top-k highest scoring synthetic data samples
|
||||
:cvar top_p: Nucleus-style filtering, keep samples exceeding cumulative score threshold
|
||||
:cvar top_k_top_p: Combined top-k and top-p filtering strategy
|
||||
:cvar sigmoid: Apply sigmoid function for probability-based filtering
|
||||
"""
|
||||
|
||||
none = "none"
|
||||
random = "random"
|
||||
|
@ -26,7 +34,12 @@ class FilteringFunction(Enum):
|
|||
|
||||
@json_schema_type
|
||||
class SyntheticDataGenerationRequest(BaseModel):
|
||||
"""Request to generate synthetic data. A small batch of prompts and a filtering function"""
|
||||
"""Request to generate synthetic data. A small batch of prompts and a filtering function
|
||||
|
||||
:param dialogs: List of conversation messages to use as input for synthetic data generation
|
||||
:param filtering_function: Type of filtering to apply to generated synthetic data samples
|
||||
:param model: (Optional) The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint
|
||||
"""
|
||||
|
||||
dialogs: list[Message]
|
||||
filtering_function: FilteringFunction = FilteringFunction.none
|
||||
|
@ -35,7 +48,11 @@ class SyntheticDataGenerationRequest(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class SyntheticDataGenerationResponse(BaseModel):
|
||||
"""Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold."""
|
||||
"""Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold.
|
||||
|
||||
:param synthetic_data: List of generated synthetic data samples that passed the filtering criteria
|
||||
:param statistics: (Optional) Statistical information about the generation process and filtering results
|
||||
"""
|
||||
|
||||
synthetic_data: list[dict[str, Any]]
|
||||
statistics: dict[str, Any] | None = None
|
||||
|
@ -48,4 +65,12 @@ class SyntheticDataGeneration(Protocol):
|
|||
dialogs: list[Message],
|
||||
filtering_function: FilteringFunction = FilteringFunction.none,
|
||||
model: str | None = None,
|
||||
) -> SyntheticDataGenerationResponse: ...
|
||||
) -> SyntheticDataGenerationResponse:
|
||||
"""Generate synthetic data based on input dialogs and apply filtering.
|
||||
|
||||
:param dialogs: List of conversation messages to use as input for synthetic data generation
|
||||
:param filtering_function: Type of filtering to apply to generated synthetic data samples
|
||||
:param model: (Optional) The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint
|
||||
:returns: Response containing filtered synthetic data samples and optional statistics
|
||||
"""
|
||||
...
|
||||
|
|
|
@ -25,12 +25,27 @@ DEFAULT_TTL_DAYS = 7
|
|||
|
||||
@json_schema_type
|
||||
class SpanStatus(Enum):
|
||||
"""The status of a span indicating whether it completed successfully or with an error.
|
||||
:cvar OK: Span completed successfully without errors
|
||||
:cvar ERROR: Span completed with an error or failure
|
||||
"""
|
||||
|
||||
OK = "ok"
|
||||
ERROR = "error"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class Span(BaseModel):
|
||||
"""A span representing a single operation within a trace.
|
||||
:param span_id: Unique identifier for the span
|
||||
:param trace_id: Unique identifier for the trace this span belongs to
|
||||
:param parent_span_id: (Optional) Unique identifier for the parent span, if this is a child span
|
||||
:param name: Human-readable name describing the operation this span represents
|
||||
:param start_time: Timestamp when the operation began
|
||||
:param end_time: (Optional) Timestamp when the operation finished, if completed
|
||||
:param attributes: (Optional) Key-value pairs containing additional metadata about the span
|
||||
"""
|
||||
|
||||
span_id: str
|
||||
trace_id: str
|
||||
parent_span_id: str | None = None
|
||||
|
@ -47,6 +62,13 @@ class Span(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class Trace(BaseModel):
|
||||
"""A trace representing the complete execution path of a request across multiple operations.
|
||||
:param trace_id: Unique identifier for the trace
|
||||
:param root_span_id: Unique identifier for the root span that started this trace
|
||||
:param start_time: Timestamp when the trace began
|
||||
:param end_time: (Optional) Timestamp when the trace finished, if completed
|
||||
"""
|
||||
|
||||
trace_id: str
|
||||
root_span_id: str
|
||||
start_time: datetime
|
||||
|
@ -55,6 +77,12 @@ class Trace(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class EventType(Enum):
|
||||
"""The type of telemetry event being logged.
|
||||
:cvar UNSTRUCTURED_LOG: A simple log message with severity level
|
||||
:cvar STRUCTURED_LOG: A structured log event with typed payload data
|
||||
:cvar METRIC: A metric measurement with value and unit
|
||||
"""
|
||||
|
||||
UNSTRUCTURED_LOG = "unstructured_log"
|
||||
STRUCTURED_LOG = "structured_log"
|
||||
METRIC = "metric"
|
||||
|
@ -62,6 +90,15 @@ class EventType(Enum):
|
|||
|
||||
@json_schema_type
|
||||
class LogSeverity(Enum):
|
||||
"""The severity level of a log message.
|
||||
:cvar VERBOSE: Detailed diagnostic information for troubleshooting
|
||||
:cvar DEBUG: Debug information useful during development
|
||||
:cvar INFO: General informational messages about normal operation
|
||||
:cvar WARN: Warning messages about potentially problematic situations
|
||||
:cvar ERROR: Error messages indicating failures that don't stop execution
|
||||
:cvar CRITICAL: Critical error messages indicating severe failures
|
||||
"""
|
||||
|
||||
VERBOSE = "verbose"
|
||||
DEBUG = "debug"
|
||||
INFO = "info"
|
||||
|
@ -71,6 +108,13 @@ class LogSeverity(Enum):
|
|||
|
||||
|
||||
class EventCommon(BaseModel):
|
||||
"""Common fields shared by all telemetry events.
|
||||
:param trace_id: Unique identifier for the trace this event belongs to
|
||||
:param span_id: Unique identifier for the span this event belongs to
|
||||
:param timestamp: Timestamp when the event occurred
|
||||
:param attributes: (Optional) Key-value pairs containing additional metadata about the event
|
||||
"""
|
||||
|
||||
trace_id: str
|
||||
span_id: str
|
||||
timestamp: datetime
|
||||
|
@ -79,6 +123,12 @@ class EventCommon(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class UnstructuredLogEvent(EventCommon):
|
||||
"""An unstructured log event containing a simple text message.
|
||||
:param type: Event type identifier set to UNSTRUCTURED_LOG
|
||||
:param message: The log message text
|
||||
:param severity: The severity level of the log message
|
||||
"""
|
||||
|
||||
type: Literal[EventType.UNSTRUCTURED_LOG] = EventType.UNSTRUCTURED_LOG
|
||||
message: str
|
||||
severity: LogSeverity
|
||||
|
@ -86,6 +136,13 @@ class UnstructuredLogEvent(EventCommon):
|
|||
|
||||
@json_schema_type
|
||||
class MetricEvent(EventCommon):
|
||||
"""A metric event containing a measured value.
|
||||
:param type: Event type identifier set to METRIC
|
||||
:param metric: The name of the metric being measured
|
||||
:param value: The numeric value of the metric measurement
|
||||
:param unit: The unit of measurement for the metric value
|
||||
"""
|
||||
|
||||
type: Literal[EventType.METRIC] = EventType.METRIC
|
||||
metric: str # this would be an enum
|
||||
value: int | float
|
||||
|
@ -94,6 +151,12 @@ class MetricEvent(EventCommon):
|
|||
|
||||
@json_schema_type
|
||||
class MetricInResponse(BaseModel):
|
||||
"""A metric value included in API responses.
|
||||
:param metric: The name of the metric
|
||||
:param value: The numeric value of the metric
|
||||
:param unit: (Optional) The unit of measurement for the metric value
|
||||
"""
|
||||
|
||||
metric: str
|
||||
value: int | float
|
||||
unit: str | None = None
|
||||
|
@ -120,17 +183,32 @@ class MetricInResponse(BaseModel):
|
|||
|
||||
|
||||
class MetricResponseMixin(BaseModel):
|
||||
"""Mixin class for API responses that can include metrics.
|
||||
:param metrics: (Optional) List of metrics associated with the API response
|
||||
"""
|
||||
|
||||
metrics: list[MetricInResponse] | None = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class StructuredLogType(Enum):
|
||||
"""The type of structured log event payload.
|
||||
:cvar SPAN_START: Event indicating the start of a new span
|
||||
:cvar SPAN_END: Event indicating the completion of a span
|
||||
"""
|
||||
|
||||
SPAN_START = "span_start"
|
||||
SPAN_END = "span_end"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class SpanStartPayload(BaseModel):
|
||||
"""Payload for a span start event.
|
||||
:param type: Payload type identifier set to SPAN_START
|
||||
:param name: Human-readable name describing the operation this span represents
|
||||
:param parent_span_id: (Optional) Unique identifier for the parent span, if this is a child span
|
||||
"""
|
||||
|
||||
type: Literal[StructuredLogType.SPAN_START] = StructuredLogType.SPAN_START
|
||||
name: str
|
||||
parent_span_id: str | None = None
|
||||
|
@ -138,6 +216,11 @@ class SpanStartPayload(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class SpanEndPayload(BaseModel):
|
||||
"""Payload for a span end event.
|
||||
:param type: Payload type identifier set to SPAN_END
|
||||
:param status: The final status of the span indicating success or failure
|
||||
"""
|
||||
|
||||
type: Literal[StructuredLogType.SPAN_END] = StructuredLogType.SPAN_END
|
||||
status: SpanStatus
|
||||
|
||||
|
@ -151,6 +234,11 @@ register_schema(StructuredLogPayload, name="StructuredLogPayload")
|
|||
|
||||
@json_schema_type
|
||||
class StructuredLogEvent(EventCommon):
|
||||
"""A structured log event containing typed payload data.
|
||||
:param type: Event type identifier set to STRUCTURED_LOG
|
||||
:param payload: The structured payload data for the log event
|
||||
"""
|
||||
|
||||
type: Literal[EventType.STRUCTURED_LOG] = EventType.STRUCTURED_LOG
|
||||
payload: StructuredLogPayload
|
||||
|
||||
|
@ -164,6 +252,14 @@ register_schema(Event, name="Event")
|
|||
|
||||
@json_schema_type
|
||||
class EvalTrace(BaseModel):
|
||||
"""A trace record for evaluation purposes.
|
||||
:param session_id: Unique identifier for the evaluation session
|
||||
:param step: The evaluation step or phase identifier
|
||||
:param input: The input data for the evaluation
|
||||
:param output: The actual output produced during evaluation
|
||||
:param expected_output: The expected output for comparison during evaluation
|
||||
"""
|
||||
|
||||
session_id: str
|
||||
step: str
|
||||
input: str
|
||||
|
@ -173,11 +269,22 @@ class EvalTrace(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class SpanWithStatus(Span):
|
||||
"""A span that includes status information.
|
||||
:param status: (Optional) The current status of the span
|
||||
"""
|
||||
|
||||
status: SpanStatus | None = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class QueryConditionOp(Enum):
|
||||
"""Comparison operators for query conditions.
|
||||
:cvar EQ: Equal to comparison
|
||||
:cvar NE: Not equal to comparison
|
||||
:cvar GT: Greater than comparison
|
||||
:cvar LT: Less than comparison
|
||||
"""
|
||||
|
||||
EQ = "eq"
|
||||
NE = "ne"
|
||||
GT = "gt"
|
||||
|
@ -186,29 +293,59 @@ class QueryConditionOp(Enum):
|
|||
|
||||
@json_schema_type
|
||||
class QueryCondition(BaseModel):
|
||||
"""A condition for filtering query results.
|
||||
:param key: The attribute key to filter on
|
||||
:param op: The comparison operator to apply
|
||||
:param value: The value to compare against
|
||||
"""
|
||||
|
||||
key: str
|
||||
op: QueryConditionOp
|
||||
value: Any
|
||||
|
||||
|
||||
class QueryTracesResponse(BaseModel):
|
||||
"""Response containing a list of traces.
|
||||
:param data: List of traces matching the query criteria
|
||||
"""
|
||||
|
||||
data: list[Trace]
|
||||
|
||||
|
||||
class QuerySpansResponse(BaseModel):
|
||||
"""Response containing a list of spans.
|
||||
:param data: List of spans matching the query criteria
|
||||
"""
|
||||
|
||||
data: list[Span]
|
||||
|
||||
|
||||
class QuerySpanTreeResponse(BaseModel):
|
||||
"""Response containing a tree structure of spans.
|
||||
:param data: Dictionary mapping span IDs to spans with status information
|
||||
"""
|
||||
|
||||
data: dict[str, SpanWithStatus]
|
||||
|
||||
|
||||
class MetricQueryType(Enum):
|
||||
"""The type of metric query to perform.
|
||||
:cvar RANGE: Query metrics over a time range
|
||||
:cvar INSTANT: Query metrics at a specific point in time
|
||||
"""
|
||||
|
||||
RANGE = "range"
|
||||
INSTANT = "instant"
|
||||
|
||||
|
||||
class MetricLabelOperator(Enum):
|
||||
"""Operators for matching metric labels.
|
||||
:cvar EQUALS: Label value must equal the specified value
|
||||
:cvar NOT_EQUALS: Label value must not equal the specified value
|
||||
:cvar REGEX_MATCH: Label value must match the specified regular expression
|
||||
:cvar REGEX_NOT_MATCH: Label value must not match the specified regular expression
|
||||
"""
|
||||
|
||||
EQUALS = "="
|
||||
NOT_EQUALS = "!="
|
||||
REGEX_MATCH = "=~"
|
||||
|
@ -216,6 +353,12 @@ class MetricLabelOperator(Enum):
|
|||
|
||||
|
||||
class MetricLabelMatcher(BaseModel):
|
||||
"""A matcher for filtering metrics by label values.
|
||||
:param name: The name of the label to match
|
||||
:param value: The value to match against
|
||||
:param operator: The comparison operator to use for matching
|
||||
"""
|
||||
|
||||
name: str
|
||||
value: str
|
||||
operator: MetricLabelOperator = MetricLabelOperator.EQUALS
|
||||
|
@ -223,24 +366,44 @@ class MetricLabelMatcher(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class MetricLabel(BaseModel):
|
||||
"""A label associated with a metric.
|
||||
:param name: The name of the label
|
||||
:param value: The value of the label
|
||||
"""
|
||||
|
||||
name: str
|
||||
value: str
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class MetricDataPoint(BaseModel):
|
||||
"""A single data point in a metric time series.
|
||||
:param timestamp: Unix timestamp when the metric value was recorded
|
||||
:param value: The numeric value of the metric at this timestamp
|
||||
"""
|
||||
|
||||
timestamp: int
|
||||
value: float
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class MetricSeries(BaseModel):
|
||||
"""A time series of metric data points.
|
||||
:param metric: The name of the metric
|
||||
:param labels: List of labels associated with this metric series
|
||||
:param values: List of data points in chronological order
|
||||
"""
|
||||
|
||||
metric: str
|
||||
labels: list[MetricLabel]
|
||||
values: list[MetricDataPoint]
|
||||
|
||||
|
||||
class QueryMetricsResponse(BaseModel):
|
||||
"""Response containing metric time series data.
|
||||
:param data: List of metric series matching the query criteria
|
||||
"""
|
||||
|
||||
data: list[MetricSeries]
|
||||
|
||||
|
||||
|
|
|
@ -22,7 +22,7 @@ class RRFRanker(BaseModel):
|
|||
|
||||
:param type: The type of ranker, always "rrf"
|
||||
:param impact_factor: The impact factor for RRF scoring. Higher values give more weight to higher-ranked results.
|
||||
Must be greater than 0. Default of 60 is from the original RRF paper (Cormack et al., 2009).
|
||||
Must be greater than 0
|
||||
"""
|
||||
|
||||
type: Literal["rrf"] = "rrf"
|
||||
|
@ -76,12 +76,25 @@ class RAGDocument(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class RAGQueryResult(BaseModel):
|
||||
"""Result of a RAG query containing retrieved content and metadata.
|
||||
|
||||
:param content: (Optional) The retrieved content from the query
|
||||
:param metadata: Additional metadata about the query result
|
||||
"""
|
||||
|
||||
content: InterleavedContent | None = None
|
||||
metadata: dict[str, Any] = Field(default_factory=dict)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class RAGQueryGenerator(Enum):
|
||||
"""Types of query generators for RAG systems.
|
||||
|
||||
:cvar default: Default query generator using simple text processing
|
||||
:cvar llm: LLM-based query generator for enhanced query understanding
|
||||
:cvar custom: Custom query generator implementation
|
||||
"""
|
||||
|
||||
default = "default"
|
||||
llm = "llm"
|
||||
custom = "custom"
|
||||
|
@ -103,12 +116,25 @@ class RAGSearchMode(StrEnum):
|
|||
|
||||
@json_schema_type
|
||||
class DefaultRAGQueryGeneratorConfig(BaseModel):
|
||||
"""Configuration for the default RAG query generator.
|
||||
|
||||
:param type: Type of query generator, always 'default'
|
||||
:param separator: String separator used to join query terms
|
||||
"""
|
||||
|
||||
type: Literal["default"] = "default"
|
||||
separator: str = " "
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class LLMRAGQueryGeneratorConfig(BaseModel):
|
||||
"""Configuration for the LLM-based RAG query generator.
|
||||
|
||||
:param type: Type of query generator, always 'llm'
|
||||
:param model: Name of the language model to use for query generation
|
||||
:param template: Template string for formatting the query generation prompt
|
||||
"""
|
||||
|
||||
type: Literal["llm"] = "llm"
|
||||
model: str
|
||||
template: str
|
||||
|
@ -166,7 +192,12 @@ class RAGToolRuntime(Protocol):
|
|||
vector_db_id: str,
|
||||
chunk_size_in_tokens: int = 512,
|
||||
) -> None:
|
||||
"""Index documents so they can be used by the RAG system"""
|
||||
"""Index documents so they can be used by the RAG system.
|
||||
|
||||
:param documents: List of documents to index in the RAG system
|
||||
:param vector_db_id: ID of the vector database to store the document embeddings
|
||||
:param chunk_size_in_tokens: (Optional) Size in tokens for document chunking during indexing
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/tool-runtime/rag-tool/query", method="POST")
|
||||
|
@ -176,5 +207,11 @@ class RAGToolRuntime(Protocol):
|
|||
vector_db_ids: list[str],
|
||||
query_config: RAGQueryConfig | None = None,
|
||||
) -> RAGQueryResult:
|
||||
"""Query the RAG system for context; typically invoked by the agent"""
|
||||
"""Query the RAG system for context; typically invoked by the agent.
|
||||
|
||||
:param content: The query content to search for in the indexed documents
|
||||
:param vector_db_ids: List of vector database IDs to search within
|
||||
:param query_config: (Optional) Configuration parameters for the query operation
|
||||
:returns: RAGQueryResult containing the retrieved content and metadata
|
||||
"""
|
||||
...
|
||||
|
|
|
@ -20,6 +20,15 @@ from .rag_tool import RAGToolRuntime
|
|||
|
||||
@json_schema_type
|
||||
class ToolParameter(BaseModel):
|
||||
"""Parameter definition for a tool.
|
||||
|
||||
:param name: Name of the parameter
|
||||
:param parameter_type: Type of the parameter (e.g., string, integer)
|
||||
:param description: Human-readable description of what the parameter does
|
||||
:param required: Whether this parameter is required for tool invocation
|
||||
:param default: (Optional) Default value for the parameter if not provided
|
||||
"""
|
||||
|
||||
name: str
|
||||
parameter_type: str
|
||||
description: str
|
||||
|
@ -29,6 +38,15 @@ class ToolParameter(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class Tool(Resource):
|
||||
"""A tool that can be invoked by agents.
|
||||
|
||||
:param type: Type of resource, always 'tool'
|
||||
:param toolgroup_id: ID of the tool group this tool belongs to
|
||||
:param description: Human-readable description of what the tool does
|
||||
:param parameters: List of parameters this tool accepts
|
||||
:param metadata: (Optional) Additional metadata about the tool
|
||||
"""
|
||||
|
||||
type: Literal[ResourceType.tool] = ResourceType.tool
|
||||
toolgroup_id: str
|
||||
description: str
|
||||
|
@ -38,6 +56,14 @@ class Tool(Resource):
|
|||
|
||||
@json_schema_type
|
||||
class ToolDef(BaseModel):
|
||||
"""Tool definition used in runtime contexts.
|
||||
|
||||
:param name: Name of the tool
|
||||
:param description: (Optional) Human-readable description of what the tool does
|
||||
:param parameters: (Optional) List of parameters this tool accepts
|
||||
:param metadata: (Optional) Additional metadata about the tool
|
||||
"""
|
||||
|
||||
name: str
|
||||
description: str | None = None
|
||||
parameters: list[ToolParameter] | None = None
|
||||
|
@ -46,6 +72,14 @@ class ToolDef(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class ToolGroupInput(BaseModel):
|
||||
"""Input data for registering a tool group.
|
||||
|
||||
:param toolgroup_id: Unique identifier for the tool group
|
||||
:param provider_id: ID of the provider that will handle this tool group
|
||||
:param args: (Optional) Additional arguments to pass to the provider
|
||||
:param mcp_endpoint: (Optional) Model Context Protocol endpoint for remote tools
|
||||
"""
|
||||
|
||||
toolgroup_id: str
|
||||
provider_id: str
|
||||
args: dict[str, Any] | None = None
|
||||
|
@ -54,6 +88,13 @@ class ToolGroupInput(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class ToolGroup(Resource):
|
||||
"""A group of related tools managed together.
|
||||
|
||||
:param type: Type of resource, always 'tool_group'
|
||||
:param mcp_endpoint: (Optional) Model Context Protocol endpoint for remote tools
|
||||
:param args: (Optional) Additional arguments for the tool group
|
||||
"""
|
||||
|
||||
type: Literal[ResourceType.tool_group] = ResourceType.tool_group
|
||||
mcp_endpoint: URL | None = None
|
||||
args: dict[str, Any] | None = None
|
||||
|
@ -61,6 +102,14 @@ class ToolGroup(Resource):
|
|||
|
||||
@json_schema_type
|
||||
class ToolInvocationResult(BaseModel):
|
||||
"""Result of a tool invocation.
|
||||
|
||||
:param content: (Optional) The output content from the tool execution
|
||||
:param error_message: (Optional) Error message if the tool execution failed
|
||||
:param error_code: (Optional) Numeric error code if the tool execution failed
|
||||
:param metadata: (Optional) Additional metadata about the tool execution
|
||||
"""
|
||||
|
||||
content: InterleavedContent | None = None
|
||||
error_message: str | None = None
|
||||
error_code: int | None = None
|
||||
|
@ -73,14 +122,29 @@ class ToolStore(Protocol):
|
|||
|
||||
|
||||
class ListToolGroupsResponse(BaseModel):
|
||||
"""Response containing a list of tool groups.
|
||||
|
||||
:param data: List of tool groups
|
||||
"""
|
||||
|
||||
data: list[ToolGroup]
|
||||
|
||||
|
||||
class ListToolsResponse(BaseModel):
|
||||
"""Response containing a list of tools.
|
||||
|
||||
:param data: List of tools
|
||||
"""
|
||||
|
||||
data: list[Tool]
|
||||
|
||||
|
||||
class ListToolDefsResponse(BaseModel):
|
||||
"""Response containing a list of tool definitions.
|
||||
|
||||
:param data: List of tool definitions
|
||||
"""
|
||||
|
||||
data: list[ToolDef]
|
||||
|
||||
|
||||
|
@ -158,6 +222,11 @@ class ToolGroups(Protocol):
|
|||
|
||||
|
||||
class SpecialToolGroup(Enum):
|
||||
"""Special tool groups with predefined functionality.
|
||||
|
||||
:cvar rag_tool: Retrieval-Augmented Generation tool group for document search and retrieval
|
||||
"""
|
||||
|
||||
rag_tool = "rag_tool"
|
||||
|
||||
|
||||
|
|
|
@ -15,6 +15,13 @@ from llama_stack.schema_utils import json_schema_type, webmethod
|
|||
|
||||
@json_schema_type
|
||||
class VectorDB(Resource):
|
||||
"""Vector database resource for storing and querying vector embeddings.
|
||||
|
||||
:param type: Type of resource, always 'vector_db' for vector databases
|
||||
:param embedding_model: Name of the embedding model to use for vector generation
|
||||
:param embedding_dimension: Dimension of the embedding vectors
|
||||
"""
|
||||
|
||||
type: Literal[ResourceType.vector_db] = ResourceType.vector_db
|
||||
|
||||
embedding_model: str
|
||||
|
@ -31,6 +38,14 @@ class VectorDB(Resource):
|
|||
|
||||
|
||||
class VectorDBInput(BaseModel):
|
||||
"""Input parameters for creating or configuring a vector database.
|
||||
|
||||
:param vector_db_id: Unique identifier for the vector database
|
||||
:param embedding_model: Name of the embedding model to use for vector generation
|
||||
:param embedding_dimension: Dimension of the embedding vectors
|
||||
:param provider_vector_db_id: (Optional) Provider-specific identifier for the vector database
|
||||
"""
|
||||
|
||||
vector_db_id: str
|
||||
embedding_model: str
|
||||
embedding_dimension: int
|
||||
|
@ -39,6 +54,11 @@ class VectorDBInput(BaseModel):
|
|||
|
||||
|
||||
class ListVectorDBsResponse(BaseModel):
|
||||
"""Response from listing vector databases.
|
||||
|
||||
:param data: List of vector databases
|
||||
"""
|
||||
|
||||
data: list[VectorDB]
|
||||
|
||||
|
||||
|
|
|
@ -94,12 +94,27 @@ class Chunk(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class QueryChunksResponse(BaseModel):
|
||||
"""Response from querying chunks in a vector database.
|
||||
|
||||
:param chunks: List of content chunks returned from the query
|
||||
:param scores: Relevance scores corresponding to each returned chunk
|
||||
"""
|
||||
|
||||
chunks: list[Chunk]
|
||||
scores: list[float]
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class VectorStoreFileCounts(BaseModel):
|
||||
"""File processing status counts for a vector store.
|
||||
|
||||
:param completed: Number of files that have been successfully processed
|
||||
:param cancelled: Number of files that had their processing cancelled
|
||||
:param failed: Number of files that failed to process
|
||||
:param in_progress: Number of files currently being processed
|
||||
:param total: Total number of files in the vector store
|
||||
"""
|
||||
|
||||
completed: int
|
||||
cancelled: int
|
||||
failed: int
|
||||
|
@ -109,7 +124,20 @@ class VectorStoreFileCounts(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class VectorStoreObject(BaseModel):
|
||||
"""OpenAI Vector Store object."""
|
||||
"""OpenAI Vector Store object.
|
||||
|
||||
:param id: Unique identifier for the vector store
|
||||
:param object: Object type identifier, always "vector_store"
|
||||
:param created_at: Timestamp when the vector store was created
|
||||
:param name: (Optional) Name of the vector store
|
||||
:param usage_bytes: Storage space used by the vector store in bytes
|
||||
:param file_counts: File processing status counts for the vector store
|
||||
:param status: Current status of the vector store
|
||||
:param expires_after: (Optional) Expiration policy for the vector store
|
||||
:param expires_at: (Optional) Timestamp when the vector store will expire
|
||||
:param last_active_at: (Optional) Timestamp of last activity on the vector store
|
||||
:param metadata: Set of key-value pairs that can be attached to the vector store
|
||||
"""
|
||||
|
||||
id: str
|
||||
object: str = "vector_store"
|
||||
|
@ -126,7 +154,14 @@ class VectorStoreObject(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class VectorStoreCreateRequest(BaseModel):
|
||||
"""Request to create a vector store."""
|
||||
"""Request to create a vector store.
|
||||
|
||||
:param name: (Optional) Name for the vector store
|
||||
:param file_ids: List of file IDs to include in the vector store
|
||||
:param expires_after: (Optional) Expiration policy for the vector store
|
||||
:param chunking_strategy: (Optional) Strategy for splitting files into chunks
|
||||
:param metadata: Set of key-value pairs that can be attached to the vector store
|
||||
"""
|
||||
|
||||
name: str | None = None
|
||||
file_ids: list[str] = Field(default_factory=list)
|
||||
|
@ -137,7 +172,12 @@ class VectorStoreCreateRequest(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class VectorStoreModifyRequest(BaseModel):
|
||||
"""Request to modify a vector store."""
|
||||
"""Request to modify a vector store.
|
||||
|
||||
:param name: (Optional) Updated name for the vector store
|
||||
:param expires_after: (Optional) Updated expiration policy for the vector store
|
||||
:param metadata: (Optional) Updated set of key-value pairs for the vector store
|
||||
"""
|
||||
|
||||
name: str | None = None
|
||||
expires_after: dict[str, Any] | None = None
|
||||
|
@ -146,7 +186,14 @@ class VectorStoreModifyRequest(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class VectorStoreListResponse(BaseModel):
|
||||
"""Response from listing vector stores."""
|
||||
"""Response from listing vector stores.
|
||||
|
||||
:param object: Object type identifier, always "list"
|
||||
:param data: List of vector store objects
|
||||
:param first_id: (Optional) ID of the first vector store in the list for pagination
|
||||
:param last_id: (Optional) ID of the last vector store in the list for pagination
|
||||
:param has_more: Whether there are more vector stores available beyond this page
|
||||
"""
|
||||
|
||||
object: str = "list"
|
||||
data: list[VectorStoreObject]
|
||||
|
@ -157,7 +204,14 @@ class VectorStoreListResponse(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class VectorStoreSearchRequest(BaseModel):
|
||||
"""Request to search a vector store."""
|
||||
"""Request to search a vector store.
|
||||
|
||||
:param query: Search query as a string or list of strings
|
||||
:param filters: (Optional) Filters based on file attributes to narrow search results
|
||||
:param max_num_results: Maximum number of results to return, defaults to 10
|
||||
:param ranking_options: (Optional) Options for ranking and filtering search results
|
||||
:param rewrite_query: Whether to rewrite the query for better vector search performance
|
||||
"""
|
||||
|
||||
query: str | list[str]
|
||||
filters: dict[str, Any] | None = None
|
||||
|
@ -168,13 +222,26 @@ class VectorStoreSearchRequest(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class VectorStoreContent(BaseModel):
|
||||
"""Content item from a vector store file or search result.
|
||||
|
||||
:param type: Content type, currently only "text" is supported
|
||||
:param text: The actual text content
|
||||
"""
|
||||
|
||||
type: Literal["text"]
|
||||
text: str
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class VectorStoreSearchResponse(BaseModel):
|
||||
"""Response from searching a vector store."""
|
||||
"""Response from searching a vector store.
|
||||
|
||||
:param file_id: Unique identifier of the file containing the result
|
||||
:param filename: Name of the file containing the result
|
||||
:param score: Relevance score for this search result
|
||||
:param attributes: (Optional) Key-value attributes associated with the file
|
||||
:param content: List of content items matching the search query
|
||||
"""
|
||||
|
||||
file_id: str
|
||||
filename: str
|
||||
|
@ -185,7 +252,14 @@ class VectorStoreSearchResponse(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class VectorStoreSearchResponsePage(BaseModel):
|
||||
"""Response from searching a vector store."""
|
||||
"""Paginated response from searching a vector store.
|
||||
|
||||
:param object: Object type identifier for the search results page
|
||||
:param search_query: The original search query that was executed
|
||||
:param data: List of search result objects
|
||||
:param has_more: Whether there are more results available beyond this page
|
||||
:param next_page: (Optional) Token for retrieving the next page of results
|
||||
"""
|
||||
|
||||
object: str = "vector_store.search_results.page"
|
||||
search_query: str
|
||||
|
@ -196,7 +270,12 @@ class VectorStoreSearchResponsePage(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class VectorStoreDeleteResponse(BaseModel):
|
||||
"""Response from deleting a vector store."""
|
||||
"""Response from deleting a vector store.
|
||||
|
||||
:param id: Unique identifier of the deleted vector store
|
||||
:param object: Object type identifier for the deletion response
|
||||
:param deleted: Whether the deletion operation was successful
|
||||
"""
|
||||
|
||||
id: str
|
||||
object: str = "vector_store.deleted"
|
||||
|
@ -205,17 +284,34 @@ class VectorStoreDeleteResponse(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class VectorStoreChunkingStrategyAuto(BaseModel):
|
||||
"""Automatic chunking strategy for vector store files.
|
||||
|
||||
:param type: Strategy type, always "auto" for automatic chunking
|
||||
"""
|
||||
|
||||
type: Literal["auto"] = "auto"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class VectorStoreChunkingStrategyStaticConfig(BaseModel):
|
||||
"""Configuration for static chunking strategy.
|
||||
|
||||
:param chunk_overlap_tokens: Number of tokens to overlap between adjacent chunks
|
||||
:param max_chunk_size_tokens: Maximum number of tokens per chunk, must be between 100 and 4096
|
||||
"""
|
||||
|
||||
chunk_overlap_tokens: int = 400
|
||||
max_chunk_size_tokens: int = Field(800, ge=100, le=4096)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class VectorStoreChunkingStrategyStatic(BaseModel):
|
||||
"""Static chunking strategy with configurable parameters.
|
||||
|
||||
:param type: Strategy type, always "static" for static chunking
|
||||
:param static: Configuration parameters for the static chunking strategy
|
||||
"""
|
||||
|
||||
type: Literal["static"] = "static"
|
||||
static: VectorStoreChunkingStrategyStaticConfig
|
||||
|
||||
|
@ -227,6 +323,12 @@ register_schema(VectorStoreChunkingStrategy, name="VectorStoreChunkingStrategy")
|
|||
|
||||
|
||||
class SearchRankingOptions(BaseModel):
|
||||
"""Options for ranking and filtering search results.
|
||||
|
||||
:param ranker: (Optional) Name of the ranking algorithm to use
|
||||
:param score_threshold: (Optional) Minimum relevance score threshold for results
|
||||
"""
|
||||
|
||||
ranker: str | None = None
|
||||
# NOTE: OpenAI File Search Tool requires threshold to be between 0 and 1, however
|
||||
# we don't guarantee that the score is between 0 and 1, so will leave this unconstrained
|
||||
|
@ -236,6 +338,12 @@ class SearchRankingOptions(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class VectorStoreFileLastError(BaseModel):
|
||||
"""Error information for failed vector store file processing.
|
||||
|
||||
:param code: Error code indicating the type of failure
|
||||
:param message: Human-readable error message describing the failure
|
||||
"""
|
||||
|
||||
code: Literal["server_error"] | Literal["rate_limit_exceeded"]
|
||||
message: str
|
||||
|
||||
|
@ -246,7 +354,18 @@ register_schema(VectorStoreFileStatus, name="VectorStoreFileStatus")
|
|||
|
||||
@json_schema_type
|
||||
class VectorStoreFileObject(BaseModel):
|
||||
"""OpenAI Vector Store File object."""
|
||||
"""OpenAI Vector Store File object.
|
||||
|
||||
:param id: Unique identifier for the file
|
||||
:param object: Object type identifier, always "vector_store.file"
|
||||
:param attributes: Key-value attributes associated with the file
|
||||
:param chunking_strategy: Strategy used for splitting the file into chunks
|
||||
:param created_at: Timestamp when the file was added to the vector store
|
||||
:param last_error: (Optional) Error information if file processing failed
|
||||
:param status: Current processing status of the file
|
||||
:param usage_bytes: Storage space used by this file in bytes
|
||||
:param vector_store_id: ID of the vector store containing this file
|
||||
"""
|
||||
|
||||
id: str
|
||||
object: str = "vector_store.file"
|
||||
|
@ -261,7 +380,14 @@ class VectorStoreFileObject(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class VectorStoreListFilesResponse(BaseModel):
|
||||
"""Response from listing vector stores."""
|
||||
"""Response from listing files in a vector store.
|
||||
|
||||
:param object: Object type identifier, always "list"
|
||||
:param data: List of vector store file objects
|
||||
:param first_id: (Optional) ID of the first file in the list for pagination
|
||||
:param last_id: (Optional) ID of the last file in the list for pagination
|
||||
:param has_more: Whether there are more files available beyond this page
|
||||
"""
|
||||
|
||||
object: str = "list"
|
||||
data: list[VectorStoreFileObject]
|
||||
|
@ -272,7 +398,13 @@ class VectorStoreListFilesResponse(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class VectorStoreFileContentsResponse(BaseModel):
|
||||
"""Response from retrieving the contents of a vector store file."""
|
||||
"""Response from retrieving the contents of a vector store file.
|
||||
|
||||
:param file_id: Unique identifier for the file
|
||||
:param filename: Name of the file
|
||||
:param attributes: Key-value attributes associated with the file
|
||||
:param content: List of content items from the file
|
||||
"""
|
||||
|
||||
file_id: str
|
||||
filename: str
|
||||
|
@ -282,7 +414,12 @@ class VectorStoreFileContentsResponse(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class VectorStoreFileDeleteResponse(BaseModel):
|
||||
"""Response from deleting a vector store file."""
|
||||
"""Response from deleting a vector store file.
|
||||
|
||||
:param id: Unique identifier of the deleted file
|
||||
:param object: Object type identifier for the deletion response
|
||||
:param deleted: Whether the deletion operation was successful
|
||||
"""
|
||||
|
||||
id: str
|
||||
object: str = "vector_store.file.deleted"
|
||||
|
@ -478,6 +615,11 @@ class VectorIO(Protocol):
|
|||
"""List files in a vector store.
|
||||
|
||||
:param vector_store_id: The ID of the vector store to list files from.
|
||||
:param limit: (Optional) A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20.
|
||||
:param order: (Optional) Sort order by the `created_at` timestamp of the objects. `asc` for ascending order and `desc` for descending order.
|
||||
:param after: (Optional) A cursor for use in pagination. `after` is an object ID that defines your place in the list.
|
||||
:param before: (Optional) A cursor for use in pagination. `before` is an object ID that defines your place in the list.
|
||||
:param filter: (Optional) Filter by file status to only return files with the specified status.
|
||||
:returns: A VectorStoreListFilesResponse containing the list of files.
|
||||
"""
|
||||
...
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue