diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..bee8a64b7 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +__pycache__ diff --git a/source/agentic_system_types.py b/source/agentic_system_types.py new file mode 100644 index 000000000..f5548a9be --- /dev/null +++ b/source/agentic_system_types.py @@ -0,0 +1,88 @@ +from dataclasses import dataclass, field +from enum import Enum +from typing import Any, Dict, List, Optional, Set, Union + +from model_types import ( + BuiltinTool, + Content, + InstructModel, + Message, + PretrainedModel, + SamplingParams, + StopReason, + ToolCall, + ToolDefinition, + ToolResponse, +) + +from strong_typing.schema import json_schema_type + + +class ExecutionStepType(Enum): + """The type of execution step.""" + + model_inference = "model_inference" + tool_execution = "tool_execution" + safety_filtering = "safety_filtering" + memory_retrieval = "memory_retrieval" + + +@dataclass +class ExecutionStepBase: + """An agentic system turn can consist of one or more such execution steps.""" + + step_type: ExecutionStepType + + +@dataclass +class ModelInferenceStep(ExecutionStepBase): + step_type = ExecutionStepType.model_inference + text: str + logprobs: Optional[Dict[str, Any]] = None + + +@dataclass +class ToolExecutionStep(ExecutionStepBase): + step_type = ExecutionStepType.tool_execution + + # we could be calling multiple tools in a single step (in parallel) + tool_calls: List[ToolCall] + tool_responses: List[ToolResponse] + + +@dataclass +class SafetyViolation: + violation_type: str + details: str + suggested_user_response: Optional[str] = None + + +@dataclass +class SafetyFilteringStep(ExecutionStepBase): + step_type = ExecutionStepType.safety_filtering + violation: Optional[SafetyViolation] = None + + +@dataclass +class MemoryRetrievalStep(ExecutionStepBase): + step_type = ExecutionStepType.memory_retrieval + documents: List[str] + scores: List[float] + + +ExecutionStep = Union[ + ModelInferenceStep, + ToolExecutionStep, + SafetyFilteringStep, + MemoryRetrievalStep, +] + + +@json_schema_type +@dataclass +class AgenticSystemTurn: + """A single turn in an interaction with an Agentic System.""" + + user_messages: List[Message] + steps: List[ExecutionStep] + response_message: Message diff --git a/source/api_definitions.py b/source/api_definitions.py new file mode 100644 index 000000000..71d8dbf4a --- /dev/null +++ b/source/api_definitions.py @@ -0,0 +1,197 @@ +from dataclasses import dataclass, field +from enum import Enum +from typing import Any, Dict, List, Optional, Protocol, Set, Union + +import yaml + +from model_types import ( + BuiltinTool, + Content, + InstructModel, + Message, + PretrainedModel, + SamplingParams, + StopReason, + ToolCall, + ToolDefinition, + ToolResponse, +) +from agentic_system_types import ( + AgenticSystemTurn, +) + +from pyopenapi import Info, Options, Server, Specification, webmethod +from strong_typing.schema import json_schema_type + + +@json_schema_type +@dataclass +class CompletionRequest: + content: Content + model: PretrainedModel + sampling_params: SamplingParams = SamplingParams() + max_tokens: int = 0 + stream: bool = False + logprobs: bool = False + + +@json_schema_type +@dataclass +class CompletionResponse: + """Normal completion response.""" + + content: Content + stop_reason: Optional[StopReason] = None + logprobs: Optional[Dict[str, Any]] = None + + +@json_schema_type +@dataclass +class CompletionResponseStreamChunk: + """streamed completion response.""" + + text_delta: str + stop_reason: Optional[StopReason] = None + logprobs: Optional[Dict[str, Any]] = None + + +@json_schema_type +@dataclass +class ChatCompletionRequest: + message: Message + model: InstructModel + message_history: List[Message] = None + sampling_params: SamplingParams = SamplingParams() + + # zero-shot tool definitions as input to the model + available_tools: List[Union[BuiltinTool, ToolDefinition]] = field( + default_factory=list + ) + + max_tokens: int = 0 + stream: bool = False + logprobs: bool = False + + +@json_schema_type +@dataclass +class ChatCompletionResponse: + """Normal chat completion response.""" + + content: Content + + # note: multiple tool calls can be generated in a single response + tool_calls: List[ToolCall] = field(default_factory=list) + + stop_reason: Optional[StopReason] = None + logprobs: Optional[Dict[str, Any]] = None + + +@json_schema_type +@dataclass +class ChatCompletionResponseStreamChunk: + """Streamed chat completion response. The actual response is a series of such objects.""" + + text_delta: str + stop_reason: Optional[StopReason] = None + tool_call: Optional[ToolCall] = None + + +class Inference(Protocol): + + def post_completion( + self, + request: CompletionRequest, + ) -> Union[CompletionResponse, CompletionResponseStreamChunk]: ... + + def post_chat_completion( + self, + request: ChatCompletionRequest, + ) -> Union[ChatCompletionResponse, ChatCompletionResponseStreamChunk]: ... + + +@dataclass +class AgenticSystemCreateRequest: + instructions: str + model: InstructModel + + # zero-shot tool definitions as input to the model + available_tools: List[Union[BuiltinTool, ToolDefinition]] = field( + default_factory=list + ) + + executable_tools: Set[str] = field(default_factory=set) + + +@json_schema_type +@dataclass +class AgenticSystemCreateResponse: + agent_id: str + + +@json_schema_type +@dataclass +class AgenticSystemExecuteRequest: + agent_id: str + messages: List[Message] + turn_history: List[AgenticSystemTurn] = None + stream: bool = False + + +@json_schema_type +@dataclass +class AgenticSystemExecuteResponse: + """non-stream response from the agentic system.""" + + turn: AgenticSystemTurn + + +@json_schema_type +@dataclass +class AgenticSystemExecuteResponseStreamChunk: + """Streamed agent execution response.""" + + # TODO: make things streamable + turn: AgenticSystemTurn + + stop_reason: Optional[StopReason] = None + + +class AgenticSystem(Protocol): + + @webmethod(route="/agentic_system/create") + def create_agentic_system( + self, + request: AgenticSystemCreateRequest, + ) -> AgenticSystemCreateResponse: ... + + @webmethod(route="/agentic_system/execute") + def create_agentic_system_execute( + self, + request: AgenticSystemExecuteRequest, + ) -> Union[ + AgenticSystemExecuteResponse, AgenticSystemExecuteResponseStreamChunk + ]: ... + + +class LlamaStackEndpoints(Inference, AgenticSystem): ... + + +if __name__ == "__main__": + print("Converting the spec to YAML (openapi.yaml) and HTML (openapi.html)") + spec = Specification( + LlamaStackEndpoints, + Options( + server=Server(url="http://llama.meta.com"), + info=Info( + title="Llama Stack specification", + version="0.1", + description="This is the llama stack", + ), + ), + ) + with open("openapi.yaml", "w", encoding="utf-8") as fp: + yaml.dump(spec.get_json(), fp, allow_unicode=True) + + with open("openapi.html", "w") as fp: + spec.write_html(fp, pretty_print=True) diff --git a/source/defn.py b/source/defn.py deleted file mode 100644 index 619a95e7f..000000000 --- a/source/defn.py +++ /dev/null @@ -1,271 +0,0 @@ -from dataclasses import dataclass, field -from enum import Enum -from typing import Any, Dict, List, Optional, Protocol, Set, Union - -import yaml - -from pyopenapi import Info, Options, Server, Specification, webmethod -from strong_typing.schema import json_schema_type - - -@json_schema_type( - schema={"type": "string", "format": "uri", "pattern": "^(https?://|file://|data:)"} -) -@dataclass -class URL: - url: str - - def __str__(self) -> str: - return self.url - - -@json_schema_type -@dataclass -class Attachment: - """ - Attachments are used to refer to external resources, such as images, videos, audio, etc. - - """ - - url: URL - mime_type: str - - -Content = Union[ - str, - Attachment, - List[Union[str, Attachment]], -] - - -class Role(Enum): - system = "system" - user = "user" - assistant = "assistant" - tool = "tool" - - -@dataclass -class ToolCall: - """ - A tool call is a request to a tool. - """ - - tool_name: str - arguments: Dict[str, Any] - - -@dataclass -class ToolResponse: - tool_name: str - response: str - - -@dataclass -class ToolDefinition: - tool_name: str - parameters: Dict[str, Any] - - -# TODO: we need to document the parameters for the tool calls -class BuiltinTool(Enum): - """ - Builtin tools are tools the model is natively aware of and was potentially fine-tuned with. - """ - - web_search = "web_search" - math = "math" - image_gen = "image_gen" - code_interpreter = "code_interpreter" - - -class StopReason(Enum): - """ - Stop reasons are used to indicate why the model stopped generating text. - """ - - not_stopped = "not_stopped" - finished_ok = "finished_ok" - max_tokens = "max_tokens" - - -@json_schema_type -@dataclass -class Message: - role: Role - - # input to the model or output from the model - content: Content - - # output from the model - tool_calls: List[ToolCall] = field(default_factory=list) - - # input to the model - tool_responses: List[ToolResponse] = field(default_factory=list) - - -@dataclass -class SamplingParams: - temperature: float = 0.0 - strategy: str = "greedy" - top_p: float = 0.95 - top_k: int = 0 - - -class PretrainedModel(Enum): - llama3_8b = "llama3_8b" - llama3_70b = "llama3_70b" - - -class InstructModel(Enum): - llama3_8b_chat = "llama3_8b_chat" - llama3_70b_chat = "llama3_70b_chat" - - -@json_schema_type -@dataclass -class CompletionRequest: - content: Content - model: PretrainedModel = PretrainedModel.llama3_8b - sampling_params: SamplingParams = SamplingParams() - max_tokens: int = 0 - stream: bool = False - logprobs: bool = False - - -@json_schema_type -@dataclass -class CompletionResponse: - """Normal completion response.""" - - content: Content - stop_reason: Optional[StopReason] = None - logprobs: Optional[Dict[str, Any]] = None - - -@json_schema_type -@dataclass -class StreamedCompletionResponse: - """streamed completion response.""" - - text_delta: str - stop_reason: Optional[StopReason] = None - logprobs: Optional[Dict[str, Any]] = None - - -@dataclass -class ChatCompletionRequestCommon: - message: Message - message_history: List[Message] = None - model: InstructModel = InstructModel.llama3_8b_chat - sampling_params: SamplingParams = SamplingParams() - - # zero-shot tool definitions as input to the model - available_tools: List[Union[BuiltinTool, ToolDefinition]] = field( - default_factory=list - ) - - -@json_schema_type -@dataclass -class ChatCompletionRequest(ChatCompletionRequestCommon): - max_tokens: int = 0 - stream: bool = False - logprobs: bool = False - - -@json_schema_type -@dataclass -class ChatCompletionResponse: - """Normal chat completion response.""" - - content: Content - - # note: multiple tool calls can be generated in a single response - tool_calls: List[ToolCall] = field(default_factory=list) - - stop_reason: Optional[StopReason] = None - logprobs: Optional[Dict[str, Any]] = None - - -@json_schema_type -@dataclass -class StreamedChatCompletionResponse: - """Streamed chat completion response.""" - - text_delta: str - stop_reason: Optional[StopReason] = None - tool_call: Optional[ToolCall] = None - - -class Inference(Protocol): - - def post_completion( - self, - request: CompletionRequest, - ) -> Union[CompletionResponse, StreamedCompletionResponse]: ... - - def post_chat_completion( - self, - request: ChatCompletionRequest, - ) -> Union[ChatCompletionResponse, StreamedChatCompletionResponse]: ... - - -@json_schema_type -@dataclass -class AgenticSystemExecuteRequest(ChatCompletionRequestCommon): - executable_tools: Set[str] = field(default_factory=set) - stream: bool = False - - -@json_schema_type -@dataclass -class AgenticSystemExecuteResponse: - """Normal chat completion response.""" - - content: Content - stop_reason: StopReason - tool_calls: List[ToolCall] = field(default_factory=list) - logprobs: Optional[Dict[str, Any]] = None - - -@json_schema_type -@dataclass -class StreamedAgenticSystemExecuteResponse: - """Streamed chat completion response.""" - - text_delta: str - stop_reason: StopReason - tool_call: Optional[ToolCall] = None - - -class AgenticSystem(Protocol): - - @webmethod(route="/agentic/system/execute") - def create_agentic_system_execute( - self, - request: AgenticSystemExecuteRequest, - ) -> Union[AgenticSystemExecuteResponse, StreamedAgenticSystemExecuteResponse]: ... - - -class Endpoint(Inference, AgenticSystem): ... - - -if __name__ == "__main__": - print("Converting the spec to YAML (openapi.yaml) and HTML (openapi.html)") - spec = Specification( - Endpoint, - Options( - server=Server(url="http://llama.meta.com"), - info=Info( - title="Llama Stack specification", - version="0.1", - description="This is the llama stack", - ), - ), - ) - with open("openapi.yaml", "w", encoding="utf-8") as fp: - yaml.dump(spec.get_json(), fp, allow_unicode=True) - - with open("openapi.html", "w") as fp: - spec.write_html(fp, pretty_print=True) diff --git a/source/model_types.py b/source/model_types.py new file mode 100644 index 000000000..bb7e09971 --- /dev/null +++ b/source/model_types.py @@ -0,0 +1,122 @@ +from dataclasses import dataclass, field +from enum import Enum +from typing import Any, Dict, List, Optional, Set, Union + +from strong_typing.schema import json_schema_type + + +@json_schema_type( + schema={"type": "string", "format": "uri", "pattern": "^(https?://|file://|data:)"} +) +@dataclass +class URL: + url: str + + def __str__(self) -> str: + return self.url + + +@json_schema_type +@dataclass +class Attachment: + """ + Attachments are used to refer to external resources, such as images, videos, audio, etc. + + """ + + url: URL + mime_type: str + + +Content = Union[ + str, + Attachment, + List[Union[str, Attachment]], +] + + +class Role(Enum): + system = "system" + user = "user" + assistant = "assistant" + tool = "tool" + + +@dataclass +class ToolCall: + """ + A tool call is a request to a tool. + """ + + tool_name: str + arguments: Dict[str, Any] + + +@dataclass +class ToolResponse: + tool_name: str + response: str + + +@dataclass +class ToolDefinition: + tool_name: str + parameters: Dict[str, Any] + + +# TODO: we need to document the parameters for the tool calls +class BuiltinTool(Enum): + """ + Builtin tools are tools the model is natively aware of and was potentially fine-tuned with. + """ + + web_search = "web_search" + math = "math" + image_gen = "image_gen" + code_interpreter = "code_interpreter" + + +class StopReason(Enum): + """ + Stop reasons are used to indicate why the model stopped generating text. + """ + + not_stopped = "not_stopped" + finished_ok = "finished_ok" + max_tokens = "max_tokens" + + +@json_schema_type +@dataclass +class Message: + role: Role + + # input to the model or output from the model + content: Content + + # output from the model + tool_calls: List[ToolCall] = field(default_factory=list) + + # input to the model + tool_responses: List[ToolResponse] = field(default_factory=list) + + +@dataclass +class SamplingParams: + temperature: float = 0.0 + strategy: str = "greedy" + top_p: float = 0.95 + top_k: int = 0 + + +class PretrainedModel(Enum): + llama3_8b = "llama3_8b" + llama3_70b = "llama3_70b" + + +class InstructModel(Enum): + llama3_8b_chat = "llama3_8b_chat" + llama3_70b_chat = "llama3_70b_chat" + + + diff --git a/source/openapi.html b/source/openapi.html index 328d73edc..3826377cf 100644 --- a/source/openapi.html +++ b/source/openapi.html @@ -29,11 +29,41 @@ } ], "paths": { - "/agentic/system/execute": { + "/agentic_system/create": { "post": { "responses": { "200": { - "description": "Normal chat completion response. **OR** Streamed chat completion response.", + "description": "OK", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/AgenticSystemCreateResponse" + } + } + } + } + }, + "tags": [ + "AgenticSystem" + ], + "parameters": [], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/AgenticSystemCreateRequest" + } + } + }, + "required": true + } + } + }, + "/agentic_system/execute": { + "post": { + "responses": { + "200": { + "description": "non-stream response from the agentic system. **OR** Streamed agent execution response.", "content": { "application/json": { "schema": { @@ -42,7 +72,7 @@ "$ref": "#/components/schemas/AgenticSystemExecuteResponse" }, { - "$ref": "#/components/schemas/StreamedAgenticSystemExecuteResponse" + "$ref": "#/components/schemas/AgenticSystemExecuteResponseStreamChunk" } ] } @@ -70,7 +100,7 @@ "post": { "responses": { "200": { - "description": "Normal chat completion response. **OR** Streamed chat completion response.", + "description": "Normal chat completion response. **OR** Streamed chat completion response. The actual response is a series of such objects.", "content": { "application/json": { "schema": { @@ -79,7 +109,7 @@ "$ref": "#/components/schemas/ChatCompletionResponse" }, { - "$ref": "#/components/schemas/StreamedChatCompletionResponse" + "$ref": "#/components/schemas/ChatCompletionResponseStreamChunk" } ] } @@ -116,7 +146,7 @@ "$ref": "#/components/schemas/CompletionResponse" }, { - "$ref": "#/components/schemas/StreamedCompletionResponse" + "$ref": "#/components/schemas/CompletionResponseStreamChunk" } ] } @@ -144,52 +174,17 @@ "jsonSchemaDialect": "https://json-schema.org/draft/2020-12/schema", "components": { "schemas": { - "AgenticSystemExecuteRequest": { + "AgenticSystemCreateRequest": { "type": "object", "properties": { - "message": { - "$ref": "#/components/schemas/Message" - }, - "message_history": { - "type": "array", - "items": { - "$ref": "#/components/schemas/Message" - } + "instructions": { + "type": "string" }, "model": { "type": "string", "enum": [ "llama3_8b_chat", "llama3_70b_chat" - ], - "default": "llama3_8b_chat" - }, - "sampling_params": { - "type": "object", - "properties": { - "temperature": { - "type": "number", - "default": 0.0 - }, - "strategy": { - "type": "string", - "default": "greedy" - }, - "top_p": { - "type": "number", - "default": 0.95 - }, - "top_k": { - "type": "integer", - "default": 0 - } - }, - "additionalProperties": false, - "required": [ - "temperature", - "strategy", - "top_p", - "top_k" ] }, "available_tools": { @@ -253,6 +248,45 @@ "type": "string" }, "uniqueItems": true + } + }, + "additionalProperties": false, + "required": [ + "instructions", + "model", + "available_tools", + "executable_tools" + ] + }, + "AgenticSystemCreateResponse": { + "type": "object", + "properties": { + "agent_id": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "agent_id" + ] + }, + "AgenticSystemExecuteRequest": { + "type": "object", + "properties": { + "agent_id": { + "type": "string" + }, + "messages": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Message" + } + }, + "turn_history": { + "type": "array", + "items": { + "$ref": "#/components/schemas/AgenticSystemTurn" + } }, "stream": { "type": "boolean", @@ -261,15 +295,245 @@ }, "additionalProperties": false, "required": [ - "message", - "message_history", - "model", - "sampling_params", - "available_tools", - "executable_tools", + "agent_id", + "messages", + "turn_history", "stream" ] }, + "AgenticSystemTurn": { + "type": "object", + "properties": { + "user_messages": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Message" + } + }, + "steps": { + "type": "array", + "items": { + "oneOf": [ + { + "type": "object", + "properties": { + "step_type": { + "type": "string", + "enum": [ + "model_inference", + "tool_execution", + "safety_filtering", + "memory_retrieval" + ], + "title": "The type of execution step.", + "default": "model_inference" + }, + "text": { + "type": "string" + }, + "logprobs": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + } + }, + "additionalProperties": false, + "required": [ + "step_type", + "text" + ] + }, + { + "type": "object", + "properties": { + "step_type": { + "type": "string", + "enum": [ + "model_inference", + "tool_execution", + "safety_filtering", + "memory_retrieval" + ], + "title": "The type of execution step.", + "default": "tool_execution" + }, + "tool_calls": { + "type": "array", + "items": { + "type": "object", + "properties": { + "tool_name": { + "type": "string" + }, + "arguments": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + } + }, + "additionalProperties": false, + "required": [ + "tool_name", + "arguments" + ], + "title": "A tool call is a request to a tool." + } + }, + "tool_responses": { + "type": "array", + "items": { + "type": "object", + "properties": { + "tool_name": { + "type": "string" + }, + "response": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "tool_name", + "response" + ] + } + } + }, + "additionalProperties": false, + "required": [ + "step_type", + "tool_calls", + "tool_responses" + ] + }, + { + "type": "object", + "properties": { + "step_type": { + "type": "string", + "enum": [ + "model_inference", + "tool_execution", + "safety_filtering", + "memory_retrieval" + ], + "title": "The type of execution step.", + "default": "safety_filtering" + }, + "violation": { + "type": "object", + "properties": { + "violation_type": { + "type": "string" + }, + "details": { + "type": "string" + }, + "suggested_user_response": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "violation_type", + "details" + ] + } + }, + "additionalProperties": false, + "required": [ + "step_type" + ] + }, + { + "type": "object", + "properties": { + "step_type": { + "type": "string", + "enum": [ + "model_inference", + "tool_execution", + "safety_filtering", + "memory_retrieval" + ], + "title": "The type of execution step.", + "default": "memory_retrieval" + }, + "documents": { + "type": "array", + "items": { + "type": "string" + } + }, + "scores": { + "type": "array", + "items": { + "type": "number" + } + } + }, + "additionalProperties": false, + "required": [ + "step_type", + "documents", + "scores" + ] + } + ] + } + }, + "response_message": { + "$ref": "#/components/schemas/Message" + } + }, + "additionalProperties": false, + "required": [ + "user_messages", + "steps", + "response_message" + ], + "title": "A single turn in an interaction with an Agentic System." + }, "Attachment": { "type": "object", "properties": { @@ -400,119 +664,21 @@ "AgenticSystemExecuteResponse": { "type": "object", "properties": { - "content": { - "oneOf": [ - { - "type": "string" - }, - { - "$ref": "#/components/schemas/Attachment" - }, - { - "type": "array", - "items": { - "oneOf": [ - { - "type": "string" - }, - { - "$ref": "#/components/schemas/Attachment" - } - ] - } - } - ] - }, - "stop_reason": { - "type": "string", - "enum": [ - "not_stopped", - "finished_ok", - "max_tokens" - ], - "title": "Stop reasons are used to indicate why the model stopped generating text." - }, - "tool_calls": { - "type": "array", - "items": { - "type": "object", - "properties": { - "tool_name": { - "type": "string" - }, - "arguments": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - } - }, - "additionalProperties": false, - "required": [ - "tool_name", - "arguments" - ], - "title": "A tool call is a request to a tool." - } - }, - "logprobs": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } + "turn": { + "$ref": "#/components/schemas/AgenticSystemTurn" } }, "additionalProperties": false, "required": [ - "content", - "stop_reason", - "tool_calls" + "turn" ], - "title": "Normal chat completion response." + "title": "non-stream response from the agentic system." }, - "StreamedAgenticSystemExecuteResponse": { + "AgenticSystemExecuteResponseStreamChunk": { "type": "object", "properties": { - "text_delta": { - "type": "string" + "turn": { + "$ref": "#/components/schemas/AgenticSystemTurn" }, "stop_reason": { "type": "string", @@ -522,53 +688,13 @@ "max_tokens" ], "title": "Stop reasons are used to indicate why the model stopped generating text." - }, - "tool_call": { - "type": "object", - "properties": { - "tool_name": { - "type": "string" - }, - "arguments": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - } - }, - "additionalProperties": false, - "required": [ - "tool_name", - "arguments" - ], - "title": "A tool call is a request to a tool." } }, "additionalProperties": false, "required": [ - "text_delta", - "stop_reason" + "turn" ], - "title": "Streamed chat completion response." + "title": "Streamed agent execution response." }, "ChatCompletionRequest": { "type": "object", @@ -576,19 +702,18 @@ "message": { "$ref": "#/components/schemas/Message" }, - "message_history": { - "type": "array", - "items": { - "$ref": "#/components/schemas/Message" - } - }, "model": { "type": "string", "enum": [ "llama3_8b_chat", "llama3_70b_chat" - ], - "default": "llama3_8b_chat" + ] + }, + "message_history": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Message" + } }, "sampling_params": { "type": "object", @@ -689,8 +814,8 @@ "additionalProperties": false, "required": [ "message", - "message_history", "model", + "message_history", "sampling_params", "available_tools", "max_tokens", @@ -808,7 +933,7 @@ ], "title": "Normal chat completion response." }, - "StreamedChatCompletionResponse": { + "ChatCompletionResponseStreamChunk": { "type": "object", "properties": { "text_delta": { @@ -867,7 +992,7 @@ "required": [ "text_delta" ], - "title": "Streamed chat completion response." + "title": "Streamed chat completion response. The actual response is a series of such objects." }, "CompletionRequest": { "type": "object", @@ -900,8 +1025,7 @@ "enum": [ "llama3_8b", "llama3_70b" - ], - "default": "llama3_8b" + ] }, "sampling_params": { "type": "object", @@ -1021,7 +1145,7 @@ ], "title": "Normal completion response." }, - "StreamedCompletionResponse": { + "CompletionResponseStreamChunk": { "type": "object", "properties": { "text_delta": { @@ -1083,10 +1207,22 @@ { "name": "AgenticSystem" }, + { + "name": "AgenticSystemCreateRequest", + "description": "" + }, + { + "name": "AgenticSystemCreateResponse", + "description": "" + }, { "name": "AgenticSystemExecuteRequest", "description": "" }, + { + "name": "AgenticSystemTurn", + "description": "A single turn in an interaction with an Agentic System.\n\n" + }, { "name": "Attachment", "description": "Attachments are used to refer to external resources, such as images, videos, audio, etc.\n\n" @@ -1101,11 +1237,11 @@ }, { "name": "AgenticSystemExecuteResponse", - "description": "Normal chat completion response.\n\n" + "description": "non-stream response from the agentic system.\n\n" }, { - "name": "StreamedAgenticSystemExecuteResponse", - "description": "Streamed chat completion response.\n\n" + "name": "AgenticSystemExecuteResponseStreamChunk", + "description": "Streamed agent execution response.\n\n" }, { "name": "ChatCompletionRequest", @@ -1116,8 +1252,8 @@ "description": "Normal chat completion response.\n\n" }, { - "name": "StreamedChatCompletionResponse", - "description": "Streamed chat completion response.\n\n" + "name": "ChatCompletionResponseStreamChunk", + "description": "Streamed chat completion response. The actual response is a series of such objects.\n\n" }, { "name": "CompletionRequest", @@ -1128,8 +1264,8 @@ "description": "Normal completion response.\n\n" }, { - "name": "StreamedCompletionResponse", - "description": "streamed completion response.\n\n" + "name": "CompletionResponseStreamChunk", + "description": "streamed completion response.\n\n" } ], "x-tagGroups": [ @@ -1143,17 +1279,20 @@ { "name": "Types", "tags": [ + "AgenticSystemCreateRequest", + "AgenticSystemCreateResponse", "AgenticSystemExecuteRequest", "AgenticSystemExecuteResponse", + "AgenticSystemExecuteResponseStreamChunk", + "AgenticSystemTurn", "Attachment", "ChatCompletionRequest", "ChatCompletionResponse", + "ChatCompletionResponseStreamChunk", "CompletionRequest", "CompletionResponse", + "CompletionResponseStreamChunk", "Message", - "StreamedAgenticSystemExecuteResponse", - "StreamedChatCompletionResponse", - "StreamedCompletionResponse", "URL" ] } diff --git a/source/openapi.yaml b/source/openapi.yaml index d0b4fc170..45cb2c8c2 100644 --- a/source/openapi.yaml +++ b/source/openapi.yaml @@ -1,7 +1,7 @@ components: responses: {} schemas: - AgenticSystemExecuteRequest: + AgenticSystemCreateRequest: additionalProperties: false properties: available_tools: @@ -39,73 +39,61 @@ components: type: string type: array uniqueItems: true - message: - $ref: '#/components/schemas/Message' - message_history: - items: - $ref: '#/components/schemas/Message' - type: array + instructions: + type: string model: - default: llama3_8b_chat enum: - llama3_8b_chat - llama3_70b_chat type: string - sampling_params: - additionalProperties: false - properties: - strategy: - default: greedy - type: string - temperature: - default: 0.0 - type: number - top_k: - default: 0 - type: integer - top_p: - default: 0.95 - type: number - required: - - temperature - - strategy - - top_p - - top_k - type: object + required: + - instructions + - model + - available_tools + - executable_tools + type: object + AgenticSystemCreateResponse: + additionalProperties: false + properties: + agent_id: + type: string + required: + - agent_id + type: object + AgenticSystemExecuteRequest: + additionalProperties: false + properties: + agent_id: + type: string + messages: + items: + $ref: '#/components/schemas/Message' + type: array stream: default: false type: boolean + turn_history: + items: + $ref: '#/components/schemas/AgenticSystemTurn' + type: array required: - - message - - message_history - - model - - sampling_params - - available_tools - - executable_tools + - agent_id + - messages + - turn_history - stream type: object AgenticSystemExecuteResponse: additionalProperties: false properties: - content: - oneOf: - - type: string - - $ref: '#/components/schemas/Attachment' - - items: - oneOf: - - type: string - - $ref: '#/components/schemas/Attachment' - type: array - logprobs: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object + turn: + $ref: '#/components/schemas/AgenticSystemTurn' + required: + - turn + title: non-stream response from the agentic system. + type: object + AgenticSystemExecuteResponseStreamChunk: + additionalProperties: false + properties: stop_reason: enum: - not_stopped @@ -114,33 +102,159 @@ components: title: Stop reasons are used to indicate why the model stopped generating text. type: string - tool_calls: + turn: + $ref: '#/components/schemas/AgenticSystemTurn' + required: + - turn + title: Streamed agent execution response. + type: object + AgenticSystemTurn: + additionalProperties: false + properties: + response_message: + $ref: '#/components/schemas/Message' + steps: items: - additionalProperties: false - properties: - arguments: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - tool_name: - type: string - required: - - tool_name - - arguments - title: A tool call is a request to a tool. - type: object + oneOf: + - additionalProperties: false + properties: + logprobs: + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + type: object + step_type: + default: model_inference + enum: + - model_inference + - tool_execution + - safety_filtering + - memory_retrieval + title: The type of execution step. + type: string + text: + type: string + required: + - step_type + - text + type: object + - additionalProperties: false + properties: + step_type: + default: tool_execution + enum: + - model_inference + - tool_execution + - safety_filtering + - memory_retrieval + title: The type of execution step. + type: string + tool_calls: + items: + additionalProperties: false + properties: + arguments: + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + type: object + tool_name: + type: string + required: + - tool_name + - arguments + title: A tool call is a request to a tool. + type: object + type: array + tool_responses: + items: + additionalProperties: false + properties: + response: + type: string + tool_name: + type: string + required: + - tool_name + - response + type: object + type: array + required: + - step_type + - tool_calls + - tool_responses + type: object + - additionalProperties: false + properties: + step_type: + default: safety_filtering + enum: + - model_inference + - tool_execution + - safety_filtering + - memory_retrieval + title: The type of execution step. + type: string + violation: + additionalProperties: false + properties: + details: + type: string + suggested_user_response: + type: string + violation_type: + type: string + required: + - violation_type + - details + type: object + required: + - step_type + type: object + - additionalProperties: false + properties: + documents: + items: + type: string + type: array + scores: + items: + type: number + type: array + step_type: + default: memory_retrieval + enum: + - model_inference + - tool_execution + - safety_filtering + - memory_retrieval + title: The type of execution step. + type: string + required: + - step_type + - documents + - scores + type: object + type: array + user_messages: + items: + $ref: '#/components/schemas/Message' type: array required: - - content - - stop_reason - - tool_calls - title: Normal chat completion response. + - user_messages + - steps + - response_message + title: A single turn in an interaction with an Agentic System. type: object Attachment: additionalProperties: false @@ -201,7 +315,6 @@ components: $ref: '#/components/schemas/Message' type: array model: - default: llama3_8b_chat enum: - llama3_8b_chat - llama3_70b_chat @@ -232,8 +345,8 @@ components: type: boolean required: - message - - message_history - model + - message_history - sampling_params - available_tools - max_tokens @@ -297,6 +410,44 @@ components: - tool_calls title: Normal chat completion response. type: object + ChatCompletionResponseStreamChunk: + additionalProperties: false + properties: + stop_reason: + enum: + - not_stopped + - finished_ok + - max_tokens + title: Stop reasons are used to indicate why the model stopped generating + text. + type: string + text_delta: + type: string + tool_call: + additionalProperties: false + properties: + arguments: + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + type: object + tool_name: + type: string + required: + - tool_name + - arguments + title: A tool call is a request to a tool. + type: object + required: + - text_delta + title: Streamed chat completion response. The actual response is a series of + such objects. + type: object CompletionRequest: additionalProperties: false properties: @@ -316,7 +467,6 @@ components: default: 0 type: integer model: - default: llama3_8b enum: - llama3_8b - llama3_70b @@ -387,6 +537,33 @@ components: - content title: Normal completion response. type: object + CompletionResponseStreamChunk: + additionalProperties: false + properties: + logprobs: + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + type: object + stop_reason: + enum: + - not_stopped + - finished_ok + - max_tokens + title: Stop reasons are used to indicate why the model stopped generating + text. + type: string + text_delta: + type: string + required: + - text_delta + title: streamed completion response. + type: object Message: additionalProperties: false properties: @@ -447,108 +624,6 @@ components: - tool_calls - tool_responses type: object - StreamedAgenticSystemExecuteResponse: - additionalProperties: false - properties: - stop_reason: - enum: - - not_stopped - - finished_ok - - max_tokens - title: Stop reasons are used to indicate why the model stopped generating - text. - type: string - text_delta: - type: string - tool_call: - additionalProperties: false - properties: - arguments: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - tool_name: - type: string - required: - - tool_name - - arguments - title: A tool call is a request to a tool. - type: object - required: - - text_delta - - stop_reason - title: Streamed chat completion response. - type: object - StreamedChatCompletionResponse: - additionalProperties: false - properties: - stop_reason: - enum: - - not_stopped - - finished_ok - - max_tokens - title: Stop reasons are used to indicate why the model stopped generating - text. - type: string - text_delta: - type: string - tool_call: - additionalProperties: false - properties: - arguments: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - tool_name: - type: string - required: - - tool_name - - arguments - title: A tool call is a request to a tool. - type: object - required: - - text_delta - title: Streamed chat completion response. - type: object - StreamedCompletionResponse: - additionalProperties: false - properties: - logprobs: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - stop_reason: - enum: - - not_stopped - - finished_ok - - max_tokens - title: Stop reasons are used to indicate why the model stopped generating - text. - type: string - text_delta: - type: string - required: - - text_delta - title: streamed completion response. - type: object URL: format: uri pattern: ^(https?://|file://|data:) @@ -560,7 +635,25 @@ info: jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema openapi: 3.1.0 paths: - /agentic/system/execute: + /agentic_system/create: + post: + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/AgenticSystemCreateRequest' + required: true + responses: + '200': + content: + application/json: + schema: + $ref: '#/components/schemas/AgenticSystemCreateResponse' + description: OK + tags: + - AgenticSystem + /agentic_system/execute: post: parameters: [] requestBody: @@ -576,9 +669,9 @@ paths: schema: oneOf: - $ref: '#/components/schemas/AgenticSystemExecuteResponse' - - $ref: '#/components/schemas/StreamedAgenticSystemExecuteResponse' - description: Normal chat completion response. **OR** Streamed chat completion - response. + - $ref: '#/components/schemas/AgenticSystemExecuteResponseStreamChunk' + description: non-stream response from the agentic system. **OR** Streamed + agent execution response. tags: - AgenticSystem /chat_completion: @@ -597,9 +690,9 @@ paths: schema: oneOf: - $ref: '#/components/schemas/ChatCompletionResponse' - - $ref: '#/components/schemas/StreamedChatCompletionResponse' + - $ref: '#/components/schemas/ChatCompletionResponseStreamChunk' description: Normal chat completion response. **OR** Streamed chat completion - response. + response. The actual response is a series of such objects. tags: - Inference /completion: @@ -618,7 +711,7 @@ paths: schema: oneOf: - $ref: '#/components/schemas/CompletionResponse' - - $ref: '#/components/schemas/StreamedCompletionResponse' + - $ref: '#/components/schemas/CompletionResponseStreamChunk' description: Normal completion response. **OR** streamed completion response. tags: - Inference @@ -629,9 +722,20 @@ servers: tags: - name: Inference - name: AgenticSystem +- description: + name: AgenticSystemCreateRequest +- description: + name: AgenticSystemCreateResponse - description: name: AgenticSystemExecuteRequest +- description: 'A single turn in an interaction with an Agentic System. + + + ' + name: AgenticSystemTurn - description: 'Attachments are used to refer to external resources, such as images, videos, audio, etc. @@ -642,18 +746,18 @@ tags: name: Message - description: name: URL -- description: 'Normal chat completion response. +- description: 'non-stream response from the agentic system. ' name: AgenticSystemExecuteResponse -- description: 'Streamed chat completion response. +- description: 'Streamed agent execution response. - ' - name: StreamedAgenticSystemExecuteResponse + name: AgenticSystemExecuteResponseStreamChunk - description: name: ChatCompletionRequest @@ -662,12 +766,13 @@ tags: ' name: ChatCompletionResponse -- description: 'Streamed chat completion response. +- description: 'Streamed chat completion response. The actual response is a series + of such objects. - ' - name: StreamedChatCompletionResponse + name: ChatCompletionResponseStreamChunk - description: name: CompletionRequest @@ -679,9 +784,9 @@ tags: - description: 'streamed completion response. - ' - name: StreamedCompletionResponse + name: CompletionResponseStreamChunk x-tagGroups: - name: Operations tags: @@ -689,15 +794,18 @@ x-tagGroups: - Inference - name: Types tags: + - AgenticSystemCreateRequest + - AgenticSystemCreateResponse - AgenticSystemExecuteRequest - AgenticSystemExecuteResponse + - AgenticSystemExecuteResponseStreamChunk + - AgenticSystemTurn - Attachment - ChatCompletionRequest - ChatCompletionResponse + - ChatCompletionResponseStreamChunk - CompletionRequest - CompletionResponse + - CompletionResponseStreamChunk - Message - - StreamedAgenticSystemExecuteResponse - - StreamedChatCompletionResponse - - StreamedCompletionResponse - URL diff --git a/source/run.sh b/source/run.sh index 980d979d2..b30929cfb 100644 --- a/source/run.sh +++ b/source/run.sh @@ -1,3 +1,3 @@ #!/bin/bash -PYTHONPATH=. python3 defn.py +PYTHONPATH=. python3 api_definitions.py