diff --git a/.gitignore b/.gitignore
new file mode 100644
index 000000000..bee8a64b7
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+__pycache__
diff --git a/source/agentic_system_types.py b/source/agentic_system_types.py
new file mode 100644
index 000000000..f5548a9be
--- /dev/null
+++ b/source/agentic_system_types.py
@@ -0,0 +1,88 @@
+from dataclasses import dataclass, field
+from enum import Enum
+from typing import Any, Dict, List, Optional, Set, Union
+
+from model_types import (
+ BuiltinTool,
+ Content,
+ InstructModel,
+ Message,
+ PretrainedModel,
+ SamplingParams,
+ StopReason,
+ ToolCall,
+ ToolDefinition,
+ ToolResponse,
+)
+
+from strong_typing.schema import json_schema_type
+
+
+class ExecutionStepType(Enum):
+ """The type of execution step."""
+
+ model_inference = "model_inference"
+ tool_execution = "tool_execution"
+ safety_filtering = "safety_filtering"
+ memory_retrieval = "memory_retrieval"
+
+
+@dataclass
+class ExecutionStepBase:
+ """An agentic system turn can consist of one or more such execution steps."""
+
+ step_type: ExecutionStepType
+
+
+@dataclass
+class ModelInferenceStep(ExecutionStepBase):
+ step_type = ExecutionStepType.model_inference
+ text: str
+ logprobs: Optional[Dict[str, Any]] = None
+
+
+@dataclass
+class ToolExecutionStep(ExecutionStepBase):
+ step_type = ExecutionStepType.tool_execution
+
+ # we could be calling multiple tools in a single step (in parallel)
+ tool_calls: List[ToolCall]
+ tool_responses: List[ToolResponse]
+
+
+@dataclass
+class SafetyViolation:
+ violation_type: str
+ details: str
+ suggested_user_response: Optional[str] = None
+
+
+@dataclass
+class SafetyFilteringStep(ExecutionStepBase):
+ step_type = ExecutionStepType.safety_filtering
+ violation: Optional[SafetyViolation] = None
+
+
+@dataclass
+class MemoryRetrievalStep(ExecutionStepBase):
+ step_type = ExecutionStepType.memory_retrieval
+ documents: List[str]
+ scores: List[float]
+
+
+ExecutionStep = Union[
+ ModelInferenceStep,
+ ToolExecutionStep,
+ SafetyFilteringStep,
+ MemoryRetrievalStep,
+]
+
+
+@json_schema_type
+@dataclass
+class AgenticSystemTurn:
+ """A single turn in an interaction with an Agentic System."""
+
+ user_messages: List[Message]
+ steps: List[ExecutionStep]
+ response_message: Message
diff --git a/source/api_definitions.py b/source/api_definitions.py
new file mode 100644
index 000000000..71d8dbf4a
--- /dev/null
+++ b/source/api_definitions.py
@@ -0,0 +1,197 @@
+from dataclasses import dataclass, field
+from enum import Enum
+from typing import Any, Dict, List, Optional, Protocol, Set, Union
+
+import yaml
+
+from model_types import (
+ BuiltinTool,
+ Content,
+ InstructModel,
+ Message,
+ PretrainedModel,
+ SamplingParams,
+ StopReason,
+ ToolCall,
+ ToolDefinition,
+ ToolResponse,
+)
+from agentic_system_types import (
+ AgenticSystemTurn,
+)
+
+from pyopenapi import Info, Options, Server, Specification, webmethod
+from strong_typing.schema import json_schema_type
+
+
+@json_schema_type
+@dataclass
+class CompletionRequest:
+ content: Content
+ model: PretrainedModel
+ sampling_params: SamplingParams = SamplingParams()
+ max_tokens: int = 0
+ stream: bool = False
+ logprobs: bool = False
+
+
+@json_schema_type
+@dataclass
+class CompletionResponse:
+ """Normal completion response."""
+
+ content: Content
+ stop_reason: Optional[StopReason] = None
+ logprobs: Optional[Dict[str, Any]] = None
+
+
+@json_schema_type
+@dataclass
+class CompletionResponseStreamChunk:
+ """streamed completion response."""
+
+ text_delta: str
+ stop_reason: Optional[StopReason] = None
+ logprobs: Optional[Dict[str, Any]] = None
+
+
+@json_schema_type
+@dataclass
+class ChatCompletionRequest:
+ message: Message
+ model: InstructModel
+ message_history: List[Message] = None
+ sampling_params: SamplingParams = SamplingParams()
+
+ # zero-shot tool definitions as input to the model
+ available_tools: List[Union[BuiltinTool, ToolDefinition]] = field(
+ default_factory=list
+ )
+
+ max_tokens: int = 0
+ stream: bool = False
+ logprobs: bool = False
+
+
+@json_schema_type
+@dataclass
+class ChatCompletionResponse:
+ """Normal chat completion response."""
+
+ content: Content
+
+ # note: multiple tool calls can be generated in a single response
+ tool_calls: List[ToolCall] = field(default_factory=list)
+
+ stop_reason: Optional[StopReason] = None
+ logprobs: Optional[Dict[str, Any]] = None
+
+
+@json_schema_type
+@dataclass
+class ChatCompletionResponseStreamChunk:
+ """Streamed chat completion response. The actual response is a series of such objects."""
+
+ text_delta: str
+ stop_reason: Optional[StopReason] = None
+ tool_call: Optional[ToolCall] = None
+
+
+class Inference(Protocol):
+
+ def post_completion(
+ self,
+ request: CompletionRequest,
+ ) -> Union[CompletionResponse, CompletionResponseStreamChunk]: ...
+
+ def post_chat_completion(
+ self,
+ request: ChatCompletionRequest,
+ ) -> Union[ChatCompletionResponse, ChatCompletionResponseStreamChunk]: ...
+
+
+@dataclass
+class AgenticSystemCreateRequest:
+ instructions: str
+ model: InstructModel
+
+ # zero-shot tool definitions as input to the model
+ available_tools: List[Union[BuiltinTool, ToolDefinition]] = field(
+ default_factory=list
+ )
+
+ executable_tools: Set[str] = field(default_factory=set)
+
+
+@json_schema_type
+@dataclass
+class AgenticSystemCreateResponse:
+ agent_id: str
+
+
+@json_schema_type
+@dataclass
+class AgenticSystemExecuteRequest:
+ agent_id: str
+ messages: List[Message]
+ turn_history: List[AgenticSystemTurn] = None
+ stream: bool = False
+
+
+@json_schema_type
+@dataclass
+class AgenticSystemExecuteResponse:
+ """non-stream response from the agentic system."""
+
+ turn: AgenticSystemTurn
+
+
+@json_schema_type
+@dataclass
+class AgenticSystemExecuteResponseStreamChunk:
+ """Streamed agent execution response."""
+
+ # TODO: make things streamable
+ turn: AgenticSystemTurn
+
+ stop_reason: Optional[StopReason] = None
+
+
+class AgenticSystem(Protocol):
+
+ @webmethod(route="/agentic_system/create")
+ def create_agentic_system(
+ self,
+ request: AgenticSystemCreateRequest,
+ ) -> AgenticSystemCreateResponse: ...
+
+ @webmethod(route="/agentic_system/execute")
+ def create_agentic_system_execute(
+ self,
+ request: AgenticSystemExecuteRequest,
+ ) -> Union[
+ AgenticSystemExecuteResponse, AgenticSystemExecuteResponseStreamChunk
+ ]: ...
+
+
+class LlamaStackEndpoints(Inference, AgenticSystem): ...
+
+
+if __name__ == "__main__":
+ print("Converting the spec to YAML (openapi.yaml) and HTML (openapi.html)")
+ spec = Specification(
+ LlamaStackEndpoints,
+ Options(
+ server=Server(url="http://llama.meta.com"),
+ info=Info(
+ title="Llama Stack specification",
+ version="0.1",
+ description="This is the llama stack",
+ ),
+ ),
+ )
+ with open("openapi.yaml", "w", encoding="utf-8") as fp:
+ yaml.dump(spec.get_json(), fp, allow_unicode=True)
+
+ with open("openapi.html", "w") as fp:
+ spec.write_html(fp, pretty_print=True)
diff --git a/source/defn.py b/source/defn.py
deleted file mode 100644
index 619a95e7f..000000000
--- a/source/defn.py
+++ /dev/null
@@ -1,271 +0,0 @@
-from dataclasses import dataclass, field
-from enum import Enum
-from typing import Any, Dict, List, Optional, Protocol, Set, Union
-
-import yaml
-
-from pyopenapi import Info, Options, Server, Specification, webmethod
-from strong_typing.schema import json_schema_type
-
-
-@json_schema_type(
- schema={"type": "string", "format": "uri", "pattern": "^(https?://|file://|data:)"}
-)
-@dataclass
-class URL:
- url: str
-
- def __str__(self) -> str:
- return self.url
-
-
-@json_schema_type
-@dataclass
-class Attachment:
- """
- Attachments are used to refer to external resources, such as images, videos, audio, etc.
-
- """
-
- url: URL
- mime_type: str
-
-
-Content = Union[
- str,
- Attachment,
- List[Union[str, Attachment]],
-]
-
-
-class Role(Enum):
- system = "system"
- user = "user"
- assistant = "assistant"
- tool = "tool"
-
-
-@dataclass
-class ToolCall:
- """
- A tool call is a request to a tool.
- """
-
- tool_name: str
- arguments: Dict[str, Any]
-
-
-@dataclass
-class ToolResponse:
- tool_name: str
- response: str
-
-
-@dataclass
-class ToolDefinition:
- tool_name: str
- parameters: Dict[str, Any]
-
-
-# TODO: we need to document the parameters for the tool calls
-class BuiltinTool(Enum):
- """
- Builtin tools are tools the model is natively aware of and was potentially fine-tuned with.
- """
-
- web_search = "web_search"
- math = "math"
- image_gen = "image_gen"
- code_interpreter = "code_interpreter"
-
-
-class StopReason(Enum):
- """
- Stop reasons are used to indicate why the model stopped generating text.
- """
-
- not_stopped = "not_stopped"
- finished_ok = "finished_ok"
- max_tokens = "max_tokens"
-
-
-@json_schema_type
-@dataclass
-class Message:
- role: Role
-
- # input to the model or output from the model
- content: Content
-
- # output from the model
- tool_calls: List[ToolCall] = field(default_factory=list)
-
- # input to the model
- tool_responses: List[ToolResponse] = field(default_factory=list)
-
-
-@dataclass
-class SamplingParams:
- temperature: float = 0.0
- strategy: str = "greedy"
- top_p: float = 0.95
- top_k: int = 0
-
-
-class PretrainedModel(Enum):
- llama3_8b = "llama3_8b"
- llama3_70b = "llama3_70b"
-
-
-class InstructModel(Enum):
- llama3_8b_chat = "llama3_8b_chat"
- llama3_70b_chat = "llama3_70b_chat"
-
-
-@json_schema_type
-@dataclass
-class CompletionRequest:
- content: Content
- model: PretrainedModel = PretrainedModel.llama3_8b
- sampling_params: SamplingParams = SamplingParams()
- max_tokens: int = 0
- stream: bool = False
- logprobs: bool = False
-
-
-@json_schema_type
-@dataclass
-class CompletionResponse:
- """Normal completion response."""
-
- content: Content
- stop_reason: Optional[StopReason] = None
- logprobs: Optional[Dict[str, Any]] = None
-
-
-@json_schema_type
-@dataclass
-class StreamedCompletionResponse:
- """streamed completion response."""
-
- text_delta: str
- stop_reason: Optional[StopReason] = None
- logprobs: Optional[Dict[str, Any]] = None
-
-
-@dataclass
-class ChatCompletionRequestCommon:
- message: Message
- message_history: List[Message] = None
- model: InstructModel = InstructModel.llama3_8b_chat
- sampling_params: SamplingParams = SamplingParams()
-
- # zero-shot tool definitions as input to the model
- available_tools: List[Union[BuiltinTool, ToolDefinition]] = field(
- default_factory=list
- )
-
-
-@json_schema_type
-@dataclass
-class ChatCompletionRequest(ChatCompletionRequestCommon):
- max_tokens: int = 0
- stream: bool = False
- logprobs: bool = False
-
-
-@json_schema_type
-@dataclass
-class ChatCompletionResponse:
- """Normal chat completion response."""
-
- content: Content
-
- # note: multiple tool calls can be generated in a single response
- tool_calls: List[ToolCall] = field(default_factory=list)
-
- stop_reason: Optional[StopReason] = None
- logprobs: Optional[Dict[str, Any]] = None
-
-
-@json_schema_type
-@dataclass
-class StreamedChatCompletionResponse:
- """Streamed chat completion response."""
-
- text_delta: str
- stop_reason: Optional[StopReason] = None
- tool_call: Optional[ToolCall] = None
-
-
-class Inference(Protocol):
-
- def post_completion(
- self,
- request: CompletionRequest,
- ) -> Union[CompletionResponse, StreamedCompletionResponse]: ...
-
- def post_chat_completion(
- self,
- request: ChatCompletionRequest,
- ) -> Union[ChatCompletionResponse, StreamedChatCompletionResponse]: ...
-
-
-@json_schema_type
-@dataclass
-class AgenticSystemExecuteRequest(ChatCompletionRequestCommon):
- executable_tools: Set[str] = field(default_factory=set)
- stream: bool = False
-
-
-@json_schema_type
-@dataclass
-class AgenticSystemExecuteResponse:
- """Normal chat completion response."""
-
- content: Content
- stop_reason: StopReason
- tool_calls: List[ToolCall] = field(default_factory=list)
- logprobs: Optional[Dict[str, Any]] = None
-
-
-@json_schema_type
-@dataclass
-class StreamedAgenticSystemExecuteResponse:
- """Streamed chat completion response."""
-
- text_delta: str
- stop_reason: StopReason
- tool_call: Optional[ToolCall] = None
-
-
-class AgenticSystem(Protocol):
-
- @webmethod(route="/agentic/system/execute")
- def create_agentic_system_execute(
- self,
- request: AgenticSystemExecuteRequest,
- ) -> Union[AgenticSystemExecuteResponse, StreamedAgenticSystemExecuteResponse]: ...
-
-
-class Endpoint(Inference, AgenticSystem): ...
-
-
-if __name__ == "__main__":
- print("Converting the spec to YAML (openapi.yaml) and HTML (openapi.html)")
- spec = Specification(
- Endpoint,
- Options(
- server=Server(url="http://llama.meta.com"),
- info=Info(
- title="Llama Stack specification",
- version="0.1",
- description="This is the llama stack",
- ),
- ),
- )
- with open("openapi.yaml", "w", encoding="utf-8") as fp:
- yaml.dump(spec.get_json(), fp, allow_unicode=True)
-
- with open("openapi.html", "w") as fp:
- spec.write_html(fp, pretty_print=True)
diff --git a/source/model_types.py b/source/model_types.py
new file mode 100644
index 000000000..bb7e09971
--- /dev/null
+++ b/source/model_types.py
@@ -0,0 +1,122 @@
+from dataclasses import dataclass, field
+from enum import Enum
+from typing import Any, Dict, List, Optional, Set, Union
+
+from strong_typing.schema import json_schema_type
+
+
+@json_schema_type(
+ schema={"type": "string", "format": "uri", "pattern": "^(https?://|file://|data:)"}
+)
+@dataclass
+class URL:
+ url: str
+
+ def __str__(self) -> str:
+ return self.url
+
+
+@json_schema_type
+@dataclass
+class Attachment:
+ """
+ Attachments are used to refer to external resources, such as images, videos, audio, etc.
+
+ """
+
+ url: URL
+ mime_type: str
+
+
+Content = Union[
+ str,
+ Attachment,
+ List[Union[str, Attachment]],
+]
+
+
+class Role(Enum):
+ system = "system"
+ user = "user"
+ assistant = "assistant"
+ tool = "tool"
+
+
+@dataclass
+class ToolCall:
+ """
+ A tool call is a request to a tool.
+ """
+
+ tool_name: str
+ arguments: Dict[str, Any]
+
+
+@dataclass
+class ToolResponse:
+ tool_name: str
+ response: str
+
+
+@dataclass
+class ToolDefinition:
+ tool_name: str
+ parameters: Dict[str, Any]
+
+
+# TODO: we need to document the parameters for the tool calls
+class BuiltinTool(Enum):
+ """
+ Builtin tools are tools the model is natively aware of and was potentially fine-tuned with.
+ """
+
+ web_search = "web_search"
+ math = "math"
+ image_gen = "image_gen"
+ code_interpreter = "code_interpreter"
+
+
+class StopReason(Enum):
+ """
+ Stop reasons are used to indicate why the model stopped generating text.
+ """
+
+ not_stopped = "not_stopped"
+ finished_ok = "finished_ok"
+ max_tokens = "max_tokens"
+
+
+@json_schema_type
+@dataclass
+class Message:
+ role: Role
+
+ # input to the model or output from the model
+ content: Content
+
+ # output from the model
+ tool_calls: List[ToolCall] = field(default_factory=list)
+
+ # input to the model
+ tool_responses: List[ToolResponse] = field(default_factory=list)
+
+
+@dataclass
+class SamplingParams:
+ temperature: float = 0.0
+ strategy: str = "greedy"
+ top_p: float = 0.95
+ top_k: int = 0
+
+
+class PretrainedModel(Enum):
+ llama3_8b = "llama3_8b"
+ llama3_70b = "llama3_70b"
+
+
+class InstructModel(Enum):
+ llama3_8b_chat = "llama3_8b_chat"
+ llama3_70b_chat = "llama3_70b_chat"
+
+
+
diff --git a/source/openapi.html b/source/openapi.html
index 328d73edc..3826377cf 100644
--- a/source/openapi.html
+++ b/source/openapi.html
@@ -29,11 +29,41 @@
}
],
"paths": {
- "/agentic/system/execute": {
+ "/agentic_system/create": {
"post": {
"responses": {
"200": {
- "description": "Normal chat completion response. **OR** Streamed chat completion response.",
+ "description": "OK",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/AgenticSystemCreateResponse"
+ }
+ }
+ }
+ }
+ },
+ "tags": [
+ "AgenticSystem"
+ ],
+ "parameters": [],
+ "requestBody": {
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/AgenticSystemCreateRequest"
+ }
+ }
+ },
+ "required": true
+ }
+ }
+ },
+ "/agentic_system/execute": {
+ "post": {
+ "responses": {
+ "200": {
+ "description": "non-stream response from the agentic system. **OR** Streamed agent execution response.",
"content": {
"application/json": {
"schema": {
@@ -42,7 +72,7 @@
"$ref": "#/components/schemas/AgenticSystemExecuteResponse"
},
{
- "$ref": "#/components/schemas/StreamedAgenticSystemExecuteResponse"
+ "$ref": "#/components/schemas/AgenticSystemExecuteResponseStreamChunk"
}
]
}
@@ -70,7 +100,7 @@
"post": {
"responses": {
"200": {
- "description": "Normal chat completion response. **OR** Streamed chat completion response.",
+ "description": "Normal chat completion response. **OR** Streamed chat completion response. The actual response is a series of such objects.",
"content": {
"application/json": {
"schema": {
@@ -79,7 +109,7 @@
"$ref": "#/components/schemas/ChatCompletionResponse"
},
{
- "$ref": "#/components/schemas/StreamedChatCompletionResponse"
+ "$ref": "#/components/schemas/ChatCompletionResponseStreamChunk"
}
]
}
@@ -116,7 +146,7 @@
"$ref": "#/components/schemas/CompletionResponse"
},
{
- "$ref": "#/components/schemas/StreamedCompletionResponse"
+ "$ref": "#/components/schemas/CompletionResponseStreamChunk"
}
]
}
@@ -144,52 +174,17 @@
"jsonSchemaDialect": "https://json-schema.org/draft/2020-12/schema",
"components": {
"schemas": {
- "AgenticSystemExecuteRequest": {
+ "AgenticSystemCreateRequest": {
"type": "object",
"properties": {
- "message": {
- "$ref": "#/components/schemas/Message"
- },
- "message_history": {
- "type": "array",
- "items": {
- "$ref": "#/components/schemas/Message"
- }
+ "instructions": {
+ "type": "string"
},
"model": {
"type": "string",
"enum": [
"llama3_8b_chat",
"llama3_70b_chat"
- ],
- "default": "llama3_8b_chat"
- },
- "sampling_params": {
- "type": "object",
- "properties": {
- "temperature": {
- "type": "number",
- "default": 0.0
- },
- "strategy": {
- "type": "string",
- "default": "greedy"
- },
- "top_p": {
- "type": "number",
- "default": 0.95
- },
- "top_k": {
- "type": "integer",
- "default": 0
- }
- },
- "additionalProperties": false,
- "required": [
- "temperature",
- "strategy",
- "top_p",
- "top_k"
]
},
"available_tools": {
@@ -253,6 +248,45 @@
"type": "string"
},
"uniqueItems": true
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "instructions",
+ "model",
+ "available_tools",
+ "executable_tools"
+ ]
+ },
+ "AgenticSystemCreateResponse": {
+ "type": "object",
+ "properties": {
+ "agent_id": {
+ "type": "string"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "agent_id"
+ ]
+ },
+ "AgenticSystemExecuteRequest": {
+ "type": "object",
+ "properties": {
+ "agent_id": {
+ "type": "string"
+ },
+ "messages": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/Message"
+ }
+ },
+ "turn_history": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/AgenticSystemTurn"
+ }
},
"stream": {
"type": "boolean",
@@ -261,15 +295,245 @@
},
"additionalProperties": false,
"required": [
- "message",
- "message_history",
- "model",
- "sampling_params",
- "available_tools",
- "executable_tools",
+ "agent_id",
+ "messages",
+ "turn_history",
"stream"
]
},
+ "AgenticSystemTurn": {
+ "type": "object",
+ "properties": {
+ "user_messages": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/Message"
+ }
+ },
+ "steps": {
+ "type": "array",
+ "items": {
+ "oneOf": [
+ {
+ "type": "object",
+ "properties": {
+ "step_type": {
+ "type": "string",
+ "enum": [
+ "model_inference",
+ "tool_execution",
+ "safety_filtering",
+ "memory_retrieval"
+ ],
+ "title": "The type of execution step.",
+ "default": "model_inference"
+ },
+ "text": {
+ "type": "string"
+ },
+ "logprobs": {
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "string"
+ },
+ {
+ "type": "array"
+ },
+ {
+ "type": "object"
+ }
+ ]
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "step_type",
+ "text"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "step_type": {
+ "type": "string",
+ "enum": [
+ "model_inference",
+ "tool_execution",
+ "safety_filtering",
+ "memory_retrieval"
+ ],
+ "title": "The type of execution step.",
+ "default": "tool_execution"
+ },
+ "tool_calls": {
+ "type": "array",
+ "items": {
+ "type": "object",
+ "properties": {
+ "tool_name": {
+ "type": "string"
+ },
+ "arguments": {
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "string"
+ },
+ {
+ "type": "array"
+ },
+ {
+ "type": "object"
+ }
+ ]
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "tool_name",
+ "arguments"
+ ],
+ "title": "A tool call is a request to a tool."
+ }
+ },
+ "tool_responses": {
+ "type": "array",
+ "items": {
+ "type": "object",
+ "properties": {
+ "tool_name": {
+ "type": "string"
+ },
+ "response": {
+ "type": "string"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "tool_name",
+ "response"
+ ]
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "step_type",
+ "tool_calls",
+ "tool_responses"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "step_type": {
+ "type": "string",
+ "enum": [
+ "model_inference",
+ "tool_execution",
+ "safety_filtering",
+ "memory_retrieval"
+ ],
+ "title": "The type of execution step.",
+ "default": "safety_filtering"
+ },
+ "violation": {
+ "type": "object",
+ "properties": {
+ "violation_type": {
+ "type": "string"
+ },
+ "details": {
+ "type": "string"
+ },
+ "suggested_user_response": {
+ "type": "string"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "violation_type",
+ "details"
+ ]
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "step_type"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "step_type": {
+ "type": "string",
+ "enum": [
+ "model_inference",
+ "tool_execution",
+ "safety_filtering",
+ "memory_retrieval"
+ ],
+ "title": "The type of execution step.",
+ "default": "memory_retrieval"
+ },
+ "documents": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "scores": {
+ "type": "array",
+ "items": {
+ "type": "number"
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "step_type",
+ "documents",
+ "scores"
+ ]
+ }
+ ]
+ }
+ },
+ "response_message": {
+ "$ref": "#/components/schemas/Message"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "user_messages",
+ "steps",
+ "response_message"
+ ],
+ "title": "A single turn in an interaction with an Agentic System."
+ },
"Attachment": {
"type": "object",
"properties": {
@@ -400,119 +664,21 @@
"AgenticSystemExecuteResponse": {
"type": "object",
"properties": {
- "content": {
- "oneOf": [
- {
- "type": "string"
- },
- {
- "$ref": "#/components/schemas/Attachment"
- },
- {
- "type": "array",
- "items": {
- "oneOf": [
- {
- "type": "string"
- },
- {
- "$ref": "#/components/schemas/Attachment"
- }
- ]
- }
- }
- ]
- },
- "stop_reason": {
- "type": "string",
- "enum": [
- "not_stopped",
- "finished_ok",
- "max_tokens"
- ],
- "title": "Stop reasons are used to indicate why the model stopped generating text."
- },
- "tool_calls": {
- "type": "array",
- "items": {
- "type": "object",
- "properties": {
- "tool_name": {
- "type": "string"
- },
- "arguments": {
- "type": "object",
- "additionalProperties": {
- "oneOf": [
- {
- "type": "null"
- },
- {
- "type": "boolean"
- },
- {
- "type": "number"
- },
- {
- "type": "string"
- },
- {
- "type": "array"
- },
- {
- "type": "object"
- }
- ]
- }
- }
- },
- "additionalProperties": false,
- "required": [
- "tool_name",
- "arguments"
- ],
- "title": "A tool call is a request to a tool."
- }
- },
- "logprobs": {
- "type": "object",
- "additionalProperties": {
- "oneOf": [
- {
- "type": "null"
- },
- {
- "type": "boolean"
- },
- {
- "type": "number"
- },
- {
- "type": "string"
- },
- {
- "type": "array"
- },
- {
- "type": "object"
- }
- ]
- }
+ "turn": {
+ "$ref": "#/components/schemas/AgenticSystemTurn"
}
},
"additionalProperties": false,
"required": [
- "content",
- "stop_reason",
- "tool_calls"
+ "turn"
],
- "title": "Normal chat completion response."
+ "title": "non-stream response from the agentic system."
},
- "StreamedAgenticSystemExecuteResponse": {
+ "AgenticSystemExecuteResponseStreamChunk": {
"type": "object",
"properties": {
- "text_delta": {
- "type": "string"
+ "turn": {
+ "$ref": "#/components/schemas/AgenticSystemTurn"
},
"stop_reason": {
"type": "string",
@@ -522,53 +688,13 @@
"max_tokens"
],
"title": "Stop reasons are used to indicate why the model stopped generating text."
- },
- "tool_call": {
- "type": "object",
- "properties": {
- "tool_name": {
- "type": "string"
- },
- "arguments": {
- "type": "object",
- "additionalProperties": {
- "oneOf": [
- {
- "type": "null"
- },
- {
- "type": "boolean"
- },
- {
- "type": "number"
- },
- {
- "type": "string"
- },
- {
- "type": "array"
- },
- {
- "type": "object"
- }
- ]
- }
- }
- },
- "additionalProperties": false,
- "required": [
- "tool_name",
- "arguments"
- ],
- "title": "A tool call is a request to a tool."
}
},
"additionalProperties": false,
"required": [
- "text_delta",
- "stop_reason"
+ "turn"
],
- "title": "Streamed chat completion response."
+ "title": "Streamed agent execution response."
},
"ChatCompletionRequest": {
"type": "object",
@@ -576,19 +702,18 @@
"message": {
"$ref": "#/components/schemas/Message"
},
- "message_history": {
- "type": "array",
- "items": {
- "$ref": "#/components/schemas/Message"
- }
- },
"model": {
"type": "string",
"enum": [
"llama3_8b_chat",
"llama3_70b_chat"
- ],
- "default": "llama3_8b_chat"
+ ]
+ },
+ "message_history": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/Message"
+ }
},
"sampling_params": {
"type": "object",
@@ -689,8 +814,8 @@
"additionalProperties": false,
"required": [
"message",
- "message_history",
"model",
+ "message_history",
"sampling_params",
"available_tools",
"max_tokens",
@@ -808,7 +933,7 @@
],
"title": "Normal chat completion response."
},
- "StreamedChatCompletionResponse": {
+ "ChatCompletionResponseStreamChunk": {
"type": "object",
"properties": {
"text_delta": {
@@ -867,7 +992,7 @@
"required": [
"text_delta"
],
- "title": "Streamed chat completion response."
+ "title": "Streamed chat completion response. The actual response is a series of such objects."
},
"CompletionRequest": {
"type": "object",
@@ -900,8 +1025,7 @@
"enum": [
"llama3_8b",
"llama3_70b"
- ],
- "default": "llama3_8b"
+ ]
},
"sampling_params": {
"type": "object",
@@ -1021,7 +1145,7 @@
],
"title": "Normal completion response."
},
- "StreamedCompletionResponse": {
+ "CompletionResponseStreamChunk": {
"type": "object",
"properties": {
"text_delta": {
@@ -1083,10 +1207,22 @@
{
"name": "AgenticSystem"
},
+ {
+ "name": "AgenticSystemCreateRequest",
+ "description": ""
+ },
+ {
+ "name": "AgenticSystemCreateResponse",
+ "description": ""
+ },
{
"name": "AgenticSystemExecuteRequest",
"description": ""
},
+ {
+ "name": "AgenticSystemTurn",
+ "description": "A single turn in an interaction with an Agentic System.\n\n"
+ },
{
"name": "Attachment",
"description": "Attachments are used to refer to external resources, such as images, videos, audio, etc.\n\n"
@@ -1101,11 +1237,11 @@
},
{
"name": "AgenticSystemExecuteResponse",
- "description": "Normal chat completion response.\n\n"
+ "description": "non-stream response from the agentic system.\n\n"
},
{
- "name": "StreamedAgenticSystemExecuteResponse",
- "description": "Streamed chat completion response.\n\n"
+ "name": "AgenticSystemExecuteResponseStreamChunk",
+ "description": "Streamed agent execution response.\n\n"
},
{
"name": "ChatCompletionRequest",
@@ -1116,8 +1252,8 @@
"description": "Normal chat completion response.\n\n"
},
{
- "name": "StreamedChatCompletionResponse",
- "description": "Streamed chat completion response.\n\n"
+ "name": "ChatCompletionResponseStreamChunk",
+ "description": "Streamed chat completion response. The actual response is a series of such objects.\n\n"
},
{
"name": "CompletionRequest",
@@ -1128,8 +1264,8 @@
"description": "Normal completion response.\n\n"
},
{
- "name": "StreamedCompletionResponse",
- "description": "streamed completion response.\n\n"
+ "name": "CompletionResponseStreamChunk",
+ "description": "streamed completion response.\n\n"
}
],
"x-tagGroups": [
@@ -1143,17 +1279,20 @@
{
"name": "Types",
"tags": [
+ "AgenticSystemCreateRequest",
+ "AgenticSystemCreateResponse",
"AgenticSystemExecuteRequest",
"AgenticSystemExecuteResponse",
+ "AgenticSystemExecuteResponseStreamChunk",
+ "AgenticSystemTurn",
"Attachment",
"ChatCompletionRequest",
"ChatCompletionResponse",
+ "ChatCompletionResponseStreamChunk",
"CompletionRequest",
"CompletionResponse",
+ "CompletionResponseStreamChunk",
"Message",
- "StreamedAgenticSystemExecuteResponse",
- "StreamedChatCompletionResponse",
- "StreamedCompletionResponse",
"URL"
]
}
diff --git a/source/openapi.yaml b/source/openapi.yaml
index d0b4fc170..45cb2c8c2 100644
--- a/source/openapi.yaml
+++ b/source/openapi.yaml
@@ -1,7 +1,7 @@
components:
responses: {}
schemas:
- AgenticSystemExecuteRequest:
+ AgenticSystemCreateRequest:
additionalProperties: false
properties:
available_tools:
@@ -39,73 +39,61 @@ components:
type: string
type: array
uniqueItems: true
- message:
- $ref: '#/components/schemas/Message'
- message_history:
- items:
- $ref: '#/components/schemas/Message'
- type: array
+ instructions:
+ type: string
model:
- default: llama3_8b_chat
enum:
- llama3_8b_chat
- llama3_70b_chat
type: string
- sampling_params:
- additionalProperties: false
- properties:
- strategy:
- default: greedy
- type: string
- temperature:
- default: 0.0
- type: number
- top_k:
- default: 0
- type: integer
- top_p:
- default: 0.95
- type: number
- required:
- - temperature
- - strategy
- - top_p
- - top_k
- type: object
+ required:
+ - instructions
+ - model
+ - available_tools
+ - executable_tools
+ type: object
+ AgenticSystemCreateResponse:
+ additionalProperties: false
+ properties:
+ agent_id:
+ type: string
+ required:
+ - agent_id
+ type: object
+ AgenticSystemExecuteRequest:
+ additionalProperties: false
+ properties:
+ agent_id:
+ type: string
+ messages:
+ items:
+ $ref: '#/components/schemas/Message'
+ type: array
stream:
default: false
type: boolean
+ turn_history:
+ items:
+ $ref: '#/components/schemas/AgenticSystemTurn'
+ type: array
required:
- - message
- - message_history
- - model
- - sampling_params
- - available_tools
- - executable_tools
+ - agent_id
+ - messages
+ - turn_history
- stream
type: object
AgenticSystemExecuteResponse:
additionalProperties: false
properties:
- content:
- oneOf:
- - type: string
- - $ref: '#/components/schemas/Attachment'
- - items:
- oneOf:
- - type: string
- - $ref: '#/components/schemas/Attachment'
- type: array
- logprobs:
- additionalProperties:
- oneOf:
- - type: 'null'
- - type: boolean
- - type: number
- - type: string
- - type: array
- - type: object
- type: object
+ turn:
+ $ref: '#/components/schemas/AgenticSystemTurn'
+ required:
+ - turn
+ title: non-stream response from the agentic system.
+ type: object
+ AgenticSystemExecuteResponseStreamChunk:
+ additionalProperties: false
+ properties:
stop_reason:
enum:
- not_stopped
@@ -114,33 +102,159 @@ components:
title: Stop reasons are used to indicate why the model stopped generating
text.
type: string
- tool_calls:
+ turn:
+ $ref: '#/components/schemas/AgenticSystemTurn'
+ required:
+ - turn
+ title: Streamed agent execution response.
+ type: object
+ AgenticSystemTurn:
+ additionalProperties: false
+ properties:
+ response_message:
+ $ref: '#/components/schemas/Message'
+ steps:
items:
- additionalProperties: false
- properties:
- arguments:
- additionalProperties:
- oneOf:
- - type: 'null'
- - type: boolean
- - type: number
- - type: string
- - type: array
- - type: object
- type: object
- tool_name:
- type: string
- required:
- - tool_name
- - arguments
- title: A tool call is a request to a tool.
- type: object
+ oneOf:
+ - additionalProperties: false
+ properties:
+ logprobs:
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ type: object
+ step_type:
+ default: model_inference
+ enum:
+ - model_inference
+ - tool_execution
+ - safety_filtering
+ - memory_retrieval
+ title: The type of execution step.
+ type: string
+ text:
+ type: string
+ required:
+ - step_type
+ - text
+ type: object
+ - additionalProperties: false
+ properties:
+ step_type:
+ default: tool_execution
+ enum:
+ - model_inference
+ - tool_execution
+ - safety_filtering
+ - memory_retrieval
+ title: The type of execution step.
+ type: string
+ tool_calls:
+ items:
+ additionalProperties: false
+ properties:
+ arguments:
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ type: object
+ tool_name:
+ type: string
+ required:
+ - tool_name
+ - arguments
+ title: A tool call is a request to a tool.
+ type: object
+ type: array
+ tool_responses:
+ items:
+ additionalProperties: false
+ properties:
+ response:
+ type: string
+ tool_name:
+ type: string
+ required:
+ - tool_name
+ - response
+ type: object
+ type: array
+ required:
+ - step_type
+ - tool_calls
+ - tool_responses
+ type: object
+ - additionalProperties: false
+ properties:
+ step_type:
+ default: safety_filtering
+ enum:
+ - model_inference
+ - tool_execution
+ - safety_filtering
+ - memory_retrieval
+ title: The type of execution step.
+ type: string
+ violation:
+ additionalProperties: false
+ properties:
+ details:
+ type: string
+ suggested_user_response:
+ type: string
+ violation_type:
+ type: string
+ required:
+ - violation_type
+ - details
+ type: object
+ required:
+ - step_type
+ type: object
+ - additionalProperties: false
+ properties:
+ documents:
+ items:
+ type: string
+ type: array
+ scores:
+ items:
+ type: number
+ type: array
+ step_type:
+ default: memory_retrieval
+ enum:
+ - model_inference
+ - tool_execution
+ - safety_filtering
+ - memory_retrieval
+ title: The type of execution step.
+ type: string
+ required:
+ - step_type
+ - documents
+ - scores
+ type: object
+ type: array
+ user_messages:
+ items:
+ $ref: '#/components/schemas/Message'
type: array
required:
- - content
- - stop_reason
- - tool_calls
- title: Normal chat completion response.
+ - user_messages
+ - steps
+ - response_message
+ title: A single turn in an interaction with an Agentic System.
type: object
Attachment:
additionalProperties: false
@@ -201,7 +315,6 @@ components:
$ref: '#/components/schemas/Message'
type: array
model:
- default: llama3_8b_chat
enum:
- llama3_8b_chat
- llama3_70b_chat
@@ -232,8 +345,8 @@ components:
type: boolean
required:
- message
- - message_history
- model
+ - message_history
- sampling_params
- available_tools
- max_tokens
@@ -297,6 +410,44 @@ components:
- tool_calls
title: Normal chat completion response.
type: object
+ ChatCompletionResponseStreamChunk:
+ additionalProperties: false
+ properties:
+ stop_reason:
+ enum:
+ - not_stopped
+ - finished_ok
+ - max_tokens
+ title: Stop reasons are used to indicate why the model stopped generating
+ text.
+ type: string
+ text_delta:
+ type: string
+ tool_call:
+ additionalProperties: false
+ properties:
+ arguments:
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ type: object
+ tool_name:
+ type: string
+ required:
+ - tool_name
+ - arguments
+ title: A tool call is a request to a tool.
+ type: object
+ required:
+ - text_delta
+ title: Streamed chat completion response. The actual response is a series of
+ such objects.
+ type: object
CompletionRequest:
additionalProperties: false
properties:
@@ -316,7 +467,6 @@ components:
default: 0
type: integer
model:
- default: llama3_8b
enum:
- llama3_8b
- llama3_70b
@@ -387,6 +537,33 @@ components:
- content
title: Normal completion response.
type: object
+ CompletionResponseStreamChunk:
+ additionalProperties: false
+ properties:
+ logprobs:
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ type: object
+ stop_reason:
+ enum:
+ - not_stopped
+ - finished_ok
+ - max_tokens
+ title: Stop reasons are used to indicate why the model stopped generating
+ text.
+ type: string
+ text_delta:
+ type: string
+ required:
+ - text_delta
+ title: streamed completion response.
+ type: object
Message:
additionalProperties: false
properties:
@@ -447,108 +624,6 @@ components:
- tool_calls
- tool_responses
type: object
- StreamedAgenticSystemExecuteResponse:
- additionalProperties: false
- properties:
- stop_reason:
- enum:
- - not_stopped
- - finished_ok
- - max_tokens
- title: Stop reasons are used to indicate why the model stopped generating
- text.
- type: string
- text_delta:
- type: string
- tool_call:
- additionalProperties: false
- properties:
- arguments:
- additionalProperties:
- oneOf:
- - type: 'null'
- - type: boolean
- - type: number
- - type: string
- - type: array
- - type: object
- type: object
- tool_name:
- type: string
- required:
- - tool_name
- - arguments
- title: A tool call is a request to a tool.
- type: object
- required:
- - text_delta
- - stop_reason
- title: Streamed chat completion response.
- type: object
- StreamedChatCompletionResponse:
- additionalProperties: false
- properties:
- stop_reason:
- enum:
- - not_stopped
- - finished_ok
- - max_tokens
- title: Stop reasons are used to indicate why the model stopped generating
- text.
- type: string
- text_delta:
- type: string
- tool_call:
- additionalProperties: false
- properties:
- arguments:
- additionalProperties:
- oneOf:
- - type: 'null'
- - type: boolean
- - type: number
- - type: string
- - type: array
- - type: object
- type: object
- tool_name:
- type: string
- required:
- - tool_name
- - arguments
- title: A tool call is a request to a tool.
- type: object
- required:
- - text_delta
- title: Streamed chat completion response.
- type: object
- StreamedCompletionResponse:
- additionalProperties: false
- properties:
- logprobs:
- additionalProperties:
- oneOf:
- - type: 'null'
- - type: boolean
- - type: number
- - type: string
- - type: array
- - type: object
- type: object
- stop_reason:
- enum:
- - not_stopped
- - finished_ok
- - max_tokens
- title: Stop reasons are used to indicate why the model stopped generating
- text.
- type: string
- text_delta:
- type: string
- required:
- - text_delta
- title: streamed completion response.
- type: object
URL:
format: uri
pattern: ^(https?://|file://|data:)
@@ -560,7 +635,25 @@ info:
jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
openapi: 3.1.0
paths:
- /agentic/system/execute:
+ /agentic_system/create:
+ post:
+ parameters: []
+ requestBody:
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/AgenticSystemCreateRequest'
+ required: true
+ responses:
+ '200':
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/AgenticSystemCreateResponse'
+ description: OK
+ tags:
+ - AgenticSystem
+ /agentic_system/execute:
post:
parameters: []
requestBody:
@@ -576,9 +669,9 @@ paths:
schema:
oneOf:
- $ref: '#/components/schemas/AgenticSystemExecuteResponse'
- - $ref: '#/components/schemas/StreamedAgenticSystemExecuteResponse'
- description: Normal chat completion response. **OR** Streamed chat completion
- response.
+ - $ref: '#/components/schemas/AgenticSystemExecuteResponseStreamChunk'
+ description: non-stream response from the agentic system. **OR** Streamed
+ agent execution response.
tags:
- AgenticSystem
/chat_completion:
@@ -597,9 +690,9 @@ paths:
schema:
oneOf:
- $ref: '#/components/schemas/ChatCompletionResponse'
- - $ref: '#/components/schemas/StreamedChatCompletionResponse'
+ - $ref: '#/components/schemas/ChatCompletionResponseStreamChunk'
description: Normal chat completion response. **OR** Streamed chat completion
- response.
+ response. The actual response is a series of such objects.
tags:
- Inference
/completion:
@@ -618,7 +711,7 @@ paths:
schema:
oneOf:
- $ref: '#/components/schemas/CompletionResponse'
- - $ref: '#/components/schemas/StreamedCompletionResponse'
+ - $ref: '#/components/schemas/CompletionResponseStreamChunk'
description: Normal completion response. **OR** streamed completion response.
tags:
- Inference
@@ -629,9 +722,20 @@ servers:
tags:
- name: Inference
- name: AgenticSystem
+- description:
+ name: AgenticSystemCreateRequest
+- description:
+ name: AgenticSystemCreateResponse
- description:
name: AgenticSystemExecuteRequest
+- description: 'A single turn in an interaction with an Agentic System.
+
+
+ '
+ name: AgenticSystemTurn
- description: 'Attachments are used to refer to external resources, such as images,
videos, audio, etc.
@@ -642,18 +746,18 @@ tags:
name: Message
- description:
name: URL
-- description: 'Normal chat completion response.
+- description: 'non-stream response from the agentic system.
'
name: AgenticSystemExecuteResponse
-- description: 'Streamed chat completion response.
+- description: 'Streamed agent execution response.
- '
- name: StreamedAgenticSystemExecuteResponse
+ name: AgenticSystemExecuteResponseStreamChunk
- description:
name: ChatCompletionRequest
@@ -662,12 +766,13 @@ tags:
'
name: ChatCompletionResponse
-- description: 'Streamed chat completion response.
+- description: 'Streamed chat completion response. The actual response is a series
+ of such objects.
- '
- name: StreamedChatCompletionResponse
+ name: ChatCompletionResponseStreamChunk
- description:
name: CompletionRequest
@@ -679,9 +784,9 @@ tags:
- description: 'streamed completion response.
- '
- name: StreamedCompletionResponse
+ name: CompletionResponseStreamChunk
x-tagGroups:
- name: Operations
tags:
@@ -689,15 +794,18 @@ x-tagGroups:
- Inference
- name: Types
tags:
+ - AgenticSystemCreateRequest
+ - AgenticSystemCreateResponse
- AgenticSystemExecuteRequest
- AgenticSystemExecuteResponse
+ - AgenticSystemExecuteResponseStreamChunk
+ - AgenticSystemTurn
- Attachment
- ChatCompletionRequest
- ChatCompletionResponse
+ - ChatCompletionResponseStreamChunk
- CompletionRequest
- CompletionResponse
+ - CompletionResponseStreamChunk
- Message
- - StreamedAgenticSystemExecuteResponse
- - StreamedChatCompletionResponse
- - StreamedCompletionResponse
- URL
diff --git a/source/run.sh b/source/run.sh
index 980d979d2..b30929cfb 100644
--- a/source/run.sh
+++ b/source/run.sh
@@ -1,3 +1,3 @@
#!/bin/bash
-PYTHONPATH=. python3 defn.py
+PYTHONPATH=. python3 api_definitions.py