more work on agent definitions

This commit is contained in:
Ashwin Bharambe 2024-07-09 13:53:09 -07:00
parent 6e4586ba7a
commit 97f9b18aca
8 changed files with 1079 additions and 695 deletions

1
.gitignore vendored Normal file
View file

@ -0,0 +1 @@
__pycache__

View file

@ -0,0 +1,88 @@
from dataclasses import dataclass, field
from enum import Enum
from typing import Any, Dict, List, Optional, Set, Union
from model_types import (
BuiltinTool,
Content,
InstructModel,
Message,
PretrainedModel,
SamplingParams,
StopReason,
ToolCall,
ToolDefinition,
ToolResponse,
)
from strong_typing.schema import json_schema_type
class ExecutionStepType(Enum):
"""The type of execution step."""
model_inference = "model_inference"
tool_execution = "tool_execution"
safety_filtering = "safety_filtering"
memory_retrieval = "memory_retrieval"
@dataclass
class ExecutionStepBase:
"""An agentic system turn can consist of one or more such execution steps."""
step_type: ExecutionStepType
@dataclass
class ModelInferenceStep(ExecutionStepBase):
step_type = ExecutionStepType.model_inference
text: str
logprobs: Optional[Dict[str, Any]] = None
@dataclass
class ToolExecutionStep(ExecutionStepBase):
step_type = ExecutionStepType.tool_execution
# we could be calling multiple tools in a single step (in parallel)
tool_calls: List[ToolCall]
tool_responses: List[ToolResponse]
@dataclass
class SafetyViolation:
violation_type: str
details: str
suggested_user_response: Optional[str] = None
@dataclass
class SafetyFilteringStep(ExecutionStepBase):
step_type = ExecutionStepType.safety_filtering
violation: Optional[SafetyViolation] = None
@dataclass
class MemoryRetrievalStep(ExecutionStepBase):
step_type = ExecutionStepType.memory_retrieval
documents: List[str]
scores: List[float]
ExecutionStep = Union[
ModelInferenceStep,
ToolExecutionStep,
SafetyFilteringStep,
MemoryRetrievalStep,
]
@json_schema_type
@dataclass
class AgenticSystemTurn:
"""A single turn in an interaction with an Agentic System."""
user_messages: List[Message]
steps: List[ExecutionStep]
response_message: Message

197
source/api_definitions.py Normal file
View file

@ -0,0 +1,197 @@
from dataclasses import dataclass, field
from enum import Enum
from typing import Any, Dict, List, Optional, Protocol, Set, Union
import yaml
from model_types import (
BuiltinTool,
Content,
InstructModel,
Message,
PretrainedModel,
SamplingParams,
StopReason,
ToolCall,
ToolDefinition,
ToolResponse,
)
from agentic_system_types import (
AgenticSystemTurn,
)
from pyopenapi import Info, Options, Server, Specification, webmethod
from strong_typing.schema import json_schema_type
@json_schema_type
@dataclass
class CompletionRequest:
content: Content
model: PretrainedModel
sampling_params: SamplingParams = SamplingParams()
max_tokens: int = 0
stream: bool = False
logprobs: bool = False
@json_schema_type
@dataclass
class CompletionResponse:
"""Normal completion response."""
content: Content
stop_reason: Optional[StopReason] = None
logprobs: Optional[Dict[str, Any]] = None
@json_schema_type
@dataclass
class CompletionResponseStreamChunk:
"""streamed completion response."""
text_delta: str
stop_reason: Optional[StopReason] = None
logprobs: Optional[Dict[str, Any]] = None
@json_schema_type
@dataclass
class ChatCompletionRequest:
message: Message
model: InstructModel
message_history: List[Message] = None
sampling_params: SamplingParams = SamplingParams()
# zero-shot tool definitions as input to the model
available_tools: List[Union[BuiltinTool, ToolDefinition]] = field(
default_factory=list
)
max_tokens: int = 0
stream: bool = False
logprobs: bool = False
@json_schema_type
@dataclass
class ChatCompletionResponse:
"""Normal chat completion response."""
content: Content
# note: multiple tool calls can be generated in a single response
tool_calls: List[ToolCall] = field(default_factory=list)
stop_reason: Optional[StopReason] = None
logprobs: Optional[Dict[str, Any]] = None
@json_schema_type
@dataclass
class ChatCompletionResponseStreamChunk:
"""Streamed chat completion response. The actual response is a series of such objects."""
text_delta: str
stop_reason: Optional[StopReason] = None
tool_call: Optional[ToolCall] = None
class Inference(Protocol):
def post_completion(
self,
request: CompletionRequest,
) -> Union[CompletionResponse, CompletionResponseStreamChunk]: ...
def post_chat_completion(
self,
request: ChatCompletionRequest,
) -> Union[ChatCompletionResponse, ChatCompletionResponseStreamChunk]: ...
@dataclass
class AgenticSystemCreateRequest:
instructions: str
model: InstructModel
# zero-shot tool definitions as input to the model
available_tools: List[Union[BuiltinTool, ToolDefinition]] = field(
default_factory=list
)
executable_tools: Set[str] = field(default_factory=set)
@json_schema_type
@dataclass
class AgenticSystemCreateResponse:
agent_id: str
@json_schema_type
@dataclass
class AgenticSystemExecuteRequest:
agent_id: str
messages: List[Message]
turn_history: List[AgenticSystemTurn] = None
stream: bool = False
@json_schema_type
@dataclass
class AgenticSystemExecuteResponse:
"""non-stream response from the agentic system."""
turn: AgenticSystemTurn
@json_schema_type
@dataclass
class AgenticSystemExecuteResponseStreamChunk:
"""Streamed agent execution response."""
# TODO: make things streamable
turn: AgenticSystemTurn
stop_reason: Optional[StopReason] = None
class AgenticSystem(Protocol):
@webmethod(route="/agentic_system/create")
def create_agentic_system(
self,
request: AgenticSystemCreateRequest,
) -> AgenticSystemCreateResponse: ...
@webmethod(route="/agentic_system/execute")
def create_agentic_system_execute(
self,
request: AgenticSystemExecuteRequest,
) -> Union[
AgenticSystemExecuteResponse, AgenticSystemExecuteResponseStreamChunk
]: ...
class LlamaStackEndpoints(Inference, AgenticSystem): ...
if __name__ == "__main__":
print("Converting the spec to YAML (openapi.yaml) and HTML (openapi.html)")
spec = Specification(
LlamaStackEndpoints,
Options(
server=Server(url="http://llama.meta.com"),
info=Info(
title="Llama Stack specification",
version="0.1",
description="This is the llama stack",
),
),
)
with open("openapi.yaml", "w", encoding="utf-8") as fp:
yaml.dump(spec.get_json(), fp, allow_unicode=True)
with open("openapi.html", "w") as fp:
spec.write_html(fp, pretty_print=True)

View file

@ -1,271 +0,0 @@
from dataclasses import dataclass, field
from enum import Enum
from typing import Any, Dict, List, Optional, Protocol, Set, Union
import yaml
from pyopenapi import Info, Options, Server, Specification, webmethod
from strong_typing.schema import json_schema_type
@json_schema_type(
schema={"type": "string", "format": "uri", "pattern": "^(https?://|file://|data:)"}
)
@dataclass
class URL:
url: str
def __str__(self) -> str:
return self.url
@json_schema_type
@dataclass
class Attachment:
"""
Attachments are used to refer to external resources, such as images, videos, audio, etc.
"""
url: URL
mime_type: str
Content = Union[
str,
Attachment,
List[Union[str, Attachment]],
]
class Role(Enum):
system = "system"
user = "user"
assistant = "assistant"
tool = "tool"
@dataclass
class ToolCall:
"""
A tool call is a request to a tool.
"""
tool_name: str
arguments: Dict[str, Any]
@dataclass
class ToolResponse:
tool_name: str
response: str
@dataclass
class ToolDefinition:
tool_name: str
parameters: Dict[str, Any]
# TODO: we need to document the parameters for the tool calls
class BuiltinTool(Enum):
"""
Builtin tools are tools the model is natively aware of and was potentially fine-tuned with.
"""
web_search = "web_search"
math = "math"
image_gen = "image_gen"
code_interpreter = "code_interpreter"
class StopReason(Enum):
"""
Stop reasons are used to indicate why the model stopped generating text.
"""
not_stopped = "not_stopped"
finished_ok = "finished_ok"
max_tokens = "max_tokens"
@json_schema_type
@dataclass
class Message:
role: Role
# input to the model or output from the model
content: Content
# output from the model
tool_calls: List[ToolCall] = field(default_factory=list)
# input to the model
tool_responses: List[ToolResponse] = field(default_factory=list)
@dataclass
class SamplingParams:
temperature: float = 0.0
strategy: str = "greedy"
top_p: float = 0.95
top_k: int = 0
class PretrainedModel(Enum):
llama3_8b = "llama3_8b"
llama3_70b = "llama3_70b"
class InstructModel(Enum):
llama3_8b_chat = "llama3_8b_chat"
llama3_70b_chat = "llama3_70b_chat"
@json_schema_type
@dataclass
class CompletionRequest:
content: Content
model: PretrainedModel = PretrainedModel.llama3_8b
sampling_params: SamplingParams = SamplingParams()
max_tokens: int = 0
stream: bool = False
logprobs: bool = False
@json_schema_type
@dataclass
class CompletionResponse:
"""Normal completion response."""
content: Content
stop_reason: Optional[StopReason] = None
logprobs: Optional[Dict[str, Any]] = None
@json_schema_type
@dataclass
class StreamedCompletionResponse:
"""streamed completion response."""
text_delta: str
stop_reason: Optional[StopReason] = None
logprobs: Optional[Dict[str, Any]] = None
@dataclass
class ChatCompletionRequestCommon:
message: Message
message_history: List[Message] = None
model: InstructModel = InstructModel.llama3_8b_chat
sampling_params: SamplingParams = SamplingParams()
# zero-shot tool definitions as input to the model
available_tools: List[Union[BuiltinTool, ToolDefinition]] = field(
default_factory=list
)
@json_schema_type
@dataclass
class ChatCompletionRequest(ChatCompletionRequestCommon):
max_tokens: int = 0
stream: bool = False
logprobs: bool = False
@json_schema_type
@dataclass
class ChatCompletionResponse:
"""Normal chat completion response."""
content: Content
# note: multiple tool calls can be generated in a single response
tool_calls: List[ToolCall] = field(default_factory=list)
stop_reason: Optional[StopReason] = None
logprobs: Optional[Dict[str, Any]] = None
@json_schema_type
@dataclass
class StreamedChatCompletionResponse:
"""Streamed chat completion response."""
text_delta: str
stop_reason: Optional[StopReason] = None
tool_call: Optional[ToolCall] = None
class Inference(Protocol):
def post_completion(
self,
request: CompletionRequest,
) -> Union[CompletionResponse, StreamedCompletionResponse]: ...
def post_chat_completion(
self,
request: ChatCompletionRequest,
) -> Union[ChatCompletionResponse, StreamedChatCompletionResponse]: ...
@json_schema_type
@dataclass
class AgenticSystemExecuteRequest(ChatCompletionRequestCommon):
executable_tools: Set[str] = field(default_factory=set)
stream: bool = False
@json_schema_type
@dataclass
class AgenticSystemExecuteResponse:
"""Normal chat completion response."""
content: Content
stop_reason: StopReason
tool_calls: List[ToolCall] = field(default_factory=list)
logprobs: Optional[Dict[str, Any]] = None
@json_schema_type
@dataclass
class StreamedAgenticSystemExecuteResponse:
"""Streamed chat completion response."""
text_delta: str
stop_reason: StopReason
tool_call: Optional[ToolCall] = None
class AgenticSystem(Protocol):
@webmethod(route="/agentic/system/execute")
def create_agentic_system_execute(
self,
request: AgenticSystemExecuteRequest,
) -> Union[AgenticSystemExecuteResponse, StreamedAgenticSystemExecuteResponse]: ...
class Endpoint(Inference, AgenticSystem): ...
if __name__ == "__main__":
print("Converting the spec to YAML (openapi.yaml) and HTML (openapi.html)")
spec = Specification(
Endpoint,
Options(
server=Server(url="http://llama.meta.com"),
info=Info(
title="Llama Stack specification",
version="0.1",
description="This is the llama stack",
),
),
)
with open("openapi.yaml", "w", encoding="utf-8") as fp:
yaml.dump(spec.get_json(), fp, allow_unicode=True)
with open("openapi.html", "w") as fp:
spec.write_html(fp, pretty_print=True)

122
source/model_types.py Normal file
View file

@ -0,0 +1,122 @@
from dataclasses import dataclass, field
from enum import Enum
from typing import Any, Dict, List, Optional, Set, Union
from strong_typing.schema import json_schema_type
@json_schema_type(
schema={"type": "string", "format": "uri", "pattern": "^(https?://|file://|data:)"}
)
@dataclass
class URL:
url: str
def __str__(self) -> str:
return self.url
@json_schema_type
@dataclass
class Attachment:
"""
Attachments are used to refer to external resources, such as images, videos, audio, etc.
"""
url: URL
mime_type: str
Content = Union[
str,
Attachment,
List[Union[str, Attachment]],
]
class Role(Enum):
system = "system"
user = "user"
assistant = "assistant"
tool = "tool"
@dataclass
class ToolCall:
"""
A tool call is a request to a tool.
"""
tool_name: str
arguments: Dict[str, Any]
@dataclass
class ToolResponse:
tool_name: str
response: str
@dataclass
class ToolDefinition:
tool_name: str
parameters: Dict[str, Any]
# TODO: we need to document the parameters for the tool calls
class BuiltinTool(Enum):
"""
Builtin tools are tools the model is natively aware of and was potentially fine-tuned with.
"""
web_search = "web_search"
math = "math"
image_gen = "image_gen"
code_interpreter = "code_interpreter"
class StopReason(Enum):
"""
Stop reasons are used to indicate why the model stopped generating text.
"""
not_stopped = "not_stopped"
finished_ok = "finished_ok"
max_tokens = "max_tokens"
@json_schema_type
@dataclass
class Message:
role: Role
# input to the model or output from the model
content: Content
# output from the model
tool_calls: List[ToolCall] = field(default_factory=list)
# input to the model
tool_responses: List[ToolResponse] = field(default_factory=list)
@dataclass
class SamplingParams:
temperature: float = 0.0
strategy: str = "greedy"
top_p: float = 0.95
top_k: int = 0
class PretrainedModel(Enum):
llama3_8b = "llama3_8b"
llama3_70b = "llama3_70b"
class InstructModel(Enum):
llama3_8b_chat = "llama3_8b_chat"
llama3_70b_chat = "llama3_70b_chat"

View file

@ -29,11 +29,41 @@
} }
], ],
"paths": { "paths": {
"/agentic/system/execute": { "/agentic_system/create": {
"post": { "post": {
"responses": { "responses": {
"200": { "200": {
"description": "Normal chat completion response. **OR** Streamed chat completion response.", "description": "OK",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/AgenticSystemCreateResponse"
}
}
}
}
},
"tags": [
"AgenticSystem"
],
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/AgenticSystemCreateRequest"
}
}
},
"required": true
}
}
},
"/agentic_system/execute": {
"post": {
"responses": {
"200": {
"description": "non-stream response from the agentic system. **OR** Streamed agent execution response.",
"content": { "content": {
"application/json": { "application/json": {
"schema": { "schema": {
@ -42,7 +72,7 @@
"$ref": "#/components/schemas/AgenticSystemExecuteResponse" "$ref": "#/components/schemas/AgenticSystemExecuteResponse"
}, },
{ {
"$ref": "#/components/schemas/StreamedAgenticSystemExecuteResponse" "$ref": "#/components/schemas/AgenticSystemExecuteResponseStreamChunk"
} }
] ]
} }
@ -70,7 +100,7 @@
"post": { "post": {
"responses": { "responses": {
"200": { "200": {
"description": "Normal chat completion response. **OR** Streamed chat completion response.", "description": "Normal chat completion response. **OR** Streamed chat completion response. The actual response is a series of such objects.",
"content": { "content": {
"application/json": { "application/json": {
"schema": { "schema": {
@ -79,7 +109,7 @@
"$ref": "#/components/schemas/ChatCompletionResponse" "$ref": "#/components/schemas/ChatCompletionResponse"
}, },
{ {
"$ref": "#/components/schemas/StreamedChatCompletionResponse" "$ref": "#/components/schemas/ChatCompletionResponseStreamChunk"
} }
] ]
} }
@ -116,7 +146,7 @@
"$ref": "#/components/schemas/CompletionResponse" "$ref": "#/components/schemas/CompletionResponse"
}, },
{ {
"$ref": "#/components/schemas/StreamedCompletionResponse" "$ref": "#/components/schemas/CompletionResponseStreamChunk"
} }
] ]
} }
@ -144,52 +174,17 @@
"jsonSchemaDialect": "https://json-schema.org/draft/2020-12/schema", "jsonSchemaDialect": "https://json-schema.org/draft/2020-12/schema",
"components": { "components": {
"schemas": { "schemas": {
"AgenticSystemExecuteRequest": { "AgenticSystemCreateRequest": {
"type": "object", "type": "object",
"properties": { "properties": {
"message": { "instructions": {
"$ref": "#/components/schemas/Message" "type": "string"
},
"message_history": {
"type": "array",
"items": {
"$ref": "#/components/schemas/Message"
}
}, },
"model": { "model": {
"type": "string", "type": "string",
"enum": [ "enum": [
"llama3_8b_chat", "llama3_8b_chat",
"llama3_70b_chat" "llama3_70b_chat"
],
"default": "llama3_8b_chat"
},
"sampling_params": {
"type": "object",
"properties": {
"temperature": {
"type": "number",
"default": 0.0
},
"strategy": {
"type": "string",
"default": "greedy"
},
"top_p": {
"type": "number",
"default": 0.95
},
"top_k": {
"type": "integer",
"default": 0
}
},
"additionalProperties": false,
"required": [
"temperature",
"strategy",
"top_p",
"top_k"
] ]
}, },
"available_tools": { "available_tools": {
@ -253,6 +248,45 @@
"type": "string" "type": "string"
}, },
"uniqueItems": true "uniqueItems": true
}
},
"additionalProperties": false,
"required": [
"instructions",
"model",
"available_tools",
"executable_tools"
]
},
"AgenticSystemCreateResponse": {
"type": "object",
"properties": {
"agent_id": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"agent_id"
]
},
"AgenticSystemExecuteRequest": {
"type": "object",
"properties": {
"agent_id": {
"type": "string"
},
"messages": {
"type": "array",
"items": {
"$ref": "#/components/schemas/Message"
}
},
"turn_history": {
"type": "array",
"items": {
"$ref": "#/components/schemas/AgenticSystemTurn"
}
}, },
"stream": { "stream": {
"type": "boolean", "type": "boolean",
@ -261,15 +295,245 @@
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"message", "agent_id",
"message_history", "messages",
"model", "turn_history",
"sampling_params",
"available_tools",
"executable_tools",
"stream" "stream"
] ]
}, },
"AgenticSystemTurn": {
"type": "object",
"properties": {
"user_messages": {
"type": "array",
"items": {
"$ref": "#/components/schemas/Message"
}
},
"steps": {
"type": "array",
"items": {
"oneOf": [
{
"type": "object",
"properties": {
"step_type": {
"type": "string",
"enum": [
"model_inference",
"tool_execution",
"safety_filtering",
"memory_retrieval"
],
"title": "The type of execution step.",
"default": "model_inference"
},
"text": {
"type": "string"
},
"logprobs": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
}
},
"additionalProperties": false,
"required": [
"step_type",
"text"
]
},
{
"type": "object",
"properties": {
"step_type": {
"type": "string",
"enum": [
"model_inference",
"tool_execution",
"safety_filtering",
"memory_retrieval"
],
"title": "The type of execution step.",
"default": "tool_execution"
},
"tool_calls": {
"type": "array",
"items": {
"type": "object",
"properties": {
"tool_name": {
"type": "string"
},
"arguments": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
}
},
"additionalProperties": false,
"required": [
"tool_name",
"arguments"
],
"title": "A tool call is a request to a tool."
}
},
"tool_responses": {
"type": "array",
"items": {
"type": "object",
"properties": {
"tool_name": {
"type": "string"
},
"response": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"tool_name",
"response"
]
}
}
},
"additionalProperties": false,
"required": [
"step_type",
"tool_calls",
"tool_responses"
]
},
{
"type": "object",
"properties": {
"step_type": {
"type": "string",
"enum": [
"model_inference",
"tool_execution",
"safety_filtering",
"memory_retrieval"
],
"title": "The type of execution step.",
"default": "safety_filtering"
},
"violation": {
"type": "object",
"properties": {
"violation_type": {
"type": "string"
},
"details": {
"type": "string"
},
"suggested_user_response": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"violation_type",
"details"
]
}
},
"additionalProperties": false,
"required": [
"step_type"
]
},
{
"type": "object",
"properties": {
"step_type": {
"type": "string",
"enum": [
"model_inference",
"tool_execution",
"safety_filtering",
"memory_retrieval"
],
"title": "The type of execution step.",
"default": "memory_retrieval"
},
"documents": {
"type": "array",
"items": {
"type": "string"
}
},
"scores": {
"type": "array",
"items": {
"type": "number"
}
}
},
"additionalProperties": false,
"required": [
"step_type",
"documents",
"scores"
]
}
]
}
},
"response_message": {
"$ref": "#/components/schemas/Message"
}
},
"additionalProperties": false,
"required": [
"user_messages",
"steps",
"response_message"
],
"title": "A single turn in an interaction with an Agentic System."
},
"Attachment": { "Attachment": {
"type": "object", "type": "object",
"properties": { "properties": {
@ -400,119 +664,21 @@
"AgenticSystemExecuteResponse": { "AgenticSystemExecuteResponse": {
"type": "object", "type": "object",
"properties": { "properties": {
"content": { "turn": {
"oneOf": [ "$ref": "#/components/schemas/AgenticSystemTurn"
{
"type": "string"
},
{
"$ref": "#/components/schemas/Attachment"
},
{
"type": "array",
"items": {
"oneOf": [
{
"type": "string"
},
{
"$ref": "#/components/schemas/Attachment"
}
]
}
}
]
},
"stop_reason": {
"type": "string",
"enum": [
"not_stopped",
"finished_ok",
"max_tokens"
],
"title": "Stop reasons are used to indicate why the model stopped generating text."
},
"tool_calls": {
"type": "array",
"items": {
"type": "object",
"properties": {
"tool_name": {
"type": "string"
},
"arguments": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
}
},
"additionalProperties": false,
"required": [
"tool_name",
"arguments"
],
"title": "A tool call is a request to a tool."
}
},
"logprobs": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"content", "turn"
"stop_reason",
"tool_calls"
], ],
"title": "Normal chat completion response." "title": "non-stream response from the agentic system."
}, },
"StreamedAgenticSystemExecuteResponse": { "AgenticSystemExecuteResponseStreamChunk": {
"type": "object", "type": "object",
"properties": { "properties": {
"text_delta": { "turn": {
"type": "string" "$ref": "#/components/schemas/AgenticSystemTurn"
}, },
"stop_reason": { "stop_reason": {
"type": "string", "type": "string",
@ -522,53 +688,13 @@
"max_tokens" "max_tokens"
], ],
"title": "Stop reasons are used to indicate why the model stopped generating text." "title": "Stop reasons are used to indicate why the model stopped generating text."
},
"tool_call": {
"type": "object",
"properties": {
"tool_name": {
"type": "string"
},
"arguments": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
}
},
"additionalProperties": false,
"required": [
"tool_name",
"arguments"
],
"title": "A tool call is a request to a tool."
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"text_delta", "turn"
"stop_reason"
], ],
"title": "Streamed chat completion response." "title": "Streamed agent execution response."
}, },
"ChatCompletionRequest": { "ChatCompletionRequest": {
"type": "object", "type": "object",
@ -576,19 +702,18 @@
"message": { "message": {
"$ref": "#/components/schemas/Message" "$ref": "#/components/schemas/Message"
}, },
"message_history": {
"type": "array",
"items": {
"$ref": "#/components/schemas/Message"
}
},
"model": { "model": {
"type": "string", "type": "string",
"enum": [ "enum": [
"llama3_8b_chat", "llama3_8b_chat",
"llama3_70b_chat" "llama3_70b_chat"
], ]
"default": "llama3_8b_chat" },
"message_history": {
"type": "array",
"items": {
"$ref": "#/components/schemas/Message"
}
}, },
"sampling_params": { "sampling_params": {
"type": "object", "type": "object",
@ -689,8 +814,8 @@
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"message", "message",
"message_history",
"model", "model",
"message_history",
"sampling_params", "sampling_params",
"available_tools", "available_tools",
"max_tokens", "max_tokens",
@ -808,7 +933,7 @@
], ],
"title": "Normal chat completion response." "title": "Normal chat completion response."
}, },
"StreamedChatCompletionResponse": { "ChatCompletionResponseStreamChunk": {
"type": "object", "type": "object",
"properties": { "properties": {
"text_delta": { "text_delta": {
@ -867,7 +992,7 @@
"required": [ "required": [
"text_delta" "text_delta"
], ],
"title": "Streamed chat completion response." "title": "Streamed chat completion response. The actual response is a series of such objects."
}, },
"CompletionRequest": { "CompletionRequest": {
"type": "object", "type": "object",
@ -900,8 +1025,7 @@
"enum": [ "enum": [
"llama3_8b", "llama3_8b",
"llama3_70b" "llama3_70b"
], ]
"default": "llama3_8b"
}, },
"sampling_params": { "sampling_params": {
"type": "object", "type": "object",
@ -1021,7 +1145,7 @@
], ],
"title": "Normal completion response." "title": "Normal completion response."
}, },
"StreamedCompletionResponse": { "CompletionResponseStreamChunk": {
"type": "object", "type": "object",
"properties": { "properties": {
"text_delta": { "text_delta": {
@ -1083,10 +1207,22 @@
{ {
"name": "AgenticSystem" "name": "AgenticSystem"
}, },
{
"name": "AgenticSystemCreateRequest",
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/AgenticSystemCreateRequest\" />"
},
{
"name": "AgenticSystemCreateResponse",
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/AgenticSystemCreateResponse\" />"
},
{ {
"name": "AgenticSystemExecuteRequest", "name": "AgenticSystemExecuteRequest",
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/AgenticSystemExecuteRequest\" />" "description": "<SchemaDefinition schemaRef=\"#/components/schemas/AgenticSystemExecuteRequest\" />"
}, },
{
"name": "AgenticSystemTurn",
"description": "A single turn in an interaction with an Agentic System.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/AgenticSystemTurn\" />"
},
{ {
"name": "Attachment", "name": "Attachment",
"description": "Attachments are used to refer to external resources, such as images, videos, audio, etc.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/Attachment\" />" "description": "Attachments are used to refer to external resources, such as images, videos, audio, etc.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/Attachment\" />"
@ -1101,11 +1237,11 @@
}, },
{ {
"name": "AgenticSystemExecuteResponse", "name": "AgenticSystemExecuteResponse",
"description": "Normal chat completion response.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/AgenticSystemExecuteResponse\" />" "description": "non-stream response from the agentic system.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/AgenticSystemExecuteResponse\" />"
}, },
{ {
"name": "StreamedAgenticSystemExecuteResponse", "name": "AgenticSystemExecuteResponseStreamChunk",
"description": "Streamed chat completion response.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/StreamedAgenticSystemExecuteResponse\" />" "description": "Streamed agent execution response.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/AgenticSystemExecuteResponseStreamChunk\" />"
}, },
{ {
"name": "ChatCompletionRequest", "name": "ChatCompletionRequest",
@ -1116,8 +1252,8 @@
"description": "Normal chat completion response.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/ChatCompletionResponse\" />" "description": "Normal chat completion response.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/ChatCompletionResponse\" />"
}, },
{ {
"name": "StreamedChatCompletionResponse", "name": "ChatCompletionResponseStreamChunk",
"description": "Streamed chat completion response.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/StreamedChatCompletionResponse\" />" "description": "Streamed chat completion response. The actual response is a series of such objects.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/ChatCompletionResponseStreamChunk\" />"
}, },
{ {
"name": "CompletionRequest", "name": "CompletionRequest",
@ -1128,8 +1264,8 @@
"description": "Normal completion response.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/CompletionResponse\" />" "description": "Normal completion response.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/CompletionResponse\" />"
}, },
{ {
"name": "StreamedCompletionResponse", "name": "CompletionResponseStreamChunk",
"description": "streamed completion response.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/StreamedCompletionResponse\" />" "description": "streamed completion response.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/CompletionResponseStreamChunk\" />"
} }
], ],
"x-tagGroups": [ "x-tagGroups": [
@ -1143,17 +1279,20 @@
{ {
"name": "Types", "name": "Types",
"tags": [ "tags": [
"AgenticSystemCreateRequest",
"AgenticSystemCreateResponse",
"AgenticSystemExecuteRequest", "AgenticSystemExecuteRequest",
"AgenticSystemExecuteResponse", "AgenticSystemExecuteResponse",
"AgenticSystemExecuteResponseStreamChunk",
"AgenticSystemTurn",
"Attachment", "Attachment",
"ChatCompletionRequest", "ChatCompletionRequest",
"ChatCompletionResponse", "ChatCompletionResponse",
"ChatCompletionResponseStreamChunk",
"CompletionRequest", "CompletionRequest",
"CompletionResponse", "CompletionResponse",
"CompletionResponseStreamChunk",
"Message", "Message",
"StreamedAgenticSystemExecuteResponse",
"StreamedChatCompletionResponse",
"StreamedCompletionResponse",
"URL" "URL"
] ]
} }

View file

@ -1,7 +1,7 @@
components: components:
responses: {} responses: {}
schemas: schemas:
AgenticSystemExecuteRequest: AgenticSystemCreateRequest:
additionalProperties: false additionalProperties: false
properties: properties:
available_tools: available_tools:
@ -39,73 +39,61 @@ components:
type: string type: string
type: array type: array
uniqueItems: true uniqueItems: true
message: instructions:
$ref: '#/components/schemas/Message' type: string
message_history:
items:
$ref: '#/components/schemas/Message'
type: array
model: model:
default: llama3_8b_chat
enum: enum:
- llama3_8b_chat - llama3_8b_chat
- llama3_70b_chat - llama3_70b_chat
type: string type: string
sampling_params: required:
additionalProperties: false - instructions
properties: - model
strategy: - available_tools
default: greedy - executable_tools
type: string type: object
temperature: AgenticSystemCreateResponse:
default: 0.0 additionalProperties: false
type: number properties:
top_k: agent_id:
default: 0 type: string
type: integer required:
top_p: - agent_id
default: 0.95 type: object
type: number AgenticSystemExecuteRequest:
required: additionalProperties: false
- temperature properties:
- strategy agent_id:
- top_p type: string
- top_k messages:
type: object items:
$ref: '#/components/schemas/Message'
type: array
stream: stream:
default: false default: false
type: boolean type: boolean
turn_history:
items:
$ref: '#/components/schemas/AgenticSystemTurn'
type: array
required: required:
- message - agent_id
- message_history - messages
- model - turn_history
- sampling_params
- available_tools
- executable_tools
- stream - stream
type: object type: object
AgenticSystemExecuteResponse: AgenticSystemExecuteResponse:
additionalProperties: false additionalProperties: false
properties: properties:
content: turn:
oneOf: $ref: '#/components/schemas/AgenticSystemTurn'
- type: string required:
- $ref: '#/components/schemas/Attachment' - turn
- items: title: non-stream response from the agentic system.
oneOf: type: object
- type: string AgenticSystemExecuteResponseStreamChunk:
- $ref: '#/components/schemas/Attachment' additionalProperties: false
type: array properties:
logprobs:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
stop_reason: stop_reason:
enum: enum:
- not_stopped - not_stopped
@ -114,33 +102,159 @@ components:
title: Stop reasons are used to indicate why the model stopped generating title: Stop reasons are used to indicate why the model stopped generating
text. text.
type: string type: string
tool_calls: turn:
$ref: '#/components/schemas/AgenticSystemTurn'
required:
- turn
title: Streamed agent execution response.
type: object
AgenticSystemTurn:
additionalProperties: false
properties:
response_message:
$ref: '#/components/schemas/Message'
steps:
items: items:
additionalProperties: false oneOf:
properties: - additionalProperties: false
arguments: properties:
additionalProperties: logprobs:
oneOf: additionalProperties:
- type: 'null' oneOf:
- type: boolean - type: 'null'
- type: number - type: boolean
- type: string - type: number
- type: array - type: string
- type: object - type: array
type: object - type: object
tool_name: type: object
type: string step_type:
required: default: model_inference
- tool_name enum:
- arguments - model_inference
title: A tool call is a request to a tool. - tool_execution
type: object - safety_filtering
- memory_retrieval
title: The type of execution step.
type: string
text:
type: string
required:
- step_type
- text
type: object
- additionalProperties: false
properties:
step_type:
default: tool_execution
enum:
- model_inference
- tool_execution
- safety_filtering
- memory_retrieval
title: The type of execution step.
type: string
tool_calls:
items:
additionalProperties: false
properties:
arguments:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
tool_name:
type: string
required:
- tool_name
- arguments
title: A tool call is a request to a tool.
type: object
type: array
tool_responses:
items:
additionalProperties: false
properties:
response:
type: string
tool_name:
type: string
required:
- tool_name
- response
type: object
type: array
required:
- step_type
- tool_calls
- tool_responses
type: object
- additionalProperties: false
properties:
step_type:
default: safety_filtering
enum:
- model_inference
- tool_execution
- safety_filtering
- memory_retrieval
title: The type of execution step.
type: string
violation:
additionalProperties: false
properties:
details:
type: string
suggested_user_response:
type: string
violation_type:
type: string
required:
- violation_type
- details
type: object
required:
- step_type
type: object
- additionalProperties: false
properties:
documents:
items:
type: string
type: array
scores:
items:
type: number
type: array
step_type:
default: memory_retrieval
enum:
- model_inference
- tool_execution
- safety_filtering
- memory_retrieval
title: The type of execution step.
type: string
required:
- step_type
- documents
- scores
type: object
type: array
user_messages:
items:
$ref: '#/components/schemas/Message'
type: array type: array
required: required:
- content - user_messages
- stop_reason - steps
- tool_calls - response_message
title: Normal chat completion response. title: A single turn in an interaction with an Agentic System.
type: object type: object
Attachment: Attachment:
additionalProperties: false additionalProperties: false
@ -201,7 +315,6 @@ components:
$ref: '#/components/schemas/Message' $ref: '#/components/schemas/Message'
type: array type: array
model: model:
default: llama3_8b_chat
enum: enum:
- llama3_8b_chat - llama3_8b_chat
- llama3_70b_chat - llama3_70b_chat
@ -232,8 +345,8 @@ components:
type: boolean type: boolean
required: required:
- message - message
- message_history
- model - model
- message_history
- sampling_params - sampling_params
- available_tools - available_tools
- max_tokens - max_tokens
@ -297,6 +410,44 @@ components:
- tool_calls - tool_calls
title: Normal chat completion response. title: Normal chat completion response.
type: object type: object
ChatCompletionResponseStreamChunk:
additionalProperties: false
properties:
stop_reason:
enum:
- not_stopped
- finished_ok
- max_tokens
title: Stop reasons are used to indicate why the model stopped generating
text.
type: string
text_delta:
type: string
tool_call:
additionalProperties: false
properties:
arguments:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
tool_name:
type: string
required:
- tool_name
- arguments
title: A tool call is a request to a tool.
type: object
required:
- text_delta
title: Streamed chat completion response. The actual response is a series of
such objects.
type: object
CompletionRequest: CompletionRequest:
additionalProperties: false additionalProperties: false
properties: properties:
@ -316,7 +467,6 @@ components:
default: 0 default: 0
type: integer type: integer
model: model:
default: llama3_8b
enum: enum:
- llama3_8b - llama3_8b
- llama3_70b - llama3_70b
@ -387,6 +537,33 @@ components:
- content - content
title: Normal completion response. title: Normal completion response.
type: object type: object
CompletionResponseStreamChunk:
additionalProperties: false
properties:
logprobs:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
stop_reason:
enum:
- not_stopped
- finished_ok
- max_tokens
title: Stop reasons are used to indicate why the model stopped generating
text.
type: string
text_delta:
type: string
required:
- text_delta
title: streamed completion response.
type: object
Message: Message:
additionalProperties: false additionalProperties: false
properties: properties:
@ -447,108 +624,6 @@ components:
- tool_calls - tool_calls
- tool_responses - tool_responses
type: object type: object
StreamedAgenticSystemExecuteResponse:
additionalProperties: false
properties:
stop_reason:
enum:
- not_stopped
- finished_ok
- max_tokens
title: Stop reasons are used to indicate why the model stopped generating
text.
type: string
text_delta:
type: string
tool_call:
additionalProperties: false
properties:
arguments:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
tool_name:
type: string
required:
- tool_name
- arguments
title: A tool call is a request to a tool.
type: object
required:
- text_delta
- stop_reason
title: Streamed chat completion response.
type: object
StreamedChatCompletionResponse:
additionalProperties: false
properties:
stop_reason:
enum:
- not_stopped
- finished_ok
- max_tokens
title: Stop reasons are used to indicate why the model stopped generating
text.
type: string
text_delta:
type: string
tool_call:
additionalProperties: false
properties:
arguments:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
tool_name:
type: string
required:
- tool_name
- arguments
title: A tool call is a request to a tool.
type: object
required:
- text_delta
title: Streamed chat completion response.
type: object
StreamedCompletionResponse:
additionalProperties: false
properties:
logprobs:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
stop_reason:
enum:
- not_stopped
- finished_ok
- max_tokens
title: Stop reasons are used to indicate why the model stopped generating
text.
type: string
text_delta:
type: string
required:
- text_delta
title: streamed completion response.
type: object
URL: URL:
format: uri format: uri
pattern: ^(https?://|file://|data:) pattern: ^(https?://|file://|data:)
@ -560,7 +635,25 @@ info:
jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
openapi: 3.1.0 openapi: 3.1.0
paths: paths:
/agentic/system/execute: /agentic_system/create:
post:
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/AgenticSystemCreateRequest'
required: true
responses:
'200':
content:
application/json:
schema:
$ref: '#/components/schemas/AgenticSystemCreateResponse'
description: OK
tags:
- AgenticSystem
/agentic_system/execute:
post: post:
parameters: [] parameters: []
requestBody: requestBody:
@ -576,9 +669,9 @@ paths:
schema: schema:
oneOf: oneOf:
- $ref: '#/components/schemas/AgenticSystemExecuteResponse' - $ref: '#/components/schemas/AgenticSystemExecuteResponse'
- $ref: '#/components/schemas/StreamedAgenticSystemExecuteResponse' - $ref: '#/components/schemas/AgenticSystemExecuteResponseStreamChunk'
description: Normal chat completion response. **OR** Streamed chat completion description: non-stream response from the agentic system. **OR** Streamed
response. agent execution response.
tags: tags:
- AgenticSystem - AgenticSystem
/chat_completion: /chat_completion:
@ -597,9 +690,9 @@ paths:
schema: schema:
oneOf: oneOf:
- $ref: '#/components/schemas/ChatCompletionResponse' - $ref: '#/components/schemas/ChatCompletionResponse'
- $ref: '#/components/schemas/StreamedChatCompletionResponse' - $ref: '#/components/schemas/ChatCompletionResponseStreamChunk'
description: Normal chat completion response. **OR** Streamed chat completion description: Normal chat completion response. **OR** Streamed chat completion
response. response. The actual response is a series of such objects.
tags: tags:
- Inference - Inference
/completion: /completion:
@ -618,7 +711,7 @@ paths:
schema: schema:
oneOf: oneOf:
- $ref: '#/components/schemas/CompletionResponse' - $ref: '#/components/schemas/CompletionResponse'
- $ref: '#/components/schemas/StreamedCompletionResponse' - $ref: '#/components/schemas/CompletionResponseStreamChunk'
description: Normal completion response. **OR** streamed completion response. description: Normal completion response. **OR** streamed completion response.
tags: tags:
- Inference - Inference
@ -629,9 +722,20 @@ servers:
tags: tags:
- name: Inference - name: Inference
- name: AgenticSystem - name: AgenticSystem
- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemCreateRequest"
/>
name: AgenticSystemCreateRequest
- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemCreateResponse"
/>
name: AgenticSystemCreateResponse
- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemExecuteRequest" - description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemExecuteRequest"
/> />
name: AgenticSystemExecuteRequest name: AgenticSystemExecuteRequest
- description: 'A single turn in an interaction with an Agentic System.
<SchemaDefinition schemaRef="#/components/schemas/AgenticSystemTurn" />'
name: AgenticSystemTurn
- description: 'Attachments are used to refer to external resources, such as images, - description: 'Attachments are used to refer to external resources, such as images,
videos, audio, etc. videos, audio, etc.
@ -642,18 +746,18 @@ tags:
name: Message name: Message
- description: <SchemaDefinition schemaRef="#/components/schemas/URL" /> - description: <SchemaDefinition schemaRef="#/components/schemas/URL" />
name: URL name: URL
- description: 'Normal chat completion response. - description: 'non-stream response from the agentic system.
<SchemaDefinition schemaRef="#/components/schemas/AgenticSystemExecuteResponse" <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemExecuteResponse"
/>' />'
name: AgenticSystemExecuteResponse name: AgenticSystemExecuteResponse
- description: 'Streamed chat completion response. - description: 'Streamed agent execution response.
<SchemaDefinition schemaRef="#/components/schemas/StreamedAgenticSystemExecuteResponse" <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemExecuteResponseStreamChunk"
/>' />'
name: StreamedAgenticSystemExecuteResponse name: AgenticSystemExecuteResponseStreamChunk
- description: <SchemaDefinition schemaRef="#/components/schemas/ChatCompletionRequest" - description: <SchemaDefinition schemaRef="#/components/schemas/ChatCompletionRequest"
/> />
name: ChatCompletionRequest name: ChatCompletionRequest
@ -662,12 +766,13 @@ tags:
<SchemaDefinition schemaRef="#/components/schemas/ChatCompletionResponse" />' <SchemaDefinition schemaRef="#/components/schemas/ChatCompletionResponse" />'
name: ChatCompletionResponse name: ChatCompletionResponse
- description: 'Streamed chat completion response. - description: 'Streamed chat completion response. The actual response is a series
of such objects.
<SchemaDefinition schemaRef="#/components/schemas/StreamedChatCompletionResponse" <SchemaDefinition schemaRef="#/components/schemas/ChatCompletionResponseStreamChunk"
/>' />'
name: StreamedChatCompletionResponse name: ChatCompletionResponseStreamChunk
- description: <SchemaDefinition schemaRef="#/components/schemas/CompletionRequest" - description: <SchemaDefinition schemaRef="#/components/schemas/CompletionRequest"
/> />
name: CompletionRequest name: CompletionRequest
@ -679,9 +784,9 @@ tags:
- description: 'streamed completion response. - description: 'streamed completion response.
<SchemaDefinition schemaRef="#/components/schemas/StreamedCompletionResponse" <SchemaDefinition schemaRef="#/components/schemas/CompletionResponseStreamChunk"
/>' />'
name: StreamedCompletionResponse name: CompletionResponseStreamChunk
x-tagGroups: x-tagGroups:
- name: Operations - name: Operations
tags: tags:
@ -689,15 +794,18 @@ x-tagGroups:
- Inference - Inference
- name: Types - name: Types
tags: tags:
- AgenticSystemCreateRequest
- AgenticSystemCreateResponse
- AgenticSystemExecuteRequest - AgenticSystemExecuteRequest
- AgenticSystemExecuteResponse - AgenticSystemExecuteResponse
- AgenticSystemExecuteResponseStreamChunk
- AgenticSystemTurn
- Attachment - Attachment
- ChatCompletionRequest - ChatCompletionRequest
- ChatCompletionResponse - ChatCompletionResponse
- ChatCompletionResponseStreamChunk
- CompletionRequest - CompletionRequest
- CompletionResponse - CompletionResponse
- CompletionResponseStreamChunk
- Message - Message
- StreamedAgenticSystemExecuteResponse
- StreamedChatCompletionResponse
- StreamedCompletionResponse
- URL - URL

View file

@ -1,3 +1,3 @@
#!/bin/bash #!/bin/bash
PYTHONPATH=. python3 defn.py PYTHONPATH=. python3 api_definitions.py