more work on agent definitions

This commit is contained in:
Ashwin Bharambe 2024-07-09 13:53:09 -07:00
parent 6e4586ba7a
commit 97f9b18aca
8 changed files with 1079 additions and 695 deletions

1
.gitignore vendored Normal file
View file

@ -0,0 +1 @@
__pycache__

View file

@ -0,0 +1,88 @@
from dataclasses import dataclass, field
from enum import Enum
from typing import Any, Dict, List, Optional, Set, Union
from model_types import (
BuiltinTool,
Content,
InstructModel,
Message,
PretrainedModel,
SamplingParams,
StopReason,
ToolCall,
ToolDefinition,
ToolResponse,
)
from strong_typing.schema import json_schema_type
class ExecutionStepType(Enum):
"""The type of execution step."""
model_inference = "model_inference"
tool_execution = "tool_execution"
safety_filtering = "safety_filtering"
memory_retrieval = "memory_retrieval"
@dataclass
class ExecutionStepBase:
"""An agentic system turn can consist of one or more such execution steps."""
step_type: ExecutionStepType
@dataclass
class ModelInferenceStep(ExecutionStepBase):
step_type = ExecutionStepType.model_inference
text: str
logprobs: Optional[Dict[str, Any]] = None
@dataclass
class ToolExecutionStep(ExecutionStepBase):
step_type = ExecutionStepType.tool_execution
# we could be calling multiple tools in a single step (in parallel)
tool_calls: List[ToolCall]
tool_responses: List[ToolResponse]
@dataclass
class SafetyViolation:
violation_type: str
details: str
suggested_user_response: Optional[str] = None
@dataclass
class SafetyFilteringStep(ExecutionStepBase):
step_type = ExecutionStepType.safety_filtering
violation: Optional[SafetyViolation] = None
@dataclass
class MemoryRetrievalStep(ExecutionStepBase):
step_type = ExecutionStepType.memory_retrieval
documents: List[str]
scores: List[float]
ExecutionStep = Union[
ModelInferenceStep,
ToolExecutionStep,
SafetyFilteringStep,
MemoryRetrievalStep,
]
@json_schema_type
@dataclass
class AgenticSystemTurn:
"""A single turn in an interaction with an Agentic System."""
user_messages: List[Message]
steps: List[ExecutionStep]
response_message: Message

197
source/api_definitions.py Normal file
View file

@ -0,0 +1,197 @@
from dataclasses import dataclass, field
from enum import Enum
from typing import Any, Dict, List, Optional, Protocol, Set, Union
import yaml
from model_types import (
BuiltinTool,
Content,
InstructModel,
Message,
PretrainedModel,
SamplingParams,
StopReason,
ToolCall,
ToolDefinition,
ToolResponse,
)
from agentic_system_types import (
AgenticSystemTurn,
)
from pyopenapi import Info, Options, Server, Specification, webmethod
from strong_typing.schema import json_schema_type
@json_schema_type
@dataclass
class CompletionRequest:
content: Content
model: PretrainedModel
sampling_params: SamplingParams = SamplingParams()
max_tokens: int = 0
stream: bool = False
logprobs: bool = False
@json_schema_type
@dataclass
class CompletionResponse:
"""Normal completion response."""
content: Content
stop_reason: Optional[StopReason] = None
logprobs: Optional[Dict[str, Any]] = None
@json_schema_type
@dataclass
class CompletionResponseStreamChunk:
"""streamed completion response."""
text_delta: str
stop_reason: Optional[StopReason] = None
logprobs: Optional[Dict[str, Any]] = None
@json_schema_type
@dataclass
class ChatCompletionRequest:
message: Message
model: InstructModel
message_history: List[Message] = None
sampling_params: SamplingParams = SamplingParams()
# zero-shot tool definitions as input to the model
available_tools: List[Union[BuiltinTool, ToolDefinition]] = field(
default_factory=list
)
max_tokens: int = 0
stream: bool = False
logprobs: bool = False
@json_schema_type
@dataclass
class ChatCompletionResponse:
"""Normal chat completion response."""
content: Content
# note: multiple tool calls can be generated in a single response
tool_calls: List[ToolCall] = field(default_factory=list)
stop_reason: Optional[StopReason] = None
logprobs: Optional[Dict[str, Any]] = None
@json_schema_type
@dataclass
class ChatCompletionResponseStreamChunk:
"""Streamed chat completion response. The actual response is a series of such objects."""
text_delta: str
stop_reason: Optional[StopReason] = None
tool_call: Optional[ToolCall] = None
class Inference(Protocol):
def post_completion(
self,
request: CompletionRequest,
) -> Union[CompletionResponse, CompletionResponseStreamChunk]: ...
def post_chat_completion(
self,
request: ChatCompletionRequest,
) -> Union[ChatCompletionResponse, ChatCompletionResponseStreamChunk]: ...
@dataclass
class AgenticSystemCreateRequest:
instructions: str
model: InstructModel
# zero-shot tool definitions as input to the model
available_tools: List[Union[BuiltinTool, ToolDefinition]] = field(
default_factory=list
)
executable_tools: Set[str] = field(default_factory=set)
@json_schema_type
@dataclass
class AgenticSystemCreateResponse:
agent_id: str
@json_schema_type
@dataclass
class AgenticSystemExecuteRequest:
agent_id: str
messages: List[Message]
turn_history: List[AgenticSystemTurn] = None
stream: bool = False
@json_schema_type
@dataclass
class AgenticSystemExecuteResponse:
"""non-stream response from the agentic system."""
turn: AgenticSystemTurn
@json_schema_type
@dataclass
class AgenticSystemExecuteResponseStreamChunk:
"""Streamed agent execution response."""
# TODO: make things streamable
turn: AgenticSystemTurn
stop_reason: Optional[StopReason] = None
class AgenticSystem(Protocol):
@webmethod(route="/agentic_system/create")
def create_agentic_system(
self,
request: AgenticSystemCreateRequest,
) -> AgenticSystemCreateResponse: ...
@webmethod(route="/agentic_system/execute")
def create_agentic_system_execute(
self,
request: AgenticSystemExecuteRequest,
) -> Union[
AgenticSystemExecuteResponse, AgenticSystemExecuteResponseStreamChunk
]: ...
class LlamaStackEndpoints(Inference, AgenticSystem): ...
if __name__ == "__main__":
print("Converting the spec to YAML (openapi.yaml) and HTML (openapi.html)")
spec = Specification(
LlamaStackEndpoints,
Options(
server=Server(url="http://llama.meta.com"),
info=Info(
title="Llama Stack specification",
version="0.1",
description="This is the llama stack",
),
),
)
with open("openapi.yaml", "w", encoding="utf-8") as fp:
yaml.dump(spec.get_json(), fp, allow_unicode=True)
with open("openapi.html", "w") as fp:
spec.write_html(fp, pretty_print=True)

View file

@ -1,271 +0,0 @@
from dataclasses import dataclass, field
from enum import Enum
from typing import Any, Dict, List, Optional, Protocol, Set, Union
import yaml
from pyopenapi import Info, Options, Server, Specification, webmethod
from strong_typing.schema import json_schema_type
@json_schema_type(
schema={"type": "string", "format": "uri", "pattern": "^(https?://|file://|data:)"}
)
@dataclass
class URL:
url: str
def __str__(self) -> str:
return self.url
@json_schema_type
@dataclass
class Attachment:
"""
Attachments are used to refer to external resources, such as images, videos, audio, etc.
"""
url: URL
mime_type: str
Content = Union[
str,
Attachment,
List[Union[str, Attachment]],
]
class Role(Enum):
system = "system"
user = "user"
assistant = "assistant"
tool = "tool"
@dataclass
class ToolCall:
"""
A tool call is a request to a tool.
"""
tool_name: str
arguments: Dict[str, Any]
@dataclass
class ToolResponse:
tool_name: str
response: str
@dataclass
class ToolDefinition:
tool_name: str
parameters: Dict[str, Any]
# TODO: we need to document the parameters for the tool calls
class BuiltinTool(Enum):
"""
Builtin tools are tools the model is natively aware of and was potentially fine-tuned with.
"""
web_search = "web_search"
math = "math"
image_gen = "image_gen"
code_interpreter = "code_interpreter"
class StopReason(Enum):
"""
Stop reasons are used to indicate why the model stopped generating text.
"""
not_stopped = "not_stopped"
finished_ok = "finished_ok"
max_tokens = "max_tokens"
@json_schema_type
@dataclass
class Message:
role: Role
# input to the model or output from the model
content: Content
# output from the model
tool_calls: List[ToolCall] = field(default_factory=list)
# input to the model
tool_responses: List[ToolResponse] = field(default_factory=list)
@dataclass
class SamplingParams:
temperature: float = 0.0
strategy: str = "greedy"
top_p: float = 0.95
top_k: int = 0
class PretrainedModel(Enum):
llama3_8b = "llama3_8b"
llama3_70b = "llama3_70b"
class InstructModel(Enum):
llama3_8b_chat = "llama3_8b_chat"
llama3_70b_chat = "llama3_70b_chat"
@json_schema_type
@dataclass
class CompletionRequest:
content: Content
model: PretrainedModel = PretrainedModel.llama3_8b
sampling_params: SamplingParams = SamplingParams()
max_tokens: int = 0
stream: bool = False
logprobs: bool = False
@json_schema_type
@dataclass
class CompletionResponse:
"""Normal completion response."""
content: Content
stop_reason: Optional[StopReason] = None
logprobs: Optional[Dict[str, Any]] = None
@json_schema_type
@dataclass
class StreamedCompletionResponse:
"""streamed completion response."""
text_delta: str
stop_reason: Optional[StopReason] = None
logprobs: Optional[Dict[str, Any]] = None
@dataclass
class ChatCompletionRequestCommon:
message: Message
message_history: List[Message] = None
model: InstructModel = InstructModel.llama3_8b_chat
sampling_params: SamplingParams = SamplingParams()
# zero-shot tool definitions as input to the model
available_tools: List[Union[BuiltinTool, ToolDefinition]] = field(
default_factory=list
)
@json_schema_type
@dataclass
class ChatCompletionRequest(ChatCompletionRequestCommon):
max_tokens: int = 0
stream: bool = False
logprobs: bool = False
@json_schema_type
@dataclass
class ChatCompletionResponse:
"""Normal chat completion response."""
content: Content
# note: multiple tool calls can be generated in a single response
tool_calls: List[ToolCall] = field(default_factory=list)
stop_reason: Optional[StopReason] = None
logprobs: Optional[Dict[str, Any]] = None
@json_schema_type
@dataclass
class StreamedChatCompletionResponse:
"""Streamed chat completion response."""
text_delta: str
stop_reason: Optional[StopReason] = None
tool_call: Optional[ToolCall] = None
class Inference(Protocol):
def post_completion(
self,
request: CompletionRequest,
) -> Union[CompletionResponse, StreamedCompletionResponse]: ...
def post_chat_completion(
self,
request: ChatCompletionRequest,
) -> Union[ChatCompletionResponse, StreamedChatCompletionResponse]: ...
@json_schema_type
@dataclass
class AgenticSystemExecuteRequest(ChatCompletionRequestCommon):
executable_tools: Set[str] = field(default_factory=set)
stream: bool = False
@json_schema_type
@dataclass
class AgenticSystemExecuteResponse:
"""Normal chat completion response."""
content: Content
stop_reason: StopReason
tool_calls: List[ToolCall] = field(default_factory=list)
logprobs: Optional[Dict[str, Any]] = None
@json_schema_type
@dataclass
class StreamedAgenticSystemExecuteResponse:
"""Streamed chat completion response."""
text_delta: str
stop_reason: StopReason
tool_call: Optional[ToolCall] = None
class AgenticSystem(Protocol):
@webmethod(route="/agentic/system/execute")
def create_agentic_system_execute(
self,
request: AgenticSystemExecuteRequest,
) -> Union[AgenticSystemExecuteResponse, StreamedAgenticSystemExecuteResponse]: ...
class Endpoint(Inference, AgenticSystem): ...
if __name__ == "__main__":
print("Converting the spec to YAML (openapi.yaml) and HTML (openapi.html)")
spec = Specification(
Endpoint,
Options(
server=Server(url="http://llama.meta.com"),
info=Info(
title="Llama Stack specification",
version="0.1",
description="This is the llama stack",
),
),
)
with open("openapi.yaml", "w", encoding="utf-8") as fp:
yaml.dump(spec.get_json(), fp, allow_unicode=True)
with open("openapi.html", "w") as fp:
spec.write_html(fp, pretty_print=True)

122
source/model_types.py Normal file
View file

@ -0,0 +1,122 @@
from dataclasses import dataclass, field
from enum import Enum
from typing import Any, Dict, List, Optional, Set, Union
from strong_typing.schema import json_schema_type
@json_schema_type(
schema={"type": "string", "format": "uri", "pattern": "^(https?://|file://|data:)"}
)
@dataclass
class URL:
url: str
def __str__(self) -> str:
return self.url
@json_schema_type
@dataclass
class Attachment:
"""
Attachments are used to refer to external resources, such as images, videos, audio, etc.
"""
url: URL
mime_type: str
Content = Union[
str,
Attachment,
List[Union[str, Attachment]],
]
class Role(Enum):
system = "system"
user = "user"
assistant = "assistant"
tool = "tool"
@dataclass
class ToolCall:
"""
A tool call is a request to a tool.
"""
tool_name: str
arguments: Dict[str, Any]
@dataclass
class ToolResponse:
tool_name: str
response: str
@dataclass
class ToolDefinition:
tool_name: str
parameters: Dict[str, Any]
# TODO: we need to document the parameters for the tool calls
class BuiltinTool(Enum):
"""
Builtin tools are tools the model is natively aware of and was potentially fine-tuned with.
"""
web_search = "web_search"
math = "math"
image_gen = "image_gen"
code_interpreter = "code_interpreter"
class StopReason(Enum):
"""
Stop reasons are used to indicate why the model stopped generating text.
"""
not_stopped = "not_stopped"
finished_ok = "finished_ok"
max_tokens = "max_tokens"
@json_schema_type
@dataclass
class Message:
role: Role
# input to the model or output from the model
content: Content
# output from the model
tool_calls: List[ToolCall] = field(default_factory=list)
# input to the model
tool_responses: List[ToolResponse] = field(default_factory=list)
@dataclass
class SamplingParams:
temperature: float = 0.0
strategy: str = "greedy"
top_p: float = 0.95
top_k: int = 0
class PretrainedModel(Enum):
llama3_8b = "llama3_8b"
llama3_70b = "llama3_70b"
class InstructModel(Enum):
llama3_8b_chat = "llama3_8b_chat"
llama3_70b_chat = "llama3_70b_chat"

View file

@ -29,11 +29,41 @@
}
],
"paths": {
"/agentic/system/execute": {
"/agentic_system/create": {
"post": {
"responses": {
"200": {
"description": "Normal chat completion response. **OR** Streamed chat completion response.",
"description": "OK",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/AgenticSystemCreateResponse"
}
}
}
}
},
"tags": [
"AgenticSystem"
],
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/AgenticSystemCreateRequest"
}
}
},
"required": true
}
}
},
"/agentic_system/execute": {
"post": {
"responses": {
"200": {
"description": "non-stream response from the agentic system. **OR** Streamed agent execution response.",
"content": {
"application/json": {
"schema": {
@ -42,7 +72,7 @@
"$ref": "#/components/schemas/AgenticSystemExecuteResponse"
},
{
"$ref": "#/components/schemas/StreamedAgenticSystemExecuteResponse"
"$ref": "#/components/schemas/AgenticSystemExecuteResponseStreamChunk"
}
]
}
@ -70,7 +100,7 @@
"post": {
"responses": {
"200": {
"description": "Normal chat completion response. **OR** Streamed chat completion response.",
"description": "Normal chat completion response. **OR** Streamed chat completion response. The actual response is a series of such objects.",
"content": {
"application/json": {
"schema": {
@ -79,7 +109,7 @@
"$ref": "#/components/schemas/ChatCompletionResponse"
},
{
"$ref": "#/components/schemas/StreamedChatCompletionResponse"
"$ref": "#/components/schemas/ChatCompletionResponseStreamChunk"
}
]
}
@ -116,7 +146,7 @@
"$ref": "#/components/schemas/CompletionResponse"
},
{
"$ref": "#/components/schemas/StreamedCompletionResponse"
"$ref": "#/components/schemas/CompletionResponseStreamChunk"
}
]
}
@ -144,52 +174,17 @@
"jsonSchemaDialect": "https://json-schema.org/draft/2020-12/schema",
"components": {
"schemas": {
"AgenticSystemExecuteRequest": {
"AgenticSystemCreateRequest": {
"type": "object",
"properties": {
"message": {
"$ref": "#/components/schemas/Message"
},
"message_history": {
"type": "array",
"items": {
"$ref": "#/components/schemas/Message"
}
"instructions": {
"type": "string"
},
"model": {
"type": "string",
"enum": [
"llama3_8b_chat",
"llama3_70b_chat"
],
"default": "llama3_8b_chat"
},
"sampling_params": {
"type": "object",
"properties": {
"temperature": {
"type": "number",
"default": 0.0
},
"strategy": {
"type": "string",
"default": "greedy"
},
"top_p": {
"type": "number",
"default": 0.95
},
"top_k": {
"type": "integer",
"default": 0
}
},
"additionalProperties": false,
"required": [
"temperature",
"strategy",
"top_p",
"top_k"
]
},
"available_tools": {
@ -253,6 +248,45 @@
"type": "string"
},
"uniqueItems": true
}
},
"additionalProperties": false,
"required": [
"instructions",
"model",
"available_tools",
"executable_tools"
]
},
"AgenticSystemCreateResponse": {
"type": "object",
"properties": {
"agent_id": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"agent_id"
]
},
"AgenticSystemExecuteRequest": {
"type": "object",
"properties": {
"agent_id": {
"type": "string"
},
"messages": {
"type": "array",
"items": {
"$ref": "#/components/schemas/Message"
}
},
"turn_history": {
"type": "array",
"items": {
"$ref": "#/components/schemas/AgenticSystemTurn"
}
},
"stream": {
"type": "boolean",
@ -261,15 +295,245 @@
},
"additionalProperties": false,
"required": [
"message",
"message_history",
"model",
"sampling_params",
"available_tools",
"executable_tools",
"agent_id",
"messages",
"turn_history",
"stream"
]
},
"AgenticSystemTurn": {
"type": "object",
"properties": {
"user_messages": {
"type": "array",
"items": {
"$ref": "#/components/schemas/Message"
}
},
"steps": {
"type": "array",
"items": {
"oneOf": [
{
"type": "object",
"properties": {
"step_type": {
"type": "string",
"enum": [
"model_inference",
"tool_execution",
"safety_filtering",
"memory_retrieval"
],
"title": "The type of execution step.",
"default": "model_inference"
},
"text": {
"type": "string"
},
"logprobs": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
}
},
"additionalProperties": false,
"required": [
"step_type",
"text"
]
},
{
"type": "object",
"properties": {
"step_type": {
"type": "string",
"enum": [
"model_inference",
"tool_execution",
"safety_filtering",
"memory_retrieval"
],
"title": "The type of execution step.",
"default": "tool_execution"
},
"tool_calls": {
"type": "array",
"items": {
"type": "object",
"properties": {
"tool_name": {
"type": "string"
},
"arguments": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
}
},
"additionalProperties": false,
"required": [
"tool_name",
"arguments"
],
"title": "A tool call is a request to a tool."
}
},
"tool_responses": {
"type": "array",
"items": {
"type": "object",
"properties": {
"tool_name": {
"type": "string"
},
"response": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"tool_name",
"response"
]
}
}
},
"additionalProperties": false,
"required": [
"step_type",
"tool_calls",
"tool_responses"
]
},
{
"type": "object",
"properties": {
"step_type": {
"type": "string",
"enum": [
"model_inference",
"tool_execution",
"safety_filtering",
"memory_retrieval"
],
"title": "The type of execution step.",
"default": "safety_filtering"
},
"violation": {
"type": "object",
"properties": {
"violation_type": {
"type": "string"
},
"details": {
"type": "string"
},
"suggested_user_response": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"violation_type",
"details"
]
}
},
"additionalProperties": false,
"required": [
"step_type"
]
},
{
"type": "object",
"properties": {
"step_type": {
"type": "string",
"enum": [
"model_inference",
"tool_execution",
"safety_filtering",
"memory_retrieval"
],
"title": "The type of execution step.",
"default": "memory_retrieval"
},
"documents": {
"type": "array",
"items": {
"type": "string"
}
},
"scores": {
"type": "array",
"items": {
"type": "number"
}
}
},
"additionalProperties": false,
"required": [
"step_type",
"documents",
"scores"
]
}
]
}
},
"response_message": {
"$ref": "#/components/schemas/Message"
}
},
"additionalProperties": false,
"required": [
"user_messages",
"steps",
"response_message"
],
"title": "A single turn in an interaction with an Agentic System."
},
"Attachment": {
"type": "object",
"properties": {
@ -400,28 +664,21 @@
"AgenticSystemExecuteResponse": {
"type": "object",
"properties": {
"content": {
"oneOf": [
{
"type": "string"
},
{
"$ref": "#/components/schemas/Attachment"
},
{
"type": "array",
"items": {
"oneOf": [
{
"type": "string"
},
{
"$ref": "#/components/schemas/Attachment"
"turn": {
"$ref": "#/components/schemas/AgenticSystemTurn"
}
]
}
}
]
},
"additionalProperties": false,
"required": [
"turn"
],
"title": "non-stream response from the agentic system."
},
"AgenticSystemExecuteResponseStreamChunk": {
"type": "object",
"properties": {
"turn": {
"$ref": "#/components/schemas/AgenticSystemTurn"
},
"stop_reason": {
"type": "string",
@ -431,144 +688,13 @@
"max_tokens"
],
"title": "Stop reasons are used to indicate why the model stopped generating text."
},
"tool_calls": {
"type": "array",
"items": {
"type": "object",
"properties": {
"tool_name": {
"type": "string"
},
"arguments": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
}
},
"additionalProperties": false,
"required": [
"tool_name",
"arguments"
"turn"
],
"title": "A tool call is a request to a tool."
}
},
"logprobs": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
}
},
"additionalProperties": false,
"required": [
"content",
"stop_reason",
"tool_calls"
],
"title": "Normal chat completion response."
},
"StreamedAgenticSystemExecuteResponse": {
"type": "object",
"properties": {
"text_delta": {
"type": "string"
},
"stop_reason": {
"type": "string",
"enum": [
"not_stopped",
"finished_ok",
"max_tokens"
],
"title": "Stop reasons are used to indicate why the model stopped generating text."
},
"tool_call": {
"type": "object",
"properties": {
"tool_name": {
"type": "string"
},
"arguments": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
}
},
"additionalProperties": false,
"required": [
"tool_name",
"arguments"
],
"title": "A tool call is a request to a tool."
}
},
"additionalProperties": false,
"required": [
"text_delta",
"stop_reason"
],
"title": "Streamed chat completion response."
"title": "Streamed agent execution response."
},
"ChatCompletionRequest": {
"type": "object",
@ -576,19 +702,18 @@
"message": {
"$ref": "#/components/schemas/Message"
},
"message_history": {
"type": "array",
"items": {
"$ref": "#/components/schemas/Message"
}
},
"model": {
"type": "string",
"enum": [
"llama3_8b_chat",
"llama3_70b_chat"
],
"default": "llama3_8b_chat"
]
},
"message_history": {
"type": "array",
"items": {
"$ref": "#/components/schemas/Message"
}
},
"sampling_params": {
"type": "object",
@ -689,8 +814,8 @@
"additionalProperties": false,
"required": [
"message",
"message_history",
"model",
"message_history",
"sampling_params",
"available_tools",
"max_tokens",
@ -808,7 +933,7 @@
],
"title": "Normal chat completion response."
},
"StreamedChatCompletionResponse": {
"ChatCompletionResponseStreamChunk": {
"type": "object",
"properties": {
"text_delta": {
@ -867,7 +992,7 @@
"required": [
"text_delta"
],
"title": "Streamed chat completion response."
"title": "Streamed chat completion response. The actual response is a series of such objects."
},
"CompletionRequest": {
"type": "object",
@ -900,8 +1025,7 @@
"enum": [
"llama3_8b",
"llama3_70b"
],
"default": "llama3_8b"
]
},
"sampling_params": {
"type": "object",
@ -1021,7 +1145,7 @@
],
"title": "Normal completion response."
},
"StreamedCompletionResponse": {
"CompletionResponseStreamChunk": {
"type": "object",
"properties": {
"text_delta": {
@ -1083,10 +1207,22 @@
{
"name": "AgenticSystem"
},
{
"name": "AgenticSystemCreateRequest",
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/AgenticSystemCreateRequest\" />"
},
{
"name": "AgenticSystemCreateResponse",
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/AgenticSystemCreateResponse\" />"
},
{
"name": "AgenticSystemExecuteRequest",
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/AgenticSystemExecuteRequest\" />"
},
{
"name": "AgenticSystemTurn",
"description": "A single turn in an interaction with an Agentic System.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/AgenticSystemTurn\" />"
},
{
"name": "Attachment",
"description": "Attachments are used to refer to external resources, such as images, videos, audio, etc.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/Attachment\" />"
@ -1101,11 +1237,11 @@
},
{
"name": "AgenticSystemExecuteResponse",
"description": "Normal chat completion response.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/AgenticSystemExecuteResponse\" />"
"description": "non-stream response from the agentic system.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/AgenticSystemExecuteResponse\" />"
},
{
"name": "StreamedAgenticSystemExecuteResponse",
"description": "Streamed chat completion response.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/StreamedAgenticSystemExecuteResponse\" />"
"name": "AgenticSystemExecuteResponseStreamChunk",
"description": "Streamed agent execution response.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/AgenticSystemExecuteResponseStreamChunk\" />"
},
{
"name": "ChatCompletionRequest",
@ -1116,8 +1252,8 @@
"description": "Normal chat completion response.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/ChatCompletionResponse\" />"
},
{
"name": "StreamedChatCompletionResponse",
"description": "Streamed chat completion response.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/StreamedChatCompletionResponse\" />"
"name": "ChatCompletionResponseStreamChunk",
"description": "Streamed chat completion response. The actual response is a series of such objects.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/ChatCompletionResponseStreamChunk\" />"
},
{
"name": "CompletionRequest",
@ -1128,8 +1264,8 @@
"description": "Normal completion response.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/CompletionResponse\" />"
},
{
"name": "StreamedCompletionResponse",
"description": "streamed completion response.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/StreamedCompletionResponse\" />"
"name": "CompletionResponseStreamChunk",
"description": "streamed completion response.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/CompletionResponseStreamChunk\" />"
}
],
"x-tagGroups": [
@ -1143,17 +1279,20 @@
{
"name": "Types",
"tags": [
"AgenticSystemCreateRequest",
"AgenticSystemCreateResponse",
"AgenticSystemExecuteRequest",
"AgenticSystemExecuteResponse",
"AgenticSystemExecuteResponseStreamChunk",
"AgenticSystemTurn",
"Attachment",
"ChatCompletionRequest",
"ChatCompletionResponse",
"ChatCompletionResponseStreamChunk",
"CompletionRequest",
"CompletionResponse",
"CompletionResponseStreamChunk",
"Message",
"StreamedAgenticSystemExecuteResponse",
"StreamedChatCompletionResponse",
"StreamedCompletionResponse",
"URL"
]
}

View file

@ -1,7 +1,7 @@
components:
responses: {}
schemas:
AgenticSystemExecuteRequest:
AgenticSystemCreateRequest:
additionalProperties: false
properties:
available_tools:
@ -39,63 +39,85 @@ components:
type: string
type: array
uniqueItems: true
message:
$ref: '#/components/schemas/Message'
message_history:
items:
$ref: '#/components/schemas/Message'
type: array
instructions:
type: string
model:
default: llama3_8b_chat
enum:
- llama3_8b_chat
- llama3_70b_chat
type: string
sampling_params:
required:
- instructions
- model
- available_tools
- executable_tools
type: object
AgenticSystemCreateResponse:
additionalProperties: false
properties:
strategy:
default: greedy
agent_id:
type: string
temperature:
default: 0.0
type: number
top_k:
default: 0
type: integer
top_p:
default: 0.95
type: number
required:
- temperature
- strategy
- top_p
- top_k
- agent_id
type: object
AgenticSystemExecuteRequest:
additionalProperties: false
properties:
agent_id:
type: string
messages:
items:
$ref: '#/components/schemas/Message'
type: array
stream:
default: false
type: boolean
turn_history:
items:
$ref: '#/components/schemas/AgenticSystemTurn'
type: array
required:
- message
- message_history
- model
- sampling_params
- available_tools
- executable_tools
- agent_id
- messages
- turn_history
- stream
type: object
AgenticSystemExecuteResponse:
additionalProperties: false
properties:
content:
turn:
$ref: '#/components/schemas/AgenticSystemTurn'
required:
- turn
title: non-stream response from the agentic system.
type: object
AgenticSystemExecuteResponseStreamChunk:
additionalProperties: false
properties:
stop_reason:
enum:
- not_stopped
- finished_ok
- max_tokens
title: Stop reasons are used to indicate why the model stopped generating
text.
type: string
turn:
$ref: '#/components/schemas/AgenticSystemTurn'
required:
- turn
title: Streamed agent execution response.
type: object
AgenticSystemTurn:
additionalProperties: false
properties:
response_message:
$ref: '#/components/schemas/Message'
steps:
items:
oneOf:
- type: string
- $ref: '#/components/schemas/Attachment'
- items:
oneOf:
- type: string
- $ref: '#/components/schemas/Attachment'
type: array
- additionalProperties: false
properties:
logprobs:
additionalProperties:
oneOf:
@ -106,13 +128,31 @@ components:
- type: array
- type: object
type: object
stop_reason:
step_type:
default: model_inference
enum:
- not_stopped
- finished_ok
- max_tokens
title: Stop reasons are used to indicate why the model stopped generating
text.
- model_inference
- tool_execution
- safety_filtering
- memory_retrieval
title: The type of execution step.
type: string
text:
type: string
required:
- step_type
- text
type: object
- additionalProperties: false
properties:
step_type:
default: tool_execution
enum:
- model_inference
- tool_execution
- safety_filtering
- memory_retrieval
title: The type of execution step.
type: string
tool_calls:
items:
@ -136,11 +176,85 @@ components:
title: A tool call is a request to a tool.
type: object
type: array
tool_responses:
items:
additionalProperties: false
properties:
response:
type: string
tool_name:
type: string
required:
- content
- stop_reason
- tool_name
- response
type: object
type: array
required:
- step_type
- tool_calls
title: Normal chat completion response.
- tool_responses
type: object
- additionalProperties: false
properties:
step_type:
default: safety_filtering
enum:
- model_inference
- tool_execution
- safety_filtering
- memory_retrieval
title: The type of execution step.
type: string
violation:
additionalProperties: false
properties:
details:
type: string
suggested_user_response:
type: string
violation_type:
type: string
required:
- violation_type
- details
type: object
required:
- step_type
type: object
- additionalProperties: false
properties:
documents:
items:
type: string
type: array
scores:
items:
type: number
type: array
step_type:
default: memory_retrieval
enum:
- model_inference
- tool_execution
- safety_filtering
- memory_retrieval
title: The type of execution step.
type: string
required:
- step_type
- documents
- scores
type: object
type: array
user_messages:
items:
$ref: '#/components/schemas/Message'
type: array
required:
- user_messages
- steps
- response_message
title: A single turn in an interaction with an Agentic System.
type: object
Attachment:
additionalProperties: false
@ -201,7 +315,6 @@ components:
$ref: '#/components/schemas/Message'
type: array
model:
default: llama3_8b_chat
enum:
- llama3_8b_chat
- llama3_70b_chat
@ -232,8 +345,8 @@ components:
type: boolean
required:
- message
- message_history
- model
- message_history
- sampling_params
- available_tools
- max_tokens
@ -297,6 +410,44 @@ components:
- tool_calls
title: Normal chat completion response.
type: object
ChatCompletionResponseStreamChunk:
additionalProperties: false
properties:
stop_reason:
enum:
- not_stopped
- finished_ok
- max_tokens
title: Stop reasons are used to indicate why the model stopped generating
text.
type: string
text_delta:
type: string
tool_call:
additionalProperties: false
properties:
arguments:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
tool_name:
type: string
required:
- tool_name
- arguments
title: A tool call is a request to a tool.
type: object
required:
- text_delta
title: Streamed chat completion response. The actual response is a series of
such objects.
type: object
CompletionRequest:
additionalProperties: false
properties:
@ -316,7 +467,6 @@ components:
default: 0
type: integer
model:
default: llama3_8b
enum:
- llama3_8b
- llama3_70b
@ -387,6 +537,33 @@ components:
- content
title: Normal completion response.
type: object
CompletionResponseStreamChunk:
additionalProperties: false
properties:
logprobs:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
stop_reason:
enum:
- not_stopped
- finished_ok
- max_tokens
title: Stop reasons are used to indicate why the model stopped generating
text.
type: string
text_delta:
type: string
required:
- text_delta
title: streamed completion response.
type: object
Message:
additionalProperties: false
properties:
@ -447,108 +624,6 @@ components:
- tool_calls
- tool_responses
type: object
StreamedAgenticSystemExecuteResponse:
additionalProperties: false
properties:
stop_reason:
enum:
- not_stopped
- finished_ok
- max_tokens
title: Stop reasons are used to indicate why the model stopped generating
text.
type: string
text_delta:
type: string
tool_call:
additionalProperties: false
properties:
arguments:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
tool_name:
type: string
required:
- tool_name
- arguments
title: A tool call is a request to a tool.
type: object
required:
- text_delta
- stop_reason
title: Streamed chat completion response.
type: object
StreamedChatCompletionResponse:
additionalProperties: false
properties:
stop_reason:
enum:
- not_stopped
- finished_ok
- max_tokens
title: Stop reasons are used to indicate why the model stopped generating
text.
type: string
text_delta:
type: string
tool_call:
additionalProperties: false
properties:
arguments:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
tool_name:
type: string
required:
- tool_name
- arguments
title: A tool call is a request to a tool.
type: object
required:
- text_delta
title: Streamed chat completion response.
type: object
StreamedCompletionResponse:
additionalProperties: false
properties:
logprobs:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
stop_reason:
enum:
- not_stopped
- finished_ok
- max_tokens
title: Stop reasons are used to indicate why the model stopped generating
text.
type: string
text_delta:
type: string
required:
- text_delta
title: streamed completion response.
type: object
URL:
format: uri
pattern: ^(https?://|file://|data:)
@ -560,7 +635,25 @@ info:
jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
openapi: 3.1.0
paths:
/agentic/system/execute:
/agentic_system/create:
post:
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/AgenticSystemCreateRequest'
required: true
responses:
'200':
content:
application/json:
schema:
$ref: '#/components/schemas/AgenticSystemCreateResponse'
description: OK
tags:
- AgenticSystem
/agentic_system/execute:
post:
parameters: []
requestBody:
@ -576,9 +669,9 @@ paths:
schema:
oneOf:
- $ref: '#/components/schemas/AgenticSystemExecuteResponse'
- $ref: '#/components/schemas/StreamedAgenticSystemExecuteResponse'
description: Normal chat completion response. **OR** Streamed chat completion
response.
- $ref: '#/components/schemas/AgenticSystemExecuteResponseStreamChunk'
description: non-stream response from the agentic system. **OR** Streamed
agent execution response.
tags:
- AgenticSystem
/chat_completion:
@ -597,9 +690,9 @@ paths:
schema:
oneOf:
- $ref: '#/components/schemas/ChatCompletionResponse'
- $ref: '#/components/schemas/StreamedChatCompletionResponse'
- $ref: '#/components/schemas/ChatCompletionResponseStreamChunk'
description: Normal chat completion response. **OR** Streamed chat completion
response.
response. The actual response is a series of such objects.
tags:
- Inference
/completion:
@ -618,7 +711,7 @@ paths:
schema:
oneOf:
- $ref: '#/components/schemas/CompletionResponse'
- $ref: '#/components/schemas/StreamedCompletionResponse'
- $ref: '#/components/schemas/CompletionResponseStreamChunk'
description: Normal completion response. **OR** streamed completion response.
tags:
- Inference
@ -629,9 +722,20 @@ servers:
tags:
- name: Inference
- name: AgenticSystem
- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemCreateRequest"
/>
name: AgenticSystemCreateRequest
- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemCreateResponse"
/>
name: AgenticSystemCreateResponse
- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemExecuteRequest"
/>
name: AgenticSystemExecuteRequest
- description: 'A single turn in an interaction with an Agentic System.
<SchemaDefinition schemaRef="#/components/schemas/AgenticSystemTurn" />'
name: AgenticSystemTurn
- description: 'Attachments are used to refer to external resources, such as images,
videos, audio, etc.
@ -642,18 +746,18 @@ tags:
name: Message
- description: <SchemaDefinition schemaRef="#/components/schemas/URL" />
name: URL
- description: 'Normal chat completion response.
- description: 'non-stream response from the agentic system.
<SchemaDefinition schemaRef="#/components/schemas/AgenticSystemExecuteResponse"
/>'
name: AgenticSystemExecuteResponse
- description: 'Streamed chat completion response.
- description: 'Streamed agent execution response.
<SchemaDefinition schemaRef="#/components/schemas/StreamedAgenticSystemExecuteResponse"
<SchemaDefinition schemaRef="#/components/schemas/AgenticSystemExecuteResponseStreamChunk"
/>'
name: StreamedAgenticSystemExecuteResponse
name: AgenticSystemExecuteResponseStreamChunk
- description: <SchemaDefinition schemaRef="#/components/schemas/ChatCompletionRequest"
/>
name: ChatCompletionRequest
@ -662,12 +766,13 @@ tags:
<SchemaDefinition schemaRef="#/components/schemas/ChatCompletionResponse" />'
name: ChatCompletionResponse
- description: 'Streamed chat completion response.
- description: 'Streamed chat completion response. The actual response is a series
of such objects.
<SchemaDefinition schemaRef="#/components/schemas/StreamedChatCompletionResponse"
<SchemaDefinition schemaRef="#/components/schemas/ChatCompletionResponseStreamChunk"
/>'
name: StreamedChatCompletionResponse
name: ChatCompletionResponseStreamChunk
- description: <SchemaDefinition schemaRef="#/components/schemas/CompletionRequest"
/>
name: CompletionRequest
@ -679,9 +784,9 @@ tags:
- description: 'streamed completion response.
<SchemaDefinition schemaRef="#/components/schemas/StreamedCompletionResponse"
<SchemaDefinition schemaRef="#/components/schemas/CompletionResponseStreamChunk"
/>'
name: StreamedCompletionResponse
name: CompletionResponseStreamChunk
x-tagGroups:
- name: Operations
tags:
@ -689,15 +794,18 @@ x-tagGroups:
- Inference
- name: Types
tags:
- AgenticSystemCreateRequest
- AgenticSystemCreateResponse
- AgenticSystemExecuteRequest
- AgenticSystemExecuteResponse
- AgenticSystemExecuteResponseStreamChunk
- AgenticSystemTurn
- Attachment
- ChatCompletionRequest
- ChatCompletionResponse
- ChatCompletionResponseStreamChunk
- CompletionRequest
- CompletionResponse
- CompletionResponseStreamChunk
- Message
- StreamedAgenticSystemExecuteResponse
- StreamedChatCompletionResponse
- StreamedCompletionResponse
- URL

View file

@ -1,3 +1,3 @@
#!/bin/bash
PYTHONPATH=. python3 defn.py
PYTHONPATH=. python3 api_definitions.py