diff --git a/source/defn.py b/source/defn.py index 48289fd0e..619a95e7f 100644 --- a/source/defn.py +++ b/source/defn.py @@ -1,6 +1,6 @@ from dataclasses import dataclass, field from enum import Enum -from typing import Any, Dict, List, Optional, Protocol, Union +from typing import Any, Dict, List, Optional, Protocol, Set, Union import yaml @@ -45,16 +45,6 @@ class Role(Enum): tool = "tool" -class StopReason(Enum): - """ - Stop reasons are used to indicate why the model stopped generating text. - """ - - not_stopped = "not_stopped" - finished_ok = "finished_ok" - max_tokens = "max_tokens" - - @dataclass class ToolCall: """ @@ -77,6 +67,28 @@ class ToolDefinition: parameters: Dict[str, Any] +# TODO: we need to document the parameters for the tool calls +class BuiltinTool(Enum): + """ + Builtin tools are tools the model is natively aware of and was potentially fine-tuned with. + """ + + web_search = "web_search" + math = "math" + image_gen = "image_gen" + code_interpreter = "code_interpreter" + + +class StopReason(Enum): + """ + Stop reasons are used to indicate why the model stopped generating text. + """ + + not_stopped = "not_stopped" + finished_ok = "finished_ok" + max_tokens = "max_tokens" + + @json_schema_type @dataclass class Message: @@ -85,9 +97,6 @@ class Message: # input to the model or output from the model content: Content - # zero-shot tool definitions as input to the model - tool_definitions: List[ToolDefinition] = field(default_factory=list) - # output from the model tool_calls: List[ToolCall] = field(default_factory=list) @@ -95,45 +104,6 @@ class Message: tool_responses: List[ToolResponse] = field(default_factory=list) -@json_schema_type -@dataclass -class CompletionResponse: - """Normal completion response.""" - content: Content - stop_reason: StopReason - logprobs: Optional[Dict[str, Any]] = None - - -@json_schema_type -@dataclass -class StreamedCompletionResponse: - """streamed completion response.""" - text_delta: str - stop_reason: StopReason - logprobs: Optional[Dict[str, Any]] = None - - -@json_schema_type -@dataclass -class ChatCompletionResponse: - """Normal chat completion response.""" - - content: Content - stop_reason: StopReason - tool_calls: List[ToolCall] = field(default_factory=list) - logprobs: Optional[Dict[str, Any]] = None - - -@json_schema_type -@dataclass -class StreamedChatCompletionResponse: - """Streamed chat completion response.""" - - text_delta: str - stop_reason: StopReason - tool_call: Optional[ToolCall] = None - - @dataclass class SamplingParams: temperature: float = 0.0 @@ -165,16 +135,69 @@ class CompletionRequest: @json_schema_type @dataclass -class ChatCompletionRequest: +class CompletionResponse: + """Normal completion response.""" + + content: Content + stop_reason: Optional[StopReason] = None + logprobs: Optional[Dict[str, Any]] = None + + +@json_schema_type +@dataclass +class StreamedCompletionResponse: + """streamed completion response.""" + + text_delta: str + stop_reason: Optional[StopReason] = None + logprobs: Optional[Dict[str, Any]] = None + + +@dataclass +class ChatCompletionRequestCommon: message: Message message_history: List[Message] = None model: InstructModel = InstructModel.llama3_8b_chat sampling_params: SamplingParams = SamplingParams() + + # zero-shot tool definitions as input to the model + available_tools: List[Union[BuiltinTool, ToolDefinition]] = field( + default_factory=list + ) + + +@json_schema_type +@dataclass +class ChatCompletionRequest(ChatCompletionRequestCommon): max_tokens: int = 0 stream: bool = False logprobs: bool = False +@json_schema_type +@dataclass +class ChatCompletionResponse: + """Normal chat completion response.""" + + content: Content + + # note: multiple tool calls can be generated in a single response + tool_calls: List[ToolCall] = field(default_factory=list) + + stop_reason: Optional[StopReason] = None + logprobs: Optional[Dict[str, Any]] = None + + +@json_schema_type +@dataclass +class StreamedChatCompletionResponse: + """Streamed chat completion response.""" + + text_delta: str + stop_reason: Optional[StopReason] = None + tool_call: Optional[ToolCall] = None + + class Inference(Protocol): def post_completion( @@ -188,19 +211,41 @@ class Inference(Protocol): ) -> Union[ChatCompletionResponse, StreamedChatCompletionResponse]: ... +@json_schema_type +@dataclass +class AgenticSystemExecuteRequest(ChatCompletionRequestCommon): + executable_tools: Set[str] = field(default_factory=set) + stream: bool = False + @json_schema_type @dataclass -class AgenticSystemExecuteRequest: - message: Message - message_history: List[Message] = None - model: InstructModel = InstructModel.llama3_8b_chat - sampling_params: SamplingParams = SamplingParams() +class AgenticSystemExecuteResponse: + """Normal chat completion response.""" + + content: Content + stop_reason: StopReason + tool_calls: List[ToolCall] = field(default_factory=list) + logprobs: Optional[Dict[str, Any]] = None + + +@json_schema_type +@dataclass +class StreamedAgenticSystemExecuteResponse: + """Streamed chat completion response.""" + + text_delta: str + stop_reason: StopReason + tool_call: Optional[ToolCall] = None + class AgenticSystem(Protocol): @webmethod(route="/agentic/system/execute") - def create_agentic_system_execute(self,) -> str: ... + def create_agentic_system_execute( + self, + request: AgenticSystemExecuteRequest, + ) -> Union[AgenticSystemExecuteResponse, StreamedAgenticSystemExecuteResponse]: ... class Endpoint(Inference, AgenticSystem): ... diff --git a/source/openapi.html b/source/openapi.html index c4290cb7f..328d73edc 100644 --- a/source/openapi.html +++ b/source/openapi.html @@ -30,14 +30,21 @@ ], "paths": { "/agentic/system/execute": { - "get": { + "post": { "responses": { "200": { - "description": "OK", + "description": "Normal chat completion response. **OR** Streamed chat completion response.", "content": { "application/json": { "schema": { - "type": "string" + "oneOf": [ + { + "$ref": "#/components/schemas/AgenticSystemExecuteResponse" + }, + { + "$ref": "#/components/schemas/StreamedAgenticSystemExecuteResponse" + } + ] } } } @@ -46,7 +53,17 @@ "tags": [ "AgenticSystem" ], - "parameters": [] + "parameters": [], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/AgenticSystemExecuteRequest" + } + } + }, + "required": true + } } }, "/chat_completion": { @@ -127,24 +144,7 @@ "jsonSchemaDialect": "https://json-schema.org/draft/2020-12/schema", "components": { "schemas": { - "Attachment": { - "type": "object", - "properties": { - "url": { - "$ref": "#/components/schemas/URL" - }, - "mime_type": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "url", - "mime_type" - ], - "title": "Attachments are used to refer to external resources, such as images, videos, audio, etc." - }, - "ChatCompletionRequest": { + "AgenticSystemExecuteRequest": { "type": "object", "properties": { "message": { @@ -192,17 +192,71 @@ "top_k" ] }, - "max_tokens": { - "type": "integer", - "default": 0 + "available_tools": { + "type": "array", + "items": { + "oneOf": [ + { + "type": "string", + "enum": [ + "web_search", + "math", + "image_gen", + "code_interpreter" + ], + "title": "Builtin tools are tools the model is natively aware of and was potentially fine-tuned with." + }, + { + "type": "object", + "properties": { + "tool_name": { + "type": "string" + }, + "parameters": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + } + }, + "additionalProperties": false, + "required": [ + "tool_name", + "parameters" + ] + } + ] + } + }, + "executable_tools": { + "type": "array", + "items": { + "type": "string" + }, + "uniqueItems": true }, "stream": { "type": "boolean", "default": false - }, - "logprobs": { - "type": "boolean", - "default": false } }, "additionalProperties": false, @@ -211,11 +265,28 @@ "message_history", "model", "sampling_params", - "max_tokens", - "stream", - "logprobs" + "available_tools", + "executable_tools", + "stream" ] }, + "Attachment": { + "type": "object", + "properties": { + "url": { + "$ref": "#/components/schemas/URL" + }, + "mime_type": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "url", + "mime_type" + ], + "title": "Attachments are used to refer to external resources, such as images, videos, audio, etc." + }, "Message": { "type": "object", "properties": { @@ -251,47 +322,6 @@ } ] }, - "tool_definitions": { - "type": "array", - "items": { - "type": "object", - "properties": { - "tool_name": { - "type": "string" - }, - "parameters": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - } - }, - "additionalProperties": false, - "required": [ - "tool_name", - "parameters" - ] - } - }, "tool_calls": { "type": "array", "items": { @@ -358,7 +388,6 @@ "required": [ "role", "content", - "tool_definitions", "tool_calls", "tool_responses" ] @@ -368,7 +397,7 @@ "format": "uri", "pattern": "^(https?://|file://|data:)" }, - "ChatCompletionResponse": { + "AgenticSystemExecuteResponse": { "type": "object", "properties": { "content": { @@ -479,7 +508,7 @@ ], "title": "Normal chat completion response." }, - "StreamedChatCompletionResponse": { + "StreamedAgenticSystemExecuteResponse": { "type": "object", "properties": { "text_delta": { @@ -541,6 +570,305 @@ ], "title": "Streamed chat completion response." }, + "ChatCompletionRequest": { + "type": "object", + "properties": { + "message": { + "$ref": "#/components/schemas/Message" + }, + "message_history": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Message" + } + }, + "model": { + "type": "string", + "enum": [ + "llama3_8b_chat", + "llama3_70b_chat" + ], + "default": "llama3_8b_chat" + }, + "sampling_params": { + "type": "object", + "properties": { + "temperature": { + "type": "number", + "default": 0.0 + }, + "strategy": { + "type": "string", + "default": "greedy" + }, + "top_p": { + "type": "number", + "default": 0.95 + }, + "top_k": { + "type": "integer", + "default": 0 + } + }, + "additionalProperties": false, + "required": [ + "temperature", + "strategy", + "top_p", + "top_k" + ] + }, + "available_tools": { + "type": "array", + "items": { + "oneOf": [ + { + "type": "string", + "enum": [ + "web_search", + "math", + "image_gen", + "code_interpreter" + ], + "title": "Builtin tools are tools the model is natively aware of and was potentially fine-tuned with." + }, + { + "type": "object", + "properties": { + "tool_name": { + "type": "string" + }, + "parameters": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + } + }, + "additionalProperties": false, + "required": [ + "tool_name", + "parameters" + ] + } + ] + } + }, + "max_tokens": { + "type": "integer", + "default": 0 + }, + "stream": { + "type": "boolean", + "default": false + }, + "logprobs": { + "type": "boolean", + "default": false + } + }, + "additionalProperties": false, + "required": [ + "message", + "message_history", + "model", + "sampling_params", + "available_tools", + "max_tokens", + "stream", + "logprobs" + ] + }, + "ChatCompletionResponse": { + "type": "object", + "properties": { + "content": { + "oneOf": [ + { + "type": "string" + }, + { + "$ref": "#/components/schemas/Attachment" + }, + { + "type": "array", + "items": { + "oneOf": [ + { + "type": "string" + }, + { + "$ref": "#/components/schemas/Attachment" + } + ] + } + } + ] + }, + "tool_calls": { + "type": "array", + "items": { + "type": "object", + "properties": { + "tool_name": { + "type": "string" + }, + "arguments": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + } + }, + "additionalProperties": false, + "required": [ + "tool_name", + "arguments" + ], + "title": "A tool call is a request to a tool." + } + }, + "stop_reason": { + "type": "string", + "enum": [ + "not_stopped", + "finished_ok", + "max_tokens" + ], + "title": "Stop reasons are used to indicate why the model stopped generating text." + }, + "logprobs": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + } + }, + "additionalProperties": false, + "required": [ + "content", + "tool_calls" + ], + "title": "Normal chat completion response." + }, + "StreamedChatCompletionResponse": { + "type": "object", + "properties": { + "text_delta": { + "type": "string" + }, + "stop_reason": { + "type": "string", + "enum": [ + "not_stopped", + "finished_ok", + "max_tokens" + ], + "title": "Stop reasons are used to indicate why the model stopped generating text." + }, + "tool_call": { + "type": "object", + "properties": { + "tool_name": { + "type": "string" + }, + "arguments": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + } + }, + "additionalProperties": false, + "required": [ + "tool_name", + "arguments" + ], + "title": "A tool call is a request to a tool." + } + }, + "additionalProperties": false, + "required": [ + "text_delta" + ], + "title": "Streamed chat completion response." + }, "CompletionRequest": { "type": "object", "properties": { @@ -689,8 +1017,7 @@ }, "additionalProperties": false, "required": [ - "content", - "stop_reason" + "content" ], "title": "Normal completion response." }, @@ -737,8 +1064,7 @@ }, "additionalProperties": false, "required": [ - "text_delta", - "stop_reason" + "text_delta" ], "title": "streamed completion response." } @@ -751,20 +1077,20 @@ } ], "tags": [ + { + "name": "Inference" + }, { "name": "AgenticSystem" }, { - "name": "Inference" + "name": "AgenticSystemExecuteRequest", + "description": "" }, { "name": "Attachment", "description": "Attachments are used to refer to external resources, such as images, videos, audio, etc.\n\n" }, - { - "name": "ChatCompletionRequest", - "description": "" - }, { "name": "Message", "description": "" @@ -773,6 +1099,18 @@ "name": "URL", "description": "" }, + { + "name": "AgenticSystemExecuteResponse", + "description": "Normal chat completion response.\n\n" + }, + { + "name": "StreamedAgenticSystemExecuteResponse", + "description": "Streamed chat completion response.\n\n" + }, + { + "name": "ChatCompletionRequest", + "description": "" + }, { "name": "ChatCompletionResponse", "description": "Normal chat completion response.\n\n" @@ -805,12 +1143,15 @@ { "name": "Types", "tags": [ + "AgenticSystemExecuteRequest", + "AgenticSystemExecuteResponse", "Attachment", "ChatCompletionRequest", "ChatCompletionResponse", "CompletionRequest", "CompletionResponse", "Message", + "StreamedAgenticSystemExecuteResponse", "StreamedChatCompletionResponse", "StreamedCompletionResponse", "URL" diff --git a/source/openapi.yaml b/source/openapi.yaml index 4ec2c1e31..d0b4fc170 100644 --- a/source/openapi.yaml +++ b/source/openapi.yaml @@ -1,6 +1,147 @@ components: responses: {} schemas: + AgenticSystemExecuteRequest: + additionalProperties: false + properties: + available_tools: + items: + oneOf: + - enum: + - web_search + - math + - image_gen + - code_interpreter + title: Builtin tools are tools the model is natively aware of and was + potentially fine-tuned with. + type: string + - additionalProperties: false + properties: + parameters: + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + type: object + tool_name: + type: string + required: + - tool_name + - parameters + type: object + type: array + executable_tools: + items: + type: string + type: array + uniqueItems: true + message: + $ref: '#/components/schemas/Message' + message_history: + items: + $ref: '#/components/schemas/Message' + type: array + model: + default: llama3_8b_chat + enum: + - llama3_8b_chat + - llama3_70b_chat + type: string + sampling_params: + additionalProperties: false + properties: + strategy: + default: greedy + type: string + temperature: + default: 0.0 + type: number + top_k: + default: 0 + type: integer + top_p: + default: 0.95 + type: number + required: + - temperature + - strategy + - top_p + - top_k + type: object + stream: + default: false + type: boolean + required: + - message + - message_history + - model + - sampling_params + - available_tools + - executable_tools + - stream + type: object + AgenticSystemExecuteResponse: + additionalProperties: false + properties: + content: + oneOf: + - type: string + - $ref: '#/components/schemas/Attachment' + - items: + oneOf: + - type: string + - $ref: '#/components/schemas/Attachment' + type: array + logprobs: + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + type: object + stop_reason: + enum: + - not_stopped + - finished_ok + - max_tokens + title: Stop reasons are used to indicate why the model stopped generating + text. + type: string + tool_calls: + items: + additionalProperties: false + properties: + arguments: + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + type: object + tool_name: + type: string + required: + - tool_name + - arguments + title: A tool call is a request to a tool. + type: object + type: array + required: + - content + - stop_reason + - tool_calls + title: Normal chat completion response. + type: object Attachment: additionalProperties: false properties: @@ -17,6 +158,36 @@ components: ChatCompletionRequest: additionalProperties: false properties: + available_tools: + items: + oneOf: + - enum: + - web_search + - math + - image_gen + - code_interpreter + title: Builtin tools are tools the model is natively aware of and was + potentially fine-tuned with. + type: string + - additionalProperties: false + properties: + parameters: + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + type: object + tool_name: + type: string + required: + - tool_name + - parameters + type: object + type: array logprobs: default: false type: boolean @@ -64,6 +235,7 @@ components: - message_history - model - sampling_params + - available_tools - max_tokens - stream - logprobs @@ -122,7 +294,6 @@ components: type: array required: - content - - stop_reason - tool_calls title: Normal chat completion response. type: object @@ -214,7 +385,6 @@ components: type: string required: - content - - stop_reason title: Normal completion response. type: object Message: @@ -258,27 +428,6 @@ components: title: A tool call is a request to a tool. type: object type: array - tool_definitions: - items: - additionalProperties: false - properties: - parameters: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - tool_name: - type: string - required: - - tool_name - - parameters - type: object - type: array tool_responses: items: additionalProperties: false @@ -295,11 +444,10 @@ components: required: - role - content - - tool_definitions - tool_calls - tool_responses type: object - StreamedChatCompletionResponse: + StreamedAgenticSystemExecuteResponse: additionalProperties: false properties: stop_reason: @@ -337,6 +485,43 @@ components: - stop_reason title: Streamed chat completion response. type: object + StreamedChatCompletionResponse: + additionalProperties: false + properties: + stop_reason: + enum: + - not_stopped + - finished_ok + - max_tokens + title: Stop reasons are used to indicate why the model stopped generating + text. + type: string + text_delta: + type: string + tool_call: + additionalProperties: false + properties: + arguments: + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + type: object + tool_name: + type: string + required: + - tool_name + - arguments + title: A tool call is a request to a tool. + type: object + required: + - text_delta + title: Streamed chat completion response. + type: object StreamedCompletionResponse: additionalProperties: false properties: @@ -362,7 +547,6 @@ components: type: string required: - text_delta - - stop_reason title: streamed completion response. type: object URL: @@ -377,15 +561,24 @@ jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema openapi: 3.1.0 paths: /agentic/system/execute: - get: + post: parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/AgenticSystemExecuteRequest' + required: true responses: '200': content: application/json: schema: - type: string - description: OK + oneOf: + - $ref: '#/components/schemas/AgenticSystemExecuteResponse' + - $ref: '#/components/schemas/StreamedAgenticSystemExecuteResponse' + description: Normal chat completion response. **OR** Streamed chat completion + response. tags: - AgenticSystem /chat_completion: @@ -434,17 +627,17 @@ security: servers: - url: http://llama.meta.com tags: -- name: AgenticSystem - name: Inference +- name: AgenticSystem +- description: + name: AgenticSystemExecuteRequest - description: 'Attachments are used to refer to external resources, such as images, videos, audio, etc. ' name: Attachment -- description: - name: ChatCompletionRequest - description: name: Message - description: @@ -452,6 +645,21 @@ tags: - description: 'Normal chat completion response. + ' + name: AgenticSystemExecuteResponse +- description: 'Streamed chat completion response. + + + ' + name: StreamedAgenticSystemExecuteResponse +- description: + name: ChatCompletionRequest +- description: 'Normal chat completion response. + + ' name: ChatCompletionResponse - description: 'Streamed chat completion response. @@ -481,12 +689,15 @@ x-tagGroups: - Inference - name: Types tags: + - AgenticSystemExecuteRequest + - AgenticSystemExecuteResponse - Attachment - ChatCompletionRequest - ChatCompletionResponse - CompletionRequest - CompletionResponse - Message + - StreamedAgenticSystemExecuteResponse - StreamedChatCompletionResponse - StreamedCompletionResponse - URL