more definitions

This commit is contained in:
Ashwin Bharambe 2024-07-08 16:35:28 -07:00
parent 722d20c6de
commit 6e4586ba7a
3 changed files with 775 additions and 178 deletions

View file

@ -1,6 +1,6 @@
from dataclasses import dataclass, field
from enum import Enum
from typing import Any, Dict, List, Optional, Protocol, Union
from typing import Any, Dict, List, Optional, Protocol, Set, Union
import yaml
@ -45,16 +45,6 @@ class Role(Enum):
tool = "tool"
class StopReason(Enum):
"""
Stop reasons are used to indicate why the model stopped generating text.
"""
not_stopped = "not_stopped"
finished_ok = "finished_ok"
max_tokens = "max_tokens"
@dataclass
class ToolCall:
"""
@ -77,6 +67,28 @@ class ToolDefinition:
parameters: Dict[str, Any]
# TODO: we need to document the parameters for the tool calls
class BuiltinTool(Enum):
"""
Builtin tools are tools the model is natively aware of and was potentially fine-tuned with.
"""
web_search = "web_search"
math = "math"
image_gen = "image_gen"
code_interpreter = "code_interpreter"
class StopReason(Enum):
"""
Stop reasons are used to indicate why the model stopped generating text.
"""
not_stopped = "not_stopped"
finished_ok = "finished_ok"
max_tokens = "max_tokens"
@json_schema_type
@dataclass
class Message:
@ -85,9 +97,6 @@ class Message:
# input to the model or output from the model
content: Content
# zero-shot tool definitions as input to the model
tool_definitions: List[ToolDefinition] = field(default_factory=list)
# output from the model
tool_calls: List[ToolCall] = field(default_factory=list)
@ -95,45 +104,6 @@ class Message:
tool_responses: List[ToolResponse] = field(default_factory=list)
@json_schema_type
@dataclass
class CompletionResponse:
"""Normal completion response."""
content: Content
stop_reason: StopReason
logprobs: Optional[Dict[str, Any]] = None
@json_schema_type
@dataclass
class StreamedCompletionResponse:
"""streamed completion response."""
text_delta: str
stop_reason: StopReason
logprobs: Optional[Dict[str, Any]] = None
@json_schema_type
@dataclass
class ChatCompletionResponse:
"""Normal chat completion response."""
content: Content
stop_reason: StopReason
tool_calls: List[ToolCall] = field(default_factory=list)
logprobs: Optional[Dict[str, Any]] = None
@json_schema_type
@dataclass
class StreamedChatCompletionResponse:
"""Streamed chat completion response."""
text_delta: str
stop_reason: StopReason
tool_call: Optional[ToolCall] = None
@dataclass
class SamplingParams:
temperature: float = 0.0
@ -165,16 +135,69 @@ class CompletionRequest:
@json_schema_type
@dataclass
class ChatCompletionRequest:
class CompletionResponse:
"""Normal completion response."""
content: Content
stop_reason: Optional[StopReason] = None
logprobs: Optional[Dict[str, Any]] = None
@json_schema_type
@dataclass
class StreamedCompletionResponse:
"""streamed completion response."""
text_delta: str
stop_reason: Optional[StopReason] = None
logprobs: Optional[Dict[str, Any]] = None
@dataclass
class ChatCompletionRequestCommon:
message: Message
message_history: List[Message] = None
model: InstructModel = InstructModel.llama3_8b_chat
sampling_params: SamplingParams = SamplingParams()
# zero-shot tool definitions as input to the model
available_tools: List[Union[BuiltinTool, ToolDefinition]] = field(
default_factory=list
)
@json_schema_type
@dataclass
class ChatCompletionRequest(ChatCompletionRequestCommon):
max_tokens: int = 0
stream: bool = False
logprobs: bool = False
@json_schema_type
@dataclass
class ChatCompletionResponse:
"""Normal chat completion response."""
content: Content
# note: multiple tool calls can be generated in a single response
tool_calls: List[ToolCall] = field(default_factory=list)
stop_reason: Optional[StopReason] = None
logprobs: Optional[Dict[str, Any]] = None
@json_schema_type
@dataclass
class StreamedChatCompletionResponse:
"""Streamed chat completion response."""
text_delta: str
stop_reason: Optional[StopReason] = None
tool_call: Optional[ToolCall] = None
class Inference(Protocol):
def post_completion(
@ -188,19 +211,41 @@ class Inference(Protocol):
) -> Union[ChatCompletionResponse, StreamedChatCompletionResponse]: ...
@json_schema_type
@dataclass
class AgenticSystemExecuteRequest(ChatCompletionRequestCommon):
executable_tools: Set[str] = field(default_factory=set)
stream: bool = False
@json_schema_type
@dataclass
class AgenticSystemExecuteRequest:
message: Message
message_history: List[Message] = None
model: InstructModel = InstructModel.llama3_8b_chat
sampling_params: SamplingParams = SamplingParams()
class AgenticSystemExecuteResponse:
"""Normal chat completion response."""
content: Content
stop_reason: StopReason
tool_calls: List[ToolCall] = field(default_factory=list)
logprobs: Optional[Dict[str, Any]] = None
@json_schema_type
@dataclass
class StreamedAgenticSystemExecuteResponse:
"""Streamed chat completion response."""
text_delta: str
stop_reason: StopReason
tool_call: Optional[ToolCall] = None
class AgenticSystem(Protocol):
@webmethod(route="/agentic/system/execute")
def create_agentic_system_execute(self,) -> str: ...
def create_agentic_system_execute(
self,
request: AgenticSystemExecuteRequest,
) -> Union[AgenticSystemExecuteResponse, StreamedAgenticSystemExecuteResponse]: ...
class Endpoint(Inference, AgenticSystem): ...

View file

@ -30,14 +30,21 @@
],
"paths": {
"/agentic/system/execute": {
"get": {
"post": {
"responses": {
"200": {
"description": "OK",
"description": "Normal chat completion response. **OR** Streamed chat completion response.",
"content": {
"application/json": {
"schema": {
"type": "string"
"oneOf": [
{
"$ref": "#/components/schemas/AgenticSystemExecuteResponse"
},
{
"$ref": "#/components/schemas/StreamedAgenticSystemExecuteResponse"
}
]
}
}
}
@ -46,7 +53,17 @@
"tags": [
"AgenticSystem"
],
"parameters": []
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/AgenticSystemExecuteRequest"
}
}
},
"required": true
}
}
},
"/chat_completion": {
@ -127,24 +144,7 @@
"jsonSchemaDialect": "https://json-schema.org/draft/2020-12/schema",
"components": {
"schemas": {
"Attachment": {
"type": "object",
"properties": {
"url": {
"$ref": "#/components/schemas/URL"
},
"mime_type": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"url",
"mime_type"
],
"title": "Attachments are used to refer to external resources, such as images, videos, audio, etc."
},
"ChatCompletionRequest": {
"AgenticSystemExecuteRequest": {
"type": "object",
"properties": {
"message": {
@ -192,17 +192,71 @@
"top_k"
]
},
"max_tokens": {
"type": "integer",
"default": 0
"available_tools": {
"type": "array",
"items": {
"oneOf": [
{
"type": "string",
"enum": [
"web_search",
"math",
"image_gen",
"code_interpreter"
],
"title": "Builtin tools are tools the model is natively aware of and was potentially fine-tuned with."
},
{
"type": "object",
"properties": {
"tool_name": {
"type": "string"
},
"parameters": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
}
},
"additionalProperties": false,
"required": [
"tool_name",
"parameters"
]
}
]
}
},
"executable_tools": {
"type": "array",
"items": {
"type": "string"
},
"uniqueItems": true
},
"stream": {
"type": "boolean",
"default": false
},
"logprobs": {
"type": "boolean",
"default": false
}
},
"additionalProperties": false,
@ -211,11 +265,28 @@
"message_history",
"model",
"sampling_params",
"max_tokens",
"stream",
"logprobs"
"available_tools",
"executable_tools",
"stream"
]
},
"Attachment": {
"type": "object",
"properties": {
"url": {
"$ref": "#/components/schemas/URL"
},
"mime_type": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"url",
"mime_type"
],
"title": "Attachments are used to refer to external resources, such as images, videos, audio, etc."
},
"Message": {
"type": "object",
"properties": {
@ -251,47 +322,6 @@
}
]
},
"tool_definitions": {
"type": "array",
"items": {
"type": "object",
"properties": {
"tool_name": {
"type": "string"
},
"parameters": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
}
},
"additionalProperties": false,
"required": [
"tool_name",
"parameters"
]
}
},
"tool_calls": {
"type": "array",
"items": {
@ -358,7 +388,6 @@
"required": [
"role",
"content",
"tool_definitions",
"tool_calls",
"tool_responses"
]
@ -368,7 +397,7 @@
"format": "uri",
"pattern": "^(https?://|file://|data:)"
},
"ChatCompletionResponse": {
"AgenticSystemExecuteResponse": {
"type": "object",
"properties": {
"content": {
@ -479,7 +508,7 @@
],
"title": "Normal chat completion response."
},
"StreamedChatCompletionResponse": {
"StreamedAgenticSystemExecuteResponse": {
"type": "object",
"properties": {
"text_delta": {
@ -541,6 +570,305 @@
],
"title": "Streamed chat completion response."
},
"ChatCompletionRequest": {
"type": "object",
"properties": {
"message": {
"$ref": "#/components/schemas/Message"
},
"message_history": {
"type": "array",
"items": {
"$ref": "#/components/schemas/Message"
}
},
"model": {
"type": "string",
"enum": [
"llama3_8b_chat",
"llama3_70b_chat"
],
"default": "llama3_8b_chat"
},
"sampling_params": {
"type": "object",
"properties": {
"temperature": {
"type": "number",
"default": 0.0
},
"strategy": {
"type": "string",
"default": "greedy"
},
"top_p": {
"type": "number",
"default": 0.95
},
"top_k": {
"type": "integer",
"default": 0
}
},
"additionalProperties": false,
"required": [
"temperature",
"strategy",
"top_p",
"top_k"
]
},
"available_tools": {
"type": "array",
"items": {
"oneOf": [
{
"type": "string",
"enum": [
"web_search",
"math",
"image_gen",
"code_interpreter"
],
"title": "Builtin tools are tools the model is natively aware of and was potentially fine-tuned with."
},
{
"type": "object",
"properties": {
"tool_name": {
"type": "string"
},
"parameters": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
}
},
"additionalProperties": false,
"required": [
"tool_name",
"parameters"
]
}
]
}
},
"max_tokens": {
"type": "integer",
"default": 0
},
"stream": {
"type": "boolean",
"default": false
},
"logprobs": {
"type": "boolean",
"default": false
}
},
"additionalProperties": false,
"required": [
"message",
"message_history",
"model",
"sampling_params",
"available_tools",
"max_tokens",
"stream",
"logprobs"
]
},
"ChatCompletionResponse": {
"type": "object",
"properties": {
"content": {
"oneOf": [
{
"type": "string"
},
{
"$ref": "#/components/schemas/Attachment"
},
{
"type": "array",
"items": {
"oneOf": [
{
"type": "string"
},
{
"$ref": "#/components/schemas/Attachment"
}
]
}
}
]
},
"tool_calls": {
"type": "array",
"items": {
"type": "object",
"properties": {
"tool_name": {
"type": "string"
},
"arguments": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
}
},
"additionalProperties": false,
"required": [
"tool_name",
"arguments"
],
"title": "A tool call is a request to a tool."
}
},
"stop_reason": {
"type": "string",
"enum": [
"not_stopped",
"finished_ok",
"max_tokens"
],
"title": "Stop reasons are used to indicate why the model stopped generating text."
},
"logprobs": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
}
},
"additionalProperties": false,
"required": [
"content",
"tool_calls"
],
"title": "Normal chat completion response."
},
"StreamedChatCompletionResponse": {
"type": "object",
"properties": {
"text_delta": {
"type": "string"
},
"stop_reason": {
"type": "string",
"enum": [
"not_stopped",
"finished_ok",
"max_tokens"
],
"title": "Stop reasons are used to indicate why the model stopped generating text."
},
"tool_call": {
"type": "object",
"properties": {
"tool_name": {
"type": "string"
},
"arguments": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
}
},
"additionalProperties": false,
"required": [
"tool_name",
"arguments"
],
"title": "A tool call is a request to a tool."
}
},
"additionalProperties": false,
"required": [
"text_delta"
],
"title": "Streamed chat completion response."
},
"CompletionRequest": {
"type": "object",
"properties": {
@ -689,8 +1017,7 @@
},
"additionalProperties": false,
"required": [
"content",
"stop_reason"
"content"
],
"title": "Normal completion response."
},
@ -737,8 +1064,7 @@
},
"additionalProperties": false,
"required": [
"text_delta",
"stop_reason"
"text_delta"
],
"title": "streamed completion response."
}
@ -751,20 +1077,20 @@
}
],
"tags": [
{
"name": "Inference"
},
{
"name": "AgenticSystem"
},
{
"name": "Inference"
"name": "AgenticSystemExecuteRequest",
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/AgenticSystemExecuteRequest\" />"
},
{
"name": "Attachment",
"description": "Attachments are used to refer to external resources, such as images, videos, audio, etc.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/Attachment\" />"
},
{
"name": "ChatCompletionRequest",
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/ChatCompletionRequest\" />"
},
{
"name": "Message",
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/Message\" />"
@ -773,6 +1099,18 @@
"name": "URL",
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/URL\" />"
},
{
"name": "AgenticSystemExecuteResponse",
"description": "Normal chat completion response.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/AgenticSystemExecuteResponse\" />"
},
{
"name": "StreamedAgenticSystemExecuteResponse",
"description": "Streamed chat completion response.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/StreamedAgenticSystemExecuteResponse\" />"
},
{
"name": "ChatCompletionRequest",
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/ChatCompletionRequest\" />"
},
{
"name": "ChatCompletionResponse",
"description": "Normal chat completion response.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/ChatCompletionResponse\" />"
@ -805,12 +1143,15 @@
{
"name": "Types",
"tags": [
"AgenticSystemExecuteRequest",
"AgenticSystemExecuteResponse",
"Attachment",
"ChatCompletionRequest",
"ChatCompletionResponse",
"CompletionRequest",
"CompletionResponse",
"Message",
"StreamedAgenticSystemExecuteResponse",
"StreamedChatCompletionResponse",
"StreamedCompletionResponse",
"URL"

View file

@ -1,6 +1,147 @@
components:
responses: {}
schemas:
AgenticSystemExecuteRequest:
additionalProperties: false
properties:
available_tools:
items:
oneOf:
- enum:
- web_search
- math
- image_gen
- code_interpreter
title: Builtin tools are tools the model is natively aware of and was
potentially fine-tuned with.
type: string
- additionalProperties: false
properties:
parameters:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
tool_name:
type: string
required:
- tool_name
- parameters
type: object
type: array
executable_tools:
items:
type: string
type: array
uniqueItems: true
message:
$ref: '#/components/schemas/Message'
message_history:
items:
$ref: '#/components/schemas/Message'
type: array
model:
default: llama3_8b_chat
enum:
- llama3_8b_chat
- llama3_70b_chat
type: string
sampling_params:
additionalProperties: false
properties:
strategy:
default: greedy
type: string
temperature:
default: 0.0
type: number
top_k:
default: 0
type: integer
top_p:
default: 0.95
type: number
required:
- temperature
- strategy
- top_p
- top_k
type: object
stream:
default: false
type: boolean
required:
- message
- message_history
- model
- sampling_params
- available_tools
- executable_tools
- stream
type: object
AgenticSystemExecuteResponse:
additionalProperties: false
properties:
content:
oneOf:
- type: string
- $ref: '#/components/schemas/Attachment'
- items:
oneOf:
- type: string
- $ref: '#/components/schemas/Attachment'
type: array
logprobs:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
stop_reason:
enum:
- not_stopped
- finished_ok
- max_tokens
title: Stop reasons are used to indicate why the model stopped generating
text.
type: string
tool_calls:
items:
additionalProperties: false
properties:
arguments:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
tool_name:
type: string
required:
- tool_name
- arguments
title: A tool call is a request to a tool.
type: object
type: array
required:
- content
- stop_reason
- tool_calls
title: Normal chat completion response.
type: object
Attachment:
additionalProperties: false
properties:
@ -17,6 +158,36 @@ components:
ChatCompletionRequest:
additionalProperties: false
properties:
available_tools:
items:
oneOf:
- enum:
- web_search
- math
- image_gen
- code_interpreter
title: Builtin tools are tools the model is natively aware of and was
potentially fine-tuned with.
type: string
- additionalProperties: false
properties:
parameters:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
tool_name:
type: string
required:
- tool_name
- parameters
type: object
type: array
logprobs:
default: false
type: boolean
@ -64,6 +235,7 @@ components:
- message_history
- model
- sampling_params
- available_tools
- max_tokens
- stream
- logprobs
@ -122,7 +294,6 @@ components:
type: array
required:
- content
- stop_reason
- tool_calls
title: Normal chat completion response.
type: object
@ -214,7 +385,6 @@ components:
type: string
required:
- content
- stop_reason
title: Normal completion response.
type: object
Message:
@ -258,27 +428,6 @@ components:
title: A tool call is a request to a tool.
type: object
type: array
tool_definitions:
items:
additionalProperties: false
properties:
parameters:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
tool_name:
type: string
required:
- tool_name
- parameters
type: object
type: array
tool_responses:
items:
additionalProperties: false
@ -295,11 +444,10 @@ components:
required:
- role
- content
- tool_definitions
- tool_calls
- tool_responses
type: object
StreamedChatCompletionResponse:
StreamedAgenticSystemExecuteResponse:
additionalProperties: false
properties:
stop_reason:
@ -337,6 +485,43 @@ components:
- stop_reason
title: Streamed chat completion response.
type: object
StreamedChatCompletionResponse:
additionalProperties: false
properties:
stop_reason:
enum:
- not_stopped
- finished_ok
- max_tokens
title: Stop reasons are used to indicate why the model stopped generating
text.
type: string
text_delta:
type: string
tool_call:
additionalProperties: false
properties:
arguments:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
tool_name:
type: string
required:
- tool_name
- arguments
title: A tool call is a request to a tool.
type: object
required:
- text_delta
title: Streamed chat completion response.
type: object
StreamedCompletionResponse:
additionalProperties: false
properties:
@ -362,7 +547,6 @@ components:
type: string
required:
- text_delta
- stop_reason
title: streamed completion response.
type: object
URL:
@ -377,15 +561,24 @@ jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
openapi: 3.1.0
paths:
/agentic/system/execute:
get:
post:
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/AgenticSystemExecuteRequest'
required: true
responses:
'200':
content:
application/json:
schema:
type: string
description: OK
oneOf:
- $ref: '#/components/schemas/AgenticSystemExecuteResponse'
- $ref: '#/components/schemas/StreamedAgenticSystemExecuteResponse'
description: Normal chat completion response. **OR** Streamed chat completion
response.
tags:
- AgenticSystem
/chat_completion:
@ -434,17 +627,17 @@ security:
servers:
- url: http://llama.meta.com
tags:
- name: AgenticSystem
- name: Inference
- name: AgenticSystem
- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemExecuteRequest"
/>
name: AgenticSystemExecuteRequest
- description: 'Attachments are used to refer to external resources, such as images,
videos, audio, etc.
<SchemaDefinition schemaRef="#/components/schemas/Attachment" />'
name: Attachment
- description: <SchemaDefinition schemaRef="#/components/schemas/ChatCompletionRequest"
/>
name: ChatCompletionRequest
- description: <SchemaDefinition schemaRef="#/components/schemas/Message" />
name: Message
- description: <SchemaDefinition schemaRef="#/components/schemas/URL" />
@ -452,6 +645,21 @@ tags:
- description: 'Normal chat completion response.
<SchemaDefinition schemaRef="#/components/schemas/AgenticSystemExecuteResponse"
/>'
name: AgenticSystemExecuteResponse
- description: 'Streamed chat completion response.
<SchemaDefinition schemaRef="#/components/schemas/StreamedAgenticSystemExecuteResponse"
/>'
name: StreamedAgenticSystemExecuteResponse
- description: <SchemaDefinition schemaRef="#/components/schemas/ChatCompletionRequest"
/>
name: ChatCompletionRequest
- description: 'Normal chat completion response.
<SchemaDefinition schemaRef="#/components/schemas/ChatCompletionResponse" />'
name: ChatCompletionResponse
- description: 'Streamed chat completion response.
@ -481,12 +689,15 @@ x-tagGroups:
- Inference
- name: Types
tags:
- AgenticSystemExecuteRequest
- AgenticSystemExecuteResponse
- Attachment
- ChatCompletionRequest
- ChatCompletionResponse
- CompletionRequest
- CompletionResponse
- Message
- StreamedAgenticSystemExecuteResponse
- StreamedChatCompletionResponse
- StreamedCompletionResponse
- URL