updated chat completion

This commit is contained in:
Hardik Shah 2024-06-26 15:25:52 -07:00
parent 749e50b288
commit eb81ad1ffd
2 changed files with 177 additions and 122 deletions

View file

@ -1,107 +1,13 @@
openapi: 3.0.0 openapi: 3.0.0
info: info:
title: Single Inference API (chat_completion) title: Chat Completion API
version: 0.0.1 version: 0.0.1
components:
schemas:
Tool:
type: object
description: A tool that can be used by an agent to perform specific tasks.
properties:
name:
type: string
description: The name of the tool.
description:
type: string
description: A brief description of what the tool does and how it should be used.
parameters:
type: array
items:
$ref: '#/components/schemas/ToolParameter'
returnValue:
$ref: '#/components/schemas/ToolReturnValue'
ToolParameter:
type: object
properties:
type:
type: string
enum: [string, int, float, list, bool]
description: The data type of the parameter.
itemType:
type: string
description: The type of items in the parameter if it is a list.
description:
type: string
description: Details about what the parameter is used for and any constraints.
ToolReturnValue:
type: object
properties:
type:
type: string
enum: [string, int, float, list, bool]
description: The data type of the return value.
itemType:
type: string
description: The type of items in the return value if it is a list.
description:
type: string
description: Details about the significance of the return value.
Attachment:
type: object
properties:
uri:
type: string
description: URI of the attachment.
mime-type:
type: string
description: MIME type of the attachment.
Message:
type: object
properties:
role:
type: string
description: Role of the entity in the message.
text:
type: string
description: Text content of the message.
attachments:
type: array
items:
$ref: '#/components/schemas/Attachment'
Completion:
type: object
properties:
id:
type: string
description: Unique identifier for the completion.
role:
type: string
description: Role of the entity generating the completion.
text:
type: string
description: Text content of the completion.
attachments:
type: array
items:
$ref: '#/components/schemas/Attachment'
tokens:
type: array
items:
type: integer
logprobs:
type: array
items:
type: number
finish_reason:
type: string
description: Reason for completion termination.
# TODO: Add `tool_choice` --
# for eg. "auto": use model's guess, how to force to use particular tool, how to disbale inbuilt tools
paths: paths:
/chat_completion/: /chat_completion/:
post: post:
summary: Submit a chat completion request summary: Submit a chat completion request
description: Submit a chat completion request description: |
This endpoint allows clients to submit a chat completion request.
requestBody: requestBody:
required: true required: true
content: content:
@ -115,31 +21,13 @@ paths:
$ref: '#/components/schemas/Message' $ref: '#/components/schemas/Message'
model: model:
type: string type: string
description: Model identifier options:
logprobs: $ref: '#/components/schemas/Options'
type: boolean
description: Whether to include log probabilities in the output
max_tokens:
type: integer
description: Maximum number of tokens to generate
n_completions: n_completions:
type: integer type: integer
description: Number of completions to generate
temperature:
type: number
format: float
description: Temperature setting for the generation
top_p:
type: number
format: float
description: Top p setting for the generation
tools:
type: array
items:
$ref: '#/components/schemas/Tool'
responses: responses:
'200': '200':
description: Chat completion request processed successfully description: Successful response
content: content:
application/json: application/json:
schema: schema:
@ -147,11 +35,106 @@ paths:
properties: properties:
id: id:
type: string type: string
description: Unique identifier for the completion request candidates:
completions:
type: array type: array
items: items:
$ref: '#/components/schemas/Completion' $ref: '#/components/schemas/Completion'
model: model_called:
type: string type: string
description: Model used for generating completions usage:
$ref: '#/components/schemas/TokenUsage'
components:
schemas:
Message:
type: object
properties:
role:
type: string
text:
type: string
attachments:
type: array
items:
$ref: '#/components/schemas/MediaAttachment'
eot:
type: boolean
description: "End of transmission flag."
tool_call:
type: boolean
description: "Indicates if it's a tool call - builtin, custom, or ipython."
is_complete:
type: boolean
description: "For streaming, indicates if the message is complete."
is_header_complete:
type: boolean
description: "For streaming, indicates if the header of the message is complete."
metadata:
type: object
additionalProperties: true
description: "Additional metadata as JSON."
MediaAttachment:
type: object
properties:
attachment_type:
$ref: '#/components/schemas/MediaAttachmentType'
data_type:
$ref: '#/components/schemas/MediaAttachmentDataType'
data:
type: string
MediaAttachmentType:
type: string
enum:
- image
- video
- audio
- text
description: "Type of media attachment."
MediaAttachmentDataType:
type: string
enum:
- raw_bytes
- filepath
- uri
description: "Data type of the media attachment."
Completion:
type: object
properties:
id:
type: string
message:
$ref: '#/components/schemas/Message'
tokens:
type: array
items:
type: integer
logprobs:
type: array
items:
type: number
finish_reason:
type: string
enum:
- stop
- safety
- max-length
description: "Reason for completion termination."
Options:
type: object
properties:
logprobs:
type: boolean
max_tokens:
type: integer
temperature:
type: number
top_p:
type: number
TokenUsage:
type: object
properties:
input_tokens:
type: integer
output_tokens:
type: integer
total_tokens:
type: integer

View file

@ -0,0 +1,72 @@
# Simple bullet form for ease of read and iteration
# Use LLMs to translate this to a OpenAPI spec.
== Schema ==
Message:
role: str
text: str
attachements: List[MediaAttachment]
eot: bool
tool_call: bool # if it's a tool call - builtin or custom or ipython
# for streaming
is_complete: bool
is_header_complete: bool
metadata: json
MediaAttachment:
attachement_type: MediaAttachmentType
data_type: MediaAttachmentDataType
data: str
MediaAttachmentType: # enum [image, video, audio, text(or file)]
MediaAttachmentDataType: # enum [raw_bytes, filepath, uri]
Completion:
id: str
message: Message
tokens: List[int]
logprobs: List[floats]
finish_reason: str # Enum (stop, safety, max-length, etc)
Options:
logprobs: bool
max_tokens: int
temperature: float
top_p: float
#TODO: Get more options from metagen
TokenUsage:
input_tokens: int
output_tokens: int
total_tokens: int
== Callsite ==
callsite:
chat_completion/
request_type:
post
description:
submit a chat completion request
request:
messages: List[Message]
model: str
options: Options
n_complections: int
# TODO: how to handle tooling control if any ?
# Add `tools` and `tool_choice` --
# for eg. "auto": use model's guess
# how to force to use particular tool
# how to disbale inbuilt tools
# tools: List[Tool]
# tool_choice: Any
response:
id: str
candidates: List[Completion] # a list to account for when n_completions > 1
model_called: str # info on that model that produced this result
usage: TokenUsage
# TODO
# callsite:
# chat_completion_stream/