This commit is contained in:
Ashwin Bharambe 2024-07-10 23:33:57 -07:00
parent ee86f2c75f
commit 7cade3acc3
3 changed files with 721 additions and 271 deletions

View file

@ -80,15 +80,12 @@ class CompletionResponseStreamChunk:
@json_schema_type
@dataclass
class ChatCompletionRequest:
message: Message
model: InstructModel
message_history: List[Message] = None
dialog: Dialog
sampling_params: SamplingParams = SamplingParams()
# zero-shot tool definitions as input to the model
available_tools: List[Union[BuiltinTool, ToolDefinition]] = field(
default_factory=list
)
available_tools: List[ToolDefinition] = field(default_factory=list)
max_tokens: int = 0
stream: bool = False
@ -119,6 +116,30 @@ class ChatCompletionResponseStreamChunk:
tool_call: Optional[ToolCall] = None
@json_schema_type
@dataclass
class BatchCompletionRequest:
model: PretrainedModel
content_batch: List[Content]
sampling_params: SamplingParams = SamplingParams()
max_tokens: int = 0
logprobs: bool = False
@json_schema_type
@dataclass
class BatchChatCompletionRequest:
model: InstructModel
batch_dialogs: List[Dialog]
sampling_params: SamplingParams = SamplingParams()
# zero-shot tool definitions as input to the model
available_tools: List[ToolDefinition] = field(default_factory=list)
max_tokens: int = 0
logprobs: bool = False
class Inference(Protocol):
def post_completion(
@ -131,35 +152,6 @@ class Inference(Protocol):
request: ChatCompletionRequest,
) -> Union[ChatCompletionResponse, ChatCompletionResponseStreamChunk]: ...
@json_schema_type
@dataclass
class BatchCompletionRequest:
content_batch: List[Content]
model: PretrainedModel
sampling_params: SamplingParams = SamplingParams()
max_tokens: int = 0
logprobs: bool = False
@json_schema_type
@dataclass
class BatchChatCompletionRequest:
model: InstructModel
batch_messages: List[Dialog]
sampling_params: SamplingParams = SamplingParams()
# zero-shot tool definitions as input to the model
available_tools: List[Union[BuiltinTool, ToolDefinition]] = field(
default_factory=list
)
max_tokens: int = 0
logprobs: bool = False
class BatchInference(Protocol):
"""Batch inference calls"""
def post_batch_completion(
self,
request: BatchCompletionRequest,
@ -302,8 +294,7 @@ class MemoryBanks(Protocol):
@dataclass
class KPromptGenerations:
prompt: Message
message_history: List[Message]
dialog: Dialog
k_generations: List[Message]

View file

@ -386,6 +386,66 @@
]
}
},
"/batch_chat_completion": {
"post": {
"responses": {
"200": {
"description": "OK",
"content": {
"application/jsonl": {
"schema": {
"$ref": "#/components/schemas/ChatCompletionResponse"
}
}
}
}
},
"tags": [
"Inference"
],
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/BatchChatCompletionRequest"
}
}
},
"required": true
}
}
},
"/batch_completion": {
"post": {
"responses": {
"200": {
"description": "OK",
"content": {
"application/jsonl": {
"schema": {
"$ref": "#/components/schemas/CompletionResponse"
}
}
}
}
},
"tags": [
"Inference"
],
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/BatchCompletionRequest"
}
}
},
"required": true
}
}
},
"/chat_completion": {
"post": {
"responses": {
@ -1770,12 +1830,9 @@
],
"title": "Stream of logs from a finetuning job."
},
"ChatCompletionRequest": {
"BatchChatCompletionRequest": {
"type": "object",
"properties": {
"message": {
"$ref": "#/components/schemas/Message"
},
"model": {
"type": "string",
"enum": [
@ -1783,10 +1840,10 @@
"llama3_70b_chat"
]
},
"message_history": {
"batch_dialogs": {
"type": "array",
"items": {
"$ref": "#/components/schemas/Message"
"$ref": "#/components/schemas/Dialog"
}
},
"sampling_params": {
@ -1820,80 +1877,67 @@
"available_tools": {
"type": "array",
"items": {
"oneOf": [
{
"type": "string",
"enum": [
"web_search",
"math",
"image_gen",
"code_interpreter"
]
},
{
"type": "object",
"properties": {
"tool_name": {
"oneOf": [
{
"type": "string",
"enum": [
"web_search",
"math",
"image_gen",
"code_interpreter"
]
},
{
"type": "string"
}
"type": "object",
"properties": {
"tool_name": {
"oneOf": [
{
"type": "string",
"enum": [
"web_search",
"math",
"image_gen",
"code_interpreter"
]
},
"parameters": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
},
"input_shields": {
"type": "array",
"items": {
"$ref": "#/components/schemas/ShieldConfig"
}
},
"output_shields": {
"type": "array",
"items": {
"$ref": "#/components/schemas/ShieldConfig"
}
{
"type": "string"
}
},
"additionalProperties": false,
"required": [
"tool_name",
"input_shields",
"output_shields"
]
},
"parameters": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
},
"input_shields": {
"type": "array",
"items": {
"$ref": "#/components/schemas/ShieldConfig"
}
},
"output_shields": {
"type": "array",
"items": {
"$ref": "#/components/schemas/ShieldConfig"
}
}
},
"additionalProperties": false,
"required": [
"tool_name",
"input_shields",
"output_shields"
]
}
},
@ -1901,10 +1945,6 @@
"type": "integer",
"default": 0
},
"stream": {
"type": "boolean",
"default": false
},
"logprobs": {
"type": "boolean",
"default": false
@ -1912,16 +1952,33 @@
},
"additionalProperties": false,
"required": [
"message",
"model",
"message_history",
"batch_dialogs",
"sampling_params",
"available_tools",
"max_tokens",
"stream",
"logprobs"
]
},
"Dialog": {
"type": "object",
"properties": {
"message": {
"$ref": "#/components/schemas/Message"
},
"message_history": {
"type": "array",
"items": {
"$ref": "#/components/schemas/Message"
}
}
},
"additionalProperties": false,
"required": [
"message",
"message_history"
]
},
"ChatCompletionResponse": {
"type": "object",
"properties": {
@ -2032,6 +2089,287 @@
],
"title": "Normal chat completion response."
},
"BatchCompletionRequest": {
"type": "object",
"properties": {
"model": {
"type": "string",
"enum": [
"llama3_8b",
"llama3_70b"
]
},
"content_batch": {
"type": "array",
"items": {
"oneOf": [
{
"type": "string"
},
{
"$ref": "#/components/schemas/Attachment"
},
{
"type": "array",
"items": {
"oneOf": [
{
"type": "string"
},
{
"$ref": "#/components/schemas/Attachment"
}
]
}
}
]
}
},
"sampling_params": {
"type": "object",
"properties": {
"temperature": {
"type": "number",
"default": 0.0
},
"strategy": {
"type": "string",
"default": "greedy"
},
"top_p": {
"type": "number",
"default": 0.95
},
"top_k": {
"type": "integer",
"default": 0
}
},
"additionalProperties": false,
"required": [
"temperature",
"strategy",
"top_p",
"top_k"
]
},
"max_tokens": {
"type": "integer",
"default": 0
},
"logprobs": {
"type": "boolean",
"default": false
}
},
"additionalProperties": false,
"required": [
"model",
"content_batch",
"sampling_params",
"max_tokens",
"logprobs"
]
},
"CompletionResponse": {
"type": "object",
"properties": {
"content": {
"oneOf": [
{
"type": "string"
},
{
"$ref": "#/components/schemas/Attachment"
},
{
"type": "array",
"items": {
"oneOf": [
{
"type": "string"
},
{
"$ref": "#/components/schemas/Attachment"
}
]
}
}
]
},
"stop_reason": {
"type": "string",
"enum": [
"not_stopped",
"finished_ok",
"max_tokens"
],
"title": "Stop reasons are used to indicate why the model stopped generating text."
},
"logprobs": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
}
},
"additionalProperties": false,
"required": [
"content"
],
"title": "Normal completion response."
},
"ChatCompletionRequest": {
"type": "object",
"properties": {
"model": {
"type": "string",
"enum": [
"llama3_8b_chat",
"llama3_70b_chat"
]
},
"dialog": {
"$ref": "#/components/schemas/Dialog"
},
"sampling_params": {
"type": "object",
"properties": {
"temperature": {
"type": "number",
"default": 0.0
},
"strategy": {
"type": "string",
"default": "greedy"
},
"top_p": {
"type": "number",
"default": 0.95
},
"top_k": {
"type": "integer",
"default": 0
}
},
"additionalProperties": false,
"required": [
"temperature",
"strategy",
"top_p",
"top_k"
]
},
"available_tools": {
"type": "array",
"items": {
"type": "object",
"properties": {
"tool_name": {
"oneOf": [
{
"type": "string",
"enum": [
"web_search",
"math",
"image_gen",
"code_interpreter"
]
},
{
"type": "string"
}
]
},
"parameters": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
},
"input_shields": {
"type": "array",
"items": {
"$ref": "#/components/schemas/ShieldConfig"
}
},
"output_shields": {
"type": "array",
"items": {
"$ref": "#/components/schemas/ShieldConfig"
}
}
},
"additionalProperties": false,
"required": [
"tool_name",
"input_shields",
"output_shields"
]
}
},
"max_tokens": {
"type": "integer",
"default": 0
},
"stream": {
"type": "boolean",
"default": false
},
"logprobs": {
"type": "boolean",
"default": false
}
},
"additionalProperties": false,
"required": [
"model",
"dialog",
"sampling_params",
"available_tools",
"max_tokens",
"stream",
"logprobs"
]
},
"ChatCompletionResponseStreamChunk": {
"type": "object",
"properties": {
@ -2177,73 +2515,6 @@
"logprobs"
]
},
"CompletionResponse": {
"type": "object",
"properties": {
"content": {
"oneOf": [
{
"type": "string"
},
{
"$ref": "#/components/schemas/Attachment"
},
{
"type": "array",
"items": {
"oneOf": [
{
"type": "string"
},
{
"$ref": "#/components/schemas/Attachment"
}
]
}
}
]
},
"stop_reason": {
"type": "string",
"enum": [
"not_stopped",
"finished_ok",
"max_tokens"
],
"title": "Stop reasons are used to indicate why the model stopped generating text."
},
"logprobs": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
}
},
"additionalProperties": false,
"required": [
"content"
],
"title": "Normal completion response."
},
"CompletionResponseStreamChunk": {
"type": "object",
"properties": {
@ -2409,14 +2680,8 @@
"items": {
"type": "object",
"properties": {
"prompt": {
"$ref": "#/components/schemas/Message"
},
"message_history": {
"type": "array",
"items": {
"$ref": "#/components/schemas/Message"
}
"dialog": {
"$ref": "#/components/schemas/Dialog"
},
"k_generations": {
"type": "array",
@ -2427,8 +2692,7 @@
},
"additionalProperties": false,
"required": [
"prompt",
"message_history",
"dialog",
"k_generations"
]
}
@ -2738,14 +3002,11 @@
],
"tags": [
{
"name": "Inference"
"name": "RewardScoring"
},
{
"name": "MemoryBanks"
},
{
"name": "AgenticSystem"
},
{
"name": "SyntheticDataGeneration"
},
@ -2753,10 +3014,13 @@
"name": "Finetuning"
},
{
"name": "Datasets"
"name": "AgenticSystem"
},
{
"name": "RewardScoring"
"name": "Inference"
},
{
"name": "Datasets"
},
{
"name": "ShieldConfig",
@ -2823,13 +3087,29 @@
"description": "Stream of logs from a finetuning job.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/FinetuningJobLogStream\" />"
},
{
"name": "ChatCompletionRequest",
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/ChatCompletionRequest\" />"
"name": "BatchChatCompletionRequest",
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/BatchChatCompletionRequest\" />"
},
{
"name": "Dialog",
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/Dialog\" />"
},
{
"name": "ChatCompletionResponse",
"description": "Normal chat completion response.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/ChatCompletionResponse\" />"
},
{
"name": "BatchCompletionRequest",
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/BatchCompletionRequest\" />"
},
{
"name": "CompletionResponse",
"description": "Normal completion response.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/CompletionResponse\" />"
},
{
"name": "ChatCompletionRequest",
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/ChatCompletionRequest\" />"
},
{
"name": "ChatCompletionResponseStreamChunk",
"description": "Streamed chat completion response. The actual response is a series of such objects.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/ChatCompletionResponseStreamChunk\" />"
@ -2838,10 +3118,6 @@
"name": "CompletionRequest",
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/CompletionRequest\" />"
},
{
"name": "CompletionResponse",
"description": "Normal completion response.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/CompletionResponse\" />"
},
{
"name": "CompletionResponseStreamChunk",
"description": "streamed completion response.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/CompletionResponseStreamChunk\" />"
@ -2910,6 +3186,8 @@
"AgenticSystemExecuteResponseStreamChunk",
"AgenticSystemTurn",
"Attachment",
"BatchChatCompletionRequest",
"BatchCompletionRequest",
"ChatCompletionRequest",
"ChatCompletionResponse",
"ChatCompletionResponseStreamChunk",
@ -2918,6 +3196,7 @@
"CompletionResponseStreamChunk",
"CreateDatasetRequest",
"Dataset",
"Dialog",
"FinetuningJobArtifactsResponse",
"FinetuningJobLogStream",
"FinetuningJobStatusResponse",

View file

@ -433,52 +433,49 @@ components:
title: Attachments are used to refer to external resources, such as images,
videos, audio, etc.
type: object
ChatCompletionRequest:
BatchChatCompletionRequest:
additionalProperties: false
properties:
available_tools:
items:
oneOf:
- enum:
- web_search
- math
- image_gen
- code_interpreter
type: string
- additionalProperties: false
properties:
input_shields:
items:
$ref: '#/components/schemas/ShieldConfig'
type: array
output_shields:
items:
$ref: '#/components/schemas/ShieldConfig'
type: array
parameters:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
tool_name:
additionalProperties: false
properties:
input_shields:
items:
$ref: '#/components/schemas/ShieldConfig'
type: array
output_shields:
items:
$ref: '#/components/schemas/ShieldConfig'
type: array
parameters:
additionalProperties:
oneOf:
- enum:
- web_search
- math
- image_gen
- code_interpreter
type: string
- type: 'null'
- type: boolean
- type: number
- type: string
required:
- tool_name
- input_shields
- output_shields
type: object
- type: array
- type: object
type: object
tool_name:
oneOf:
- enum:
- web_search
- math
- image_gen
- code_interpreter
type: string
- type: string
required:
- tool_name
- input_shields
- output_shields
type: object
type: array
batch_dialogs:
items:
$ref: '#/components/schemas/Dialog'
type: array
logprobs:
default: false
@ -486,12 +483,141 @@ components:
max_tokens:
default: 0
type: integer
message:
$ref: '#/components/schemas/Message'
message_history:
model:
enum:
- llama3_8b_chat
- llama3_70b_chat
type: string
sampling_params:
additionalProperties: false
properties:
strategy:
default: greedy
type: string
temperature:
default: 0.0
type: number
top_k:
default: 0
type: integer
top_p:
default: 0.95
type: number
required:
- temperature
- strategy
- top_p
- top_k
type: object
required:
- model
- batch_dialogs
- sampling_params
- available_tools
- max_tokens
- logprobs
type: object
BatchCompletionRequest:
additionalProperties: false
properties:
content_batch:
items:
$ref: '#/components/schemas/Message'
oneOf:
- type: string
- $ref: '#/components/schemas/Attachment'
- items:
oneOf:
- type: string
- $ref: '#/components/schemas/Attachment'
type: array
type: array
logprobs:
default: false
type: boolean
max_tokens:
default: 0
type: integer
model:
enum:
- llama3_8b
- llama3_70b
type: string
sampling_params:
additionalProperties: false
properties:
strategy:
default: greedy
type: string
temperature:
default: 0.0
type: number
top_k:
default: 0
type: integer
top_p:
default: 0.95
type: number
required:
- temperature
- strategy
- top_p
- top_k
type: object
required:
- model
- content_batch
- sampling_params
- max_tokens
- logprobs
type: object
ChatCompletionRequest:
additionalProperties: false
properties:
available_tools:
items:
additionalProperties: false
properties:
input_shields:
items:
$ref: '#/components/schemas/ShieldConfig'
type: array
output_shields:
items:
$ref: '#/components/schemas/ShieldConfig'
type: array
parameters:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
tool_name:
oneOf:
- enum:
- web_search
- math
- image_gen
- code_interpreter
type: string
- type: string
required:
- tool_name
- input_shields
- output_shields
type: object
type: array
dialog:
$ref: '#/components/schemas/Dialog'
logprobs:
default: false
type: boolean
max_tokens:
default: 0
type: integer
model:
enum:
- llama3_8b_chat
@ -522,9 +648,8 @@ components:
default: false
type: boolean
required:
- message
- model
- message_history
- dialog
- sampling_params
- available_tools
- max_tokens
@ -785,6 +910,19 @@ components:
- metadata
title: Dataset to be used for training or evaluating language models.
type: object
Dialog:
additionalProperties: false
properties:
message:
$ref: '#/components/schemas/Message'
message_history:
items:
$ref: '#/components/schemas/Message'
type: array
required:
- message
- message_history
type: object
FinetuningJobArtifactsResponse:
additionalProperties: false
properties:
@ -1132,19 +1270,14 @@ components:
items:
additionalProperties: false
properties:
dialog:
$ref: '#/components/schemas/Dialog'
k_generations:
items:
$ref: '#/components/schemas/Message'
type: array
message_history:
items:
$ref: '#/components/schemas/Message'
type: array
prompt:
$ref: '#/components/schemas/Message'
required:
- prompt
- message_history
- dialog
- k_generations
type: object
type: array
@ -1327,6 +1460,42 @@ paths:
agent execution response.
tags:
- AgenticSystem
/batch_chat_completion:
post:
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/BatchChatCompletionRequest'
required: true
responses:
'200':
content:
application/jsonl:
schema:
$ref: '#/components/schemas/ChatCompletionResponse'
description: OK
tags:
- Inference
/batch_completion:
post:
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/BatchCompletionRequest'
required: true
responses:
'200':
content:
application/jsonl:
schema:
$ref: '#/components/schemas/CompletionResponse'
description: OK
tags:
- Inference
/chat_completion:
post:
parameters: []
@ -1659,13 +1828,13 @@ security:
servers:
- url: http://llama.meta.com
tags:
- name: Inference
- name: RewardScoring
- name: MemoryBanks
- name: AgenticSystem
- name: SyntheticDataGeneration
- name: Finetuning
- name: AgenticSystem
- name: Inference
- name: Datasets
- name: RewardScoring
- description: <SchemaDefinition schemaRef="#/components/schemas/ShieldConfig" />
name: ShieldConfig
- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemCreateRequest"
@ -1733,14 +1902,27 @@ tags:
<SchemaDefinition schemaRef="#/components/schemas/FinetuningJobLogStream" />'
name: FinetuningJobLogStream
- description: <SchemaDefinition schemaRef="#/components/schemas/ChatCompletionRequest"
- description: <SchemaDefinition schemaRef="#/components/schemas/BatchChatCompletionRequest"
/>
name: ChatCompletionRequest
name: BatchChatCompletionRequest
- description: <SchemaDefinition schemaRef="#/components/schemas/Dialog" />
name: Dialog
- description: 'Normal chat completion response.
<SchemaDefinition schemaRef="#/components/schemas/ChatCompletionResponse" />'
name: ChatCompletionResponse
- description: <SchemaDefinition schemaRef="#/components/schemas/BatchCompletionRequest"
/>
name: BatchCompletionRequest
- description: 'Normal completion response.
<SchemaDefinition schemaRef="#/components/schemas/CompletionResponse" />'
name: CompletionResponse
- description: <SchemaDefinition schemaRef="#/components/schemas/ChatCompletionRequest"
/>
name: ChatCompletionRequest
- description: 'Streamed chat completion response. The actual response is a series
of such objects.
@ -1751,11 +1933,6 @@ tags:
- description: <SchemaDefinition schemaRef="#/components/schemas/CompletionRequest"
/>
name: CompletionRequest
- description: 'Normal completion response.
<SchemaDefinition schemaRef="#/components/schemas/CompletionResponse" />'
name: CompletionResponse
- description: 'streamed completion response.
@ -1828,6 +2005,8 @@ x-tagGroups:
- AgenticSystemExecuteResponseStreamChunk
- AgenticSystemTurn
- Attachment
- BatchChatCompletionRequest
- BatchCompletionRequest
- ChatCompletionRequest
- ChatCompletionResponse
- ChatCompletionResponseStreamChunk
@ -1836,6 +2015,7 @@ x-tagGroups:
- CompletionResponseStreamChunk
- CreateDatasetRequest
- Dataset
- Dialog
- FinetuningJobArtifactsResponse
- FinetuningJobLogStream
- FinetuningJobStatusResponse