From 7cade3acc390139fde1d839aa91aedf10a6f5821 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Wed, 10 Jul 2024 23:33:57 -0700 Subject: [PATCH] fixes --- source/api_definitions.py | 63 ++-- source/openapi.html | 621 +++++++++++++++++++++++++++----------- source/openapi.yaml | 308 +++++++++++++++---- 3 files changed, 721 insertions(+), 271 deletions(-) diff --git a/source/api_definitions.py b/source/api_definitions.py index d147cae6d..b6283c83b 100644 --- a/source/api_definitions.py +++ b/source/api_definitions.py @@ -80,15 +80,12 @@ class CompletionResponseStreamChunk: @json_schema_type @dataclass class ChatCompletionRequest: - message: Message model: InstructModel - message_history: List[Message] = None + dialog: Dialog sampling_params: SamplingParams = SamplingParams() # zero-shot tool definitions as input to the model - available_tools: List[Union[BuiltinTool, ToolDefinition]] = field( - default_factory=list - ) + available_tools: List[ToolDefinition] = field(default_factory=list) max_tokens: int = 0 stream: bool = False @@ -119,6 +116,30 @@ class ChatCompletionResponseStreamChunk: tool_call: Optional[ToolCall] = None +@json_schema_type +@dataclass +class BatchCompletionRequest: + model: PretrainedModel + content_batch: List[Content] + sampling_params: SamplingParams = SamplingParams() + max_tokens: int = 0 + logprobs: bool = False + + +@json_schema_type +@dataclass +class BatchChatCompletionRequest: + model: InstructModel + batch_dialogs: List[Dialog] + sampling_params: SamplingParams = SamplingParams() + + # zero-shot tool definitions as input to the model + available_tools: List[ToolDefinition] = field(default_factory=list) + + max_tokens: int = 0 + logprobs: bool = False + + class Inference(Protocol): def post_completion( @@ -131,35 +152,6 @@ class Inference(Protocol): request: ChatCompletionRequest, ) -> Union[ChatCompletionResponse, ChatCompletionResponseStreamChunk]: ... - -@json_schema_type -@dataclass -class BatchCompletionRequest: - content_batch: List[Content] - model: PretrainedModel - sampling_params: SamplingParams = SamplingParams() - max_tokens: int = 0 - logprobs: bool = False - - -@json_schema_type -@dataclass -class BatchChatCompletionRequest: - model: InstructModel - batch_messages: List[Dialog] - sampling_params: SamplingParams = SamplingParams() - - # zero-shot tool definitions as input to the model - available_tools: List[Union[BuiltinTool, ToolDefinition]] = field( - default_factory=list - ) - - max_tokens: int = 0 - logprobs: bool = False - - -class BatchInference(Protocol): - """Batch inference calls""" def post_batch_completion( self, request: BatchCompletionRequest, @@ -302,8 +294,7 @@ class MemoryBanks(Protocol): @dataclass class KPromptGenerations: - prompt: Message - message_history: List[Message] + dialog: Dialog k_generations: List[Message] diff --git a/source/openapi.html b/source/openapi.html index 11e18c18b..b61378bad 100644 --- a/source/openapi.html +++ b/source/openapi.html @@ -386,6 +386,66 @@ ] } }, + "/batch_chat_completion": { + "post": { + "responses": { + "200": { + "description": "OK", + "content": { + "application/jsonl": { + "schema": { + "$ref": "#/components/schemas/ChatCompletionResponse" + } + } + } + } + }, + "tags": [ + "Inference" + ], + "parameters": [], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/BatchChatCompletionRequest" + } + } + }, + "required": true + } + } + }, + "/batch_completion": { + "post": { + "responses": { + "200": { + "description": "OK", + "content": { + "application/jsonl": { + "schema": { + "$ref": "#/components/schemas/CompletionResponse" + } + } + } + } + }, + "tags": [ + "Inference" + ], + "parameters": [], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/BatchCompletionRequest" + } + } + }, + "required": true + } + } + }, "/chat_completion": { "post": { "responses": { @@ -1770,12 +1830,9 @@ ], "title": "Stream of logs from a finetuning job." }, - "ChatCompletionRequest": { + "BatchChatCompletionRequest": { "type": "object", "properties": { - "message": { - "$ref": "#/components/schemas/Message" - }, "model": { "type": "string", "enum": [ @@ -1783,10 +1840,10 @@ "llama3_70b_chat" ] }, - "message_history": { + "batch_dialogs": { "type": "array", "items": { - "$ref": "#/components/schemas/Message" + "$ref": "#/components/schemas/Dialog" } }, "sampling_params": { @@ -1820,80 +1877,67 @@ "available_tools": { "type": "array", "items": { - "oneOf": [ - { - "type": "string", - "enum": [ - "web_search", - "math", - "image_gen", - "code_interpreter" - ] - }, - { - "type": "object", - "properties": { - "tool_name": { - "oneOf": [ - { - "type": "string", - "enum": [ - "web_search", - "math", - "image_gen", - "code_interpreter" - ] - }, - { - "type": "string" - } + "type": "object", + "properties": { + "tool_name": { + "oneOf": [ + { + "type": "string", + "enum": [ + "web_search", + "math", + "image_gen", + "code_interpreter" ] }, - "parameters": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - }, - "input_shields": { - "type": "array", - "items": { - "$ref": "#/components/schemas/ShieldConfig" - } - }, - "output_shields": { - "type": "array", - "items": { - "$ref": "#/components/schemas/ShieldConfig" - } + { + "type": "string" } - }, - "additionalProperties": false, - "required": [ - "tool_name", - "input_shields", - "output_shields" ] + }, + "parameters": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + }, + "input_shields": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ShieldConfig" + } + }, + "output_shields": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ShieldConfig" + } } + }, + "additionalProperties": false, + "required": [ + "tool_name", + "input_shields", + "output_shields" ] } }, @@ -1901,10 +1945,6 @@ "type": "integer", "default": 0 }, - "stream": { - "type": "boolean", - "default": false - }, "logprobs": { "type": "boolean", "default": false @@ -1912,16 +1952,33 @@ }, "additionalProperties": false, "required": [ - "message", "model", - "message_history", + "batch_dialogs", "sampling_params", "available_tools", "max_tokens", - "stream", "logprobs" ] }, + "Dialog": { + "type": "object", + "properties": { + "message": { + "$ref": "#/components/schemas/Message" + }, + "message_history": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Message" + } + } + }, + "additionalProperties": false, + "required": [ + "message", + "message_history" + ] + }, "ChatCompletionResponse": { "type": "object", "properties": { @@ -2032,6 +2089,287 @@ ], "title": "Normal chat completion response." }, + "BatchCompletionRequest": { + "type": "object", + "properties": { + "model": { + "type": "string", + "enum": [ + "llama3_8b", + "llama3_70b" + ] + }, + "content_batch": { + "type": "array", + "items": { + "oneOf": [ + { + "type": "string" + }, + { + "$ref": "#/components/schemas/Attachment" + }, + { + "type": "array", + "items": { + "oneOf": [ + { + "type": "string" + }, + { + "$ref": "#/components/schemas/Attachment" + } + ] + } + } + ] + } + }, + "sampling_params": { + "type": "object", + "properties": { + "temperature": { + "type": "number", + "default": 0.0 + }, + "strategy": { + "type": "string", + "default": "greedy" + }, + "top_p": { + "type": "number", + "default": 0.95 + }, + "top_k": { + "type": "integer", + "default": 0 + } + }, + "additionalProperties": false, + "required": [ + "temperature", + "strategy", + "top_p", + "top_k" + ] + }, + "max_tokens": { + "type": "integer", + "default": 0 + }, + "logprobs": { + "type": "boolean", + "default": false + } + }, + "additionalProperties": false, + "required": [ + "model", + "content_batch", + "sampling_params", + "max_tokens", + "logprobs" + ] + }, + "CompletionResponse": { + "type": "object", + "properties": { + "content": { + "oneOf": [ + { + "type": "string" + }, + { + "$ref": "#/components/schemas/Attachment" + }, + { + "type": "array", + "items": { + "oneOf": [ + { + "type": "string" + }, + { + "$ref": "#/components/schemas/Attachment" + } + ] + } + } + ] + }, + "stop_reason": { + "type": "string", + "enum": [ + "not_stopped", + "finished_ok", + "max_tokens" + ], + "title": "Stop reasons are used to indicate why the model stopped generating text." + }, + "logprobs": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + } + }, + "additionalProperties": false, + "required": [ + "content" + ], + "title": "Normal completion response." + }, + "ChatCompletionRequest": { + "type": "object", + "properties": { + "model": { + "type": "string", + "enum": [ + "llama3_8b_chat", + "llama3_70b_chat" + ] + }, + "dialog": { + "$ref": "#/components/schemas/Dialog" + }, + "sampling_params": { + "type": "object", + "properties": { + "temperature": { + "type": "number", + "default": 0.0 + }, + "strategy": { + "type": "string", + "default": "greedy" + }, + "top_p": { + "type": "number", + "default": 0.95 + }, + "top_k": { + "type": "integer", + "default": 0 + } + }, + "additionalProperties": false, + "required": [ + "temperature", + "strategy", + "top_p", + "top_k" + ] + }, + "available_tools": { + "type": "array", + "items": { + "type": "object", + "properties": { + "tool_name": { + "oneOf": [ + { + "type": "string", + "enum": [ + "web_search", + "math", + "image_gen", + "code_interpreter" + ] + }, + { + "type": "string" + } + ] + }, + "parameters": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + }, + "input_shields": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ShieldConfig" + } + }, + "output_shields": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ShieldConfig" + } + } + }, + "additionalProperties": false, + "required": [ + "tool_name", + "input_shields", + "output_shields" + ] + } + }, + "max_tokens": { + "type": "integer", + "default": 0 + }, + "stream": { + "type": "boolean", + "default": false + }, + "logprobs": { + "type": "boolean", + "default": false + } + }, + "additionalProperties": false, + "required": [ + "model", + "dialog", + "sampling_params", + "available_tools", + "max_tokens", + "stream", + "logprobs" + ] + }, "ChatCompletionResponseStreamChunk": { "type": "object", "properties": { @@ -2177,73 +2515,6 @@ "logprobs" ] }, - "CompletionResponse": { - "type": "object", - "properties": { - "content": { - "oneOf": [ - { - "type": "string" - }, - { - "$ref": "#/components/schemas/Attachment" - }, - { - "type": "array", - "items": { - "oneOf": [ - { - "type": "string" - }, - { - "$ref": "#/components/schemas/Attachment" - } - ] - } - } - ] - }, - "stop_reason": { - "type": "string", - "enum": [ - "not_stopped", - "finished_ok", - "max_tokens" - ], - "title": "Stop reasons are used to indicate why the model stopped generating text." - }, - "logprobs": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - } - }, - "additionalProperties": false, - "required": [ - "content" - ], - "title": "Normal completion response." - }, "CompletionResponseStreamChunk": { "type": "object", "properties": { @@ -2409,14 +2680,8 @@ "items": { "type": "object", "properties": { - "prompt": { - "$ref": "#/components/schemas/Message" - }, - "message_history": { - "type": "array", - "items": { - "$ref": "#/components/schemas/Message" - } + "dialog": { + "$ref": "#/components/schemas/Dialog" }, "k_generations": { "type": "array", @@ -2427,8 +2692,7 @@ }, "additionalProperties": false, "required": [ - "prompt", - "message_history", + "dialog", "k_generations" ] } @@ -2738,14 +3002,11 @@ ], "tags": [ { - "name": "Inference" + "name": "RewardScoring" }, { "name": "MemoryBanks" }, - { - "name": "AgenticSystem" - }, { "name": "SyntheticDataGeneration" }, @@ -2753,10 +3014,13 @@ "name": "Finetuning" }, { - "name": "Datasets" + "name": "AgenticSystem" }, { - "name": "RewardScoring" + "name": "Inference" + }, + { + "name": "Datasets" }, { "name": "ShieldConfig", @@ -2823,13 +3087,29 @@ "description": "Stream of logs from a finetuning job.\n\n" }, { - "name": "ChatCompletionRequest", - "description": "" + "name": "BatchChatCompletionRequest", + "description": "" + }, + { + "name": "Dialog", + "description": "" }, { "name": "ChatCompletionResponse", "description": "Normal chat completion response.\n\n" }, + { + "name": "BatchCompletionRequest", + "description": "" + }, + { + "name": "CompletionResponse", + "description": "Normal completion response.\n\n" + }, + { + "name": "ChatCompletionRequest", + "description": "" + }, { "name": "ChatCompletionResponseStreamChunk", "description": "Streamed chat completion response. The actual response is a series of such objects.\n\n" @@ -2838,10 +3118,6 @@ "name": "CompletionRequest", "description": "" }, - { - "name": "CompletionResponse", - "description": "Normal completion response.\n\n" - }, { "name": "CompletionResponseStreamChunk", "description": "streamed completion response.\n\n" @@ -2910,6 +3186,8 @@ "AgenticSystemExecuteResponseStreamChunk", "AgenticSystemTurn", "Attachment", + "BatchChatCompletionRequest", + "BatchCompletionRequest", "ChatCompletionRequest", "ChatCompletionResponse", "ChatCompletionResponseStreamChunk", @@ -2918,6 +3196,7 @@ "CompletionResponseStreamChunk", "CreateDatasetRequest", "Dataset", + "Dialog", "FinetuningJobArtifactsResponse", "FinetuningJobLogStream", "FinetuningJobStatusResponse", diff --git a/source/openapi.yaml b/source/openapi.yaml index 8d0363936..da53c4a56 100644 --- a/source/openapi.yaml +++ b/source/openapi.yaml @@ -433,52 +433,49 @@ components: title: Attachments are used to refer to external resources, such as images, videos, audio, etc. type: object - ChatCompletionRequest: + BatchChatCompletionRequest: additionalProperties: false properties: available_tools: items: - oneOf: - - enum: - - web_search - - math - - image_gen - - code_interpreter - type: string - - additionalProperties: false - properties: - input_shields: - items: - $ref: '#/components/schemas/ShieldConfig' - type: array - output_shields: - items: - $ref: '#/components/schemas/ShieldConfig' - type: array - parameters: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - tool_name: + additionalProperties: false + properties: + input_shields: + items: + $ref: '#/components/schemas/ShieldConfig' + type: array + output_shields: + items: + $ref: '#/components/schemas/ShieldConfig' + type: array + parameters: + additionalProperties: oneOf: - - enum: - - web_search - - math - - image_gen - - code_interpreter - type: string + - type: 'null' + - type: boolean + - type: number - type: string - required: - - tool_name - - input_shields - - output_shields - type: object + - type: array + - type: object + type: object + tool_name: + oneOf: + - enum: + - web_search + - math + - image_gen + - code_interpreter + type: string + - type: string + required: + - tool_name + - input_shields + - output_shields + type: object + type: array + batch_dialogs: + items: + $ref: '#/components/schemas/Dialog' type: array logprobs: default: false @@ -486,12 +483,141 @@ components: max_tokens: default: 0 type: integer - message: - $ref: '#/components/schemas/Message' - message_history: + model: + enum: + - llama3_8b_chat + - llama3_70b_chat + type: string + sampling_params: + additionalProperties: false + properties: + strategy: + default: greedy + type: string + temperature: + default: 0.0 + type: number + top_k: + default: 0 + type: integer + top_p: + default: 0.95 + type: number + required: + - temperature + - strategy + - top_p + - top_k + type: object + required: + - model + - batch_dialogs + - sampling_params + - available_tools + - max_tokens + - logprobs + type: object + BatchCompletionRequest: + additionalProperties: false + properties: + content_batch: items: - $ref: '#/components/schemas/Message' + oneOf: + - type: string + - $ref: '#/components/schemas/Attachment' + - items: + oneOf: + - type: string + - $ref: '#/components/schemas/Attachment' + type: array type: array + logprobs: + default: false + type: boolean + max_tokens: + default: 0 + type: integer + model: + enum: + - llama3_8b + - llama3_70b + type: string + sampling_params: + additionalProperties: false + properties: + strategy: + default: greedy + type: string + temperature: + default: 0.0 + type: number + top_k: + default: 0 + type: integer + top_p: + default: 0.95 + type: number + required: + - temperature + - strategy + - top_p + - top_k + type: object + required: + - model + - content_batch + - sampling_params + - max_tokens + - logprobs + type: object + ChatCompletionRequest: + additionalProperties: false + properties: + available_tools: + items: + additionalProperties: false + properties: + input_shields: + items: + $ref: '#/components/schemas/ShieldConfig' + type: array + output_shields: + items: + $ref: '#/components/schemas/ShieldConfig' + type: array + parameters: + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + type: object + tool_name: + oneOf: + - enum: + - web_search + - math + - image_gen + - code_interpreter + type: string + - type: string + required: + - tool_name + - input_shields + - output_shields + type: object + type: array + dialog: + $ref: '#/components/schemas/Dialog' + logprobs: + default: false + type: boolean + max_tokens: + default: 0 + type: integer model: enum: - llama3_8b_chat @@ -522,9 +648,8 @@ components: default: false type: boolean required: - - message - model - - message_history + - dialog - sampling_params - available_tools - max_tokens @@ -785,6 +910,19 @@ components: - metadata title: Dataset to be used for training or evaluating language models. type: object + Dialog: + additionalProperties: false + properties: + message: + $ref: '#/components/schemas/Message' + message_history: + items: + $ref: '#/components/schemas/Message' + type: array + required: + - message + - message_history + type: object FinetuningJobArtifactsResponse: additionalProperties: false properties: @@ -1132,19 +1270,14 @@ components: items: additionalProperties: false properties: + dialog: + $ref: '#/components/schemas/Dialog' k_generations: items: $ref: '#/components/schemas/Message' type: array - message_history: - items: - $ref: '#/components/schemas/Message' - type: array - prompt: - $ref: '#/components/schemas/Message' required: - - prompt - - message_history + - dialog - k_generations type: object type: array @@ -1327,6 +1460,42 @@ paths: agent execution response. tags: - AgenticSystem + /batch_chat_completion: + post: + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/BatchChatCompletionRequest' + required: true + responses: + '200': + content: + application/jsonl: + schema: + $ref: '#/components/schemas/ChatCompletionResponse' + description: OK + tags: + - Inference + /batch_completion: + post: + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/BatchCompletionRequest' + required: true + responses: + '200': + content: + application/jsonl: + schema: + $ref: '#/components/schemas/CompletionResponse' + description: OK + tags: + - Inference /chat_completion: post: parameters: [] @@ -1659,13 +1828,13 @@ security: servers: - url: http://llama.meta.com tags: -- name: Inference +- name: RewardScoring - name: MemoryBanks -- name: AgenticSystem - name: SyntheticDataGeneration - name: Finetuning +- name: AgenticSystem +- name: Inference - name: Datasets -- name: RewardScoring - description: name: ShieldConfig - description: ' name: FinetuningJobLogStream -- description: - name: ChatCompletionRequest + name: BatchChatCompletionRequest +- description: + name: Dialog - description: 'Normal chat completion response. ' name: ChatCompletionResponse +- description: + name: BatchCompletionRequest +- description: 'Normal completion response. + + + ' + name: CompletionResponse +- description: + name: ChatCompletionRequest - description: 'Streamed chat completion response. The actual response is a series of such objects. @@ -1751,11 +1933,6 @@ tags: - description: name: CompletionRequest -- description: 'Normal completion response. - - - ' - name: CompletionResponse - description: 'streamed completion response. @@ -1828,6 +2005,8 @@ x-tagGroups: - AgenticSystemExecuteResponseStreamChunk - AgenticSystemTurn - Attachment + - BatchChatCompletionRequest + - BatchCompletionRequest - ChatCompletionRequest - ChatCompletionResponse - ChatCompletionResponseStreamChunk @@ -1836,6 +2015,7 @@ x-tagGroups: - CompletionResponseStreamChunk - CreateDatasetRequest - Dataset + - Dialog - FinetuningJobArtifactsResponse - FinetuningJobLogStream - FinetuningJobStatusResponse