From 7cade3acc390139fde1d839aa91aedf10a6f5821 Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin@meta.com>
Date: Wed, 10 Jul 2024 23:33:57 -0700
Subject: [PATCH] fixes

---
 source/api_definitions.py |  63 ++--
 source/openapi.html       | 621 +++++++++++++++++++++++++++-----------
 source/openapi.yaml       | 308 +++++++++++++++----
 3 files changed, 721 insertions(+), 271 deletions(-)

diff --git a/source/api_definitions.py b/source/api_definitions.py
index d147cae6d..b6283c83b 100644
--- a/source/api_definitions.py
+++ b/source/api_definitions.py
@@ -80,15 +80,12 @@ class CompletionResponseStreamChunk:
 @json_schema_type
 @dataclass
 class ChatCompletionRequest:
-    message: Message
     model: InstructModel
-    message_history: List[Message] = None
+    dialog: Dialog
     sampling_params: SamplingParams = SamplingParams()
 
     # zero-shot tool definitions as input to the model
-    available_tools: List[Union[BuiltinTool, ToolDefinition]] = field(
-        default_factory=list
-    )
+    available_tools: List[ToolDefinition] = field(default_factory=list)
 
     max_tokens: int = 0
     stream: bool = False
@@ -119,6 +116,30 @@ class ChatCompletionResponseStreamChunk:
     tool_call: Optional[ToolCall] = None
 
 
+@json_schema_type
+@dataclass
+class BatchCompletionRequest:
+    model: PretrainedModel
+    content_batch: List[Content]
+    sampling_params: SamplingParams = SamplingParams()
+    max_tokens: int = 0
+    logprobs: bool = False
+
+
+@json_schema_type
+@dataclass
+class BatchChatCompletionRequest:
+    model: InstructModel
+    batch_dialogs: List[Dialog]
+    sampling_params: SamplingParams = SamplingParams()
+
+    # zero-shot tool definitions as input to the model
+    available_tools: List[ToolDefinition] = field(default_factory=list)
+
+    max_tokens: int = 0
+    logprobs: bool = False
+
+
 class Inference(Protocol):
 
     def post_completion(
@@ -131,35 +152,6 @@ class Inference(Protocol):
         request: ChatCompletionRequest,
     ) -> Union[ChatCompletionResponse, ChatCompletionResponseStreamChunk]: ...
 
-
-@json_schema_type
-@dataclass
-class BatchCompletionRequest:
-    content_batch: List[Content]
-    model: PretrainedModel
-    sampling_params: SamplingParams = SamplingParams()
-    max_tokens: int = 0
-    logprobs: bool = False
-
-
-@json_schema_type
-@dataclass
-class BatchChatCompletionRequest:
-    model: InstructModel
-    batch_messages: List[Dialog]
-    sampling_params: SamplingParams = SamplingParams()
-
-    # zero-shot tool definitions as input to the model
-    available_tools: List[Union[BuiltinTool, ToolDefinition]] = field(
-        default_factory=list
-    )
-
-    max_tokens: int = 0
-    logprobs: bool = False
-
-
-class BatchInference(Protocol):
-    """Batch inference calls"""
     def post_batch_completion(
         self,
         request: BatchCompletionRequest,
@@ -302,8 +294,7 @@ class MemoryBanks(Protocol):
 
 @dataclass
 class KPromptGenerations:
-    prompt: Message
-    message_history: List[Message]
+    dialog: Dialog
     k_generations: List[Message]
 
 
diff --git a/source/openapi.html b/source/openapi.html
index 11e18c18b..b61378bad 100644
--- a/source/openapi.html
+++ b/source/openapi.html
@@ -386,6 +386,66 @@
                 ]
             }
         },
+        "/batch_chat_completion": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "content": {
+                            "application/jsonl": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ChatCompletionResponse"
+                                }
+                            }
+                        }
+                    }
+                },
+                "tags": [
+                    "Inference"
+                ],
+                "parameters": [],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/BatchChatCompletionRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        },
+        "/batch_completion": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "content": {
+                            "application/jsonl": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/CompletionResponse"
+                                }
+                            }
+                        }
+                    }
+                },
+                "tags": [
+                    "Inference"
+                ],
+                "parameters": [],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/BatchCompletionRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        },
         "/chat_completion": {
             "post": {
                 "responses": {
@@ -1770,12 +1830,9 @@
                 ],
                 "title": "Stream of logs from a finetuning job."
             },
-            "ChatCompletionRequest": {
+            "BatchChatCompletionRequest": {
                 "type": "object",
                 "properties": {
-                    "message": {
-                        "$ref": "#/components/schemas/Message"
-                    },
                     "model": {
                         "type": "string",
                         "enum": [
@@ -1783,10 +1840,10 @@
                             "llama3_70b_chat"
                         ]
                     },
-                    "message_history": {
+                    "batch_dialogs": {
                         "type": "array",
                         "items": {
-                            "$ref": "#/components/schemas/Message"
+                            "$ref": "#/components/schemas/Dialog"
                         }
                     },
                     "sampling_params": {
@@ -1820,80 +1877,67 @@
                     "available_tools": {
                         "type": "array",
                         "items": {
-                            "oneOf": [
-                                {
-                                    "type": "string",
-                                    "enum": [
-                                        "web_search",
-                                        "math",
-                                        "image_gen",
-                                        "code_interpreter"
-                                    ]
-                                },
-                                {
-                                    "type": "object",
-                                    "properties": {
-                                        "tool_name": {
-                                            "oneOf": [
-                                                {
-                                                    "type": "string",
-                                                    "enum": [
-                                                        "web_search",
-                                                        "math",
-                                                        "image_gen",
-                                                        "code_interpreter"
-                                                    ]
-                                                },
-                                                {
-                                                    "type": "string"
-                                                }
+                            "type": "object",
+                            "properties": {
+                                "tool_name": {
+                                    "oneOf": [
+                                        {
+                                            "type": "string",
+                                            "enum": [
+                                                "web_search",
+                                                "math",
+                                                "image_gen",
+                                                "code_interpreter"
                                             ]
                                         },
-                                        "parameters": {
-                                            "type": "object",
-                                            "additionalProperties": {
-                                                "oneOf": [
-                                                    {
-                                                        "type": "null"
-                                                    },
-                                                    {
-                                                        "type": "boolean"
-                                                    },
-                                                    {
-                                                        "type": "number"
-                                                    },
-                                                    {
-                                                        "type": "string"
-                                                    },
-                                                    {
-                                                        "type": "array"
-                                                    },
-                                                    {
-                                                        "type": "object"
-                                                    }
-                                                ]
-                                            }
-                                        },
-                                        "input_shields": {
-                                            "type": "array",
-                                            "items": {
-                                                "$ref": "#/components/schemas/ShieldConfig"
-                                            }
-                                        },
-                                        "output_shields": {
-                                            "type": "array",
-                                            "items": {
-                                                "$ref": "#/components/schemas/ShieldConfig"
-                                            }
+                                        {
+                                            "type": "string"
                                         }
-                                    },
-                                    "additionalProperties": false,
-                                    "required": [
-                                        "tool_name",
-                                        "input_shields",
-                                        "output_shields"
                                     ]
+                                },
+                                "parameters": {
+                                    "type": "object",
+                                    "additionalProperties": {
+                                        "oneOf": [
+                                            {
+                                                "type": "null"
+                                            },
+                                            {
+                                                "type": "boolean"
+                                            },
+                                            {
+                                                "type": "number"
+                                            },
+                                            {
+                                                "type": "string"
+                                            },
+                                            {
+                                                "type": "array"
+                                            },
+                                            {
+                                                "type": "object"
+                                            }
+                                        ]
+                                    }
+                                },
+                                "input_shields": {
+                                    "type": "array",
+                                    "items": {
+                                        "$ref": "#/components/schemas/ShieldConfig"
+                                    }
+                                },
+                                "output_shields": {
+                                    "type": "array",
+                                    "items": {
+                                        "$ref": "#/components/schemas/ShieldConfig"
+                                    }
                                 }
+                            },
+                            "additionalProperties": false,
+                            "required": [
+                                "tool_name",
+                                "input_shields",
+                                "output_shields"
                             ]
                         }
                     },
@@ -1901,10 +1945,6 @@
                         "type": "integer",
                         "default": 0
                     },
-                    "stream": {
-                        "type": "boolean",
-                        "default": false
-                    },
                     "logprobs": {
                         "type": "boolean",
                         "default": false
@@ -1912,16 +1952,33 @@
                 },
                 "additionalProperties": false,
                 "required": [
-                    "message",
                     "model",
-                    "message_history",
+                    "batch_dialogs",
                     "sampling_params",
                     "available_tools",
                     "max_tokens",
-                    "stream",
                     "logprobs"
                 ]
             },
+            "Dialog": {
+                "type": "object",
+                "properties": {
+                    "message": {
+                        "$ref": "#/components/schemas/Message"
+                    },
+                    "message_history": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/Message"
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "message",
+                    "message_history"
+                ]
+            },
             "ChatCompletionResponse": {
                 "type": "object",
                 "properties": {
@@ -2032,6 +2089,287 @@
                 ],
                 "title": "Normal chat completion response."
             },
+            "BatchCompletionRequest": {
+                "type": "object",
+                "properties": {
+                    "model": {
+                        "type": "string",
+                        "enum": [
+                            "llama3_8b",
+                            "llama3_70b"
+                        ]
+                    },
+                    "content_batch": {
+                        "type": "array",
+                        "items": {
+                            "oneOf": [
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "$ref": "#/components/schemas/Attachment"
+                                },
+                                {
+                                    "type": "array",
+                                    "items": {
+                                        "oneOf": [
+                                            {
+                                                "type": "string"
+                                            },
+                                            {
+                                                "$ref": "#/components/schemas/Attachment"
+                                            }
+                                        ]
+                                    }
+                                }
+                            ]
+                        }
+                    },
+                    "sampling_params": {
+                        "type": "object",
+                        "properties": {
+                            "temperature": {
+                                "type": "number",
+                                "default": 0.0
+                            },
+                            "strategy": {
+                                "type": "string",
+                                "default": "greedy"
+                            },
+                            "top_p": {
+                                "type": "number",
+                                "default": 0.95
+                            },
+                            "top_k": {
+                                "type": "integer",
+                                "default": 0
+                            }
+                        },
+                        "additionalProperties": false,
+                        "required": [
+                            "temperature",
+                            "strategy",
+                            "top_p",
+                            "top_k"
+                        ]
+                    },
+                    "max_tokens": {
+                        "type": "integer",
+                        "default": 0
+                    },
+                    "logprobs": {
+                        "type": "boolean",
+                        "default": false
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "model",
+                    "content_batch",
+                    "sampling_params",
+                    "max_tokens",
+                    "logprobs"
+                ]
+            },
+            "CompletionResponse": {
+                "type": "object",
+                "properties": {
+                    "content": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "$ref": "#/components/schemas/Attachment"
+                            },
+                            {
+                                "type": "array",
+                                "items": {
+                                    "oneOf": [
+                                        {
+                                            "type": "string"
+                                        },
+                                        {
+                                            "$ref": "#/components/schemas/Attachment"
+                                        }
+                                    ]
+                                }
+                            }
+                        ]
+                    },
+                    "stop_reason": {
+                        "type": "string",
+                        "enum": [
+                            "not_stopped",
+                            "finished_ok",
+                            "max_tokens"
+                        ],
+                        "title": "Stop reasons are used to indicate why the model stopped generating text."
+                    },
+                    "logprobs": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "content"
+                ],
+                "title": "Normal completion response."
+            },
+            "ChatCompletionRequest": {
+                "type": "object",
+                "properties": {
+                    "model": {
+                        "type": "string",
+                        "enum": [
+                            "llama3_8b_chat",
+                            "llama3_70b_chat"
+                        ]
+                    },
+                    "dialog": {
+                        "$ref": "#/components/schemas/Dialog"
+                    },
+                    "sampling_params": {
+                        "type": "object",
+                        "properties": {
+                            "temperature": {
+                                "type": "number",
+                                "default": 0.0
+                            },
+                            "strategy": {
+                                "type": "string",
+                                "default": "greedy"
+                            },
+                            "top_p": {
+                                "type": "number",
+                                "default": 0.95
+                            },
+                            "top_k": {
+                                "type": "integer",
+                                "default": 0
+                            }
+                        },
+                        "additionalProperties": false,
+                        "required": [
+                            "temperature",
+                            "strategy",
+                            "top_p",
+                            "top_k"
+                        ]
+                    },
+                    "available_tools": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "properties": {
+                                "tool_name": {
+                                    "oneOf": [
+                                        {
+                                            "type": "string",
+                                            "enum": [
+                                                "web_search",
+                                                "math",
+                                                "image_gen",
+                                                "code_interpreter"
+                                            ]
+                                        },
+                                        {
+                                            "type": "string"
+                                        }
+                                    ]
+                                },
+                                "parameters": {
+                                    "type": "object",
+                                    "additionalProperties": {
+                                        "oneOf": [
+                                            {
+                                                "type": "null"
+                                            },
+                                            {
+                                                "type": "boolean"
+                                            },
+                                            {
+                                                "type": "number"
+                                            },
+                                            {
+                                                "type": "string"
+                                            },
+                                            {
+                                                "type": "array"
+                                            },
+                                            {
+                                                "type": "object"
+                                            }
+                                        ]
+                                    }
+                                },
+                                "input_shields": {
+                                    "type": "array",
+                                    "items": {
+                                        "$ref": "#/components/schemas/ShieldConfig"
+                                    }
+                                },
+                                "output_shields": {
+                                    "type": "array",
+                                    "items": {
+                                        "$ref": "#/components/schemas/ShieldConfig"
+                                    }
+                                }
+                            },
+                            "additionalProperties": false,
+                            "required": [
+                                "tool_name",
+                                "input_shields",
+                                "output_shields"
+                            ]
+                        }
+                    },
+                    "max_tokens": {
+                        "type": "integer",
+                        "default": 0
+                    },
+                    "stream": {
+                        "type": "boolean",
+                        "default": false
+                    },
+                    "logprobs": {
+                        "type": "boolean",
+                        "default": false
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "model",
+                    "dialog",
+                    "sampling_params",
+                    "available_tools",
+                    "max_tokens",
+                    "stream",
+                    "logprobs"
+                ]
+            },
             "ChatCompletionResponseStreamChunk": {
                 "type": "object",
                 "properties": {
@@ -2177,73 +2515,6 @@
                     "logprobs"
                 ]
             },
-            "CompletionResponse": {
-                "type": "object",
-                "properties": {
-                    "content": {
-                        "oneOf": [
-                            {
-                                "type": "string"
-                            },
-                            {
-                                "$ref": "#/components/schemas/Attachment"
-                            },
-                            {
-                                "type": "array",
-                                "items": {
-                                    "oneOf": [
-                                        {
-                                            "type": "string"
-                                        },
-                                        {
-                                            "$ref": "#/components/schemas/Attachment"
-                                        }
-                                    ]
-                                }
-                            }
-                        ]
-                    },
-                    "stop_reason": {
-                        "type": "string",
-                        "enum": [
-                            "not_stopped",
-                            "finished_ok",
-                            "max_tokens"
-                        ],
-                        "title": "Stop reasons are used to indicate why the model stopped generating text."
-                    },
-                    "logprobs": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        }
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "content"
-                ],
-                "title": "Normal completion response."
-            },
             "CompletionResponseStreamChunk": {
                 "type": "object",
                 "properties": {
@@ -2409,14 +2680,8 @@
                         "items": {
                             "type": "object",
                             "properties": {
-                                "prompt": {
-                                    "$ref": "#/components/schemas/Message"
-                                },
-                                "message_history": {
-                                    "type": "array",
-                                    "items": {
-                                        "$ref": "#/components/schemas/Message"
-                                    }
+                                "dialog": {
+                                    "$ref": "#/components/schemas/Dialog"
                                 },
                                 "k_generations": {
                                     "type": "array",
@@ -2427,8 +2692,7 @@
                             },
                             "additionalProperties": false,
                             "required": [
-                                "prompt",
-                                "message_history",
+                                "dialog",
                                 "k_generations"
                             ]
                         }
@@ -2738,14 +3002,11 @@
     ],
     "tags": [
         {
-            "name": "Inference"
+            "name": "RewardScoring"
         },
         {
             "name": "MemoryBanks"
         },
-        {
-            "name": "AgenticSystem"
-        },
         {
             "name": "SyntheticDataGeneration"
         },
@@ -2753,10 +3014,13 @@
             "name": "Finetuning"
         },
         {
-            "name": "Datasets"
+            "name": "AgenticSystem"
         },
         {
-            "name": "RewardScoring"
+            "name": "Inference"
+        },
+        {
+            "name": "Datasets"
         },
         {
             "name": "ShieldConfig",
@@ -2823,13 +3087,29 @@
             "description": "Stream of logs from a finetuning job.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/FinetuningJobLogStream\" />"
         },
         {
-            "name": "ChatCompletionRequest",
-            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/ChatCompletionRequest\" />"
+            "name": "BatchChatCompletionRequest",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/BatchChatCompletionRequest\" />"
+        },
+        {
+            "name": "Dialog",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/Dialog\" />"
         },
         {
             "name": "ChatCompletionResponse",
             "description": "Normal chat completion response.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/ChatCompletionResponse\" />"
         },
+        {
+            "name": "BatchCompletionRequest",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/BatchCompletionRequest\" />"
+        },
+        {
+            "name": "CompletionResponse",
+            "description": "Normal completion response.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/CompletionResponse\" />"
+        },
+        {
+            "name": "ChatCompletionRequest",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/ChatCompletionRequest\" />"
+        },
         {
             "name": "ChatCompletionResponseStreamChunk",
             "description": "Streamed chat completion response. The actual response is a series of such objects.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/ChatCompletionResponseStreamChunk\" />"
@@ -2838,10 +3118,6 @@
             "name": "CompletionRequest",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/CompletionRequest\" />"
         },
-        {
-            "name": "CompletionResponse",
-            "description": "Normal completion response.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/CompletionResponse\" />"
-        },
         {
             "name": "CompletionResponseStreamChunk",
             "description": "streamed completion response.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/CompletionResponseStreamChunk\" />"
@@ -2910,6 +3186,8 @@
                 "AgenticSystemExecuteResponseStreamChunk",
                 "AgenticSystemTurn",
                 "Attachment",
+                "BatchChatCompletionRequest",
+                "BatchCompletionRequest",
                 "ChatCompletionRequest",
                 "ChatCompletionResponse",
                 "ChatCompletionResponseStreamChunk",
@@ -2918,6 +3196,7 @@
                 "CompletionResponseStreamChunk",
                 "CreateDatasetRequest",
                 "Dataset",
+                "Dialog",
                 "FinetuningJobArtifactsResponse",
                 "FinetuningJobLogStream",
                 "FinetuningJobStatusResponse",
diff --git a/source/openapi.yaml b/source/openapi.yaml
index 8d0363936..da53c4a56 100644
--- a/source/openapi.yaml
+++ b/source/openapi.yaml
@@ -433,52 +433,49 @@ components:
       title: Attachments are used to refer to external resources, such as images,
         videos, audio, etc.
       type: object
-    ChatCompletionRequest:
+    BatchChatCompletionRequest:
       additionalProperties: false
       properties:
         available_tools:
           items:
-            oneOf:
-            - enum:
-              - web_search
-              - math
-              - image_gen
-              - code_interpreter
-              type: string
-            - additionalProperties: false
-              properties:
-                input_shields:
-                  items:
-                    $ref: '#/components/schemas/ShieldConfig'
-                  type: array
-                output_shields:
-                  items:
-                    $ref: '#/components/schemas/ShieldConfig'
-                  type: array
-                parameters:
-                  additionalProperties:
-                    oneOf:
-                    - type: 'null'
-                    - type: boolean
-                    - type: number
-                    - type: string
-                    - type: array
-                    - type: object
-                  type: object
-                tool_name:
+            additionalProperties: false
+            properties:
+              input_shields:
+                items:
+                  $ref: '#/components/schemas/ShieldConfig'
+                type: array
+              output_shields:
+                items:
+                  $ref: '#/components/schemas/ShieldConfig'
+                type: array
+              parameters:
+                additionalProperties:
                   oneOf:
-                  - enum:
-                    - web_search
-                    - math
-                    - image_gen
-                    - code_interpreter
-                    type: string
+                  - type: 'null'
+                  - type: boolean
+                  - type: number
                   - type: string
-              required:
-              - tool_name
-              - input_shields
-              - output_shields
-              type: object
+                  - type: array
+                  - type: object
+                type: object
+              tool_name:
+                oneOf:
+                - enum:
+                  - web_search
+                  - math
+                  - image_gen
+                  - code_interpreter
+                  type: string
+                - type: string
+            required:
+            - tool_name
+            - input_shields
+            - output_shields
+            type: object
+          type: array
+        batch_dialogs:
+          items:
+            $ref: '#/components/schemas/Dialog'
           type: array
         logprobs:
           default: false
@@ -486,12 +483,141 @@ components:
         max_tokens:
           default: 0
           type: integer
-        message:
-          $ref: '#/components/schemas/Message'
-        message_history:
+        model:
+          enum:
+          - llama3_8b_chat
+          - llama3_70b_chat
+          type: string
+        sampling_params:
+          additionalProperties: false
+          properties:
+            strategy:
+              default: greedy
+              type: string
+            temperature:
+              default: 0.0
+              type: number
+            top_k:
+              default: 0
+              type: integer
+            top_p:
+              default: 0.95
+              type: number
+          required:
+          - temperature
+          - strategy
+          - top_p
+          - top_k
+          type: object
+      required:
+      - model
+      - batch_dialogs
+      - sampling_params
+      - available_tools
+      - max_tokens
+      - logprobs
+      type: object
+    BatchCompletionRequest:
+      additionalProperties: false
+      properties:
+        content_batch:
           items:
-            $ref: '#/components/schemas/Message'
+            oneOf:
+            - type: string
+            - $ref: '#/components/schemas/Attachment'
+            - items:
+                oneOf:
+                - type: string
+                - $ref: '#/components/schemas/Attachment'
+              type: array
           type: array
+        logprobs:
+          default: false
+          type: boolean
+        max_tokens:
+          default: 0
+          type: integer
+        model:
+          enum:
+          - llama3_8b
+          - llama3_70b
+          type: string
+        sampling_params:
+          additionalProperties: false
+          properties:
+            strategy:
+              default: greedy
+              type: string
+            temperature:
+              default: 0.0
+              type: number
+            top_k:
+              default: 0
+              type: integer
+            top_p:
+              default: 0.95
+              type: number
+          required:
+          - temperature
+          - strategy
+          - top_p
+          - top_k
+          type: object
+      required:
+      - model
+      - content_batch
+      - sampling_params
+      - max_tokens
+      - logprobs
+      type: object
+    ChatCompletionRequest:
+      additionalProperties: false
+      properties:
+        available_tools:
+          items:
+            additionalProperties: false
+            properties:
+              input_shields:
+                items:
+                  $ref: '#/components/schemas/ShieldConfig'
+                type: array
+              output_shields:
+                items:
+                  $ref: '#/components/schemas/ShieldConfig'
+                type: array
+              parameters:
+                additionalProperties:
+                  oneOf:
+                  - type: 'null'
+                  - type: boolean
+                  - type: number
+                  - type: string
+                  - type: array
+                  - type: object
+                type: object
+              tool_name:
+                oneOf:
+                - enum:
+                  - web_search
+                  - math
+                  - image_gen
+                  - code_interpreter
+                  type: string
+                - type: string
+            required:
+            - tool_name
+            - input_shields
+            - output_shields
+            type: object
+          type: array
+        dialog:
+          $ref: '#/components/schemas/Dialog'
+        logprobs:
+          default: false
+          type: boolean
+        max_tokens:
+          default: 0
+          type: integer
         model:
           enum:
           - llama3_8b_chat
@@ -522,9 +648,8 @@ components:
           default: false
           type: boolean
       required:
-      - message
       - model
-      - message_history
+      - dialog
       - sampling_params
       - available_tools
       - max_tokens
@@ -785,6 +910,19 @@ components:
       - metadata
       title: Dataset to be used for training or evaluating language models.
       type: object
+    Dialog:
+      additionalProperties: false
+      properties:
+        message:
+          $ref: '#/components/schemas/Message'
+        message_history:
+          items:
+            $ref: '#/components/schemas/Message'
+          type: array
+      required:
+      - message
+      - message_history
+      type: object
     FinetuningJobArtifactsResponse:
       additionalProperties: false
       properties:
@@ -1132,19 +1270,14 @@ components:
           items:
             additionalProperties: false
             properties:
+              dialog:
+                $ref: '#/components/schemas/Dialog'
               k_generations:
                 items:
                   $ref: '#/components/schemas/Message'
                 type: array
-              message_history:
-                items:
-                  $ref: '#/components/schemas/Message'
-                type: array
-              prompt:
-                $ref: '#/components/schemas/Message'
             required:
-            - prompt
-            - message_history
+            - dialog
             - k_generations
             type: object
           type: array
@@ -1327,6 +1460,42 @@ paths:
             agent execution response.
       tags:
       - AgenticSystem
+  /batch_chat_completion:
+    post:
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/BatchChatCompletionRequest'
+        required: true
+      responses:
+        '200':
+          content:
+            application/jsonl:
+              schema:
+                $ref: '#/components/schemas/ChatCompletionResponse'
+          description: OK
+      tags:
+      - Inference
+  /batch_completion:
+    post:
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/BatchCompletionRequest'
+        required: true
+      responses:
+        '200':
+          content:
+            application/jsonl:
+              schema:
+                $ref: '#/components/schemas/CompletionResponse'
+          description: OK
+      tags:
+      - Inference
   /chat_completion:
     post:
       parameters: []
@@ -1659,13 +1828,13 @@ security:
 servers:
 - url: http://llama.meta.com
 tags:
-- name: Inference
+- name: RewardScoring
 - name: MemoryBanks
-- name: AgenticSystem
 - name: SyntheticDataGeneration
 - name: Finetuning
+- name: AgenticSystem
+- name: Inference
 - name: Datasets
-- name: RewardScoring
 - description: <SchemaDefinition schemaRef="#/components/schemas/ShieldConfig" />
   name: ShieldConfig
 - description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemCreateRequest"
@@ -1733,14 +1902,27 @@ tags:
 
     <SchemaDefinition schemaRef="#/components/schemas/FinetuningJobLogStream" />'
   name: FinetuningJobLogStream
-- description: <SchemaDefinition schemaRef="#/components/schemas/ChatCompletionRequest"
+- description: <SchemaDefinition schemaRef="#/components/schemas/BatchChatCompletionRequest"
     />
-  name: ChatCompletionRequest
+  name: BatchChatCompletionRequest
+- description: <SchemaDefinition schemaRef="#/components/schemas/Dialog" />
+  name: Dialog
 - description: 'Normal chat completion response.
 
 
     <SchemaDefinition schemaRef="#/components/schemas/ChatCompletionResponse" />'
   name: ChatCompletionResponse
+- description: <SchemaDefinition schemaRef="#/components/schemas/BatchCompletionRequest"
+    />
+  name: BatchCompletionRequest
+- description: 'Normal completion response.
+
+
+    <SchemaDefinition schemaRef="#/components/schemas/CompletionResponse" />'
+  name: CompletionResponse
+- description: <SchemaDefinition schemaRef="#/components/schemas/ChatCompletionRequest"
+    />
+  name: ChatCompletionRequest
 - description: 'Streamed chat completion response. The actual response is a series
     of such objects.
 
@@ -1751,11 +1933,6 @@ tags:
 - description: <SchemaDefinition schemaRef="#/components/schemas/CompletionRequest"
     />
   name: CompletionRequest
-- description: 'Normal completion response.
-
-
-    <SchemaDefinition schemaRef="#/components/schemas/CompletionResponse" />'
-  name: CompletionResponse
 - description: 'streamed completion response.
 
 
@@ -1828,6 +2005,8 @@ x-tagGroups:
   - AgenticSystemExecuteResponseStreamChunk
   - AgenticSystemTurn
   - Attachment
+  - BatchChatCompletionRequest
+  - BatchCompletionRequest
   - ChatCompletionRequest
   - ChatCompletionResponse
   - ChatCompletionResponseStreamChunk
@@ -1836,6 +2015,7 @@ x-tagGroups:
   - CompletionResponseStreamChunk
   - CreateDatasetRequest
   - Dataset
+  - Dialog
   - FinetuningJobArtifactsResponse
   - FinetuningJobLogStream
   - FinetuningJobStatusResponse