fixes

2025-12-03 09:53:45 +00:00 · 2024-07-10 23:33:57 -07:00 · 2024-07-10 23:33:57 -07:00 · 7cade3acc3
commit 7cade3acc3
parent ee86f2c75f
3 changed files with 721 additions and 271 deletions
--- a/source/api_definitions.py
+++ b/source/api_definitions.py
@ -80,15 +80,12 @@ class CompletionResponseStreamChunk:
@json_schema_type
@dataclass
 class ChatCompletionRequest:
-    message: Message
    model: InstructModel
-    message_history: List[Message] = None
+    dialog: Dialog
    sampling_params: SamplingParams = SamplingParams()

    # zero-shot tool definitions as input to the model
-    available_tools: List[Union[BuiltinTool, ToolDefinition]] = field(
-        default_factory=list
-    )
+    available_tools: List[ToolDefinition] = field(default_factory=list)

    max_tokens: int = 0
    stream: bool = False
@ -119,6 +116,30 @@ class ChatCompletionResponseStreamChunk:
    tool_call: Optional[ToolCall] = None


+@json_schema_type
+@dataclass
+class BatchCompletionRequest:
+    model: PretrainedModel
+    content_batch: List[Content]
+    sampling_params: SamplingParams = SamplingParams()
+    max_tokens: int = 0
+    logprobs: bool = False
+
+
+@json_schema_type
+@dataclass
+class BatchChatCompletionRequest:
+    model: InstructModel
+    batch_dialogs: List[Dialog]
+    sampling_params: SamplingParams = SamplingParams()
+
+    # zero-shot tool definitions as input to the model
+    available_tools: List[ToolDefinition] = field(default_factory=list)
+
+    max_tokens: int = 0
+    logprobs: bool = False
+
+
 class Inference(Protocol):

    def post_completion(
@ -131,35 +152,6 @@ class Inference(Protocol):
        request: ChatCompletionRequest,
    ) -> Union[ChatCompletionResponse, ChatCompletionResponseStreamChunk]: ...

-
-@json_schema_type
-@dataclass
-class BatchCompletionRequest:
-    content_batch: List[Content]
-    model: PretrainedModel
-    sampling_params: SamplingParams = SamplingParams()
-    max_tokens: int = 0
-    logprobs: bool = False
-
-
-@json_schema_type
-@dataclass
-class BatchChatCompletionRequest:
-    model: InstructModel
-    batch_messages: List[Dialog]
-    sampling_params: SamplingParams = SamplingParams()
-
-    # zero-shot tool definitions as input to the model
-    available_tools: List[Union[BuiltinTool, ToolDefinition]] = field(
-        default_factory=list
-    )
-
-    max_tokens: int = 0
-    logprobs: bool = False
-
-
-class BatchInference(Protocol):
-    """Batch inference calls"""
    def post_batch_completion(
        self,
        request: BatchCompletionRequest,
@ -302,8 +294,7 @@ class MemoryBanks(Protocol):

@dataclass
 class KPromptGenerations:
-    prompt: Message
-    message_history: List[Message]
+    dialog: Dialog
    k_generations: List[Message]


--- a/source/openapi.html
+++ b/source/openapi.html
@ -386,6 +386,66 @@
                ]
            }
        },
+        "/batch_chat_completion": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "content": {
+                            "application/jsonl": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ChatCompletionResponse"
+                                }
+                            }
+                        }
+                    }
+                },
+                "tags": [
+                    "Inference"
+                ],
+                "parameters": [],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/BatchChatCompletionRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        },
+        "/batch_completion": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "content": {
+                            "application/jsonl": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/CompletionResponse"
+                                }
+                            }
+                        }
+                    }
+                },
+                "tags": [
+                    "Inference"
+                ],
+                "parameters": [],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/BatchCompletionRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        },
        "/chat_completion": {
            "post": {
                "responses": {
@ -1770,12 +1830,9 @@
                ],
                "title": "Stream of logs from a finetuning job."
            },
-            "ChatCompletionRequest": {
+            "BatchChatCompletionRequest": {
                "type": "object",
                "properties": {
-                    "message": {
-                        "$ref": "#/components/schemas/Message"
-                    },
                    "model": {
                        "type": "string",
                        "enum": [
@ -1783,10 +1840,10 @@
                            "llama3_70b_chat"
                        ]
                    },
-                    "message_history": {
+                    "batch_dialogs": {
                        "type": "array",
                        "items": {
-                            "$ref": "#/components/schemas/Message"
+                            "$ref": "#/components/schemas/Dialog"
                        }
                    },
                    "sampling_params": {
@ -1820,80 +1877,67 @@
                    "available_tools": {
                        "type": "array",
                        "items": {
-                            "oneOf": [
-                                {
-                                    "type": "string",
-                                    "enum": [
-                                        "web_search",
-                                        "math",
-                                        "image_gen",
-                                        "code_interpreter"
-                                    ]
-                                },
-                                {
-                                    "type": "object",
-                                    "properties": {
-                                        "tool_name": {
-                                            "oneOf": [
-                                                {
-                                                    "type": "string",
-                                                    "enum": [
-                                                        "web_search",
-                                                        "math",
-                                                        "image_gen",
-                                                        "code_interpreter"
-                                                    ]
-                                                },
-                                                {
-                                                    "type": "string"
-                                                }
+                            "type": "object",
+                            "properties": {
+                                "tool_name": {
+                                    "oneOf": [
+                                        {
+                                            "type": "string",
+                                            "enum": [
+                                                "web_search",
+                                                "math",
+                                                "image_gen",
+                                                "code_interpreter"
                                            ]
                                        },
-                                        "parameters": {
-                                            "type": "object",
-                                            "additionalProperties": {
-                                                "oneOf": [
-                                                    {
-                                                        "type": "null"
-                                                    },
-                                                    {
-                                                        "type": "boolean"
-                                                    },
-                                                    {
-                                                        "type": "number"
-                                                    },
-                                                    {
-                                                        "type": "string"
-                                                    },
-                                                    {
-                                                        "type": "array"
-                                                    },
-                                                    {
-                                                        "type": "object"
-                                                    }
-                                                ]
-                                            }
-                                        },
-                                        "input_shields": {
-                                            "type": "array",
-                                            "items": {
-                                                "$ref": "#/components/schemas/ShieldConfig"
-                                            }
-                                        },
-                                        "output_shields": {
-                                            "type": "array",
-                                            "items": {
-                                                "$ref": "#/components/schemas/ShieldConfig"
-                                            }
+                                        {
+                                            "type": "string"
                                        }
-                                    },
-                                    "additionalProperties": false,
-                                    "required": [
-                                        "tool_name",
-                                        "input_shields",
-                                        "output_shields"
                                    ]
+                                },
+                                "parameters": {
+                                    "type": "object",
+                                    "additionalProperties": {
+                                        "oneOf": [
+                                            {
+                                                "type": "null"
+                                            },
+                                            {
+                                                "type": "boolean"
+                                            },
+                                            {
+                                                "type": "number"
+                                            },
+                                            {
+                                                "type": "string"
+                                            },
+                                            {
+                                                "type": "array"
+                                            },
+                                            {
+                                                "type": "object"
+                                            }
+                                        ]
+                                    }
+                                },
+                                "input_shields": {
+                                    "type": "array",
+                                    "items": {
+                                        "$ref": "#/components/schemas/ShieldConfig"
+                                    }
+                                },
+                                "output_shields": {
+                                    "type": "array",
+                                    "items": {
+                                        "$ref": "#/components/schemas/ShieldConfig"
+                                    }
                                }
+                            },
+                            "additionalProperties": false,
+                            "required": [
+                                "tool_name",
+                                "input_shields",
+                                "output_shields"
                            ]
                        }
                    },
@ -1901,10 +1945,6 @@
                        "type": "integer",
                        "default": 0
                    },
-                    "stream": {
-                        "type": "boolean",
-                        "default": false
-                    },
                    "logprobs": {
                        "type": "boolean",
                        "default": false
@ -1912,16 +1952,33 @@
                },
                "additionalProperties": false,
                "required": [
-                    "message",
                    "model",
-                    "message_history",
+                    "batch_dialogs",
                    "sampling_params",
                    "available_tools",
                    "max_tokens",
-                    "stream",
                    "logprobs"
                ]
            },
+            "Dialog": {
+                "type": "object",
+                "properties": {
+                    "message": {
+                        "$ref": "#/components/schemas/Message"
+                    },
+                    "message_history": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/Message"
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "message",
+                    "message_history"
+                ]
+            },
            "ChatCompletionResponse": {
                "type": "object",
                "properties": {
@ -2032,6 +2089,287 @@
                ],
                "title": "Normal chat completion response."
            },
+            "BatchCompletionRequest": {
+                "type": "object",
+                "properties": {
+                    "model": {
+                        "type": "string",
+                        "enum": [
+                            "llama3_8b",
+                            "llama3_70b"
+                        ]
+                    },
+                    "content_batch": {
+                        "type": "array",
+                        "items": {
+                            "oneOf": [
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "$ref": "#/components/schemas/Attachment"
+                                },
+                                {
+                                    "type": "array",
+                                    "items": {
+                                        "oneOf": [
+                                            {
+                                                "type": "string"
+                                            },
+                                            {
+                                                "$ref": "#/components/schemas/Attachment"
+                                            }
+                                        ]
+                                    }
+                                }
+                            ]
+                        }
+                    },
+                    "sampling_params": {
+                        "type": "object",
+                        "properties": {
+                            "temperature": {
+                                "type": "number",
+                                "default": 0.0
+                            },
+                            "strategy": {
+                                "type": "string",
+                                "default": "greedy"
+                            },
+                            "top_p": {
+                                "type": "number",
+                                "default": 0.95
+                            },
+                            "top_k": {
+                                "type": "integer",
+                                "default": 0
+                            }
+                        },
+                        "additionalProperties": false,
+                        "required": [
+                            "temperature",
+                            "strategy",
+                            "top_p",
+                            "top_k"
+                        ]
+                    },
+                    "max_tokens": {
+                        "type": "integer",
+                        "default": 0
+                    },
+                    "logprobs": {
+                        "type": "boolean",
+                        "default": false
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "model",
+                    "content_batch",
+                    "sampling_params",
+                    "max_tokens",
+                    "logprobs"
+                ]
+            },
+            "CompletionResponse": {
+                "type": "object",
+                "properties": {
+                    "content": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "$ref": "#/components/schemas/Attachment"
+                            },
+                            {
+                                "type": "array",
+                                "items": {
+                                    "oneOf": [
+                                        {
+                                            "type": "string"
+                                        },
+                                        {
+                                            "$ref": "#/components/schemas/Attachment"
+                                        }
+                                    ]
+                                }
+                            }
+                        ]
+                    },
+                    "stop_reason": {
+                        "type": "string",
+                        "enum": [
+                            "not_stopped",
+                            "finished_ok",
+                            "max_tokens"
+                        ],
+                        "title": "Stop reasons are used to indicate why the model stopped generating text."
+                    },
+                    "logprobs": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "content"
+                ],
+                "title": "Normal completion response."
+            },
+            "ChatCompletionRequest": {
+                "type": "object",
+                "properties": {
+                    "model": {
+                        "type": "string",
+                        "enum": [
+                            "llama3_8b_chat",
+                            "llama3_70b_chat"
+                        ]
+                    },
+                    "dialog": {
+                        "$ref": "#/components/schemas/Dialog"
+                    },
+                    "sampling_params": {
+                        "type": "object",
+                        "properties": {
+                            "temperature": {
+                                "type": "number",
+                                "default": 0.0
+                            },
+                            "strategy": {
+                                "type": "string",
+                                "default": "greedy"
+                            },
+                            "top_p": {
+                                "type": "number",
+                                "default": 0.95
+                            },
+                            "top_k": {
+                                "type": "integer",
+                                "default": 0
+                            }
+                        },
+                        "additionalProperties": false,
+                        "required": [
+                            "temperature",
+                            "strategy",
+                            "top_p",
+                            "top_k"
+                        ]
+                    },
+                    "available_tools": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "properties": {
+                                "tool_name": {
+                                    "oneOf": [
+                                        {
+                                            "type": "string",
+                                            "enum": [
+                                                "web_search",
+                                                "math",
+                                                "image_gen",
+                                                "code_interpreter"
+                                            ]
+                                        },
+                                        {
+                                            "type": "string"
+                                        }
+                                    ]
+                                },
+                                "parameters": {
+                                    "type": "object",
+                                    "additionalProperties": {
+                                        "oneOf": [
+                                            {
+                                                "type": "null"
+                                            },
+                                            {
+                                                "type": "boolean"
+                                            },
+                                            {
+                                                "type": "number"
+                                            },
+                                            {
+                                                "type": "string"
+                                            },
+                                            {
+                                                "type": "array"
+                                            },
+                                            {
+                                                "type": "object"
+                                            }
+                                        ]
+                                    }
+                                },
+                                "input_shields": {
+                                    "type": "array",
+                                    "items": {
+                                        "$ref": "#/components/schemas/ShieldConfig"
+                                    }
+                                },
+                                "output_shields": {
+                                    "type": "array",
+                                    "items": {
+                                        "$ref": "#/components/schemas/ShieldConfig"
+                                    }
+                                }
+                            },
+                            "additionalProperties": false,
+                            "required": [
+                                "tool_name",
+                                "input_shields",
+                                "output_shields"
+                            ]
+                        }
+                    },
+                    "max_tokens": {
+                        "type": "integer",
+                        "default": 0
+                    },
+                    "stream": {
+                        "type": "boolean",
+                        "default": false
+                    },
+                    "logprobs": {
+                        "type": "boolean",
+                        "default": false
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "model",
+                    "dialog",
+                    "sampling_params",
+                    "available_tools",
+                    "max_tokens",
+                    "stream",
+                    "logprobs"
+                ]
+            },
            "ChatCompletionResponseStreamChunk": {
                "type": "object",
                "properties": {
@ -2177,73 +2515,6 @@
                    "logprobs"
                ]
            },
-            "CompletionResponse": {
-                "type": "object",
-                "properties": {
-                    "content": {
-                        "oneOf": [
-                            {
-                                "type": "string"
-                            },
-                            {
-                                "$ref": "#/components/schemas/Attachment"
-                            },
-                            {
-                                "type": "array",
-                                "items": {
-                                    "oneOf": [
-                                        {
-                                            "type": "string"
-                                        },
-                                        {
-                                            "$ref": "#/components/schemas/Attachment"
-                                        }
-                                    ]
-                                }
-                            }
-                        ]
-                    },
-                    "stop_reason": {
-                        "type": "string",
-                        "enum": [
-                            "not_stopped",
-                            "finished_ok",
-                            "max_tokens"
-                        ],
-                        "title": "Stop reasons are used to indicate why the model stopped generating text."
-                    },
-                    "logprobs": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        }
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "content"
-                ],
-                "title": "Normal completion response."
-            },
            "CompletionResponseStreamChunk": {
                "type": "object",
                "properties": {
@ -2409,14 +2680,8 @@
                        "items": {
                            "type": "object",
                            "properties": {
-                                "prompt": {
-                                    "$ref": "#/components/schemas/Message"
-                                },
-                                "message_history": {
-                                    "type": "array",
-                                    "items": {
-                                        "$ref": "#/components/schemas/Message"
-                                    }
+                                "dialog": {
+                                    "$ref": "#/components/schemas/Dialog"
                                },
                                "k_generations": {
                                    "type": "array",
@ -2427,8 +2692,7 @@
                            },
                            "additionalProperties": false,
                            "required": [
-                                "prompt",
-                                "message_history",
+                                "dialog",
                                "k_generations"
                            ]
                        }
@ -2738,14 +3002,11 @@
    ],
    "tags": [
        {
-            "name": "Inference"
+            "name": "RewardScoring"
        },
        {
            "name": "MemoryBanks"
        },
-        {
-            "name": "AgenticSystem"
-        },
        {
            "name": "SyntheticDataGeneration"
        },
@ -2753,10 +3014,13 @@
            "name": "Finetuning"
        },
        {
-            "name": "Datasets"
+            "name": "AgenticSystem"
        },
        {
-            "name": "RewardScoring"
+            "name": "Inference"
+        },
+        {
+            "name": "Datasets"
        },
        {
            "name": "ShieldConfig",
@ -2823,13 +3087,29 @@
            "description": "Stream of logs from a finetuning job.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/FinetuningJobLogStream\" />"
        },
        {
-            "name": "ChatCompletionRequest",
-            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/ChatCompletionRequest\" />"
+            "name": "BatchChatCompletionRequest",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/BatchChatCompletionRequest\" />"
+        },
+        {
+            "name": "Dialog",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/Dialog\" />"
        },
        {
            "name": "ChatCompletionResponse",
            "description": "Normal chat completion response.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/ChatCompletionResponse\" />"
        },
+        {
+            "name": "BatchCompletionRequest",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/BatchCompletionRequest\" />"
+        },
+        {
+            "name": "CompletionResponse",
+            "description": "Normal completion response.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/CompletionResponse\" />"
+        },
+        {
+            "name": "ChatCompletionRequest",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/ChatCompletionRequest\" />"
+        },
        {
            "name": "ChatCompletionResponseStreamChunk",
            "description": "Streamed chat completion response. The actual response is a series of such objects.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/ChatCompletionResponseStreamChunk\" />"
@ -2838,10 +3118,6 @@
            "name": "CompletionRequest",
            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/CompletionRequest\" />"
        },
-        {
-            "name": "CompletionResponse",
-            "description": "Normal completion response.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/CompletionResponse\" />"
-        },
        {
            "name": "CompletionResponseStreamChunk",
            "description": "streamed completion response.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/CompletionResponseStreamChunk\" />"
@ -2910,6 +3186,8 @@
                "AgenticSystemExecuteResponseStreamChunk",
                "AgenticSystemTurn",
                "Attachment",
+                "BatchChatCompletionRequest",
+                "BatchCompletionRequest",
                "ChatCompletionRequest",
                "ChatCompletionResponse",
                "ChatCompletionResponseStreamChunk",
@ -2918,6 +3196,7 @@
                "CompletionResponseStreamChunk",
                "CreateDatasetRequest",
                "Dataset",
+                "Dialog",
                "FinetuningJobArtifactsResponse",
                "FinetuningJobLogStream",
                "FinetuningJobStatusResponse",
--- a/source/openapi.yaml
+++ b/source/openapi.yaml
@ -433,52 +433,49 @@ components:
      title: Attachments are used to refer to external resources, such as images,
        videos, audio, etc.
      type: object
-    ChatCompletionRequest:
+    BatchChatCompletionRequest:
      additionalProperties: false
      properties:
        available_tools:
          items:
-            oneOf:
-            - enum:
-              - web_search
-              - math
-              - image_gen
-              - code_interpreter
-              type: string
-            - additionalProperties: false
-              properties:
-                input_shields:
-                  items:
-                    $ref: '#/components/schemas/ShieldConfig'
-                  type: array
-                output_shields:
-                  items:
-                    $ref: '#/components/schemas/ShieldConfig'
-                  type: array
-                parameters:
-                  additionalProperties:
-                    oneOf:
-                    - type: 'null'
-                    - type: boolean
-                    - type: number
-                    - type: string
-                    - type: array
-                    - type: object
-                  type: object
-                tool_name:
+            additionalProperties: false
+            properties:
+              input_shields:
+                items:
+                  $ref: '#/components/schemas/ShieldConfig'
+                type: array
+              output_shields:
+                items:
+                  $ref: '#/components/schemas/ShieldConfig'
+                type: array
+              parameters:
+                additionalProperties:
                  oneOf:
-                  - enum:
-                    - web_search
-                    - math
-                    - image_gen
-                    - code_interpreter
-                    type: string
+                  - type: 'null'
+                  - type: boolean
+                  - type: number
                  - type: string
-              required:
-              - tool_name
-              - input_shields
-              - output_shields
-              type: object
+                  - type: array
+                  - type: object
+                type: object
+              tool_name:
+                oneOf:
+                - enum:
+                  - web_search
+                  - math
+                  - image_gen
+                  - code_interpreter
+                  type: string
+                - type: string
+            required:
+            - tool_name
+            - input_shields
+            - output_shields
+            type: object
+          type: array
+        batch_dialogs:
+          items:
+            $ref: '#/components/schemas/Dialog'
          type: array
        logprobs:
          default: false
@ -486,12 +483,141 @@ components:
        max_tokens:
          default: 0
          type: integer
-        message:
-          $ref: '#/components/schemas/Message'
-        message_history:
+        model:
+          enum:
+          - llama3_8b_chat
+          - llama3_70b_chat
+          type: string
+        sampling_params:
+          additionalProperties: false
+          properties:
+            strategy:
+              default: greedy
+              type: string
+            temperature:
+              default: 0.0
+              type: number
+            top_k:
+              default: 0
+              type: integer
+            top_p:
+              default: 0.95
+              type: number
+          required:
+          - temperature
+          - strategy
+          - top_p
+          - top_k
+          type: object
+      required:
+      - model
+      - batch_dialogs
+      - sampling_params
+      - available_tools
+      - max_tokens
+      - logprobs
+      type: object
+    BatchCompletionRequest:
+      additionalProperties: false
+      properties:
+        content_batch:
          items:
-            $ref: '#/components/schemas/Message'
+            oneOf:
+            - type: string
+            - $ref: '#/components/schemas/Attachment'
+            - items:
+                oneOf:
+                - type: string
+                - $ref: '#/components/schemas/Attachment'
+              type: array
          type: array
+        logprobs:
+          default: false
+          type: boolean
+        max_tokens:
+          default: 0
+          type: integer
+        model:
+          enum:
+          - llama3_8b
+          - llama3_70b
+          type: string
+        sampling_params:
+          additionalProperties: false
+          properties:
+            strategy:
+              default: greedy
+              type: string
+            temperature:
+              default: 0.0
+              type: number
+            top_k:
+              default: 0
+              type: integer
+            top_p:
+              default: 0.95
+              type: number
+          required:
+          - temperature
+          - strategy
+          - top_p
+          - top_k
+          type: object
+      required:
+      - model
+      - content_batch
+      - sampling_params
+      - max_tokens
+      - logprobs
+      type: object
+    ChatCompletionRequest:
+      additionalProperties: false
+      properties:
+        available_tools:
+          items:
+            additionalProperties: false
+            properties:
+              input_shields:
+                items:
+                  $ref: '#/components/schemas/ShieldConfig'
+                type: array
+              output_shields:
+                items:
+                  $ref: '#/components/schemas/ShieldConfig'
+                type: array
+              parameters:
+                additionalProperties:
+                  oneOf:
+                  - type: 'null'
+                  - type: boolean
+                  - type: number
+                  - type: string
+                  - type: array
+                  - type: object
+                type: object
+              tool_name:
+                oneOf:
+                - enum:
+                  - web_search
+                  - math
+                  - image_gen
+                  - code_interpreter
+                  type: string
+                - type: string
+            required:
+            - tool_name
+            - input_shields
+            - output_shields
+            type: object
+          type: array
+        dialog:
+          $ref: '#/components/schemas/Dialog'
+        logprobs:
+          default: false
+          type: boolean
+        max_tokens:
+          default: 0
+          type: integer
        model:
          enum:
          - llama3_8b_chat
@ -522,9 +648,8 @@ components:
          default: false
          type: boolean
      required:
-      - message
      - model
-      - message_history
+      - dialog
      - sampling_params
      - available_tools
      - max_tokens
@ -785,6 +910,19 @@ components:
      - metadata
      title: Dataset to be used for training or evaluating language models.
      type: object
+    Dialog:
+      additionalProperties: false
+      properties:
+        message:
+          $ref: '#/components/schemas/Message'
+        message_history:
+          items:
+            $ref: '#/components/schemas/Message'
+          type: array
+      required:
+      - message
+      - message_history
+      type: object
    FinetuningJobArtifactsResponse:
      additionalProperties: false
      properties:
@ -1132,19 +1270,14 @@ components:
          items:
            additionalProperties: false
            properties:
+              dialog:
+                $ref: '#/components/schemas/Dialog'
              k_generations:
                items:
                  $ref: '#/components/schemas/Message'
                type: array
-              message_history:
-                items:
-                  $ref: '#/components/schemas/Message'
-                type: array
-              prompt:
-                $ref: '#/components/schemas/Message'
            required:
-            - prompt
-            - message_history
+            - dialog
            - k_generations
            type: object
          type: array
@ -1327,6 +1460,42 @@ paths:
            agent execution response.
      tags:
      - AgenticSystem
+  /batch_chat_completion:
+    post:
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/BatchChatCompletionRequest'
+        required: true
+      responses:
+        '200':
+          content:
+            application/jsonl:
+              schema:
+                $ref: '#/components/schemas/ChatCompletionResponse'
+          description: OK
+      tags:
+      - Inference
+  /batch_completion:
+    post:
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/BatchCompletionRequest'
+        required: true
+      responses:
+        '200':
+          content:
+            application/jsonl:
+              schema:
+                $ref: '#/components/schemas/CompletionResponse'
+          description: OK
+      tags:
+      - Inference
  /chat_completion:
    post:
      parameters: []
@ -1659,13 +1828,13 @@ security:
 servers:
 - url: http://llama.meta.com
 tags:
- name: Inference
+- name: RewardScoring
 - name: MemoryBanks
- name: AgenticSystem
 - name: SyntheticDataGeneration
 - name: Finetuning
+- name: AgenticSystem
+- name: Inference
 - name: Datasets
- name: RewardScoring
 - description: <SchemaDefinition schemaRef="#/components/schemas/ShieldConfig" />
  name: ShieldConfig
 - description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemCreateRequest"
@ -1733,14 +1902,27 @@ tags:

    <SchemaDefinition schemaRef="#/components/schemas/FinetuningJobLogStream" />'
  name: FinetuningJobLogStream
- description: <SchemaDefinition schemaRef="#/components/schemas/ChatCompletionRequest"
+- description: <SchemaDefinition schemaRef="#/components/schemas/BatchChatCompletionRequest"
    />
-  name: ChatCompletionRequest
+  name: BatchChatCompletionRequest
+- description: <SchemaDefinition schemaRef="#/components/schemas/Dialog" />
+  name: Dialog
 - description: 'Normal chat completion response.


    <SchemaDefinition schemaRef="#/components/schemas/ChatCompletionResponse" />'
  name: ChatCompletionResponse
+- description: <SchemaDefinition schemaRef="#/components/schemas/BatchCompletionRequest"
+    />
+  name: BatchCompletionRequest
+- description: 'Normal completion response.
+
+
+    <SchemaDefinition schemaRef="#/components/schemas/CompletionResponse" />'
+  name: CompletionResponse
+- description: <SchemaDefinition schemaRef="#/components/schemas/ChatCompletionRequest"
+    />
+  name: ChatCompletionRequest
 - description: 'Streamed chat completion response. The actual response is a series
    of such objects.

@ -1751,11 +1933,6 @@ tags:
 - description: <SchemaDefinition schemaRef="#/components/schemas/CompletionRequest"
    />
  name: CompletionRequest
- description: 'Normal completion response.
-
-
-    <SchemaDefinition schemaRef="#/components/schemas/CompletionResponse" />'
-  name: CompletionResponse
 - description: 'streamed completion response.


@ -1828,6 +2005,8 @@ x-tagGroups:
  - AgenticSystemExecuteResponseStreamChunk
  - AgenticSystemTurn
  - Attachment
+  - BatchChatCompletionRequest
+  - BatchCompletionRequest
  - ChatCompletionRequest
  - ChatCompletionResponse
  - ChatCompletionResponseStreamChunk
@ -1836,6 +2015,7 @@ x-tagGroups:
  - CompletionResponseStreamChunk
  - CreateDatasetRequest
  - Dataset
+  - Dialog
  - FinetuningJobArtifactsResponse
  - FinetuningJobLogStream
  - FinetuningJobStatusResponse