This commit is contained in:
Ashwin Bharambe 2025-04-11 16:15:59 -07:00
parent 0cfb2e2473
commit 73d927850e
4 changed files with 43 additions and 316 deletions

View file

@ -128,49 +128,6 @@
}
}
},
"/v1/batch-inference/chat-completion-inline": {
"post": {
"responses": {
"200": {
"description": "OK",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/BatchChatCompletionResponse"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"BatchInference (Coming Soon)"
],
"description": "",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/BatchChatCompletionInlineRequest"
}
}
},
"required": true
}
}
},
"/v1/inference/batch-completion": {
"post": {
"responses": {
@ -214,49 +171,6 @@
}
}
},
"/v1/batch-inference/completion-inline": {
"post": {
"responses": {
"200": {
"description": "OK",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/BatchCompletionResponse"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"BatchInference (Coming Soon)"
],
"description": "",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/BatchCompletionInlineRequest"
}
}
},
"required": true
}
}
},
"/v1/post-training/job/cancel": {
"post": {
"responses": {
@ -325,7 +239,7 @@
}
},
"tags": [
"Inference"
"BatchInference (Coming Soon)"
],
"description": "Generate a chat completion for the given messages using the specified model.",
"parameters": [],
@ -373,7 +287,7 @@
}
},
"tags": [
"Inference"
"BatchInference (Coming Soon)"
],
"description": "Generate a completion for the given content using the specified model.",
"parameters": [],
@ -4821,56 +4735,6 @@
"title": "TokenLogProbs",
"description": "Log probabilities for generated tokens."
},
"BatchChatCompletionInlineRequest": {
"type": "object",
"properties": {
"model": {
"type": "string"
},
"messages_batch": {
"type": "array",
"items": {
"type": "array",
"items": {
"$ref": "#/components/schemas/Message"
}
}
},
"sampling_params": {
"$ref": "#/components/schemas/SamplingParams"
},
"tools": {
"type": "array",
"items": {
"$ref": "#/components/schemas/ToolDefinition"
}
},
"tool_config": {
"$ref": "#/components/schemas/ToolConfig"
},
"response_format": {
"$ref": "#/components/schemas/ResponseFormat"
},
"logprobs": {
"type": "object",
"properties": {
"top_k": {
"type": "integer",
"default": 0,
"description": "How many tokens (for each position) to return log probabilities for."
}
},
"additionalProperties": false,
"title": "LogProbConfig"
}
},
"additionalProperties": false,
"required": [
"model",
"messages_batch"
],
"title": "BatchChatCompletionInlineRequest"
},
"BatchCompletionRequest": {
"type": "object",
"properties": {
@ -4963,44 +4827,6 @@
"title": "CompletionResponse",
"description": "Response from a completion request."
},
"BatchCompletionInlineRequest": {
"type": "object",
"properties": {
"model": {
"type": "string"
},
"content_batch": {
"type": "array",
"items": {
"$ref": "#/components/schemas/InterleavedContent"
}
},
"sampling_params": {
"$ref": "#/components/schemas/SamplingParams"
},
"response_format": {
"$ref": "#/components/schemas/ResponseFormat"
},
"logprobs": {
"type": "object",
"properties": {
"top_k": {
"type": "integer",
"default": 0,
"description": "How many tokens (for each position) to return log probabilities for."
}
},
"additionalProperties": false,
"title": "LogProbConfig"
}
},
"additionalProperties": false,
"required": [
"model",
"content_batch"
],
"title": "BatchCompletionInlineRequest"
},
"CancelTrainingJobRequest": {
"type": "object",
"properties": {
@ -11331,7 +11157,9 @@
"x-displayName": "Agents API for creating and interacting with agentic systems."
},
{
"name": "BatchInference (Coming Soon)"
"name": "BatchInference (Coming Soon)",
"description": "This is an asynchronous API. If the request is successful, the response will be a job which can be polled for completion.\n\nNOTE: This API is not yet implemented and is subject to change in concert with other asynchronous APIs\nincluding (post-training, evals, etc).",
"x-displayName": "Batch inference API for generating completions and chat completions."
},
{
"name": "Benchmarks"

View file

@ -69,35 +69,6 @@ paths:
schema:
$ref: '#/components/schemas/BatchChatCompletionRequest'
required: true
/v1/batch-inference/chat-completion-inline:
post:
responses:
'200':
description: OK
content:
application/json:
schema:
$ref: '#/components/schemas/BatchChatCompletionResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- BatchInference (Coming Soon)
description: ''
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/BatchChatCompletionInlineRequest'
required: true
/v1/inference/batch-completion:
post:
responses:
@ -127,35 +98,6 @@ paths:
schema:
$ref: '#/components/schemas/BatchCompletionRequest'
required: true
/v1/batch-inference/completion-inline:
post:
responses:
'200':
description: OK
content:
application/json:
schema:
$ref: '#/components/schemas/BatchCompletionResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- BatchInference (Coming Soon)
description: ''
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/BatchCompletionInlineRequest'
required: true
/v1/post-training/job/cancel:
post:
responses:
@ -206,7 +148,7 @@ paths:
default:
$ref: '#/components/responses/DefaultError'
tags:
- Inference
- BatchInference (Coming Soon)
description: >-
Generate a chat completion for the given messages using the specified model.
parameters: []
@ -241,7 +183,7 @@ paths:
default:
$ref: '#/components/responses/DefaultError'
tags:
- Inference
- BatchInference (Coming Soon)
description: >-
Generate a completion for the given content using the specified model.
parameters: []
@ -3346,42 +3288,6 @@ components:
- logprobs_by_token
title: TokenLogProbs
description: Log probabilities for generated tokens.
BatchChatCompletionInlineRequest:
type: object
properties:
model:
type: string
messages_batch:
type: array
items:
type: array
items:
$ref: '#/components/schemas/Message'
sampling_params:
$ref: '#/components/schemas/SamplingParams'
tools:
type: array
items:
$ref: '#/components/schemas/ToolDefinition'
tool_config:
$ref: '#/components/schemas/ToolConfig'
response_format:
$ref: '#/components/schemas/ResponseFormat'
logprobs:
type: object
properties:
top_k:
type: integer
default: 0
description: >-
How many tokens (for each position) to return log probabilities for.
additionalProperties: false
title: LogProbConfig
additionalProperties: false
required:
- model
- messages_batch
title: BatchChatCompletionInlineRequest
BatchCompletionRequest:
type: object
properties:
@ -3450,34 +3356,6 @@ components:
- stop_reason
title: CompletionResponse
description: Response from a completion request.
BatchCompletionInlineRequest:
type: object
properties:
model:
type: string
content_batch:
type: array
items:
$ref: '#/components/schemas/InterleavedContent'
sampling_params:
$ref: '#/components/schemas/SamplingParams'
response_format:
$ref: '#/components/schemas/ResponseFormat'
logprobs:
type: object
properties:
top_k:
type: integer
default: 0
description: >-
How many tokens (for each position) to return log probabilities for.
additionalProperties: false
title: LogProbConfig
additionalProperties: false
required:
- model
- content_batch
title: BatchCompletionInlineRequest
CancelTrainingJobRequest:
type: object
properties:
@ -7737,6 +7615,17 @@ tags:
x-displayName: >-
Agents API for creating and interacting with agentic systems.
- name: BatchInference (Coming Soon)
description: >-
This is an asynchronous API. If the request is successful, the response will
be a job which can be polled for completion.
NOTE: This API is not yet implemented and is subject to change in concert with
other asynchronous APIs
including (post-training, evals, etc).
x-displayName: >-
Batch inference API for generating completions and chat completions.
- name: Benchmarks
- name: DatasetIO
- name: Datasets