diff --git a/client-sdks/stainless/openapi.yml b/client-sdks/stainless/openapi.yml index ce9daa26f..f531542c5 100644 --- a/client-sdks/stainless/openapi.yml +++ b/client-sdks/stainless/openapi.yml @@ -15,6 +15,141 @@ info: servers: - url: http://any-hosted-llama-stack.com paths: + /v1/batches: + get: + responses: + '200': + description: A list of batch objects. + content: + application/json: + schema: + $ref: '#/components/schemas/ListBatchesResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Batches + summary: List all batches for the current user. + description: List all batches for the current user. + parameters: + - name: after + in: query + description: >- + A cursor for pagination; returns batches after this batch ID. + required: false + schema: + type: string + - name: limit + in: query + description: >- + Number of batches to return (default 20, max 100). + required: true + schema: + type: integer + deprecated: false + post: + responses: + '200': + description: The created batch object. + content: + application/json: + schema: + $ref: '#/components/schemas/Batch' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Batches + summary: >- + Create a new batch for processing multiple API requests. + description: >- + Create a new batch for processing multiple API requests. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/CreateBatchRequest' + required: true + deprecated: false + /v1/batches/{batch_id}: + get: + responses: + '200': + description: The batch object. + content: + application/json: + schema: + $ref: '#/components/schemas/Batch' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Batches + summary: >- + Retrieve information about a specific batch. + description: >- + Retrieve information about a specific batch. + parameters: + - name: batch_id + in: path + description: The ID of the batch to retrieve. + required: true + schema: + type: string + deprecated: false + /v1/batches/{batch_id}/cancel: + post: + responses: + '200': + description: The updated batch object. + content: + application/json: + schema: + $ref: '#/components/schemas/Batch' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Batches + summary: Cancel a batch that is in progress. + description: Cancel a batch that is in progress. + parameters: + - name: batch_id + in: path + description: The ID of the batch to cancel. + required: true + schema: + type: string + deprecated: false /v1/chat/completions: get: responses: @@ -4212,6 +4347,331 @@ components: title: Error description: >- Error response from the API. Roughly follows RFC 7807. + ListBatchesResponse: + type: object + properties: + object: + type: string + const: list + default: list + data: + type: array + items: + type: object + properties: + id: + type: string + completion_window: + type: string + created_at: + type: integer + endpoint: + type: string + input_file_id: + type: string + object: + type: string + const: batch + status: + type: string + enum: + - validating + - failed + - in_progress + - finalizing + - completed + - expired + - cancelling + - cancelled + cancelled_at: + type: integer + cancelling_at: + type: integer + completed_at: + type: integer + error_file_id: + type: string + errors: + type: object + properties: + data: + type: array + items: + type: object + properties: + code: + type: string + line: + type: integer + message: + type: string + param: + type: string + additionalProperties: false + title: BatchError + object: + type: string + additionalProperties: false + title: Errors + expired_at: + type: integer + expires_at: + type: integer + failed_at: + type: integer + finalizing_at: + type: integer + in_progress_at: + type: integer + metadata: + type: object + additionalProperties: + type: string + model: + type: string + output_file_id: + type: string + request_counts: + type: object + properties: + completed: + type: integer + failed: + type: integer + total: + type: integer + additionalProperties: false + required: + - completed + - failed + - total + title: BatchRequestCounts + usage: + type: object + properties: + input_tokens: + type: integer + input_tokens_details: + type: object + properties: + cached_tokens: + type: integer + additionalProperties: false + required: + - cached_tokens + title: InputTokensDetails + output_tokens: + type: integer + output_tokens_details: + type: object + properties: + reasoning_tokens: + type: integer + additionalProperties: false + required: + - reasoning_tokens + title: OutputTokensDetails + total_tokens: + type: integer + additionalProperties: false + required: + - input_tokens + - input_tokens_details + - output_tokens + - output_tokens_details + - total_tokens + title: BatchUsage + additionalProperties: false + required: + - id + - completion_window + - created_at + - endpoint + - input_file_id + - object + - status + title: Batch + first_id: + type: string + last_id: + type: string + has_more: + type: boolean + default: false + additionalProperties: false + required: + - object + - data + - has_more + title: ListBatchesResponse + description: >- + Response containing a list of batch objects. + CreateBatchRequest: + type: object + properties: + input_file_id: + type: string + description: >- + The ID of an uploaded file containing requests for the batch. + endpoint: + type: string + description: >- + The endpoint to be used for all requests in the batch. + completion_window: + type: string + const: 24h + description: >- + The time window within which the batch should be processed. + metadata: + type: object + additionalProperties: + type: string + description: Optional metadata for the batch. + idempotency_key: + type: string + description: >- + Optional idempotency key. When provided, enables idempotent behavior. + additionalProperties: false + required: + - input_file_id + - endpoint + - completion_window + title: CreateBatchRequest + Batch: + type: object + properties: + id: + type: string + completion_window: + type: string + created_at: + type: integer + endpoint: + type: string + input_file_id: + type: string + object: + type: string + const: batch + status: + type: string + enum: + - validating + - failed + - in_progress + - finalizing + - completed + - expired + - cancelling + - cancelled + cancelled_at: + type: integer + cancelling_at: + type: integer + completed_at: + type: integer + error_file_id: + type: string + errors: + type: object + properties: + data: + type: array + items: + type: object + properties: + code: + type: string + line: + type: integer + message: + type: string + param: + type: string + additionalProperties: false + title: BatchError + object: + type: string + additionalProperties: false + title: Errors + expired_at: + type: integer + expires_at: + type: integer + failed_at: + type: integer + finalizing_at: + type: integer + in_progress_at: + type: integer + metadata: + type: object + additionalProperties: + type: string + model: + type: string + output_file_id: + type: string + request_counts: + type: object + properties: + completed: + type: integer + failed: + type: integer + total: + type: integer + additionalProperties: false + required: + - completed + - failed + - total + title: BatchRequestCounts + usage: + type: object + properties: + input_tokens: + type: integer + input_tokens_details: + type: object + properties: + cached_tokens: + type: integer + additionalProperties: false + required: + - cached_tokens + title: InputTokensDetails + output_tokens: + type: integer + output_tokens_details: + type: object + properties: + reasoning_tokens: + type: integer + additionalProperties: false + required: + - reasoning_tokens + title: OutputTokensDetails + total_tokens: + type: integer + additionalProperties: false + required: + - input_tokens + - input_tokens_details + - output_tokens + - output_tokens_details + - total_tokens + title: BatchUsage + additionalProperties: false + required: + - id + - completion_window + - created_at + - endpoint + - input_file_id + - object + - status + title: Batch Order: type: string enum: @@ -13528,6 +13988,19 @@ tags: description: >- APIs for creating and interacting with agentic systems. x-displayName: Agents + - name: Batches + description: >- + The API is designed to allow use of openai client libraries for seamless integration. + + + This API provides the following extensions: + - idempotent batch creation + + Note: This API is currently under active development and may undergo changes. + x-displayName: >- + The Batches API enables efficient processing of multiple requests in a single + operation, particularly useful for processing large datasets, batch evaluation + workflows, and cost-effective inference at scale. - name: Benchmarks description: '' - name: Conversations @@ -13602,6 +14075,7 @@ x-tagGroups: - name: Operations tags: - Agents + - Batches - Benchmarks - Conversations - DatasetIO diff --git a/docs/static/deprecated-llama-stack-spec.html b/docs/static/deprecated-llama-stack-spec.html index dca129631..626d332e1 100644 --- a/docs/static/deprecated-llama-stack-spec.html +++ b/docs/static/deprecated-llama-stack-spec.html @@ -1414,6 +1414,193 @@ "deprecated": true } }, + "/v1/openai/v1/batches": { + "get": { + "responses": { + "200": { + "description": "A list of batch objects.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ListBatchesResponse" + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" + } + }, + "tags": [ + "Batches" + ], + "summary": "List all batches for the current user.", + "description": "List all batches for the current user.", + "parameters": [ + { + "name": "after", + "in": "query", + "description": "A cursor for pagination; returns batches after this batch ID.", + "required": false, + "schema": { + "type": "string" + } + }, + { + "name": "limit", + "in": "query", + "description": "Number of batches to return (default 20, max 100).", + "required": true, + "schema": { + "type": "integer" + } + } + ], + "deprecated": true + }, + "post": { + "responses": { + "200": { + "description": "The created batch object.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Batch" + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" + } + }, + "tags": [ + "Batches" + ], + "summary": "Create a new batch for processing multiple API requests.", + "description": "Create a new batch for processing multiple API requests.", + "parameters": [], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CreateBatchRequest" + } + } + }, + "required": true + }, + "deprecated": true + } + }, + "/v1/openai/v1/batches/{batch_id}": { + "get": { + "responses": { + "200": { + "description": "The batch object.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Batch" + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" + } + }, + "tags": [ + "Batches" + ], + "summary": "Retrieve information about a specific batch.", + "description": "Retrieve information about a specific batch.", + "parameters": [ + { + "name": "batch_id", + "in": "path", + "description": "The ID of the batch to retrieve.", + "required": true, + "schema": { + "type": "string" + } + } + ], + "deprecated": true + } + }, + "/v1/openai/v1/batches/{batch_id}/cancel": { + "post": { + "responses": { + "200": { + "description": "The updated batch object.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Batch" + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" + } + }, + "tags": [ + "Batches" + ], + "summary": "Cancel a batch that is in progress.", + "description": "Cancel a batch that is in progress.", + "parameters": [ + { + "name": "batch_id", + "in": "path", + "description": "The ID of the batch to cancel.", + "required": true, + "schema": { + "type": "string" + } + } + ], + "deprecated": true + } + }, "/v1/openai/v1/chat/completions": { "get": { "responses": { @@ -6401,6 +6588,451 @@ "title": "Job", "description": "A job execution instance with status tracking." }, + "ListBatchesResponse": { + "type": "object", + "properties": { + "object": { + "type": "string", + "const": "list", + "default": "list" + }, + "data": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "completion_window": { + "type": "string" + }, + "created_at": { + "type": "integer" + }, + "endpoint": { + "type": "string" + }, + "input_file_id": { + "type": "string" + }, + "object": { + "type": "string", + "const": "batch" + }, + "status": { + "type": "string", + "enum": [ + "validating", + "failed", + "in_progress", + "finalizing", + "completed", + "expired", + "cancelling", + "cancelled" + ] + }, + "cancelled_at": { + "type": "integer" + }, + "cancelling_at": { + "type": "integer" + }, + "completed_at": { + "type": "integer" + }, + "error_file_id": { + "type": "string" + }, + "errors": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "type": "object", + "properties": { + "code": { + "type": "string" + }, + "line": { + "type": "integer" + }, + "message": { + "type": "string" + }, + "param": { + "type": "string" + } + }, + "additionalProperties": false, + "title": "BatchError" + } + }, + "object": { + "type": "string" + } + }, + "additionalProperties": false, + "title": "Errors" + }, + "expired_at": { + "type": "integer" + }, + "expires_at": { + "type": "integer" + }, + "failed_at": { + "type": "integer" + }, + "finalizing_at": { + "type": "integer" + }, + "in_progress_at": { + "type": "integer" + }, + "metadata": { + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "model": { + "type": "string" + }, + "output_file_id": { + "type": "string" + }, + "request_counts": { + "type": "object", + "properties": { + "completed": { + "type": "integer" + }, + "failed": { + "type": "integer" + }, + "total": { + "type": "integer" + } + }, + "additionalProperties": false, + "required": [ + "completed", + "failed", + "total" + ], + "title": "BatchRequestCounts" + }, + "usage": { + "type": "object", + "properties": { + "input_tokens": { + "type": "integer" + }, + "input_tokens_details": { + "type": "object", + "properties": { + "cached_tokens": { + "type": "integer" + } + }, + "additionalProperties": false, + "required": [ + "cached_tokens" + ], + "title": "InputTokensDetails" + }, + "output_tokens": { + "type": "integer" + }, + "output_tokens_details": { + "type": "object", + "properties": { + "reasoning_tokens": { + "type": "integer" + } + }, + "additionalProperties": false, + "required": [ + "reasoning_tokens" + ], + "title": "OutputTokensDetails" + }, + "total_tokens": { + "type": "integer" + } + }, + "additionalProperties": false, + "required": [ + "input_tokens", + "input_tokens_details", + "output_tokens", + "output_tokens_details", + "total_tokens" + ], + "title": "BatchUsage" + } + }, + "additionalProperties": false, + "required": [ + "id", + "completion_window", + "created_at", + "endpoint", + "input_file_id", + "object", + "status" + ], + "title": "Batch" + } + }, + "first_id": { + "type": "string" + }, + "last_id": { + "type": "string" + }, + "has_more": { + "type": "boolean", + "default": false + } + }, + "additionalProperties": false, + "required": [ + "object", + "data", + "has_more" + ], + "title": "ListBatchesResponse", + "description": "Response containing a list of batch objects." + }, + "CreateBatchRequest": { + "type": "object", + "properties": { + "input_file_id": { + "type": "string", + "description": "The ID of an uploaded file containing requests for the batch." + }, + "endpoint": { + "type": "string", + "description": "The endpoint to be used for all requests in the batch." + }, + "completion_window": { + "type": "string", + "const": "24h", + "description": "The time window within which the batch should be processed." + }, + "metadata": { + "type": "object", + "additionalProperties": { + "type": "string" + }, + "description": "Optional metadata for the batch." + }, + "idempotency_key": { + "type": "string", + "description": "Optional idempotency key. When provided, enables idempotent behavior." + } + }, + "additionalProperties": false, + "required": [ + "input_file_id", + "endpoint", + "completion_window" + ], + "title": "CreateBatchRequest" + }, + "Batch": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "completion_window": { + "type": "string" + }, + "created_at": { + "type": "integer" + }, + "endpoint": { + "type": "string" + }, + "input_file_id": { + "type": "string" + }, + "object": { + "type": "string", + "const": "batch" + }, + "status": { + "type": "string", + "enum": [ + "validating", + "failed", + "in_progress", + "finalizing", + "completed", + "expired", + "cancelling", + "cancelled" + ] + }, + "cancelled_at": { + "type": "integer" + }, + "cancelling_at": { + "type": "integer" + }, + "completed_at": { + "type": "integer" + }, + "error_file_id": { + "type": "string" + }, + "errors": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "type": "object", + "properties": { + "code": { + "type": "string" + }, + "line": { + "type": "integer" + }, + "message": { + "type": "string" + }, + "param": { + "type": "string" + } + }, + "additionalProperties": false, + "title": "BatchError" + } + }, + "object": { + "type": "string" + } + }, + "additionalProperties": false, + "title": "Errors" + }, + "expired_at": { + "type": "integer" + }, + "expires_at": { + "type": "integer" + }, + "failed_at": { + "type": "integer" + }, + "finalizing_at": { + "type": "integer" + }, + "in_progress_at": { + "type": "integer" + }, + "metadata": { + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "model": { + "type": "string" + }, + "output_file_id": { + "type": "string" + }, + "request_counts": { + "type": "object", + "properties": { + "completed": { + "type": "integer" + }, + "failed": { + "type": "integer" + }, + "total": { + "type": "integer" + } + }, + "additionalProperties": false, + "required": [ + "completed", + "failed", + "total" + ], + "title": "BatchRequestCounts" + }, + "usage": { + "type": "object", + "properties": { + "input_tokens": { + "type": "integer" + }, + "input_tokens_details": { + "type": "object", + "properties": { + "cached_tokens": { + "type": "integer" + } + }, + "additionalProperties": false, + "required": [ + "cached_tokens" + ], + "title": "InputTokensDetails" + }, + "output_tokens": { + "type": "integer" + }, + "output_tokens_details": { + "type": "object", + "properties": { + "reasoning_tokens": { + "type": "integer" + } + }, + "additionalProperties": false, + "required": [ + "reasoning_tokens" + ], + "title": "OutputTokensDetails" + }, + "total_tokens": { + "type": "integer" + } + }, + "additionalProperties": false, + "required": [ + "input_tokens", + "input_tokens_details", + "output_tokens", + "output_tokens_details", + "total_tokens" + ], + "title": "BatchUsage" + } + }, + "additionalProperties": false, + "required": [ + "id", + "completion_window", + "created_at", + "endpoint", + "input_file_id", + "object", + "status" + ], + "title": "Batch" + }, "Order": { "type": "string", "enum": [ @@ -13505,6 +14137,11 @@ "description": "APIs for creating and interacting with agentic systems.\n\n## Deprecated APIs\n\n> **⚠️ DEPRECATED**: These APIs are provided for migration reference and will be removed in future versions. Not recommended for new projects.\n\n### Migration Guidance\n\nIf you are using deprecated versions of the Agents or Responses APIs, please migrate to:\n\n- **Responses API**: Use the stable v1 Responses API endpoints\n", "x-displayName": "Agents" }, + { + "name": "Batches", + "description": "The API is designed to allow use of openai client libraries for seamless integration.\n\nThis API provides the following extensions:\n - idempotent batch creation\n\nNote: This API is currently under active development and may undergo changes.", + "x-displayName": "The Batches API enables efficient processing of multiple requests in a single operation, particularly useful for processing large datasets, batch evaluation workflows, and cost-effective inference at scale." + }, { "name": "Benchmarks", "description": "" @@ -13555,6 +14192,7 @@ "name": "Operations", "tags": [ "Agents", + "Batches", "Benchmarks", "DatasetIO", "Datasets", diff --git a/docs/static/deprecated-llama-stack-spec.yaml b/docs/static/deprecated-llama-stack-spec.yaml index 12d1327a2..ec3880a6b 100644 --- a/docs/static/deprecated-llama-stack-spec.yaml +++ b/docs/static/deprecated-llama-stack-spec.yaml @@ -1012,6 +1012,141 @@ paths: schema: type: string deprecated: true + /v1/openai/v1/batches: + get: + responses: + '200': + description: A list of batch objects. + content: + application/json: + schema: + $ref: '#/components/schemas/ListBatchesResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Batches + summary: List all batches for the current user. + description: List all batches for the current user. + parameters: + - name: after + in: query + description: >- + A cursor for pagination; returns batches after this batch ID. + required: false + schema: + type: string + - name: limit + in: query + description: >- + Number of batches to return (default 20, max 100). + required: true + schema: + type: integer + deprecated: true + post: + responses: + '200': + description: The created batch object. + content: + application/json: + schema: + $ref: '#/components/schemas/Batch' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Batches + summary: >- + Create a new batch for processing multiple API requests. + description: >- + Create a new batch for processing multiple API requests. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/CreateBatchRequest' + required: true + deprecated: true + /v1/openai/v1/batches/{batch_id}: + get: + responses: + '200': + description: The batch object. + content: + application/json: + schema: + $ref: '#/components/schemas/Batch' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Batches + summary: >- + Retrieve information about a specific batch. + description: >- + Retrieve information about a specific batch. + parameters: + - name: batch_id + in: path + description: The ID of the batch to retrieve. + required: true + schema: + type: string + deprecated: true + /v1/openai/v1/batches/{batch_id}/cancel: + post: + responses: + '200': + description: The updated batch object. + content: + application/json: + schema: + $ref: '#/components/schemas/Batch' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Batches + summary: Cancel a batch that is in progress. + description: Cancel a batch that is in progress. + parameters: + - name: batch_id + in: path + description: The ID of the batch to cancel. + required: true + schema: + type: string + deprecated: true /v1/openai/v1/chat/completions: get: responses: @@ -4736,6 +4871,331 @@ components: title: Job description: >- A job execution instance with status tracking. + ListBatchesResponse: + type: object + properties: + object: + type: string + const: list + default: list + data: + type: array + items: + type: object + properties: + id: + type: string + completion_window: + type: string + created_at: + type: integer + endpoint: + type: string + input_file_id: + type: string + object: + type: string + const: batch + status: + type: string + enum: + - validating + - failed + - in_progress + - finalizing + - completed + - expired + - cancelling + - cancelled + cancelled_at: + type: integer + cancelling_at: + type: integer + completed_at: + type: integer + error_file_id: + type: string + errors: + type: object + properties: + data: + type: array + items: + type: object + properties: + code: + type: string + line: + type: integer + message: + type: string + param: + type: string + additionalProperties: false + title: BatchError + object: + type: string + additionalProperties: false + title: Errors + expired_at: + type: integer + expires_at: + type: integer + failed_at: + type: integer + finalizing_at: + type: integer + in_progress_at: + type: integer + metadata: + type: object + additionalProperties: + type: string + model: + type: string + output_file_id: + type: string + request_counts: + type: object + properties: + completed: + type: integer + failed: + type: integer + total: + type: integer + additionalProperties: false + required: + - completed + - failed + - total + title: BatchRequestCounts + usage: + type: object + properties: + input_tokens: + type: integer + input_tokens_details: + type: object + properties: + cached_tokens: + type: integer + additionalProperties: false + required: + - cached_tokens + title: InputTokensDetails + output_tokens: + type: integer + output_tokens_details: + type: object + properties: + reasoning_tokens: + type: integer + additionalProperties: false + required: + - reasoning_tokens + title: OutputTokensDetails + total_tokens: + type: integer + additionalProperties: false + required: + - input_tokens + - input_tokens_details + - output_tokens + - output_tokens_details + - total_tokens + title: BatchUsage + additionalProperties: false + required: + - id + - completion_window + - created_at + - endpoint + - input_file_id + - object + - status + title: Batch + first_id: + type: string + last_id: + type: string + has_more: + type: boolean + default: false + additionalProperties: false + required: + - object + - data + - has_more + title: ListBatchesResponse + description: >- + Response containing a list of batch objects. + CreateBatchRequest: + type: object + properties: + input_file_id: + type: string + description: >- + The ID of an uploaded file containing requests for the batch. + endpoint: + type: string + description: >- + The endpoint to be used for all requests in the batch. + completion_window: + type: string + const: 24h + description: >- + The time window within which the batch should be processed. + metadata: + type: object + additionalProperties: + type: string + description: Optional metadata for the batch. + idempotency_key: + type: string + description: >- + Optional idempotency key. When provided, enables idempotent behavior. + additionalProperties: false + required: + - input_file_id + - endpoint + - completion_window + title: CreateBatchRequest + Batch: + type: object + properties: + id: + type: string + completion_window: + type: string + created_at: + type: integer + endpoint: + type: string + input_file_id: + type: string + object: + type: string + const: batch + status: + type: string + enum: + - validating + - failed + - in_progress + - finalizing + - completed + - expired + - cancelling + - cancelled + cancelled_at: + type: integer + cancelling_at: + type: integer + completed_at: + type: integer + error_file_id: + type: string + errors: + type: object + properties: + data: + type: array + items: + type: object + properties: + code: + type: string + line: + type: integer + message: + type: string + param: + type: string + additionalProperties: false + title: BatchError + object: + type: string + additionalProperties: false + title: Errors + expired_at: + type: integer + expires_at: + type: integer + failed_at: + type: integer + finalizing_at: + type: integer + in_progress_at: + type: integer + metadata: + type: object + additionalProperties: + type: string + model: + type: string + output_file_id: + type: string + request_counts: + type: object + properties: + completed: + type: integer + failed: + type: integer + total: + type: integer + additionalProperties: false + required: + - completed + - failed + - total + title: BatchRequestCounts + usage: + type: object + properties: + input_tokens: + type: integer + input_tokens_details: + type: object + properties: + cached_tokens: + type: integer + additionalProperties: false + required: + - cached_tokens + title: InputTokensDetails + output_tokens: + type: integer + output_tokens_details: + type: object + properties: + reasoning_tokens: + type: integer + additionalProperties: false + required: + - reasoning_tokens + title: OutputTokensDetails + total_tokens: + type: integer + additionalProperties: false + required: + - input_tokens + - input_tokens_details + - output_tokens + - output_tokens_details + - total_tokens + title: BatchUsage + additionalProperties: false + required: + - id + - completion_window + - created_at + - endpoint + - input_file_id + - object + - status + title: Batch Order: type: string enum: @@ -10263,6 +10723,19 @@ tags: - **Responses API**: Use the stable v1 Responses API endpoints x-displayName: Agents + - name: Batches + description: >- + The API is designed to allow use of openai client libraries for seamless integration. + + + This API provides the following extensions: + - idempotent batch creation + + Note: This API is currently under active development and may undergo changes. + x-displayName: >- + The Batches API enables efficient processing of multiple requests in a single + operation, particularly useful for processing large datasets, batch evaluation + workflows, and cost-effective inference at scale. - name: Benchmarks description: '' - name: DatasetIO @@ -10308,6 +10781,7 @@ x-tagGroups: - name: Operations tags: - Agents + - Batches - Benchmarks - DatasetIO - Datasets diff --git a/docs/static/llama-stack-spec.html b/docs/static/llama-stack-spec.html index 4a6dd4aa4..c1d3658f4 100644 --- a/docs/static/llama-stack-spec.html +++ b/docs/static/llama-stack-spec.html @@ -40,6 +40,193 @@ } ], "paths": { + "/v1/batches": { + "get": { + "responses": { + "200": { + "description": "A list of batch objects.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ListBatchesResponse" + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" + } + }, + "tags": [ + "Batches" + ], + "summary": "List all batches for the current user.", + "description": "List all batches for the current user.", + "parameters": [ + { + "name": "after", + "in": "query", + "description": "A cursor for pagination; returns batches after this batch ID.", + "required": false, + "schema": { + "type": "string" + } + }, + { + "name": "limit", + "in": "query", + "description": "Number of batches to return (default 20, max 100).", + "required": true, + "schema": { + "type": "integer" + } + } + ], + "deprecated": false + }, + "post": { + "responses": { + "200": { + "description": "The created batch object.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Batch" + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" + } + }, + "tags": [ + "Batches" + ], + "summary": "Create a new batch for processing multiple API requests.", + "description": "Create a new batch for processing multiple API requests.", + "parameters": [], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CreateBatchRequest" + } + } + }, + "required": true + }, + "deprecated": false + } + }, + "/v1/batches/{batch_id}": { + "get": { + "responses": { + "200": { + "description": "The batch object.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Batch" + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" + } + }, + "tags": [ + "Batches" + ], + "summary": "Retrieve information about a specific batch.", + "description": "Retrieve information about a specific batch.", + "parameters": [ + { + "name": "batch_id", + "in": "path", + "description": "The ID of the batch to retrieve.", + "required": true, + "schema": { + "type": "string" + } + } + ], + "deprecated": false + } + }, + "/v1/batches/{batch_id}/cancel": { + "post": { + "responses": { + "200": { + "description": "The updated batch object.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Batch" + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" + } + }, + "tags": [ + "Batches" + ], + "summary": "Cancel a batch that is in progress.", + "description": "Cancel a batch that is in progress.", + "parameters": [ + { + "name": "batch_id", + "in": "path", + "description": "The ID of the batch to cancel.", + "required": true, + "schema": { + "type": "string" + } + } + ], + "deprecated": false + } + }, "/v1/chat/completions": { "get": { "responses": { @@ -4005,6 +4192,451 @@ "title": "Error", "description": "Error response from the API. Roughly follows RFC 7807." }, + "ListBatchesResponse": { + "type": "object", + "properties": { + "object": { + "type": "string", + "const": "list", + "default": "list" + }, + "data": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "completion_window": { + "type": "string" + }, + "created_at": { + "type": "integer" + }, + "endpoint": { + "type": "string" + }, + "input_file_id": { + "type": "string" + }, + "object": { + "type": "string", + "const": "batch" + }, + "status": { + "type": "string", + "enum": [ + "validating", + "failed", + "in_progress", + "finalizing", + "completed", + "expired", + "cancelling", + "cancelled" + ] + }, + "cancelled_at": { + "type": "integer" + }, + "cancelling_at": { + "type": "integer" + }, + "completed_at": { + "type": "integer" + }, + "error_file_id": { + "type": "string" + }, + "errors": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "type": "object", + "properties": { + "code": { + "type": "string" + }, + "line": { + "type": "integer" + }, + "message": { + "type": "string" + }, + "param": { + "type": "string" + } + }, + "additionalProperties": false, + "title": "BatchError" + } + }, + "object": { + "type": "string" + } + }, + "additionalProperties": false, + "title": "Errors" + }, + "expired_at": { + "type": "integer" + }, + "expires_at": { + "type": "integer" + }, + "failed_at": { + "type": "integer" + }, + "finalizing_at": { + "type": "integer" + }, + "in_progress_at": { + "type": "integer" + }, + "metadata": { + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "model": { + "type": "string" + }, + "output_file_id": { + "type": "string" + }, + "request_counts": { + "type": "object", + "properties": { + "completed": { + "type": "integer" + }, + "failed": { + "type": "integer" + }, + "total": { + "type": "integer" + } + }, + "additionalProperties": false, + "required": [ + "completed", + "failed", + "total" + ], + "title": "BatchRequestCounts" + }, + "usage": { + "type": "object", + "properties": { + "input_tokens": { + "type": "integer" + }, + "input_tokens_details": { + "type": "object", + "properties": { + "cached_tokens": { + "type": "integer" + } + }, + "additionalProperties": false, + "required": [ + "cached_tokens" + ], + "title": "InputTokensDetails" + }, + "output_tokens": { + "type": "integer" + }, + "output_tokens_details": { + "type": "object", + "properties": { + "reasoning_tokens": { + "type": "integer" + } + }, + "additionalProperties": false, + "required": [ + "reasoning_tokens" + ], + "title": "OutputTokensDetails" + }, + "total_tokens": { + "type": "integer" + } + }, + "additionalProperties": false, + "required": [ + "input_tokens", + "input_tokens_details", + "output_tokens", + "output_tokens_details", + "total_tokens" + ], + "title": "BatchUsage" + } + }, + "additionalProperties": false, + "required": [ + "id", + "completion_window", + "created_at", + "endpoint", + "input_file_id", + "object", + "status" + ], + "title": "Batch" + } + }, + "first_id": { + "type": "string" + }, + "last_id": { + "type": "string" + }, + "has_more": { + "type": "boolean", + "default": false + } + }, + "additionalProperties": false, + "required": [ + "object", + "data", + "has_more" + ], + "title": "ListBatchesResponse", + "description": "Response containing a list of batch objects." + }, + "CreateBatchRequest": { + "type": "object", + "properties": { + "input_file_id": { + "type": "string", + "description": "The ID of an uploaded file containing requests for the batch." + }, + "endpoint": { + "type": "string", + "description": "The endpoint to be used for all requests in the batch." + }, + "completion_window": { + "type": "string", + "const": "24h", + "description": "The time window within which the batch should be processed." + }, + "metadata": { + "type": "object", + "additionalProperties": { + "type": "string" + }, + "description": "Optional metadata for the batch." + }, + "idempotency_key": { + "type": "string", + "description": "Optional idempotency key. When provided, enables idempotent behavior." + } + }, + "additionalProperties": false, + "required": [ + "input_file_id", + "endpoint", + "completion_window" + ], + "title": "CreateBatchRequest" + }, + "Batch": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "completion_window": { + "type": "string" + }, + "created_at": { + "type": "integer" + }, + "endpoint": { + "type": "string" + }, + "input_file_id": { + "type": "string" + }, + "object": { + "type": "string", + "const": "batch" + }, + "status": { + "type": "string", + "enum": [ + "validating", + "failed", + "in_progress", + "finalizing", + "completed", + "expired", + "cancelling", + "cancelled" + ] + }, + "cancelled_at": { + "type": "integer" + }, + "cancelling_at": { + "type": "integer" + }, + "completed_at": { + "type": "integer" + }, + "error_file_id": { + "type": "string" + }, + "errors": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "type": "object", + "properties": { + "code": { + "type": "string" + }, + "line": { + "type": "integer" + }, + "message": { + "type": "string" + }, + "param": { + "type": "string" + } + }, + "additionalProperties": false, + "title": "BatchError" + } + }, + "object": { + "type": "string" + } + }, + "additionalProperties": false, + "title": "Errors" + }, + "expired_at": { + "type": "integer" + }, + "expires_at": { + "type": "integer" + }, + "failed_at": { + "type": "integer" + }, + "finalizing_at": { + "type": "integer" + }, + "in_progress_at": { + "type": "integer" + }, + "metadata": { + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "model": { + "type": "string" + }, + "output_file_id": { + "type": "string" + }, + "request_counts": { + "type": "object", + "properties": { + "completed": { + "type": "integer" + }, + "failed": { + "type": "integer" + }, + "total": { + "type": "integer" + } + }, + "additionalProperties": false, + "required": [ + "completed", + "failed", + "total" + ], + "title": "BatchRequestCounts" + }, + "usage": { + "type": "object", + "properties": { + "input_tokens": { + "type": "integer" + }, + "input_tokens_details": { + "type": "object", + "properties": { + "cached_tokens": { + "type": "integer" + } + }, + "additionalProperties": false, + "required": [ + "cached_tokens" + ], + "title": "InputTokensDetails" + }, + "output_tokens": { + "type": "integer" + }, + "output_tokens_details": { + "type": "object", + "properties": { + "reasoning_tokens": { + "type": "integer" + } + }, + "additionalProperties": false, + "required": [ + "reasoning_tokens" + ], + "title": "OutputTokensDetails" + }, + "total_tokens": { + "type": "integer" + } + }, + "additionalProperties": false, + "required": [ + "input_tokens", + "input_tokens_details", + "output_tokens", + "output_tokens_details", + "total_tokens" + ], + "title": "BatchUsage" + } + }, + "additionalProperties": false, + "required": [ + "id", + "completion_window", + "created_at", + "endpoint", + "input_file_id", + "object", + "status" + ], + "title": "Batch" + }, "Order": { "type": "string", "enum": [ @@ -13289,6 +13921,11 @@ "description": "APIs for creating and interacting with agentic systems.\n\n## Responses API\n\nThe Responses API provides OpenAI-compatible functionality with enhanced capabilities for dynamic, stateful interactions.\n\n> **✅ STABLE**: This API is production-ready with backward compatibility guarantees. Recommended for production applications.\n\n### ✅ Supported Tools\n\nThe Responses API supports the following tool types:\n\n- **`web_search`**: Search the web for current information and real-time data\n- **`file_search`**: Search through uploaded files and vector stores\n - Supports dynamic `vector_store_ids` per call\n - Compatible with OpenAI file search patterns\n- **`function`**: Call custom functions with JSON schema validation\n- **`mcp_tool`**: Model Context Protocol integration\n\n### ✅ Supported Fields & Features\n\n**Core Capabilities:**\n- **Dynamic Configuration**: Switch models, vector stores, and tools per request without pre-configuration\n- **Conversation Branching**: Use `previous_response_id` to branch conversations and explore different paths\n- **Rich Annotations**: Automatic file citations, URL citations, and container file citations\n- **Status Tracking**: Monitor tool call execution status and handle failures gracefully\n\n### 🚧 Work in Progress\n\n- Full real-time response streaming support\n- `tool_choice` parameter\n- `max_tool_calls` parameter\n- Built-in tools (code interpreter, containers API)\n- Safety & guardrails\n- `reasoning` capabilities\n- `service_tier`\n- `logprobs`\n- `max_output_tokens`\n- `metadata` handling\n- `instructions`\n- `incomplete_details`\n- `background`", "x-displayName": "Agents" }, + { + "name": "Batches", + "description": "The API is designed to allow use of openai client libraries for seamless integration.\n\nThis API provides the following extensions:\n - idempotent batch creation\n\nNote: This API is currently under active development and may undergo changes.", + "x-displayName": "The Batches API enables efficient processing of multiple requests in a single operation, particularly useful for processing large datasets, batch evaluation workflows, and cost-effective inference at scale." + }, { "name": "Conversations", "description": "Protocol for conversation management operations.", @@ -13362,6 +13999,7 @@ "name": "Operations", "tags": [ "Agents", + "Batches", "Conversations", "Files", "Inference", diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml index 6dd1305c6..5b629a474 100644 --- a/docs/static/llama-stack-spec.yaml +++ b/docs/static/llama-stack-spec.yaml @@ -12,6 +12,141 @@ info: servers: - url: http://any-hosted-llama-stack.com paths: + /v1/batches: + get: + responses: + '200': + description: A list of batch objects. + content: + application/json: + schema: + $ref: '#/components/schemas/ListBatchesResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Batches + summary: List all batches for the current user. + description: List all batches for the current user. + parameters: + - name: after + in: query + description: >- + A cursor for pagination; returns batches after this batch ID. + required: false + schema: + type: string + - name: limit + in: query + description: >- + Number of batches to return (default 20, max 100). + required: true + schema: + type: integer + deprecated: false + post: + responses: + '200': + description: The created batch object. + content: + application/json: + schema: + $ref: '#/components/schemas/Batch' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Batches + summary: >- + Create a new batch for processing multiple API requests. + description: >- + Create a new batch for processing multiple API requests. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/CreateBatchRequest' + required: true + deprecated: false + /v1/batches/{batch_id}: + get: + responses: + '200': + description: The batch object. + content: + application/json: + schema: + $ref: '#/components/schemas/Batch' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Batches + summary: >- + Retrieve information about a specific batch. + description: >- + Retrieve information about a specific batch. + parameters: + - name: batch_id + in: path + description: The ID of the batch to retrieve. + required: true + schema: + type: string + deprecated: false + /v1/batches/{batch_id}/cancel: + post: + responses: + '200': + description: The updated batch object. + content: + application/json: + schema: + $ref: '#/components/schemas/Batch' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Batches + summary: Cancel a batch that is in progress. + description: Cancel a batch that is in progress. + parameters: + - name: batch_id + in: path + description: The ID of the batch to cancel. + required: true + schema: + type: string + deprecated: false /v1/chat/completions: get: responses: @@ -2999,6 +3134,331 @@ components: title: Error description: >- Error response from the API. Roughly follows RFC 7807. + ListBatchesResponse: + type: object + properties: + object: + type: string + const: list + default: list + data: + type: array + items: + type: object + properties: + id: + type: string + completion_window: + type: string + created_at: + type: integer + endpoint: + type: string + input_file_id: + type: string + object: + type: string + const: batch + status: + type: string + enum: + - validating + - failed + - in_progress + - finalizing + - completed + - expired + - cancelling + - cancelled + cancelled_at: + type: integer + cancelling_at: + type: integer + completed_at: + type: integer + error_file_id: + type: string + errors: + type: object + properties: + data: + type: array + items: + type: object + properties: + code: + type: string + line: + type: integer + message: + type: string + param: + type: string + additionalProperties: false + title: BatchError + object: + type: string + additionalProperties: false + title: Errors + expired_at: + type: integer + expires_at: + type: integer + failed_at: + type: integer + finalizing_at: + type: integer + in_progress_at: + type: integer + metadata: + type: object + additionalProperties: + type: string + model: + type: string + output_file_id: + type: string + request_counts: + type: object + properties: + completed: + type: integer + failed: + type: integer + total: + type: integer + additionalProperties: false + required: + - completed + - failed + - total + title: BatchRequestCounts + usage: + type: object + properties: + input_tokens: + type: integer + input_tokens_details: + type: object + properties: + cached_tokens: + type: integer + additionalProperties: false + required: + - cached_tokens + title: InputTokensDetails + output_tokens: + type: integer + output_tokens_details: + type: object + properties: + reasoning_tokens: + type: integer + additionalProperties: false + required: + - reasoning_tokens + title: OutputTokensDetails + total_tokens: + type: integer + additionalProperties: false + required: + - input_tokens + - input_tokens_details + - output_tokens + - output_tokens_details + - total_tokens + title: BatchUsage + additionalProperties: false + required: + - id + - completion_window + - created_at + - endpoint + - input_file_id + - object + - status + title: Batch + first_id: + type: string + last_id: + type: string + has_more: + type: boolean + default: false + additionalProperties: false + required: + - object + - data + - has_more + title: ListBatchesResponse + description: >- + Response containing a list of batch objects. + CreateBatchRequest: + type: object + properties: + input_file_id: + type: string + description: >- + The ID of an uploaded file containing requests for the batch. + endpoint: + type: string + description: >- + The endpoint to be used for all requests in the batch. + completion_window: + type: string + const: 24h + description: >- + The time window within which the batch should be processed. + metadata: + type: object + additionalProperties: + type: string + description: Optional metadata for the batch. + idempotency_key: + type: string + description: >- + Optional idempotency key. When provided, enables idempotent behavior. + additionalProperties: false + required: + - input_file_id + - endpoint + - completion_window + title: CreateBatchRequest + Batch: + type: object + properties: + id: + type: string + completion_window: + type: string + created_at: + type: integer + endpoint: + type: string + input_file_id: + type: string + object: + type: string + const: batch + status: + type: string + enum: + - validating + - failed + - in_progress + - finalizing + - completed + - expired + - cancelling + - cancelled + cancelled_at: + type: integer + cancelling_at: + type: integer + completed_at: + type: integer + error_file_id: + type: string + errors: + type: object + properties: + data: + type: array + items: + type: object + properties: + code: + type: string + line: + type: integer + message: + type: string + param: + type: string + additionalProperties: false + title: BatchError + object: + type: string + additionalProperties: false + title: Errors + expired_at: + type: integer + expires_at: + type: integer + failed_at: + type: integer + finalizing_at: + type: integer + in_progress_at: + type: integer + metadata: + type: object + additionalProperties: + type: string + model: + type: string + output_file_id: + type: string + request_counts: + type: object + properties: + completed: + type: integer + failed: + type: integer + total: + type: integer + additionalProperties: false + required: + - completed + - failed + - total + title: BatchRequestCounts + usage: + type: object + properties: + input_tokens: + type: integer + input_tokens_details: + type: object + properties: + cached_tokens: + type: integer + additionalProperties: false + required: + - cached_tokens + title: InputTokensDetails + output_tokens: + type: integer + output_tokens_details: + type: object + properties: + reasoning_tokens: + type: integer + additionalProperties: false + required: + - reasoning_tokens + title: OutputTokensDetails + total_tokens: + type: integer + additionalProperties: false + required: + - input_tokens + - input_tokens_details + - output_tokens + - output_tokens_details + - total_tokens + title: BatchUsage + additionalProperties: false + required: + - id + - completion_window + - created_at + - endpoint + - input_file_id + - object + - status + title: Batch Order: type: string enum: @@ -10144,6 +10604,19 @@ tags: - `background` x-displayName: Agents + - name: Batches + description: >- + The API is designed to allow use of openai client libraries for seamless integration. + + + This API provides the following extensions: + - idempotent batch creation + + Note: This API is currently under active development and may undergo changes. + x-displayName: >- + The Batches API enables efficient processing of multiple requests in a single + operation, particularly useful for processing large datasets, batch evaluation + workflows, and cost-effective inference at scale. - name: Conversations description: >- Protocol for conversation management operations. @@ -10206,6 +10679,7 @@ x-tagGroups: - name: Operations tags: - Agents + - Batches - Conversations - Files - Inference diff --git a/docs/static/stainless-llama-stack-spec.html b/docs/static/stainless-llama-stack-spec.html index 6b8f13f8f..59b6c28e7 100644 --- a/docs/static/stainless-llama-stack-spec.html +++ b/docs/static/stainless-llama-stack-spec.html @@ -40,6 +40,193 @@ } ], "paths": { + "/v1/batches": { + "get": { + "responses": { + "200": { + "description": "A list of batch objects.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ListBatchesResponse" + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" + } + }, + "tags": [ + "Batches" + ], + "summary": "List all batches for the current user.", + "description": "List all batches for the current user.", + "parameters": [ + { + "name": "after", + "in": "query", + "description": "A cursor for pagination; returns batches after this batch ID.", + "required": false, + "schema": { + "type": "string" + } + }, + { + "name": "limit", + "in": "query", + "description": "Number of batches to return (default 20, max 100).", + "required": true, + "schema": { + "type": "integer" + } + } + ], + "deprecated": false + }, + "post": { + "responses": { + "200": { + "description": "The created batch object.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Batch" + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" + } + }, + "tags": [ + "Batches" + ], + "summary": "Create a new batch for processing multiple API requests.", + "description": "Create a new batch for processing multiple API requests.", + "parameters": [], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CreateBatchRequest" + } + } + }, + "required": true + }, + "deprecated": false + } + }, + "/v1/batches/{batch_id}": { + "get": { + "responses": { + "200": { + "description": "The batch object.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Batch" + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" + } + }, + "tags": [ + "Batches" + ], + "summary": "Retrieve information about a specific batch.", + "description": "Retrieve information about a specific batch.", + "parameters": [ + { + "name": "batch_id", + "in": "path", + "description": "The ID of the batch to retrieve.", + "required": true, + "schema": { + "type": "string" + } + } + ], + "deprecated": false + } + }, + "/v1/batches/{batch_id}/cancel": { + "post": { + "responses": { + "200": { + "description": "The updated batch object.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Batch" + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" + } + }, + "tags": [ + "Batches" + ], + "summary": "Cancel a batch that is in progress.", + "description": "Cancel a batch that is in progress.", + "parameters": [ + { + "name": "batch_id", + "in": "path", + "description": "The ID of the batch to cancel.", + "required": true, + "schema": { + "type": "string" + } + } + ], + "deprecated": false + } + }, "/v1/chat/completions": { "get": { "responses": { @@ -5677,6 +5864,451 @@ "title": "Error", "description": "Error response from the API. Roughly follows RFC 7807." }, + "ListBatchesResponse": { + "type": "object", + "properties": { + "object": { + "type": "string", + "const": "list", + "default": "list" + }, + "data": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "completion_window": { + "type": "string" + }, + "created_at": { + "type": "integer" + }, + "endpoint": { + "type": "string" + }, + "input_file_id": { + "type": "string" + }, + "object": { + "type": "string", + "const": "batch" + }, + "status": { + "type": "string", + "enum": [ + "validating", + "failed", + "in_progress", + "finalizing", + "completed", + "expired", + "cancelling", + "cancelled" + ] + }, + "cancelled_at": { + "type": "integer" + }, + "cancelling_at": { + "type": "integer" + }, + "completed_at": { + "type": "integer" + }, + "error_file_id": { + "type": "string" + }, + "errors": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "type": "object", + "properties": { + "code": { + "type": "string" + }, + "line": { + "type": "integer" + }, + "message": { + "type": "string" + }, + "param": { + "type": "string" + } + }, + "additionalProperties": false, + "title": "BatchError" + } + }, + "object": { + "type": "string" + } + }, + "additionalProperties": false, + "title": "Errors" + }, + "expired_at": { + "type": "integer" + }, + "expires_at": { + "type": "integer" + }, + "failed_at": { + "type": "integer" + }, + "finalizing_at": { + "type": "integer" + }, + "in_progress_at": { + "type": "integer" + }, + "metadata": { + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "model": { + "type": "string" + }, + "output_file_id": { + "type": "string" + }, + "request_counts": { + "type": "object", + "properties": { + "completed": { + "type": "integer" + }, + "failed": { + "type": "integer" + }, + "total": { + "type": "integer" + } + }, + "additionalProperties": false, + "required": [ + "completed", + "failed", + "total" + ], + "title": "BatchRequestCounts" + }, + "usage": { + "type": "object", + "properties": { + "input_tokens": { + "type": "integer" + }, + "input_tokens_details": { + "type": "object", + "properties": { + "cached_tokens": { + "type": "integer" + } + }, + "additionalProperties": false, + "required": [ + "cached_tokens" + ], + "title": "InputTokensDetails" + }, + "output_tokens": { + "type": "integer" + }, + "output_tokens_details": { + "type": "object", + "properties": { + "reasoning_tokens": { + "type": "integer" + } + }, + "additionalProperties": false, + "required": [ + "reasoning_tokens" + ], + "title": "OutputTokensDetails" + }, + "total_tokens": { + "type": "integer" + } + }, + "additionalProperties": false, + "required": [ + "input_tokens", + "input_tokens_details", + "output_tokens", + "output_tokens_details", + "total_tokens" + ], + "title": "BatchUsage" + } + }, + "additionalProperties": false, + "required": [ + "id", + "completion_window", + "created_at", + "endpoint", + "input_file_id", + "object", + "status" + ], + "title": "Batch" + } + }, + "first_id": { + "type": "string" + }, + "last_id": { + "type": "string" + }, + "has_more": { + "type": "boolean", + "default": false + } + }, + "additionalProperties": false, + "required": [ + "object", + "data", + "has_more" + ], + "title": "ListBatchesResponse", + "description": "Response containing a list of batch objects." + }, + "CreateBatchRequest": { + "type": "object", + "properties": { + "input_file_id": { + "type": "string", + "description": "The ID of an uploaded file containing requests for the batch." + }, + "endpoint": { + "type": "string", + "description": "The endpoint to be used for all requests in the batch." + }, + "completion_window": { + "type": "string", + "const": "24h", + "description": "The time window within which the batch should be processed." + }, + "metadata": { + "type": "object", + "additionalProperties": { + "type": "string" + }, + "description": "Optional metadata for the batch." + }, + "idempotency_key": { + "type": "string", + "description": "Optional idempotency key. When provided, enables idempotent behavior." + } + }, + "additionalProperties": false, + "required": [ + "input_file_id", + "endpoint", + "completion_window" + ], + "title": "CreateBatchRequest" + }, + "Batch": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "completion_window": { + "type": "string" + }, + "created_at": { + "type": "integer" + }, + "endpoint": { + "type": "string" + }, + "input_file_id": { + "type": "string" + }, + "object": { + "type": "string", + "const": "batch" + }, + "status": { + "type": "string", + "enum": [ + "validating", + "failed", + "in_progress", + "finalizing", + "completed", + "expired", + "cancelling", + "cancelled" + ] + }, + "cancelled_at": { + "type": "integer" + }, + "cancelling_at": { + "type": "integer" + }, + "completed_at": { + "type": "integer" + }, + "error_file_id": { + "type": "string" + }, + "errors": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "type": "object", + "properties": { + "code": { + "type": "string" + }, + "line": { + "type": "integer" + }, + "message": { + "type": "string" + }, + "param": { + "type": "string" + } + }, + "additionalProperties": false, + "title": "BatchError" + } + }, + "object": { + "type": "string" + } + }, + "additionalProperties": false, + "title": "Errors" + }, + "expired_at": { + "type": "integer" + }, + "expires_at": { + "type": "integer" + }, + "failed_at": { + "type": "integer" + }, + "finalizing_at": { + "type": "integer" + }, + "in_progress_at": { + "type": "integer" + }, + "metadata": { + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "model": { + "type": "string" + }, + "output_file_id": { + "type": "string" + }, + "request_counts": { + "type": "object", + "properties": { + "completed": { + "type": "integer" + }, + "failed": { + "type": "integer" + }, + "total": { + "type": "integer" + } + }, + "additionalProperties": false, + "required": [ + "completed", + "failed", + "total" + ], + "title": "BatchRequestCounts" + }, + "usage": { + "type": "object", + "properties": { + "input_tokens": { + "type": "integer" + }, + "input_tokens_details": { + "type": "object", + "properties": { + "cached_tokens": { + "type": "integer" + } + }, + "additionalProperties": false, + "required": [ + "cached_tokens" + ], + "title": "InputTokensDetails" + }, + "output_tokens": { + "type": "integer" + }, + "output_tokens_details": { + "type": "object", + "properties": { + "reasoning_tokens": { + "type": "integer" + } + }, + "additionalProperties": false, + "required": [ + "reasoning_tokens" + ], + "title": "OutputTokensDetails" + }, + "total_tokens": { + "type": "integer" + } + }, + "additionalProperties": false, + "required": [ + "input_tokens", + "input_tokens_details", + "output_tokens", + "output_tokens_details", + "total_tokens" + ], + "title": "BatchUsage" + } + }, + "additionalProperties": false, + "required": [ + "id", + "completion_window", + "created_at", + "endpoint", + "input_file_id", + "object", + "status" + ], + "title": "Batch" + }, "Order": { "type": "string", "enum": [ @@ -17961,6 +18593,11 @@ "description": "APIs for creating and interacting with agentic systems.", "x-displayName": "Agents" }, + { + "name": "Batches", + "description": "The API is designed to allow use of openai client libraries for seamless integration.\n\nThis API provides the following extensions:\n - idempotent batch creation\n\nNote: This API is currently under active development and may undergo changes.", + "x-displayName": "The Batches API enables efficient processing of multiple requests in a single operation, particularly useful for processing large datasets, batch evaluation workflows, and cost-effective inference at scale." + }, { "name": "Benchmarks", "description": "" @@ -18055,6 +18692,7 @@ "name": "Operations", "tags": [ "Agents", + "Batches", "Benchmarks", "Conversations", "DatasetIO", diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml index ce9daa26f..f531542c5 100644 --- a/docs/static/stainless-llama-stack-spec.yaml +++ b/docs/static/stainless-llama-stack-spec.yaml @@ -15,6 +15,141 @@ info: servers: - url: http://any-hosted-llama-stack.com paths: + /v1/batches: + get: + responses: + '200': + description: A list of batch objects. + content: + application/json: + schema: + $ref: '#/components/schemas/ListBatchesResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Batches + summary: List all batches for the current user. + description: List all batches for the current user. + parameters: + - name: after + in: query + description: >- + A cursor for pagination; returns batches after this batch ID. + required: false + schema: + type: string + - name: limit + in: query + description: >- + Number of batches to return (default 20, max 100). + required: true + schema: + type: integer + deprecated: false + post: + responses: + '200': + description: The created batch object. + content: + application/json: + schema: + $ref: '#/components/schemas/Batch' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Batches + summary: >- + Create a new batch for processing multiple API requests. + description: >- + Create a new batch for processing multiple API requests. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/CreateBatchRequest' + required: true + deprecated: false + /v1/batches/{batch_id}: + get: + responses: + '200': + description: The batch object. + content: + application/json: + schema: + $ref: '#/components/schemas/Batch' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Batches + summary: >- + Retrieve information about a specific batch. + description: >- + Retrieve information about a specific batch. + parameters: + - name: batch_id + in: path + description: The ID of the batch to retrieve. + required: true + schema: + type: string + deprecated: false + /v1/batches/{batch_id}/cancel: + post: + responses: + '200': + description: The updated batch object. + content: + application/json: + schema: + $ref: '#/components/schemas/Batch' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Batches + summary: Cancel a batch that is in progress. + description: Cancel a batch that is in progress. + parameters: + - name: batch_id + in: path + description: The ID of the batch to cancel. + required: true + schema: + type: string + deprecated: false /v1/chat/completions: get: responses: @@ -4212,6 +4347,331 @@ components: title: Error description: >- Error response from the API. Roughly follows RFC 7807. + ListBatchesResponse: + type: object + properties: + object: + type: string + const: list + default: list + data: + type: array + items: + type: object + properties: + id: + type: string + completion_window: + type: string + created_at: + type: integer + endpoint: + type: string + input_file_id: + type: string + object: + type: string + const: batch + status: + type: string + enum: + - validating + - failed + - in_progress + - finalizing + - completed + - expired + - cancelling + - cancelled + cancelled_at: + type: integer + cancelling_at: + type: integer + completed_at: + type: integer + error_file_id: + type: string + errors: + type: object + properties: + data: + type: array + items: + type: object + properties: + code: + type: string + line: + type: integer + message: + type: string + param: + type: string + additionalProperties: false + title: BatchError + object: + type: string + additionalProperties: false + title: Errors + expired_at: + type: integer + expires_at: + type: integer + failed_at: + type: integer + finalizing_at: + type: integer + in_progress_at: + type: integer + metadata: + type: object + additionalProperties: + type: string + model: + type: string + output_file_id: + type: string + request_counts: + type: object + properties: + completed: + type: integer + failed: + type: integer + total: + type: integer + additionalProperties: false + required: + - completed + - failed + - total + title: BatchRequestCounts + usage: + type: object + properties: + input_tokens: + type: integer + input_tokens_details: + type: object + properties: + cached_tokens: + type: integer + additionalProperties: false + required: + - cached_tokens + title: InputTokensDetails + output_tokens: + type: integer + output_tokens_details: + type: object + properties: + reasoning_tokens: + type: integer + additionalProperties: false + required: + - reasoning_tokens + title: OutputTokensDetails + total_tokens: + type: integer + additionalProperties: false + required: + - input_tokens + - input_tokens_details + - output_tokens + - output_tokens_details + - total_tokens + title: BatchUsage + additionalProperties: false + required: + - id + - completion_window + - created_at + - endpoint + - input_file_id + - object + - status + title: Batch + first_id: + type: string + last_id: + type: string + has_more: + type: boolean + default: false + additionalProperties: false + required: + - object + - data + - has_more + title: ListBatchesResponse + description: >- + Response containing a list of batch objects. + CreateBatchRequest: + type: object + properties: + input_file_id: + type: string + description: >- + The ID of an uploaded file containing requests for the batch. + endpoint: + type: string + description: >- + The endpoint to be used for all requests in the batch. + completion_window: + type: string + const: 24h + description: >- + The time window within which the batch should be processed. + metadata: + type: object + additionalProperties: + type: string + description: Optional metadata for the batch. + idempotency_key: + type: string + description: >- + Optional idempotency key. When provided, enables idempotent behavior. + additionalProperties: false + required: + - input_file_id + - endpoint + - completion_window + title: CreateBatchRequest + Batch: + type: object + properties: + id: + type: string + completion_window: + type: string + created_at: + type: integer + endpoint: + type: string + input_file_id: + type: string + object: + type: string + const: batch + status: + type: string + enum: + - validating + - failed + - in_progress + - finalizing + - completed + - expired + - cancelling + - cancelled + cancelled_at: + type: integer + cancelling_at: + type: integer + completed_at: + type: integer + error_file_id: + type: string + errors: + type: object + properties: + data: + type: array + items: + type: object + properties: + code: + type: string + line: + type: integer + message: + type: string + param: + type: string + additionalProperties: false + title: BatchError + object: + type: string + additionalProperties: false + title: Errors + expired_at: + type: integer + expires_at: + type: integer + failed_at: + type: integer + finalizing_at: + type: integer + in_progress_at: + type: integer + metadata: + type: object + additionalProperties: + type: string + model: + type: string + output_file_id: + type: string + request_counts: + type: object + properties: + completed: + type: integer + failed: + type: integer + total: + type: integer + additionalProperties: false + required: + - completed + - failed + - total + title: BatchRequestCounts + usage: + type: object + properties: + input_tokens: + type: integer + input_tokens_details: + type: object + properties: + cached_tokens: + type: integer + additionalProperties: false + required: + - cached_tokens + title: InputTokensDetails + output_tokens: + type: integer + output_tokens_details: + type: object + properties: + reasoning_tokens: + type: integer + additionalProperties: false + required: + - reasoning_tokens + title: OutputTokensDetails + total_tokens: + type: integer + additionalProperties: false + required: + - input_tokens + - input_tokens_details + - output_tokens + - output_tokens_details + - total_tokens + title: BatchUsage + additionalProperties: false + required: + - id + - completion_window + - created_at + - endpoint + - input_file_id + - object + - status + title: Batch Order: type: string enum: @@ -13528,6 +13988,19 @@ tags: description: >- APIs for creating and interacting with agentic systems. x-displayName: Agents + - name: Batches + description: >- + The API is designed to allow use of openai client libraries for seamless integration. + + + This API provides the following extensions: + - idempotent batch creation + + Note: This API is currently under active development and may undergo changes. + x-displayName: >- + The Batches API enables efficient processing of multiple requests in a single + operation, particularly useful for processing large datasets, batch evaluation + workflows, and cost-effective inference at scale. - name: Benchmarks description: '' - name: Conversations @@ -13602,6 +14075,7 @@ x-tagGroups: - name: Operations tags: - Agents + - Batches - Benchmarks - Conversations - DatasetIO diff --git a/src/llama_stack/core/stack.py b/src/llama_stack/core/stack.py index eccc562ae..ac0a440df 100644 --- a/src/llama_stack/core/stack.py +++ b/src/llama_stack/core/stack.py @@ -14,6 +14,7 @@ from typing import Any import yaml from llama_stack.apis.agents import Agents +from llama_stack.apis.batches import Batches from llama_stack.apis.benchmarks import Benchmarks from llama_stack.apis.conversations import Conversations from llama_stack.apis.datasetio import DatasetIO @@ -63,6 +64,7 @@ class LlamaStack( Providers, Inference, Agents, + Batches, Safety, SyntheticDataGeneration, Datasets,