mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-31 05:20:00 +00:00
updates
This commit is contained in:
parent
0cfb2e2473
commit
73d927850e
4 changed files with 43 additions and 316 deletions
137
docs/_static/llama-stack-spec.yaml
vendored
137
docs/_static/llama-stack-spec.yaml
vendored
|
|
@ -69,35 +69,6 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/BatchChatCompletionRequest'
|
||||
required: true
|
||||
/v1/batch-inference/chat-completion-inline:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/BatchChatCompletionResponse'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- BatchInference (Coming Soon)
|
||||
description: ''
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/BatchChatCompletionInlineRequest'
|
||||
required: true
|
||||
/v1/inference/batch-completion:
|
||||
post:
|
||||
responses:
|
||||
|
|
@ -127,35 +98,6 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/BatchCompletionRequest'
|
||||
required: true
|
||||
/v1/batch-inference/completion-inline:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/BatchCompletionResponse'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- BatchInference (Coming Soon)
|
||||
description: ''
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/BatchCompletionInlineRequest'
|
||||
required: true
|
||||
/v1/post-training/job/cancel:
|
||||
post:
|
||||
responses:
|
||||
|
|
@ -206,7 +148,7 @@ paths:
|
|||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Inference
|
||||
- BatchInference (Coming Soon)
|
||||
description: >-
|
||||
Generate a chat completion for the given messages using the specified model.
|
||||
parameters: []
|
||||
|
|
@ -241,7 +183,7 @@ paths:
|
|||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Inference
|
||||
- BatchInference (Coming Soon)
|
||||
description: >-
|
||||
Generate a completion for the given content using the specified model.
|
||||
parameters: []
|
||||
|
|
@ -3346,42 +3288,6 @@ components:
|
|||
- logprobs_by_token
|
||||
title: TokenLogProbs
|
||||
description: Log probabilities for generated tokens.
|
||||
BatchChatCompletionInlineRequest:
|
||||
type: object
|
||||
properties:
|
||||
model:
|
||||
type: string
|
||||
messages_batch:
|
||||
type: array
|
||||
items:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/Message'
|
||||
sampling_params:
|
||||
$ref: '#/components/schemas/SamplingParams'
|
||||
tools:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/ToolDefinition'
|
||||
tool_config:
|
||||
$ref: '#/components/schemas/ToolConfig'
|
||||
response_format:
|
||||
$ref: '#/components/schemas/ResponseFormat'
|
||||
logprobs:
|
||||
type: object
|
||||
properties:
|
||||
top_k:
|
||||
type: integer
|
||||
default: 0
|
||||
description: >-
|
||||
How many tokens (for each position) to return log probabilities for.
|
||||
additionalProperties: false
|
||||
title: LogProbConfig
|
||||
additionalProperties: false
|
||||
required:
|
||||
- model
|
||||
- messages_batch
|
||||
title: BatchChatCompletionInlineRequest
|
||||
BatchCompletionRequest:
|
||||
type: object
|
||||
properties:
|
||||
|
|
@ -3450,34 +3356,6 @@ components:
|
|||
- stop_reason
|
||||
title: CompletionResponse
|
||||
description: Response from a completion request.
|
||||
BatchCompletionInlineRequest:
|
||||
type: object
|
||||
properties:
|
||||
model:
|
||||
type: string
|
||||
content_batch:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/InterleavedContent'
|
||||
sampling_params:
|
||||
$ref: '#/components/schemas/SamplingParams'
|
||||
response_format:
|
||||
$ref: '#/components/schemas/ResponseFormat'
|
||||
logprobs:
|
||||
type: object
|
||||
properties:
|
||||
top_k:
|
||||
type: integer
|
||||
default: 0
|
||||
description: >-
|
||||
How many tokens (for each position) to return log probabilities for.
|
||||
additionalProperties: false
|
||||
title: LogProbConfig
|
||||
additionalProperties: false
|
||||
required:
|
||||
- model
|
||||
- content_batch
|
||||
title: BatchCompletionInlineRequest
|
||||
CancelTrainingJobRequest:
|
||||
type: object
|
||||
properties:
|
||||
|
|
@ -7737,6 +7615,17 @@ tags:
|
|||
x-displayName: >-
|
||||
Agents API for creating and interacting with agentic systems.
|
||||
- name: BatchInference (Coming Soon)
|
||||
description: >-
|
||||
This is an asynchronous API. If the request is successful, the response will
|
||||
be a job which can be polled for completion.
|
||||
|
||||
|
||||
NOTE: This API is not yet implemented and is subject to change in concert with
|
||||
other asynchronous APIs
|
||||
|
||||
including (post-training, evals, etc).
|
||||
x-displayName: >-
|
||||
Batch inference API for generating completions and chat completions.
|
||||
- name: Benchmarks
|
||||
- name: DatasetIO
|
||||
- name: Datasets
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue