mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-29 07:14:20 +00:00
added more docs
This commit is contained in:
parent
6778359493
commit
b88f8ad616
3 changed files with 102 additions and 100 deletions
|
@ -143,22 +143,26 @@ class BatchChatCompletionRequest:
|
|||
|
||||
|
||||
class Inference(Protocol):
|
||||
"""Set of methods that can be called on the inference service."""
|
||||
|
||||
@webmethod(route="/inference/completion")
|
||||
def post_completion(
|
||||
self,
|
||||
request: CompletionRequest,
|
||||
) -> Union[CompletionResponse, CompletionResponseStreamChunk]: ...
|
||||
|
||||
@webmethod(route="/inference/chat_completion")
|
||||
def post_chat_completion(
|
||||
self,
|
||||
request: ChatCompletionRequest,
|
||||
) -> Union[ChatCompletionResponse, ChatCompletionResponseStreamChunk]: ...
|
||||
|
||||
@webmethod(route="/inference/batch_completion")
|
||||
def post_batch_completion(
|
||||
self,
|
||||
request: BatchCompletionRequest,
|
||||
) -> List[CompletionResponse]: ...
|
||||
|
||||
@webmethod(route="/inference/batch_chat_completion")
|
||||
def post_batch_chat_completion(
|
||||
self,
|
||||
request: BatchChatCompletionRequest,
|
||||
|
|
|
@ -386,7 +386,7 @@
|
|||
]
|
||||
}
|
||||
},
|
||||
"/batch_chat_completion": {
|
||||
"/inference/batch_chat_completion": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
|
@ -416,7 +416,7 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"/batch_completion": {
|
||||
"/inference/batch_completion": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
|
@ -446,7 +446,7 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"/chat_completion": {
|
||||
"/inference/chat_completion": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
|
@ -483,7 +483,7 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"/completion": {
|
||||
"/inference/completion": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
|
@ -3317,27 +3317,26 @@
|
|||
}
|
||||
],
|
||||
"tags": [
|
||||
{
|
||||
"name": "RewardScoring"
|
||||
},
|
||||
{
|
||||
"name": "PostTraining"
|
||||
},
|
||||
{
|
||||
"name": "Inference",
|
||||
"x-displayName": "Set of methods that can be called on the inference service."
|
||||
},
|
||||
{
|
||||
"name": "MemoryBanks"
|
||||
},
|
||||
{
|
||||
"name": "Datasets"
|
||||
},
|
||||
{
|
||||
"name": "AgenticSystem"
|
||||
},
|
||||
{
|
||||
"name": "Datasets"
|
||||
},
|
||||
{
|
||||
"name": "SyntheticDataGeneration"
|
||||
},
|
||||
{
|
||||
"name": "RewardScoring"
|
||||
"name": "Inference"
|
||||
},
|
||||
{
|
||||
"name": "MemoryBanks"
|
||||
},
|
||||
{
|
||||
"name": "ShieldConfig",
|
||||
|
|
|
@ -1563,83 +1563,6 @@ paths:
|
|||
agent execution response.
|
||||
tags:
|
||||
- AgenticSystem
|
||||
/batch_chat_completion:
|
||||
post:
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/BatchChatCompletionRequest'
|
||||
required: true
|
||||
responses:
|
||||
'200':
|
||||
content:
|
||||
application/jsonl:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ChatCompletionResponse'
|
||||
description: OK
|
||||
tags:
|
||||
- Inference
|
||||
/batch_completion:
|
||||
post:
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/BatchCompletionRequest'
|
||||
required: true
|
||||
responses:
|
||||
'200':
|
||||
content:
|
||||
application/jsonl:
|
||||
schema:
|
||||
$ref: '#/components/schemas/CompletionResponse'
|
||||
description: OK
|
||||
tags:
|
||||
- Inference
|
||||
/chat_completion:
|
||||
post:
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ChatCompletionRequest'
|
||||
required: true
|
||||
responses:
|
||||
'200':
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/ChatCompletionResponse'
|
||||
- $ref: '#/components/schemas/ChatCompletionResponseStreamChunk'
|
||||
description: Normal chat completion response. **OR** Streamed chat completion
|
||||
response. The actual response is a series of such objects.
|
||||
tags:
|
||||
- Inference
|
||||
/completion:
|
||||
post:
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/CompletionRequest'
|
||||
required: true
|
||||
responses:
|
||||
'200':
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/CompletionResponse'
|
||||
- $ref: '#/components/schemas/CompletionResponseStreamChunk'
|
||||
description: Normal completion response. **OR** streamed completion response.
|
||||
tags:
|
||||
- Inference
|
||||
/datasets/create:
|
||||
post:
|
||||
parameters: []
|
||||
|
@ -1684,6 +1607,83 @@ paths:
|
|||
description: OK
|
||||
tags:
|
||||
- Datasets
|
||||
/inference/batch_chat_completion:
|
||||
post:
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/BatchChatCompletionRequest'
|
||||
required: true
|
||||
responses:
|
||||
'200':
|
||||
content:
|
||||
application/jsonl:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ChatCompletionResponse'
|
||||
description: OK
|
||||
tags:
|
||||
- Inference
|
||||
/inference/batch_completion:
|
||||
post:
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/BatchCompletionRequest'
|
||||
required: true
|
||||
responses:
|
||||
'200':
|
||||
content:
|
||||
application/jsonl:
|
||||
schema:
|
||||
$ref: '#/components/schemas/CompletionResponse'
|
||||
description: OK
|
||||
tags:
|
||||
- Inference
|
||||
/inference/chat_completion:
|
||||
post:
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ChatCompletionRequest'
|
||||
required: true
|
||||
responses:
|
||||
'200':
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/ChatCompletionResponse'
|
||||
- $ref: '#/components/schemas/ChatCompletionResponseStreamChunk'
|
||||
description: Normal chat completion response. **OR** Streamed chat completion
|
||||
response. The actual response is a series of such objects.
|
||||
tags:
|
||||
- Inference
|
||||
/inference/completion:
|
||||
post:
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/CompletionRequest'
|
||||
required: true
|
||||
responses:
|
||||
'200':
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/CompletionResponse'
|
||||
- $ref: '#/components/schemas/CompletionResponseStreamChunk'
|
||||
description: Normal completion response. **OR** streamed completion response.
|
||||
tags:
|
||||
- Inference
|
||||
/memory_bank/delete:
|
||||
post:
|
||||
parameters:
|
||||
|
@ -2015,14 +2015,13 @@ security:
|
|||
servers:
|
||||
- url: http://any-hosted-llama-stack.com
|
||||
tags:
|
||||
- name: PostTraining
|
||||
- name: Inference
|
||||
x-displayName: Set of methods that can be called on the inference service.
|
||||
- name: MemoryBanks
|
||||
- name: Datasets
|
||||
- name: AgenticSystem
|
||||
- name: SyntheticDataGeneration
|
||||
- name: RewardScoring
|
||||
- name: PostTraining
|
||||
- name: AgenticSystem
|
||||
- name: Datasets
|
||||
- name: SyntheticDataGeneration
|
||||
- name: Inference
|
||||
- name: MemoryBanks
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/ShieldConfig" />
|
||||
name: ShieldConfig
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemCreateRequest"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue