mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-29 15:23:51 +00:00
added more docs
This commit is contained in:
parent
6778359493
commit
b88f8ad616
3 changed files with 102 additions and 100 deletions
|
@ -143,22 +143,26 @@ class BatchChatCompletionRequest:
|
||||||
|
|
||||||
|
|
||||||
class Inference(Protocol):
|
class Inference(Protocol):
|
||||||
"""Set of methods that can be called on the inference service."""
|
|
||||||
|
@webmethod(route="/inference/completion")
|
||||||
def post_completion(
|
def post_completion(
|
||||||
self,
|
self,
|
||||||
request: CompletionRequest,
|
request: CompletionRequest,
|
||||||
) -> Union[CompletionResponse, CompletionResponseStreamChunk]: ...
|
) -> Union[CompletionResponse, CompletionResponseStreamChunk]: ...
|
||||||
|
|
||||||
|
@webmethod(route="/inference/chat_completion")
|
||||||
def post_chat_completion(
|
def post_chat_completion(
|
||||||
self,
|
self,
|
||||||
request: ChatCompletionRequest,
|
request: ChatCompletionRequest,
|
||||||
) -> Union[ChatCompletionResponse, ChatCompletionResponseStreamChunk]: ...
|
) -> Union[ChatCompletionResponse, ChatCompletionResponseStreamChunk]: ...
|
||||||
|
|
||||||
|
@webmethod(route="/inference/batch_completion")
|
||||||
def post_batch_completion(
|
def post_batch_completion(
|
||||||
self,
|
self,
|
||||||
request: BatchCompletionRequest,
|
request: BatchCompletionRequest,
|
||||||
) -> List[CompletionResponse]: ...
|
) -> List[CompletionResponse]: ...
|
||||||
|
|
||||||
|
@webmethod(route="/inference/batch_chat_completion")
|
||||||
def post_batch_chat_completion(
|
def post_batch_chat_completion(
|
||||||
self,
|
self,
|
||||||
request: BatchChatCompletionRequest,
|
request: BatchChatCompletionRequest,
|
||||||
|
|
|
@ -386,7 +386,7 @@
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"/batch_chat_completion": {
|
"/inference/batch_chat_completion": {
|
||||||
"post": {
|
"post": {
|
||||||
"responses": {
|
"responses": {
|
||||||
"200": {
|
"200": {
|
||||||
|
@ -416,7 +416,7 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"/batch_completion": {
|
"/inference/batch_completion": {
|
||||||
"post": {
|
"post": {
|
||||||
"responses": {
|
"responses": {
|
||||||
"200": {
|
"200": {
|
||||||
|
@ -446,7 +446,7 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"/chat_completion": {
|
"/inference/chat_completion": {
|
||||||
"post": {
|
"post": {
|
||||||
"responses": {
|
"responses": {
|
||||||
"200": {
|
"200": {
|
||||||
|
@ -483,7 +483,7 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"/completion": {
|
"/inference/completion": {
|
||||||
"post": {
|
"post": {
|
||||||
"responses": {
|
"responses": {
|
||||||
"200": {
|
"200": {
|
||||||
|
@ -3317,27 +3317,26 @@
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"tags": [
|
"tags": [
|
||||||
|
{
|
||||||
|
"name": "RewardScoring"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"name": "PostTraining"
|
"name": "PostTraining"
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"name": "Inference",
|
|
||||||
"x-displayName": "Set of methods that can be called on the inference service."
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "MemoryBanks"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Datasets"
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"name": "AgenticSystem"
|
"name": "AgenticSystem"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"name": "Datasets"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"name": "SyntheticDataGeneration"
|
"name": "SyntheticDataGeneration"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "RewardScoring"
|
"name": "Inference"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "MemoryBanks"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "ShieldConfig",
|
"name": "ShieldConfig",
|
||||||
|
|
|
@ -1563,83 +1563,6 @@ paths:
|
||||||
agent execution response.
|
agent execution response.
|
||||||
tags:
|
tags:
|
||||||
- AgenticSystem
|
- AgenticSystem
|
||||||
/batch_chat_completion:
|
|
||||||
post:
|
|
||||||
parameters: []
|
|
||||||
requestBody:
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: '#/components/schemas/BatchChatCompletionRequest'
|
|
||||||
required: true
|
|
||||||
responses:
|
|
||||||
'200':
|
|
||||||
content:
|
|
||||||
application/jsonl:
|
|
||||||
schema:
|
|
||||||
$ref: '#/components/schemas/ChatCompletionResponse'
|
|
||||||
description: OK
|
|
||||||
tags:
|
|
||||||
- Inference
|
|
||||||
/batch_completion:
|
|
||||||
post:
|
|
||||||
parameters: []
|
|
||||||
requestBody:
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: '#/components/schemas/BatchCompletionRequest'
|
|
||||||
required: true
|
|
||||||
responses:
|
|
||||||
'200':
|
|
||||||
content:
|
|
||||||
application/jsonl:
|
|
||||||
schema:
|
|
||||||
$ref: '#/components/schemas/CompletionResponse'
|
|
||||||
description: OK
|
|
||||||
tags:
|
|
||||||
- Inference
|
|
||||||
/chat_completion:
|
|
||||||
post:
|
|
||||||
parameters: []
|
|
||||||
requestBody:
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: '#/components/schemas/ChatCompletionRequest'
|
|
||||||
required: true
|
|
||||||
responses:
|
|
||||||
'200':
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
oneOf:
|
|
||||||
- $ref: '#/components/schemas/ChatCompletionResponse'
|
|
||||||
- $ref: '#/components/schemas/ChatCompletionResponseStreamChunk'
|
|
||||||
description: Normal chat completion response. **OR** Streamed chat completion
|
|
||||||
response. The actual response is a series of such objects.
|
|
||||||
tags:
|
|
||||||
- Inference
|
|
||||||
/completion:
|
|
||||||
post:
|
|
||||||
parameters: []
|
|
||||||
requestBody:
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: '#/components/schemas/CompletionRequest'
|
|
||||||
required: true
|
|
||||||
responses:
|
|
||||||
'200':
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
oneOf:
|
|
||||||
- $ref: '#/components/schemas/CompletionResponse'
|
|
||||||
- $ref: '#/components/schemas/CompletionResponseStreamChunk'
|
|
||||||
description: Normal completion response. **OR** streamed completion response.
|
|
||||||
tags:
|
|
||||||
- Inference
|
|
||||||
/datasets/create:
|
/datasets/create:
|
||||||
post:
|
post:
|
||||||
parameters: []
|
parameters: []
|
||||||
|
@ -1684,6 +1607,83 @@ paths:
|
||||||
description: OK
|
description: OK
|
||||||
tags:
|
tags:
|
||||||
- Datasets
|
- Datasets
|
||||||
|
/inference/batch_chat_completion:
|
||||||
|
post:
|
||||||
|
parameters: []
|
||||||
|
requestBody:
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/BatchChatCompletionRequest'
|
||||||
|
required: true
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
content:
|
||||||
|
application/jsonl:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/ChatCompletionResponse'
|
||||||
|
description: OK
|
||||||
|
tags:
|
||||||
|
- Inference
|
||||||
|
/inference/batch_completion:
|
||||||
|
post:
|
||||||
|
parameters: []
|
||||||
|
requestBody:
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/BatchCompletionRequest'
|
||||||
|
required: true
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
content:
|
||||||
|
application/jsonl:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/CompletionResponse'
|
||||||
|
description: OK
|
||||||
|
tags:
|
||||||
|
- Inference
|
||||||
|
/inference/chat_completion:
|
||||||
|
post:
|
||||||
|
parameters: []
|
||||||
|
requestBody:
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/ChatCompletionRequest'
|
||||||
|
required: true
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
oneOf:
|
||||||
|
- $ref: '#/components/schemas/ChatCompletionResponse'
|
||||||
|
- $ref: '#/components/schemas/ChatCompletionResponseStreamChunk'
|
||||||
|
description: Normal chat completion response. **OR** Streamed chat completion
|
||||||
|
response. The actual response is a series of such objects.
|
||||||
|
tags:
|
||||||
|
- Inference
|
||||||
|
/inference/completion:
|
||||||
|
post:
|
||||||
|
parameters: []
|
||||||
|
requestBody:
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/CompletionRequest'
|
||||||
|
required: true
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
oneOf:
|
||||||
|
- $ref: '#/components/schemas/CompletionResponse'
|
||||||
|
- $ref: '#/components/schemas/CompletionResponseStreamChunk'
|
||||||
|
description: Normal completion response. **OR** streamed completion response.
|
||||||
|
tags:
|
||||||
|
- Inference
|
||||||
/memory_bank/delete:
|
/memory_bank/delete:
|
||||||
post:
|
post:
|
||||||
parameters:
|
parameters:
|
||||||
|
@ -2015,14 +2015,13 @@ security:
|
||||||
servers:
|
servers:
|
||||||
- url: http://any-hosted-llama-stack.com
|
- url: http://any-hosted-llama-stack.com
|
||||||
tags:
|
tags:
|
||||||
- name: PostTraining
|
|
||||||
- name: Inference
|
|
||||||
x-displayName: Set of methods that can be called on the inference service.
|
|
||||||
- name: MemoryBanks
|
|
||||||
- name: Datasets
|
|
||||||
- name: AgenticSystem
|
|
||||||
- name: SyntheticDataGeneration
|
|
||||||
- name: RewardScoring
|
- name: RewardScoring
|
||||||
|
- name: PostTraining
|
||||||
|
- name: AgenticSystem
|
||||||
|
- name: Datasets
|
||||||
|
- name: SyntheticDataGeneration
|
||||||
|
- name: Inference
|
||||||
|
- name: MemoryBanks
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/ShieldConfig" />
|
- description: <SchemaDefinition schemaRef="#/components/schemas/ShieldConfig" />
|
||||||
name: ShieldConfig
|
name: ShieldConfig
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemCreateRequest"
|
- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemCreateRequest"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue