added more docs

This commit is contained in:
Raghotham Murthy 2024-07-11 01:38:04 -07:00
parent 6778359493
commit b88f8ad616
3 changed files with 102 additions and 100 deletions

View file

@ -143,22 +143,26 @@ class BatchChatCompletionRequest:
class Inference(Protocol): class Inference(Protocol):
"""Set of methods that can be called on the inference service."""
@webmethod(route="/inference/completion")
def post_completion( def post_completion(
self, self,
request: CompletionRequest, request: CompletionRequest,
) -> Union[CompletionResponse, CompletionResponseStreamChunk]: ... ) -> Union[CompletionResponse, CompletionResponseStreamChunk]: ...
@webmethod(route="/inference/chat_completion")
def post_chat_completion( def post_chat_completion(
self, self,
request: ChatCompletionRequest, request: ChatCompletionRequest,
) -> Union[ChatCompletionResponse, ChatCompletionResponseStreamChunk]: ... ) -> Union[ChatCompletionResponse, ChatCompletionResponseStreamChunk]: ...
@webmethod(route="/inference/batch_completion")
def post_batch_completion( def post_batch_completion(
self, self,
request: BatchCompletionRequest, request: BatchCompletionRequest,
) -> List[CompletionResponse]: ... ) -> List[CompletionResponse]: ...
@webmethod(route="/inference/batch_chat_completion")
def post_batch_chat_completion( def post_batch_chat_completion(
self, self,
request: BatchChatCompletionRequest, request: BatchChatCompletionRequest,

View file

@ -386,7 +386,7 @@
] ]
} }
}, },
"/batch_chat_completion": { "/inference/batch_chat_completion": {
"post": { "post": {
"responses": { "responses": {
"200": { "200": {
@ -416,7 +416,7 @@
} }
} }
}, },
"/batch_completion": { "/inference/batch_completion": {
"post": { "post": {
"responses": { "responses": {
"200": { "200": {
@ -446,7 +446,7 @@
} }
} }
}, },
"/chat_completion": { "/inference/chat_completion": {
"post": { "post": {
"responses": { "responses": {
"200": { "200": {
@ -483,7 +483,7 @@
} }
} }
}, },
"/completion": { "/inference/completion": {
"post": { "post": {
"responses": { "responses": {
"200": { "200": {
@ -3317,27 +3317,26 @@
} }
], ],
"tags": [ "tags": [
{
"name": "RewardScoring"
},
{ {
"name": "PostTraining" "name": "PostTraining"
}, },
{
"name": "Inference",
"x-displayName": "Set of methods that can be called on the inference service."
},
{
"name": "MemoryBanks"
},
{
"name": "Datasets"
},
{ {
"name": "AgenticSystem" "name": "AgenticSystem"
}, },
{
"name": "Datasets"
},
{ {
"name": "SyntheticDataGeneration" "name": "SyntheticDataGeneration"
}, },
{ {
"name": "RewardScoring" "name": "Inference"
},
{
"name": "MemoryBanks"
}, },
{ {
"name": "ShieldConfig", "name": "ShieldConfig",

View file

@ -1563,83 +1563,6 @@ paths:
agent execution response. agent execution response.
tags: tags:
- AgenticSystem - AgenticSystem
/batch_chat_completion:
post:
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/BatchChatCompletionRequest'
required: true
responses:
'200':
content:
application/jsonl:
schema:
$ref: '#/components/schemas/ChatCompletionResponse'
description: OK
tags:
- Inference
/batch_completion:
post:
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/BatchCompletionRequest'
required: true
responses:
'200':
content:
application/jsonl:
schema:
$ref: '#/components/schemas/CompletionResponse'
description: OK
tags:
- Inference
/chat_completion:
post:
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/ChatCompletionRequest'
required: true
responses:
'200':
content:
application/json:
schema:
oneOf:
- $ref: '#/components/schemas/ChatCompletionResponse'
- $ref: '#/components/schemas/ChatCompletionResponseStreamChunk'
description: Normal chat completion response. **OR** Streamed chat completion
response. The actual response is a series of such objects.
tags:
- Inference
/completion:
post:
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/CompletionRequest'
required: true
responses:
'200':
content:
application/json:
schema:
oneOf:
- $ref: '#/components/schemas/CompletionResponse'
- $ref: '#/components/schemas/CompletionResponseStreamChunk'
description: Normal completion response. **OR** streamed completion response.
tags:
- Inference
/datasets/create: /datasets/create:
post: post:
parameters: [] parameters: []
@ -1684,6 +1607,83 @@ paths:
description: OK description: OK
tags: tags:
- Datasets - Datasets
/inference/batch_chat_completion:
post:
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/BatchChatCompletionRequest'
required: true
responses:
'200':
content:
application/jsonl:
schema:
$ref: '#/components/schemas/ChatCompletionResponse'
description: OK
tags:
- Inference
/inference/batch_completion:
post:
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/BatchCompletionRequest'
required: true
responses:
'200':
content:
application/jsonl:
schema:
$ref: '#/components/schemas/CompletionResponse'
description: OK
tags:
- Inference
/inference/chat_completion:
post:
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/ChatCompletionRequest'
required: true
responses:
'200':
content:
application/json:
schema:
oneOf:
- $ref: '#/components/schemas/ChatCompletionResponse'
- $ref: '#/components/schemas/ChatCompletionResponseStreamChunk'
description: Normal chat completion response. **OR** Streamed chat completion
response. The actual response is a series of such objects.
tags:
- Inference
/inference/completion:
post:
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/CompletionRequest'
required: true
responses:
'200':
content:
application/json:
schema:
oneOf:
- $ref: '#/components/schemas/CompletionResponse'
- $ref: '#/components/schemas/CompletionResponseStreamChunk'
description: Normal completion response. **OR** streamed completion response.
tags:
- Inference
/memory_bank/delete: /memory_bank/delete:
post: post:
parameters: parameters:
@ -2015,14 +2015,13 @@ security:
servers: servers:
- url: http://any-hosted-llama-stack.com - url: http://any-hosted-llama-stack.com
tags: tags:
- name: PostTraining
- name: Inference
x-displayName: Set of methods that can be called on the inference service.
- name: MemoryBanks
- name: Datasets
- name: AgenticSystem
- name: SyntheticDataGeneration
- name: RewardScoring - name: RewardScoring
- name: PostTraining
- name: AgenticSystem
- name: Datasets
- name: SyntheticDataGeneration
- name: Inference
- name: MemoryBanks
- description: <SchemaDefinition schemaRef="#/components/schemas/ShieldConfig" /> - description: <SchemaDefinition schemaRef="#/components/schemas/ShieldConfig" />
name: ShieldConfig name: ShieldConfig
- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemCreateRequest" - description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemCreateRequest"