From b88f8ad6168fac6c0df4d00b629c69b35f94a7ad Mon Sep 17 00:00:00 2001 From: Raghotham Murthy Date: Thu, 11 Jul 2024 01:38:04 -0700 Subject: [PATCH] added more docs --- source/api_definitions.py | 6 +- source/openapi.html | 29 ++++--- source/openapi.yaml | 167 +++++++++++++++++++------------------- 3 files changed, 102 insertions(+), 100 deletions(-) diff --git a/source/api_definitions.py b/source/api_definitions.py index 84e0954f7..626fa0bdd 100644 --- a/source/api_definitions.py +++ b/source/api_definitions.py @@ -143,22 +143,26 @@ class BatchChatCompletionRequest: class Inference(Protocol): - """Set of methods that can be called on the inference service.""" + + @webmethod(route="/inference/completion") def post_completion( self, request: CompletionRequest, ) -> Union[CompletionResponse, CompletionResponseStreamChunk]: ... + @webmethod(route="/inference/chat_completion") def post_chat_completion( self, request: ChatCompletionRequest, ) -> Union[ChatCompletionResponse, ChatCompletionResponseStreamChunk]: ... + @webmethod(route="/inference/batch_completion") def post_batch_completion( self, request: BatchCompletionRequest, ) -> List[CompletionResponse]: ... + @webmethod(route="/inference/batch_chat_completion") def post_batch_chat_completion( self, request: BatchChatCompletionRequest, diff --git a/source/openapi.html b/source/openapi.html index a4b8e7ca7..ceb92c59f 100644 --- a/source/openapi.html +++ b/source/openapi.html @@ -386,7 +386,7 @@ ] } }, - "/batch_chat_completion": { + "/inference/batch_chat_completion": { "post": { "responses": { "200": { @@ -416,7 +416,7 @@ } } }, - "/batch_completion": { + "/inference/batch_completion": { "post": { "responses": { "200": { @@ -446,7 +446,7 @@ } } }, - "/chat_completion": { + "/inference/chat_completion": { "post": { "responses": { "200": { @@ -483,7 +483,7 @@ } } }, - "/completion": { + "/inference/completion": { "post": { "responses": { "200": { @@ -3317,27 +3317,26 @@ } ], "tags": [ + { + "name": "RewardScoring" + }, { "name": "PostTraining" }, - { - "name": "Inference", - "x-displayName": "Set of methods that can be called on the inference service." - }, - { - "name": "MemoryBanks" - }, - { - "name": "Datasets" - }, { "name": "AgenticSystem" }, + { + "name": "Datasets" + }, { "name": "SyntheticDataGeneration" }, { - "name": "RewardScoring" + "name": "Inference" + }, + { + "name": "MemoryBanks" }, { "name": "ShieldConfig", diff --git a/source/openapi.yaml b/source/openapi.yaml index e14d457e2..dc25460bf 100644 --- a/source/openapi.yaml +++ b/source/openapi.yaml @@ -1563,83 +1563,6 @@ paths: agent execution response. tags: - AgenticSystem - /batch_chat_completion: - post: - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/BatchChatCompletionRequest' - required: true - responses: - '200': - content: - application/jsonl: - schema: - $ref: '#/components/schemas/ChatCompletionResponse' - description: OK - tags: - - Inference - /batch_completion: - post: - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/BatchCompletionRequest' - required: true - responses: - '200': - content: - application/jsonl: - schema: - $ref: '#/components/schemas/CompletionResponse' - description: OK - tags: - - Inference - /chat_completion: - post: - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/ChatCompletionRequest' - required: true - responses: - '200': - content: - application/json: - schema: - oneOf: - - $ref: '#/components/schemas/ChatCompletionResponse' - - $ref: '#/components/schemas/ChatCompletionResponseStreamChunk' - description: Normal chat completion response. **OR** Streamed chat completion - response. The actual response is a series of such objects. - tags: - - Inference - /completion: - post: - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/CompletionRequest' - required: true - responses: - '200': - content: - application/json: - schema: - oneOf: - - $ref: '#/components/schemas/CompletionResponse' - - $ref: '#/components/schemas/CompletionResponseStreamChunk' - description: Normal completion response. **OR** streamed completion response. - tags: - - Inference /datasets/create: post: parameters: [] @@ -1684,6 +1607,83 @@ paths: description: OK tags: - Datasets + /inference/batch_chat_completion: + post: + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/BatchChatCompletionRequest' + required: true + responses: + '200': + content: + application/jsonl: + schema: + $ref: '#/components/schemas/ChatCompletionResponse' + description: OK + tags: + - Inference + /inference/batch_completion: + post: + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/BatchCompletionRequest' + required: true + responses: + '200': + content: + application/jsonl: + schema: + $ref: '#/components/schemas/CompletionResponse' + description: OK + tags: + - Inference + /inference/chat_completion: + post: + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/ChatCompletionRequest' + required: true + responses: + '200': + content: + application/json: + schema: + oneOf: + - $ref: '#/components/schemas/ChatCompletionResponse' + - $ref: '#/components/schemas/ChatCompletionResponseStreamChunk' + description: Normal chat completion response. **OR** Streamed chat completion + response. The actual response is a series of such objects. + tags: + - Inference + /inference/completion: + post: + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/CompletionRequest' + required: true + responses: + '200': + content: + application/json: + schema: + oneOf: + - $ref: '#/components/schemas/CompletionResponse' + - $ref: '#/components/schemas/CompletionResponseStreamChunk' + description: Normal completion response. **OR** streamed completion response. + tags: + - Inference /memory_bank/delete: post: parameters: @@ -2015,14 +2015,13 @@ security: servers: - url: http://any-hosted-llama-stack.com tags: -- name: PostTraining -- name: Inference - x-displayName: Set of methods that can be called on the inference service. -- name: MemoryBanks -- name: Datasets -- name: AgenticSystem -- name: SyntheticDataGeneration - name: RewardScoring +- name: PostTraining +- name: AgenticSystem +- name: Datasets +- name: SyntheticDataGeneration +- name: Inference +- name: MemoryBanks - description: name: ShieldConfig - description: