From 2b6307467642e00d604480abc523fd989330e03d Mon Sep 17 00:00:00 2001 From: Dalton Flanagan <6599399+dltn@users.noreply.github.com> Date: Tue, 10 Sep 2024 01:14:11 -0400 Subject: [PATCH] add /inference/chat_completion to SSE special case --- .../llama-stack-spec.html | 36 +++++++++---------- .../llama-stack-spec.yaml | 18 +++++----- rfcs/openapi_generator/generate.py | 5 ++- 3 files changed, 31 insertions(+), 28 deletions(-) diff --git a/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.html b/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.html index a05dc34fa..f5f5fa154 100644 --- a/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.html +++ b/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.html @@ -21,7 +21,7 @@ "info": { "title": "[DRAFT] Llama Stack Specification", "version": "0.0.1", - "description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-09-09 11:19:39.855375" + "description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-09-10 01:13:08.531639" }, "servers": [ { @@ -141,7 +141,7 @@ "200": { "description": "SSE-stream of these events.", "content": { - "application/json": { + "text/event-stream": { "schema": { "$ref": "#/components/schemas/ChatCompletionResponseStreamChunk" } @@ -6062,35 +6062,35 @@ } ], "tags": [ - { - "name": "Inference" - }, - { - "name": "Datasets" - }, - { - "name": "Evaluations" - }, - { - "name": "Memory" - }, { "name": "SyntheticDataGeneration" }, - { - "name": "PostTraining" - }, { "name": "RewardScoring" }, + { + "name": "Datasets" + }, + { + "name": "Memory" + }, + { + "name": "AgenticSystem" + }, { "name": "BatchInference" }, + { + "name": "PostTraining" + }, + { + "name": "Evaluations" + }, { "name": "Telemetry" }, { - "name": "AgenticSystem" + "name": "Inference" }, { "name": "BatchChatCompletionRequest", diff --git a/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.yaml b/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.yaml index cfd01b291..3c3475fff 100644 --- a/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.yaml +++ b/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.yaml @@ -2777,7 +2777,7 @@ info: description: "This is the specification of the llama stack that provides\n \ \ a set of endpoints and their corresponding interfaces that are tailored\ \ to\n best leverage Llama Models. The specification is still in\ - \ draft and subject to change.\n Generated at 2024-09-09 11:19:39.855375" + \ draft and subject to change.\n Generated at 2024-09-10 01:13:08.531639" title: '[DRAFT] Llama Stack Specification' version: 0.0.1 jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema @@ -3302,7 +3302,7 @@ paths: responses: '200': content: - application/json: + text/event-stream: schema: $ref: '#/components/schemas/ChatCompletionResponseStreamChunk' description: SSE-stream of these events. @@ -3729,16 +3729,16 @@ security: servers: - url: http://any-hosted-llama-stack.com tags: -- name: Inference -- name: Datasets -- name: Evaluations -- name: Memory - name: SyntheticDataGeneration -- name: PostTraining - name: RewardScoring -- name: BatchInference -- name: Telemetry +- name: Datasets +- name: Memory - name: AgenticSystem +- name: BatchInference +- name: PostTraining +- name: Evaluations +- name: Telemetry +- name: Inference - description: name: BatchChatCompletionRequest diff --git a/rfcs/openapi_generator/generate.py b/rfcs/openapi_generator/generate.py index ab9774e70..279389a47 100644 --- a/rfcs/openapi_generator/generate.py +++ b/rfcs/openapi_generator/generate.py @@ -35,7 +35,10 @@ from llama_toolchain.stack import LlamaStack # TODO: this should be fixed in the generator itself so it reads appropriate annotations -STREAMING_ENDPOINTS = ["/agentic_system/turn/create"] +STREAMING_ENDPOINTS = [ + "/agentic_system/turn/create", + "/inference/chat_completion", +] def patch_sse_stream_responses(spec: Specification):