add /inference/chat_completion to SSE special case

2025-06-27 18:50:41 +00:00 · 2024-09-10 01:14:11 -04:00 · 2024-09-10 01:14:11 -04:00 · 2b63074676
commit 2b63074676
parent 4f021de10f
3 changed files with 31 additions and 28 deletions
--- a/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.html
+++ b/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.html
@ -21,7 +21,7 @@
    "info": {
        "title": "[DRAFT] Llama Stack Specification",
        "version": "0.0.1",
-        "description": "This is the specification of the llama stack that provides\n                a set of endpoints and their corresponding interfaces that are tailored to\n                best leverage Llama Models. The specification is still in draft and subject to change.\n                Generated at 2024-09-09 11:19:39.855375"
+        "description": "This is the specification of the llama stack that provides\n                a set of endpoints and their corresponding interfaces that are tailored to\n                best leverage Llama Models. The specification is still in draft and subject to change.\n                Generated at 2024-09-10 01:13:08.531639"
    },
    "servers": [
        {
@ -141,7 +141,7 @@
                    "200": {
                        "description": "SSE-stream of these events.",
                        "content": {
-                            "application/json": {
+                            "text/event-stream": {
                                "schema": {
                                    "$ref": "#/components/schemas/ChatCompletionResponseStreamChunk"
                                }
@ -6062,35 +6062,35 @@
        }
    ],
    "tags": [
-        {
-            "name": "Inference"
-        },
-        {
-            "name": "Datasets"
-        },
-        {
-            "name": "Evaluations"
-        },
-        {
-            "name": "Memory"
-        },
        {
            "name": "SyntheticDataGeneration"
        },
-        {
-            "name": "PostTraining"
-        },
        {
            "name": "RewardScoring"
        },
+        {
+            "name": "Datasets"
+        },
+        {
+            "name": "Memory"
+        },
+        {
+            "name": "AgenticSystem"
+        },
        {
            "name": "BatchInference"
        },
+        {
+            "name": "PostTraining"
+        },
+        {
+            "name": "Evaluations"
+        },
        {
            "name": "Telemetry"
        },
        {
-            "name": "AgenticSystem"
+            "name": "Inference"
        },
        {
            "name": "BatchChatCompletionRequest",
--- a/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.yaml
+++ b/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.yaml
@ -2777,7 +2777,7 @@ info:
  description: "This is the specification of the llama stack that provides\n     \
    \           a set of endpoints and their corresponding interfaces that are tailored\
    \ to\n                best leverage Llama Models. The specification is still in\
-    \ draft and subject to change.\n                Generated at 2024-09-09 11:19:39.855375"
+    \ draft and subject to change.\n                Generated at 2024-09-10 01:13:08.531639"
  title: '[DRAFT] Llama Stack Specification'
  version: 0.0.1
 jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
@ -3302,7 +3302,7 @@ paths:
      responses:
        '200':
          content:
-            application/json:
+            text/event-stream:
              schema:
                $ref: '#/components/schemas/ChatCompletionResponseStreamChunk'
          description: SSE-stream of these events.
@ -3729,16 +3729,16 @@ security:
 servers:
 - url: http://any-hosted-llama-stack.com
 tags:
- name: Inference
- name: Datasets
- name: Evaluations
- name: Memory
 - name: SyntheticDataGeneration
- name: PostTraining
 - name: RewardScoring
- name: BatchInference
- name: Telemetry
+- name: Datasets
+- name: Memory
 - name: AgenticSystem
+- name: BatchInference
+- name: PostTraining
+- name: Evaluations
+- name: Telemetry
+- name: Inference
 - description: <SchemaDefinition schemaRef="#/components/schemas/BatchChatCompletionRequest"
    />
  name: BatchChatCompletionRequest
--- a/rfcs/openapi_generator/generate.py
+++ b/rfcs/openapi_generator/generate.py
@ -35,7 +35,10 @@ from llama_toolchain.stack import LlamaStack


 # TODO: this should be fixed in the generator itself so it reads appropriate annotations
-STREAMING_ENDPOINTS = ["/agentic_system/turn/create"]
+STREAMING_ENDPOINTS = [
+    "/agentic_system/turn/create",
+    "/inference/chat_completion",
+]


 def patch_sse_stream_responses(spec: Specification):