add /inference/chat_completion to SSE special case

This commit is contained in:
Dalton Flanagan 2024-09-10 01:14:11 -04:00
parent 4f021de10f
commit 2b63074676
3 changed files with 31 additions and 28 deletions

View file

@ -21,7 +21,7 @@
"info": {
"title": "[DRAFT] Llama Stack Specification",
"version": "0.0.1",
"description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-09-09 11:19:39.855375"
"description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-09-10 01:13:08.531639"
},
"servers": [
{
@ -141,7 +141,7 @@
"200": {
"description": "SSE-stream of these events.",
"content": {
"application/json": {
"text/event-stream": {
"schema": {
"$ref": "#/components/schemas/ChatCompletionResponseStreamChunk"
}
@ -6062,35 +6062,35 @@
}
],
"tags": [
{
"name": "Inference"
},
{
"name": "Datasets"
},
{
"name": "Evaluations"
},
{
"name": "Memory"
},
{
"name": "SyntheticDataGeneration"
},
{
"name": "PostTraining"
},
{
"name": "RewardScoring"
},
{
"name": "Datasets"
},
{
"name": "Memory"
},
{
"name": "AgenticSystem"
},
{
"name": "BatchInference"
},
{
"name": "PostTraining"
},
{
"name": "Evaluations"
},
{
"name": "Telemetry"
},
{
"name": "AgenticSystem"
"name": "Inference"
},
{
"name": "BatchChatCompletionRequest",

View file

@ -2777,7 +2777,7 @@ info:
description: "This is the specification of the llama stack that provides\n \
\ a set of endpoints and their corresponding interfaces that are tailored\
\ to\n best leverage Llama Models. The specification is still in\
\ draft and subject to change.\n Generated at 2024-09-09 11:19:39.855375"
\ draft and subject to change.\n Generated at 2024-09-10 01:13:08.531639"
title: '[DRAFT] Llama Stack Specification'
version: 0.0.1
jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
@ -3302,7 +3302,7 @@ paths:
responses:
'200':
content:
application/json:
text/event-stream:
schema:
$ref: '#/components/schemas/ChatCompletionResponseStreamChunk'
description: SSE-stream of these events.
@ -3729,16 +3729,16 @@ security:
servers:
- url: http://any-hosted-llama-stack.com
tags:
- name: Inference
- name: Datasets
- name: Evaluations
- name: Memory
- name: SyntheticDataGeneration
- name: PostTraining
- name: RewardScoring
- name: BatchInference
- name: Telemetry
- name: Datasets
- name: Memory
- name: AgenticSystem
- name: BatchInference
- name: PostTraining
- name: Evaluations
- name: Telemetry
- name: Inference
- description: <SchemaDefinition schemaRef="#/components/schemas/BatchChatCompletionRequest"
/>
name: BatchChatCompletionRequest

View file

@ -35,7 +35,10 @@ from llama_toolchain.stack import LlamaStack
# TODO: this should be fixed in the generator itself so it reads appropriate annotations
STREAMING_ENDPOINTS = ["/agentic_system/turn/create"]
STREAMING_ENDPOINTS = [
"/agentic_system/turn/create",
"/inference/chat_completion",
]
def patch_sse_stream_responses(spec: Specification):