mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-27 18:50:41 +00:00
add /inference/chat_completion to SSE special case
This commit is contained in:
parent
4f021de10f
commit
2b63074676
3 changed files with 31 additions and 28 deletions
|
@ -21,7 +21,7 @@
|
|||
"info": {
|
||||
"title": "[DRAFT] Llama Stack Specification",
|
||||
"version": "0.0.1",
|
||||
"description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-09-09 11:19:39.855375"
|
||||
"description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-09-10 01:13:08.531639"
|
||||
},
|
||||
"servers": [
|
||||
{
|
||||
|
@ -141,7 +141,7 @@
|
|||
"200": {
|
||||
"description": "SSE-stream of these events.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"text/event-stream": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/ChatCompletionResponseStreamChunk"
|
||||
}
|
||||
|
@ -6062,35 +6062,35 @@
|
|||
}
|
||||
],
|
||||
"tags": [
|
||||
{
|
||||
"name": "Inference"
|
||||
},
|
||||
{
|
||||
"name": "Datasets"
|
||||
},
|
||||
{
|
||||
"name": "Evaluations"
|
||||
},
|
||||
{
|
||||
"name": "Memory"
|
||||
},
|
||||
{
|
||||
"name": "SyntheticDataGeneration"
|
||||
},
|
||||
{
|
||||
"name": "PostTraining"
|
||||
},
|
||||
{
|
||||
"name": "RewardScoring"
|
||||
},
|
||||
{
|
||||
"name": "Datasets"
|
||||
},
|
||||
{
|
||||
"name": "Memory"
|
||||
},
|
||||
{
|
||||
"name": "AgenticSystem"
|
||||
},
|
||||
{
|
||||
"name": "BatchInference"
|
||||
},
|
||||
{
|
||||
"name": "PostTraining"
|
||||
},
|
||||
{
|
||||
"name": "Evaluations"
|
||||
},
|
||||
{
|
||||
"name": "Telemetry"
|
||||
},
|
||||
{
|
||||
"name": "AgenticSystem"
|
||||
"name": "Inference"
|
||||
},
|
||||
{
|
||||
"name": "BatchChatCompletionRequest",
|
||||
|
|
|
@ -2777,7 +2777,7 @@ info:
|
|||
description: "This is the specification of the llama stack that provides\n \
|
||||
\ a set of endpoints and their corresponding interfaces that are tailored\
|
||||
\ to\n best leverage Llama Models. The specification is still in\
|
||||
\ draft and subject to change.\n Generated at 2024-09-09 11:19:39.855375"
|
||||
\ draft and subject to change.\n Generated at 2024-09-10 01:13:08.531639"
|
||||
title: '[DRAFT] Llama Stack Specification'
|
||||
version: 0.0.1
|
||||
jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
|
||||
|
@ -3302,7 +3302,7 @@ paths:
|
|||
responses:
|
||||
'200':
|
||||
content:
|
||||
application/json:
|
||||
text/event-stream:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ChatCompletionResponseStreamChunk'
|
||||
description: SSE-stream of these events.
|
||||
|
@ -3729,16 +3729,16 @@ security:
|
|||
servers:
|
||||
- url: http://any-hosted-llama-stack.com
|
||||
tags:
|
||||
- name: Inference
|
||||
- name: Datasets
|
||||
- name: Evaluations
|
||||
- name: Memory
|
||||
- name: SyntheticDataGeneration
|
||||
- name: PostTraining
|
||||
- name: RewardScoring
|
||||
- name: BatchInference
|
||||
- name: Telemetry
|
||||
- name: Datasets
|
||||
- name: Memory
|
||||
- name: AgenticSystem
|
||||
- name: BatchInference
|
||||
- name: PostTraining
|
||||
- name: Evaluations
|
||||
- name: Telemetry
|
||||
- name: Inference
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/BatchChatCompletionRequest"
|
||||
/>
|
||||
name: BatchChatCompletionRequest
|
||||
|
|
|
@ -35,7 +35,10 @@ from llama_toolchain.stack import LlamaStack
|
|||
|
||||
|
||||
# TODO: this should be fixed in the generator itself so it reads appropriate annotations
|
||||
STREAMING_ENDPOINTS = ["/agentic_system/turn/create"]
|
||||
STREAMING_ENDPOINTS = [
|
||||
"/agentic_system/turn/create",
|
||||
"/inference/chat_completion",
|
||||
]
|
||||
|
||||
|
||||
def patch_sse_stream_responses(spec: Specification):
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue