mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-27 18:50:41 +00:00
add /inference/chat_completion to SSE special case
This commit is contained in:
parent
4f021de10f
commit
2b63074676
3 changed files with 31 additions and 28 deletions
|
@ -21,7 +21,7 @@
|
||||||
"info": {
|
"info": {
|
||||||
"title": "[DRAFT] Llama Stack Specification",
|
"title": "[DRAFT] Llama Stack Specification",
|
||||||
"version": "0.0.1",
|
"version": "0.0.1",
|
||||||
"description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-09-09 11:19:39.855375"
|
"description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-09-10 01:13:08.531639"
|
||||||
},
|
},
|
||||||
"servers": [
|
"servers": [
|
||||||
{
|
{
|
||||||
|
@ -141,7 +141,7 @@
|
||||||
"200": {
|
"200": {
|
||||||
"description": "SSE-stream of these events.",
|
"description": "SSE-stream of these events.",
|
||||||
"content": {
|
"content": {
|
||||||
"application/json": {
|
"text/event-stream": {
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/components/schemas/ChatCompletionResponseStreamChunk"
|
"$ref": "#/components/schemas/ChatCompletionResponseStreamChunk"
|
||||||
}
|
}
|
||||||
|
@ -6062,35 +6062,35 @@
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"tags": [
|
"tags": [
|
||||||
{
|
|
||||||
"name": "Inference"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Datasets"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Evaluations"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Memory"
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"name": "SyntheticDataGeneration"
|
"name": "SyntheticDataGeneration"
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"name": "PostTraining"
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"name": "RewardScoring"
|
"name": "RewardScoring"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"name": "Datasets"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Memory"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "AgenticSystem"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"name": "BatchInference"
|
"name": "BatchInference"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"name": "PostTraining"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Evaluations"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"name": "Telemetry"
|
"name": "Telemetry"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "AgenticSystem"
|
"name": "Inference"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "BatchChatCompletionRequest",
|
"name": "BatchChatCompletionRequest",
|
||||||
|
|
|
@ -2777,7 +2777,7 @@ info:
|
||||||
description: "This is the specification of the llama stack that provides\n \
|
description: "This is the specification of the llama stack that provides\n \
|
||||||
\ a set of endpoints and their corresponding interfaces that are tailored\
|
\ a set of endpoints and their corresponding interfaces that are tailored\
|
||||||
\ to\n best leverage Llama Models. The specification is still in\
|
\ to\n best leverage Llama Models. The specification is still in\
|
||||||
\ draft and subject to change.\n Generated at 2024-09-09 11:19:39.855375"
|
\ draft and subject to change.\n Generated at 2024-09-10 01:13:08.531639"
|
||||||
title: '[DRAFT] Llama Stack Specification'
|
title: '[DRAFT] Llama Stack Specification'
|
||||||
version: 0.0.1
|
version: 0.0.1
|
||||||
jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
|
jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
|
||||||
|
@ -3302,7 +3302,7 @@ paths:
|
||||||
responses:
|
responses:
|
||||||
'200':
|
'200':
|
||||||
content:
|
content:
|
||||||
application/json:
|
text/event-stream:
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/components/schemas/ChatCompletionResponseStreamChunk'
|
$ref: '#/components/schemas/ChatCompletionResponseStreamChunk'
|
||||||
description: SSE-stream of these events.
|
description: SSE-stream of these events.
|
||||||
|
@ -3729,16 +3729,16 @@ security:
|
||||||
servers:
|
servers:
|
||||||
- url: http://any-hosted-llama-stack.com
|
- url: http://any-hosted-llama-stack.com
|
||||||
tags:
|
tags:
|
||||||
- name: Inference
|
|
||||||
- name: Datasets
|
|
||||||
- name: Evaluations
|
|
||||||
- name: Memory
|
|
||||||
- name: SyntheticDataGeneration
|
- name: SyntheticDataGeneration
|
||||||
- name: PostTraining
|
|
||||||
- name: RewardScoring
|
- name: RewardScoring
|
||||||
- name: BatchInference
|
- name: Datasets
|
||||||
- name: Telemetry
|
- name: Memory
|
||||||
- name: AgenticSystem
|
- name: AgenticSystem
|
||||||
|
- name: BatchInference
|
||||||
|
- name: PostTraining
|
||||||
|
- name: Evaluations
|
||||||
|
- name: Telemetry
|
||||||
|
- name: Inference
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/BatchChatCompletionRequest"
|
- description: <SchemaDefinition schemaRef="#/components/schemas/BatchChatCompletionRequest"
|
||||||
/>
|
/>
|
||||||
name: BatchChatCompletionRequest
|
name: BatchChatCompletionRequest
|
||||||
|
|
|
@ -35,7 +35,10 @@ from llama_toolchain.stack import LlamaStack
|
||||||
|
|
||||||
|
|
||||||
# TODO: this should be fixed in the generator itself so it reads appropriate annotations
|
# TODO: this should be fixed in the generator itself so it reads appropriate annotations
|
||||||
STREAMING_ENDPOINTS = ["/agentic_system/turn/create"]
|
STREAMING_ENDPOINTS = [
|
||||||
|
"/agentic_system/turn/create",
|
||||||
|
"/inference/chat_completion",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def patch_sse_stream_responses(spec: Specification):
|
def patch_sse_stream_responses(spec: Specification):
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue