From b78e6675eae4a3ae37b8518c77059c37975baa0c Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Wed, 15 Jan 2025 05:58:09 -0800 Subject: [PATCH] llama-stack version alpha -> v1 --- docs/openapi_generator/pyopenapi/generator.py | 2 +- .../strong_typing/inspection.py | 1 - docs/resources/llama-stack-spec.html | 440 +++++++++--------- docs/resources/llama-stack-spec.yaml | 327 +++++++------ llama_stack/apis/common/content_types.py | 2 + llama_stack/apis/memory_banks/memory_banks.py | 21 +- llama_stack/apis/version.py | 2 +- llama_stack/distribution/stack.py | 2 - 8 files changed, 390 insertions(+), 407 deletions(-) diff --git a/docs/openapi_generator/pyopenapi/generator.py b/docs/openapi_generator/pyopenapi/generator.py index 23465257a..25b08f071 100644 --- a/docs/openapi_generator/pyopenapi/generator.py +++ b/docs/openapi_generator/pyopenapi/generator.py @@ -537,7 +537,6 @@ class Generator: success_type_descriptions = { item: doc_string.short_description for item, doc_string in success_type_docstring.items() - if doc_string.short_description } else: # use return type as a single response type @@ -596,6 +595,7 @@ class Generator: ) responses.update(response_builder.build_response(response_options)) + assert len(responses.keys()) > 0, f"No responses found for {op.name}" if op.event_type is not None: builder = ContentBuilder(self.schema_builder) callbacks = { diff --git a/docs/openapi_generator/strong_typing/inspection.py b/docs/openapi_generator/strong_typing/inspection.py index c5e7899fa..41804f12c 100644 --- a/docs/openapi_generator/strong_typing/inspection.py +++ b/docs/openapi_generator/strong_typing/inspection.py @@ -342,7 +342,6 @@ def is_type_union(typ: object) -> bool: "True if the type annotation corresponds to a union type (e.g. `Union[T1,T2,T3]`)." typ = unwrap_annotated_type(typ) - if _is_union_like(typ): args = typing.get_args(typ) return len(args) > 2 or type(None) not in args diff --git a/docs/resources/llama-stack-spec.html b/docs/resources/llama-stack-spec.html index ad210a502..3f74a79cf 100644 --- a/docs/resources/llama-stack-spec.html +++ b/docs/resources/llama-stack-spec.html @@ -20,7 +20,7 @@ "openapi": "3.1.0", "info": { "title": "Llama Stack Specification", - "version": "alpha", + "version": "v1", "description": "This is the specification of the Llama Stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models." }, "servers": [ @@ -29,7 +29,7 @@ } ], "paths": { - "/alpha/datasetio/append-rows": { + "/v1/datasetio/append-rows": { "post": { "responses": { "200": { @@ -71,7 +71,7 @@ } } }, - "/alpha/batch-inference/chat-completion": { + "/v1/batch-inference/chat-completion": { "post": { "responses": { "200": { @@ -120,7 +120,7 @@ } } }, - "/alpha/batch-inference/completion": { + "/v1/batch-inference/completion": { "post": { "responses": { "200": { @@ -169,7 +169,7 @@ } } }, - "/alpha/post-training/job/cancel": { + "/v1/post-training/job/cancel": { "post": { "responses": { "200": { @@ -211,7 +211,7 @@ } } }, - "/alpha/inference/chat-completion": { + "/v1/inference/chat-completion": { "post": { "responses": { "200": { @@ -267,7 +267,7 @@ } } }, - "/alpha/inference/completion": { + "/v1/inference/completion": { "post": { "responses": { "200": { @@ -323,7 +323,7 @@ } } }, - "/alpha/agents/create": { + "/v1/agents/create": { "post": { "responses": { "200": { @@ -372,7 +372,7 @@ } } }, - "/alpha/agents/session/create": { + "/v1/agents/session/create": { "post": { "responses": { "200": { @@ -421,7 +421,7 @@ } } }, - "/alpha/agents/turn/create": { + "/v1/agents/turn/create": { "post": { "responses": { "200": { @@ -477,7 +477,7 @@ } } }, - "/alpha/agents/delete": { + "/v1/agents/delete": { "post": { "responses": { "200": { @@ -519,7 +519,7 @@ } } }, - "/alpha/agents/session/delete": { + "/v1/agents/session/delete": { "post": { "responses": { "200": { @@ -561,7 +561,7 @@ } } }, - "/alpha/inference/embeddings": { + "/v1/inference/embeddings": { "post": { "responses": { "200": { @@ -610,7 +610,7 @@ } } }, - "/alpha/eval/evaluate-rows": { + "/v1/eval/evaluate-rows": { "post": { "responses": { "200": { @@ -659,7 +659,7 @@ } } }, - "/alpha/agents/session/get": { + "/v1/agents/session/get": { "post": { "responses": { "200": { @@ -724,7 +724,7 @@ } } }, - "/alpha/agents/step/get": { + "/v1/agents/step/get": { "get": { "responses": { "200": { @@ -795,7 +795,7 @@ ] } }, - "/alpha/agents/turn/get": { + "/v1/agents/turn/get": { "get": { "responses": { "200": { @@ -858,7 +858,7 @@ ] } }, - "/alpha/datasets/get": { + "/v1/datasets/get": { "get": { "responses": { "200": { @@ -912,7 +912,7 @@ ] } }, - "/alpha/eval-tasks/get": { + "/v1/eval-tasks/get": { "get": { "responses": { "200": { @@ -966,7 +966,7 @@ ] } }, - "/alpha/memory-banks/get": { + "/v1/memory-banks/get": { "get": { "responses": { "200": { @@ -976,20 +976,7 @@ "schema": { "oneOf": [ { - "oneOf": [ - { - "$ref": "#/components/schemas/VectorMemoryBank" - }, - { - "$ref": "#/components/schemas/KeyValueMemoryBank" - }, - { - "$ref": "#/components/schemas/KeywordMemoryBank" - }, - { - "$ref": "#/components/schemas/GraphMemoryBank" - } - ] + "$ref": "#/components/schemas/MemoryBank" }, { "type": "null" @@ -1033,7 +1020,7 @@ ] } }, - "/alpha/models/get": { + "/v1/models/get": { "get": { "responses": { "200": { @@ -1087,7 +1074,7 @@ ] } }, - "/alpha/datasetio/get-rows-paginated": { + "/v1/datasetio/get-rows-paginated": { "get": { "responses": { "200": { @@ -1158,7 +1145,7 @@ ] } }, - "/alpha/scoring-functions/get": { + "/v1/scoring-functions/get": { "get": { "responses": { "200": { @@ -1212,7 +1199,7 @@ ] } }, - "/alpha/shields/get": { + "/v1/shields/get": { "get": { "responses": { "200": { @@ -1266,7 +1253,7 @@ ] } }, - "/alpha/telemetry/get-span-tree": { + "/v1/telemetry/get-span-tree": { "post": { "responses": { "200": { @@ -1334,7 +1321,7 @@ } } }, - "/alpha/tools/get": { + "/v1/tools/get": { "get": { "responses": { "200": { @@ -1381,7 +1368,7 @@ ] } }, - "/alpha/toolgroups/get": { + "/v1/toolgroups/get": { "get": { "responses": { "200": { @@ -1428,7 +1415,7 @@ ] } }, - "/alpha/post-training/job/artifacts": { + "/v1/post-training/job/artifacts": { "get": { "responses": { "200": { @@ -1482,7 +1469,7 @@ ] } }, - "/alpha/post-training/job/status": { + "/v1/post-training/job/status": { "get": { "responses": { "200": { @@ -1536,7 +1523,7 @@ ] } }, - "/alpha/post-training/jobs": { + "/v1/post-training/jobs": { "get": { "responses": { "200": { @@ -1575,7 +1562,7 @@ ] } }, - "/alpha/health": { + "/v1/health": { "get": { "responses": { "200": { @@ -1614,7 +1601,7 @@ ] } }, - "/alpha/memory/insert": { + "/v1/memory/insert": { "post": { "responses": { "200": { @@ -1656,7 +1643,7 @@ } } }, - "/alpha/tool-runtime/invoke": { + "/v1/tool-runtime/invoke": { "post": { "responses": { "200": { @@ -1706,7 +1693,7 @@ } } }, - "/alpha/eval/job/cancel": { + "/v1/eval/job/cancel": { "post": { "responses": { "200": { @@ -1748,7 +1735,7 @@ } } }, - "/alpha/eval/job/result": { + "/v1/eval/job/result": { "get": { "responses": { "200": { @@ -1803,7 +1790,7 @@ ] } }, - "/alpha/eval/job/status": { + "/v1/eval/job/status": { "get": { "responses": { "200": { @@ -1865,7 +1852,7 @@ ] } }, - "/alpha/datasets/list": { + "/v1/datasets/list": { "get": { "responses": { "200": { @@ -1904,7 +1891,7 @@ ] } }, - "/alpha/eval-tasks/list": { + "/v1/eval-tasks/list": { "get": { "responses": { "200": { @@ -1943,7 +1930,7 @@ ] } }, - "/alpha/memory-banks/list": { + "/v1/memory-banks/list": { "get": { "responses": { "200": { @@ -1951,20 +1938,7 @@ "content": { "application/jsonl": { "schema": { - "oneOf": [ - { - "$ref": "#/components/schemas/VectorMemoryBank" - }, - { - "$ref": "#/components/schemas/KeyValueMemoryBank" - }, - { - "$ref": "#/components/schemas/KeywordMemoryBank" - }, - { - "$ref": "#/components/schemas/GraphMemoryBank" - } - ] + "$ref": "#/components/schemas/MemoryBank" } } } @@ -1995,7 +1969,7 @@ ] } }, - "/alpha/models/list": { + "/v1/models/list": { "get": { "responses": { "200": { @@ -2034,7 +2008,7 @@ ] } }, - "/alpha/providers/list": { + "/v1/providers/list": { "get": { "responses": { "200": { @@ -2076,7 +2050,7 @@ ] } }, - "/alpha/routes/list": { + "/v1/routes/list": { "get": { "responses": { "200": { @@ -2121,7 +2095,7 @@ ] } }, - "/alpha/tool-runtime/list-tools": { + "/v1/tool-runtime/list-tools": { "post": { "responses": { "200": { @@ -2178,7 +2152,7 @@ } } }, - "/alpha/scoring-functions/list": { + "/v1/scoring-functions/list": { "get": { "responses": { "200": { @@ -2217,7 +2191,7 @@ ] } }, - "/alpha/shields/list": { + "/v1/shields/list": { "get": { "responses": { "200": { @@ -2256,7 +2230,7 @@ ] } }, - "/alpha/toolgroups/list": { + "/v1/toolgroups/list": { "get": { "responses": { "200": { @@ -2296,7 +2270,7 @@ ] } }, - "/alpha/tools/list": { + "/v1/tools/list": { "get": { "responses": { "200": { @@ -2344,7 +2318,7 @@ ] } }, - "/alpha/telemetry/log-event": { + "/v1/telemetry/log-event": { "post": { "responses": { "200": { @@ -2386,7 +2360,7 @@ } } }, - "/alpha/post-training/preference-optimize": { + "/v1/post-training/preference-optimize": { "post": { "responses": { "200": { @@ -2435,7 +2409,7 @@ } } }, - "/alpha/memory/query": { + "/v1/memory/query": { "post": { "responses": { "200": { @@ -2484,7 +2458,7 @@ } } }, - "/alpha/telemetry/query-spans": { + "/v1/telemetry/query-spans": { "post": { "responses": { "200": { @@ -2533,7 +2507,7 @@ } } }, - "/alpha/telemetry/query-traces": { + "/v1/telemetry/query-traces": { "post": { "responses": { "200": { @@ -2582,7 +2556,7 @@ } } }, - "/alpha/datasets/register": { + "/v1/datasets/register": { "post": { "responses": { "200": { @@ -2624,7 +2598,7 @@ } } }, - "/alpha/eval-tasks/register": { + "/v1/eval-tasks/register": { "post": { "responses": { "200": { @@ -2666,9 +2640,33 @@ } } }, - "/alpha/memory-banks/register": { + "/v1/memory-banks/register": { "post": { - "responses": {}, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "oneOf": [ + { + "$ref": "#/components/schemas/VectorMemoryBank" + }, + { + "$ref": "#/components/schemas/KeyValueMemoryBank" + }, + { + "$ref": "#/components/schemas/KeywordMemoryBank" + }, + { + "$ref": "#/components/schemas/GraphMemoryBank" + } + ] + } + } + } + } + }, "tags": [ "MemoryBanks" ], @@ -2704,7 +2702,7 @@ } } }, - "/alpha/models/register": { + "/v1/models/register": { "post": { "responses": { "200": { @@ -2753,7 +2751,7 @@ } } }, - "/alpha/scoring-functions/register": { + "/v1/scoring-functions/register": { "post": { "responses": { "200": { @@ -2795,7 +2793,7 @@ } } }, - "/alpha/shields/register": { + "/v1/shields/register": { "post": { "responses": { "200": { @@ -2844,7 +2842,7 @@ } } }, - "/alpha/toolgroups/register": { + "/v1/toolgroups/register": { "post": { "responses": { "200": { @@ -2887,7 +2885,7 @@ } } }, - "/alpha/eval/run-eval": { + "/v1/eval/run-eval": { "post": { "responses": { "200": { @@ -2936,7 +2934,7 @@ } } }, - "/alpha/safety/run-shield": { + "/v1/safety/run-shield": { "post": { "responses": { "200": { @@ -2985,7 +2983,7 @@ } } }, - "/alpha/telemetry/save-spans-to-dataset": { + "/v1/telemetry/save-spans-to-dataset": { "post": { "responses": { "200": { @@ -3027,7 +3025,7 @@ } } }, - "/alpha/scoring/score": { + "/v1/scoring/score": { "post": { "responses": { "200": { @@ -3076,7 +3074,7 @@ } } }, - "/alpha/scoring/score-batch": { + "/v1/scoring/score-batch": { "post": { "responses": { "200": { @@ -3125,7 +3123,7 @@ } } }, - "/alpha/post-training/supervised-fine-tune": { + "/v1/post-training/supervised-fine-tune": { "post": { "responses": { "200": { @@ -3174,7 +3172,7 @@ } } }, - "/alpha/synthetic-data-generation/generate": { + "/v1/synthetic-data-generation/generate": { "post": { "responses": { "200": { @@ -3223,7 +3221,7 @@ } } }, - "/alpha/datasets/unregister": { + "/v1/datasets/unregister": { "post": { "responses": { "200": { @@ -3265,7 +3263,7 @@ } } }, - "/alpha/memory-banks/unregister": { + "/v1/memory-banks/unregister": { "post": { "responses": { "200": { @@ -3307,7 +3305,7 @@ } } }, - "/alpha/models/unregister": { + "/v1/models/unregister": { "post": { "responses": { "200": { @@ -3349,7 +3347,7 @@ } } }, - "/alpha/toolgroups/unregister": { + "/v1/toolgroups/unregister": { "post": { "responses": { "200": { @@ -3392,7 +3390,7 @@ } } }, - "/alpha/version": { + "/v1/version": { "get": { "responses": { "200": { @@ -3514,20 +3512,6 @@ "tool_calls" ] }, - "GreedySamplingStrategy": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "greedy", - "default": "greedy" - } - }, - "additionalProperties": false, - "required": [ - "type" - ] - }, "ImageContentItem": { "type": "object", "properties": { @@ -3595,17 +3579,20 @@ "type": "object", "properties": { "strategy": { - "oneOf": [ - { - "$ref": "#/components/schemas/GreedySamplingStrategy" - }, - { - "$ref": "#/components/schemas/TopPSamplingStrategy" - }, - { - "$ref": "#/components/schemas/TopKSamplingStrategy" - } - ] + "$ref": "#/components/schemas/SamplingStrategy", + "default": "greedy" + }, + "temperature": { + "type": "number", + "default": 0.0 + }, + "top_p": { + "type": "number", + "default": 0.95 + }, + "top_k": { + "type": "integer", + "default": 0 }, "max_tokens": { "type": "integer", @@ -3621,6 +3608,14 @@ "strategy" ] }, + "SamplingStrategy": { + "type": "string", + "enum": [ + "greedy", + "top_p", + "top_k" + ] + }, "StopReason": { "type": "string", "enum": [ @@ -3874,45 +3869,6 @@ "content" ] }, - "TopKSamplingStrategy": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "top_k", - "default": "top_k" - }, - "top_k": { - "type": "integer" - } - }, - "additionalProperties": false, - "required": [ - "type", - "top_k" - ] - }, - "TopPSamplingStrategy": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "top_p", - "default": "top_p" - }, - "temperature": { - "type": "number" - }, - "top_p": { - "type": "number", - "default": 0.95 - } - }, - "additionalProperties": false, - "required": [ - "type" - ] - }, "URL": { "type": "object", "properties": { @@ -4270,47 +4226,53 @@ "ContentDelta": { "oneOf": [ { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "text", - "default": "text" - }, - "text": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "type", - "text" - ] + "$ref": "#/components/schemas/TextDelta" }, { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "image", - "default": "image" - }, - "data": { - "type": "string", - "contentEncoding": "base64" - } - }, - "additionalProperties": false, - "required": [ - "type", - "data" - ] + "$ref": "#/components/schemas/ImageDelta" }, { "$ref": "#/components/schemas/ToolCallDelta" } ] }, + "ImageDelta": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "image", + "default": "image" + }, + "data": { + "type": "string", + "contentEncoding": "base64" + } + }, + "additionalProperties": false, + "required": [ + "type", + "data" + ] + }, + "TextDelta": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "text", + "default": "text" + }, + "text": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "type", + "text" + ] + }, "TokenLogProbs": { "type": "object", "properties": { @@ -5847,6 +5809,22 @@ "memory_bank_type" ] }, + "MemoryBank": { + "oneOf": [ + { + "$ref": "#/components/schemas/VectorMemoryBank" + }, + { + "$ref": "#/components/schemas/KeyValueMemoryBank" + }, + { + "$ref": "#/components/schemas/KeywordMemoryBank" + }, + { + "$ref": "#/components/schemas/GraphMemoryBank" + } + ] + }, "Session": { "type": "object", "properties": { @@ -5867,20 +5845,7 @@ "format": "date-time" }, "memory_bank": { - "oneOf": [ - { - "$ref": "#/components/schemas/VectorMemoryBank" - }, - { - "$ref": "#/components/schemas/KeyValueMemoryBank" - }, - { - "$ref": "#/components/schemas/KeywordMemoryBank" - }, - { - "$ref": "#/components/schemas/GraphMemoryBank" - } - ] + "$ref": "#/components/schemas/MemoryBank" } }, "additionalProperties": false, @@ -7303,6 +7268,9 @@ "shuffle": { "type": "boolean" }, + "data_format": { + "$ref": "#/components/schemas/DatasetFormat" + }, "validation_dataset_id": { "type": "string" }, @@ -7319,7 +7287,15 @@ "required": [ "dataset_id", "batch_size", - "shuffle" + "shuffle", + "data_format" + ] + }, + "DatasetFormat": { + "type": "string", + "enum": [ + "instruct", + "dialog" ] }, "EfficiencyConfig": { @@ -8869,6 +8845,10 @@ "name": "Dataset", "description": "" }, + { + "name": "DatasetFormat", + "description": "" + }, { "name": "DatasetIO" }, @@ -8929,10 +8909,6 @@ "name": "GraphMemoryBankParams", "description": "" }, - { - "name": "GreedySamplingStrategy", - "description": "" - }, { "name": "HealthInfo", "description": "" @@ -8941,6 +8917,10 @@ "name": "ImageContentItem", "description": "" }, + { + "name": "ImageDelta", + "description": "" + }, { "name": "Inference" }, @@ -9018,6 +8998,10 @@ { "name": "Memory" }, + { + "name": "MemoryBank", + "description": "" + }, { "name": "MemoryBankDocument", "description": "" @@ -9182,6 +9166,10 @@ "name": "SamplingParams", "description": "" }, + { + "name": "SamplingStrategy", + "description": "" + }, { "name": "SaveSpansToDatasetRequest", "description": "" @@ -9285,6 +9273,10 @@ "name": "TextContentItem", "description": "" }, + { + "name": "TextDelta", + "description": "" + }, { "name": "TokenLogProbs", "description": "" @@ -9359,14 +9351,6 @@ { "name": "ToolRuntime" }, - { - "name": "TopKSamplingStrategy", - "description": "" - }, - { - "name": "TopPSamplingStrategy", - "description": "" - }, { "name": "Trace", "description": "" @@ -9494,6 +9478,7 @@ "DPOAlignmentConfig", "DataConfig", "Dataset", + "DatasetFormat", "DeleteAgentsRequest", "DeleteAgentsSessionRequest", "EfficiencyConfig", @@ -9506,9 +9491,9 @@ "GetSpanTreeRequest", "GraphMemoryBank", "GraphMemoryBankParams", - "GreedySamplingStrategy", "HealthInfo", "ImageContentItem", + "ImageDelta", "InferenceStep", "InsertDocumentsRequest", "InterleavedContent", @@ -9526,6 +9511,7 @@ "LogEventRequest", "LogSeverity", "LoraFinetuningConfig", + "MemoryBank", "MemoryBankDocument", "MemoryRetrievalStep", "Message", @@ -9564,6 +9550,7 @@ "RunShieldResponse", "SafetyViolation", "SamplingParams", + "SamplingStrategy", "SaveSpansToDatasetRequest", "ScoreBatchRequest", "ScoreBatchResponse", @@ -9586,6 +9573,7 @@ "SyntheticDataGenerationResponse", "SystemMessage", "TextContentItem", + "TextDelta", "TokenLogProbs", "Tool", "ToolCall", @@ -9603,8 +9591,6 @@ "ToolPromptFormat", "ToolResponse", "ToolResponseMessage", - "TopKSamplingStrategy", - "TopPSamplingStrategy", "Trace", "TrainingConfig", "Turn", diff --git a/docs/resources/llama-stack-spec.yaml b/docs/resources/llama-stack-spec.yaml index 8c885b7e5..2afb8e375 100644 --- a/docs/resources/llama-stack-spec.yaml +++ b/docs/resources/llama-stack-spec.yaml @@ -570,31 +570,8 @@ components: type: object ContentDelta: oneOf: - - additionalProperties: false - properties: - text: - type: string - type: - const: text - default: text - type: string - required: - - type - - text - type: object - - additionalProperties: false - properties: - data: - contentEncoding: base64 - type: string - type: - const: image - default: image - type: string - required: - - type - - data - type: object + - $ref: '#/components/schemas/TextDelta' + - $ref: '#/components/schemas/ImageDelta' - $ref: '#/components/schemas/ToolCallDelta' CreateAgentRequest: additionalProperties: false @@ -680,6 +657,8 @@ components: properties: batch_size: type: integer + data_format: + $ref: '#/components/schemas/DatasetFormat' dataset_id: type: string packed: @@ -696,6 +675,7 @@ components: - dataset_id - batch_size - shuffle + - data_format type: object Dataset: additionalProperties: false @@ -735,6 +715,11 @@ components: - url - metadata type: object + DatasetFormat: + enum: + - instruct + - dialog + type: string DeleteAgentsRequest: additionalProperties: false properties: @@ -937,16 +922,6 @@ components: required: - memory_bank_type type: object - GreedySamplingStrategy: - additionalProperties: false - properties: - type: - const: greedy - default: greedy - type: string - required: - - type - type: object HealthInfo: additionalProperties: false properties: @@ -970,6 +945,20 @@ components: required: - type type: object + ImageDelta: + additionalProperties: false + properties: + data: + contentEncoding: base64 + type: string + type: + const: image + default: image + type: string + required: + - type + - data + type: object InferenceStep: additionalProperties: false properties: @@ -1219,6 +1208,12 @@ components: - rank - alpha type: object + MemoryBank: + oneOf: + - $ref: '#/components/schemas/VectorMemoryBank' + - $ref: '#/components/schemas/KeyValueMemoryBank' + - $ref: '#/components/schemas/KeywordMemoryBank' + - $ref: '#/components/schemas/GraphMemoryBank' MemoryBankDocument: additionalProperties: false properties: @@ -2074,13 +2069,26 @@ components: default: 1.0 type: number strategy: - oneOf: - - $ref: '#/components/schemas/GreedySamplingStrategy' - - $ref: '#/components/schemas/TopPSamplingStrategy' - - $ref: '#/components/schemas/TopKSamplingStrategy' + $ref: '#/components/schemas/SamplingStrategy' + default: greedy + temperature: + default: 0.0 + type: number + top_k: + default: 0 + type: integer + top_p: + default: 0.95 + type: number required: - strategy type: object + SamplingStrategy: + enum: + - greedy + - top_p + - top_k + type: string SaveSpansToDatasetRequest: additionalProperties: false properties: @@ -2245,11 +2253,7 @@ components: additionalProperties: false properties: memory_bank: - oneOf: - - $ref: '#/components/schemas/VectorMemoryBank' - - $ref: '#/components/schemas/KeyValueMemoryBank' - - $ref: '#/components/schemas/KeywordMemoryBank' - - $ref: '#/components/schemas/GraphMemoryBank' + $ref: '#/components/schemas/MemoryBank' session_id: type: string session_name: @@ -2585,6 +2589,19 @@ components: - type - text type: object + TextDelta: + additionalProperties: false + properties: + text: + type: string + type: + const: text + default: text + type: string + required: + - type + - text + type: object TokenLogProbs: additionalProperties: false properties: @@ -2928,34 +2945,6 @@ components: - tool_name - content type: object - TopKSamplingStrategy: - additionalProperties: false - properties: - top_k: - type: integer - type: - const: top_k - default: top_k - type: string - required: - - type - - top_k - type: object - TopPSamplingStrategy: - additionalProperties: false - properties: - temperature: - type: number - top_p: - default: 0.95 - type: number - type: - const: top_p - default: top_p - type: string - required: - - type - type: object Trace: additionalProperties: false properties: @@ -3223,11 +3212,11 @@ info: \ a set of endpoints and their corresponding interfaces that are tailored\ \ to\n best leverage Llama Models." title: Llama Stack Specification - version: alpha + version: v1 jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema openapi: 3.1.0 paths: - /alpha/agents/create: + /v1/agents/create: post: parameters: - description: JSON-encoded provider data which will be made available to the @@ -3259,7 +3248,7 @@ paths: description: OK tags: - Agents - /alpha/agents/delete: + /v1/agents/delete: post: parameters: - description: JSON-encoded provider data which will be made available to the @@ -3287,7 +3276,7 @@ paths: description: OK tags: - Agents - /alpha/agents/session/create: + /v1/agents/session/create: post: parameters: - description: JSON-encoded provider data which will be made available to the @@ -3319,7 +3308,7 @@ paths: description: OK tags: - Agents - /alpha/agents/session/delete: + /v1/agents/session/delete: post: parameters: - description: JSON-encoded provider data which will be made available to the @@ -3347,7 +3336,7 @@ paths: description: OK tags: - Agents - /alpha/agents/session/get: + /v1/agents/session/get: post: parameters: - in: query @@ -3389,7 +3378,7 @@ paths: description: OK tags: - Agents - /alpha/agents/step/get: + /v1/agents/step/get: get: parameters: - in: query @@ -3435,7 +3424,7 @@ paths: description: OK tags: - Agents - /alpha/agents/turn/create: + /v1/agents/turn/create: post: parameters: - description: JSON-encoded provider data which will be made available to the @@ -3470,7 +3459,7 @@ paths: streamed agent turn completion response. tags: - Agents - /alpha/agents/turn/get: + /v1/agents/turn/get: get: parameters: - in: query @@ -3511,7 +3500,7 @@ paths: description: OK tags: - Agents - /alpha/batch-inference/chat-completion: + /v1/batch-inference/chat-completion: post: parameters: - description: JSON-encoded provider data which will be made available to the @@ -3543,7 +3532,7 @@ paths: description: OK tags: - BatchInference (Coming Soon) - /alpha/batch-inference/completion: + /v1/batch-inference/completion: post: parameters: - description: JSON-encoded provider data which will be made available to the @@ -3575,7 +3564,7 @@ paths: description: OK tags: - BatchInference (Coming Soon) - /alpha/datasetio/append-rows: + /v1/datasetio/append-rows: post: parameters: - description: JSON-encoded provider data which will be made available to the @@ -3603,7 +3592,7 @@ paths: description: OK tags: - DatasetIO - /alpha/datasetio/get-rows-paginated: + /v1/datasetio/get-rows-paginated: get: parameters: - in: query @@ -3649,7 +3638,7 @@ paths: description: OK tags: - DatasetIO - /alpha/datasets/get: + /v1/datasets/get: get: parameters: - in: query @@ -3682,7 +3671,7 @@ paths: description: OK tags: - Datasets - /alpha/datasets/list: + /v1/datasets/list: get: parameters: - description: JSON-encoded provider data which will be made available to the @@ -3708,7 +3697,7 @@ paths: description: OK tags: - Datasets - /alpha/datasets/register: + /v1/datasets/register: post: parameters: - description: JSON-encoded provider data which will be made available to the @@ -3736,7 +3725,7 @@ paths: description: OK tags: - Datasets - /alpha/datasets/unregister: + /v1/datasets/unregister: post: parameters: - description: JSON-encoded provider data which will be made available to the @@ -3764,7 +3753,7 @@ paths: description: OK tags: - Datasets - /alpha/eval-tasks/get: + /v1/eval-tasks/get: get: parameters: - in: query @@ -3797,7 +3786,7 @@ paths: description: OK tags: - EvalTasks - /alpha/eval-tasks/list: + /v1/eval-tasks/list: get: parameters: - description: JSON-encoded provider data which will be made available to the @@ -3823,7 +3812,7 @@ paths: description: OK tags: - EvalTasks - /alpha/eval-tasks/register: + /v1/eval-tasks/register: post: parameters: - description: JSON-encoded provider data which will be made available to the @@ -3851,7 +3840,7 @@ paths: description: OK tags: - EvalTasks - /alpha/eval/evaluate-rows: + /v1/eval/evaluate-rows: post: parameters: - description: JSON-encoded provider data which will be made available to the @@ -3883,7 +3872,7 @@ paths: description: OK tags: - Eval - /alpha/eval/job/cancel: + /v1/eval/job/cancel: post: parameters: - description: JSON-encoded provider data which will be made available to the @@ -3911,7 +3900,7 @@ paths: description: OK tags: - Eval - /alpha/eval/job/result: + /v1/eval/job/result: get: parameters: - in: query @@ -3947,7 +3936,7 @@ paths: description: OK tags: - Eval - /alpha/eval/job/status: + /v1/eval/job/status: get: parameters: - in: query @@ -3985,7 +3974,7 @@ paths: description: OK tags: - Eval - /alpha/eval/run-eval: + /v1/eval/run-eval: post: parameters: - description: JSON-encoded provider data which will be made available to the @@ -4017,7 +4006,7 @@ paths: description: OK tags: - Eval - /alpha/health: + /v1/health: get: parameters: - description: JSON-encoded provider data which will be made available to the @@ -4043,7 +4032,7 @@ paths: description: OK tags: - Inspect - /alpha/inference/chat-completion: + /v1/inference/chat-completion: post: parameters: - description: JSON-encoded provider data which will be made available to the @@ -4077,7 +4066,7 @@ paths: description: Chat completion response. **OR** SSE-stream of these events. tags: - Inference - /alpha/inference/completion: + /v1/inference/completion: post: parameters: - description: JSON-encoded provider data which will be made available to the @@ -4111,7 +4100,7 @@ paths: description: Completion response. **OR** streamed completion response. tags: - Inference - /alpha/inference/embeddings: + /v1/inference/embeddings: post: parameters: - description: JSON-encoded provider data which will be made available to the @@ -4143,7 +4132,7 @@ paths: description: OK tags: - Inference - /alpha/memory-banks/get: + /v1/memory-banks/get: get: parameters: - in: query @@ -4171,16 +4160,12 @@ paths: application/json: schema: oneOf: - - oneOf: - - $ref: '#/components/schemas/VectorMemoryBank' - - $ref: '#/components/schemas/KeyValueMemoryBank' - - $ref: '#/components/schemas/KeywordMemoryBank' - - $ref: '#/components/schemas/GraphMemoryBank' + - $ref: '#/components/schemas/MemoryBank' - type: 'null' description: OK tags: - MemoryBanks - /alpha/memory-banks/list: + /v1/memory-banks/list: get: parameters: - description: JSON-encoded provider data which will be made available to the @@ -4202,15 +4187,11 @@ paths: content: application/jsonl: schema: - oneOf: - - $ref: '#/components/schemas/VectorMemoryBank' - - $ref: '#/components/schemas/KeyValueMemoryBank' - - $ref: '#/components/schemas/KeywordMemoryBank' - - $ref: '#/components/schemas/GraphMemoryBank' + $ref: '#/components/schemas/MemoryBank' description: OK tags: - MemoryBanks - /alpha/memory-banks/register: + /v1/memory-banks/register: post: parameters: - description: JSON-encoded provider data which will be made available to the @@ -4233,10 +4214,20 @@ paths: schema: $ref: '#/components/schemas/RegisterMemoryBankRequest' required: true - responses: {} + responses: + '200': + content: + application/json: + schema: + oneOf: + - $ref: '#/components/schemas/VectorMemoryBank' + - $ref: '#/components/schemas/KeyValueMemoryBank' + - $ref: '#/components/schemas/KeywordMemoryBank' + - $ref: '#/components/schemas/GraphMemoryBank' + description: '' tags: - MemoryBanks - /alpha/memory-banks/unregister: + /v1/memory-banks/unregister: post: parameters: - description: JSON-encoded provider data which will be made available to the @@ -4264,7 +4255,7 @@ paths: description: OK tags: - MemoryBanks - /alpha/memory/insert: + /v1/memory/insert: post: parameters: - description: JSON-encoded provider data which will be made available to the @@ -4292,7 +4283,7 @@ paths: description: OK tags: - Memory - /alpha/memory/query: + /v1/memory/query: post: parameters: - description: JSON-encoded provider data which will be made available to the @@ -4324,7 +4315,7 @@ paths: description: OK tags: - Memory - /alpha/models/get: + /v1/models/get: get: parameters: - in: query @@ -4357,7 +4348,7 @@ paths: description: OK tags: - Models - /alpha/models/list: + /v1/models/list: get: parameters: - description: JSON-encoded provider data which will be made available to the @@ -4383,7 +4374,7 @@ paths: description: OK tags: - Models - /alpha/models/register: + /v1/models/register: post: parameters: - description: JSON-encoded provider data which will be made available to the @@ -4415,7 +4406,7 @@ paths: description: OK tags: - Models - /alpha/models/unregister: + /v1/models/unregister: post: parameters: - description: JSON-encoded provider data which will be made available to the @@ -4443,7 +4434,7 @@ paths: description: OK tags: - Models - /alpha/post-training/job/artifacts: + /v1/post-training/job/artifacts: get: parameters: - in: query @@ -4476,7 +4467,7 @@ paths: description: OK tags: - PostTraining (Coming Soon) - /alpha/post-training/job/cancel: + /v1/post-training/job/cancel: post: parameters: - description: JSON-encoded provider data which will be made available to the @@ -4504,7 +4495,7 @@ paths: description: OK tags: - PostTraining (Coming Soon) - /alpha/post-training/job/status: + /v1/post-training/job/status: get: parameters: - in: query @@ -4537,7 +4528,7 @@ paths: description: OK tags: - PostTraining (Coming Soon) - /alpha/post-training/jobs: + /v1/post-training/jobs: get: parameters: - description: JSON-encoded provider data which will be made available to the @@ -4563,7 +4554,7 @@ paths: description: OK tags: - PostTraining (Coming Soon) - /alpha/post-training/preference-optimize: + /v1/post-training/preference-optimize: post: parameters: - description: JSON-encoded provider data which will be made available to the @@ -4595,7 +4586,7 @@ paths: description: OK tags: - PostTraining (Coming Soon) - /alpha/post-training/supervised-fine-tune: + /v1/post-training/supervised-fine-tune: post: parameters: - description: JSON-encoded provider data which will be made available to the @@ -4627,7 +4618,7 @@ paths: description: OK tags: - PostTraining (Coming Soon) - /alpha/providers/list: + /v1/providers/list: get: parameters: - description: JSON-encoded provider data which will be made available to the @@ -4655,7 +4646,7 @@ paths: description: OK tags: - Inspect - /alpha/routes/list: + /v1/routes/list: get: parameters: - description: JSON-encoded provider data which will be made available to the @@ -4685,7 +4676,7 @@ paths: description: OK tags: - Inspect - /alpha/safety/run-shield: + /v1/safety/run-shield: post: parameters: - description: JSON-encoded provider data which will be made available to the @@ -4717,7 +4708,7 @@ paths: description: OK tags: - Safety - /alpha/scoring-functions/get: + /v1/scoring-functions/get: get: parameters: - in: query @@ -4750,7 +4741,7 @@ paths: description: OK tags: - ScoringFunctions - /alpha/scoring-functions/list: + /v1/scoring-functions/list: get: parameters: - description: JSON-encoded provider data which will be made available to the @@ -4776,7 +4767,7 @@ paths: description: OK tags: - ScoringFunctions - /alpha/scoring-functions/register: + /v1/scoring-functions/register: post: parameters: - description: JSON-encoded provider data which will be made available to the @@ -4804,7 +4795,7 @@ paths: description: OK tags: - ScoringFunctions - /alpha/scoring/score: + /v1/scoring/score: post: parameters: - description: JSON-encoded provider data which will be made available to the @@ -4836,7 +4827,7 @@ paths: description: OK tags: - Scoring - /alpha/scoring/score-batch: + /v1/scoring/score-batch: post: parameters: - description: JSON-encoded provider data which will be made available to the @@ -4868,7 +4859,7 @@ paths: description: OK tags: - Scoring - /alpha/shields/get: + /v1/shields/get: get: parameters: - in: query @@ -4901,7 +4892,7 @@ paths: description: OK tags: - Shields - /alpha/shields/list: + /v1/shields/list: get: parameters: - description: JSON-encoded provider data which will be made available to the @@ -4927,7 +4918,7 @@ paths: description: OK tags: - Shields - /alpha/shields/register: + /v1/shields/register: post: parameters: - description: JSON-encoded provider data which will be made available to the @@ -4959,7 +4950,7 @@ paths: description: OK tags: - Shields - /alpha/synthetic-data-generation/generate: + /v1/synthetic-data-generation/generate: post: parameters: - description: JSON-encoded provider data which will be made available to the @@ -4991,7 +4982,7 @@ paths: description: OK tags: - SyntheticDataGeneration (Coming Soon) - /alpha/telemetry/get-span-tree: + /v1/telemetry/get-span-tree: post: parameters: - in: query @@ -5035,7 +5026,7 @@ paths: description: OK tags: - Telemetry - /alpha/telemetry/log-event: + /v1/telemetry/log-event: post: parameters: - description: JSON-encoded provider data which will be made available to the @@ -5063,7 +5054,7 @@ paths: description: OK tags: - Telemetry - /alpha/telemetry/query-spans: + /v1/telemetry/query-spans: post: parameters: - description: JSON-encoded provider data which will be made available to the @@ -5095,7 +5086,7 @@ paths: description: OK tags: - Telemetry - /alpha/telemetry/query-traces: + /v1/telemetry/query-traces: post: parameters: - description: JSON-encoded provider data which will be made available to the @@ -5127,7 +5118,7 @@ paths: description: OK tags: - Telemetry - /alpha/telemetry/save-spans-to-dataset: + /v1/telemetry/save-spans-to-dataset: post: parameters: - description: JSON-encoded provider data which will be made available to the @@ -5155,7 +5146,7 @@ paths: description: OK tags: - Telemetry - /alpha/tool-runtime/invoke: + /v1/tool-runtime/invoke: post: parameters: - description: JSON-encoded provider data which will be made available to the @@ -5188,7 +5179,7 @@ paths: summary: Run a tool with the given arguments tags: - ToolRuntime - /alpha/tool-runtime/list-tools: + /v1/tool-runtime/list-tools: post: parameters: - in: query @@ -5225,7 +5216,7 @@ paths: description: OK tags: - ToolRuntime - /alpha/toolgroups/get: + /v1/toolgroups/get: get: parameters: - in: query @@ -5256,7 +5247,7 @@ paths: description: OK tags: - ToolGroups - /alpha/toolgroups/list: + /v1/toolgroups/list: get: parameters: - description: JSON-encoded provider data which will be made available to the @@ -5283,7 +5274,7 @@ paths: summary: List tool groups with optional provider tags: - ToolGroups - /alpha/toolgroups/register: + /v1/toolgroups/register: post: parameters: - description: JSON-encoded provider data which will be made available to the @@ -5312,7 +5303,7 @@ paths: summary: Register a tool group tags: - ToolGroups - /alpha/toolgroups/unregister: + /v1/toolgroups/unregister: post: parameters: - description: JSON-encoded provider data which will be made available to the @@ -5341,7 +5332,7 @@ paths: summary: Unregister a tool group tags: - ToolGroups - /alpha/tools/get: + /v1/tools/get: get: parameters: - in: query @@ -5372,7 +5363,7 @@ paths: description: OK tags: - ToolGroups - /alpha/tools/list: + /v1/tools/list: get: parameters: - in: query @@ -5404,7 +5395,7 @@ paths: summary: List tools with optional tool group tags: - ToolGroups - /alpha/version: + /v1/version: get: parameters: - description: JSON-encoded provider data which will be made available to the @@ -5573,6 +5564,8 @@ tags: name: DataConfig - description: name: Dataset +- description: + name: DatasetFormat - name: DatasetIO - name: Datasets - description: name: GraphMemoryBankParams -- description: - name: GreedySamplingStrategy - description: name: HealthInfo - description: name: ImageContentItem +- description: + name: ImageDelta - name: Inference - description: name: InferenceStep @@ -5670,6 +5662,8 @@ tags: /> name: LoraFinetuningConfig - name: Memory +- description: + name: MemoryBank - description: name: MemoryBankDocument @@ -5781,6 +5775,9 @@ tags: name: SafetyViolation - description: name: SamplingParams +- description: + name: SamplingStrategy - description: name: SaveSpansToDatasetRequest @@ -5849,6 +5846,8 @@ tags: - description: name: TextContentItem +- description: + name: TextDelta - description: name: TokenLogProbs - description: @@ -5899,12 +5898,6 @@ tags: /> name: ToolResponseMessage - name: ToolRuntime -- description: - name: TopKSamplingStrategy -- description: - name: TopPSamplingStrategy - description: name: Trace - description: @@ -6009,6 +6002,7 @@ x-tagGroups: - DPOAlignmentConfig - DataConfig - Dataset + - DatasetFormat - DeleteAgentsRequest - DeleteAgentsSessionRequest - EfficiencyConfig @@ -6021,9 +6015,9 @@ x-tagGroups: - GetSpanTreeRequest - GraphMemoryBank - GraphMemoryBankParams - - GreedySamplingStrategy - HealthInfo - ImageContentItem + - ImageDelta - InferenceStep - InsertDocumentsRequest - InterleavedContent @@ -6041,6 +6035,7 @@ x-tagGroups: - LogEventRequest - LogSeverity - LoraFinetuningConfig + - MemoryBank - MemoryBankDocument - MemoryRetrievalStep - Message @@ -6079,6 +6074,7 @@ x-tagGroups: - RunShieldResponse - SafetyViolation - SamplingParams + - SamplingStrategy - SaveSpansToDatasetRequest - ScoreBatchRequest - ScoreBatchResponse @@ -6101,6 +6097,7 @@ x-tagGroups: - SyntheticDataGenerationResponse - SystemMessage - TextContentItem + - TextDelta - TokenLogProbs - Tool - ToolCall @@ -6118,8 +6115,6 @@ x-tagGroups: - ToolPromptFormat - ToolResponse - ToolResponseMessage - - TopKSamplingStrategy - - TopPSamplingStrategy - Trace - TrainingConfig - Turn diff --git a/llama_stack/apis/common/content_types.py b/llama_stack/apis/common/content_types.py index 3b61fa243..b845d09dd 100644 --- a/llama_stack/apis/common/content_types.py +++ b/llama_stack/apis/common/content_types.py @@ -64,11 +64,13 @@ InterleavedContent = register_schema( ) +@json_schema_type class TextDelta(BaseModel): type: Literal["text"] = "text" text: str +@json_schema_type class ImageDelta(BaseModel): type: Literal["image"] = "image" data: bytes diff --git a/llama_stack/apis/memory_banks/memory_banks.py b/llama_stack/apis/memory_banks/memory_banks.py index b037dfa66..21569beff 100644 --- a/llama_stack/apis/memory_banks/memory_banks.py +++ b/llama_stack/apis/memory_banks/memory_banks.py @@ -15,7 +15,7 @@ from typing import ( Union, ) -from llama_models.schema_utils import json_schema_type, webmethod +from llama_models.schema_utils import json_schema_type, register_schema, webmethod from pydantic import BaseModel, Field @@ -113,15 +113,18 @@ class GraphMemoryBank(MemoryBankResourceMixin): memory_bank_type: Literal[MemoryBankType.graph.value] = MemoryBankType.graph.value -MemoryBank = Annotated[ - Union[ - VectorMemoryBank, - KeyValueMemoryBank, - KeywordMemoryBank, - GraphMemoryBank, +MemoryBank = register_schema( + Annotated[ + Union[ + VectorMemoryBank, + KeyValueMemoryBank, + KeywordMemoryBank, + GraphMemoryBank, + ], + Field(discriminator="memory_bank_type"), ], - Field(discriminator="memory_bank_type"), -] + name="MemoryBank", +) class MemoryBankInput(BaseModel): diff --git a/llama_stack/apis/version.py b/llama_stack/apis/version.py index f178712ba..53ad6a854 100644 --- a/llama_stack/apis/version.py +++ b/llama_stack/apis/version.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -LLAMA_STACK_API_VERSION = "alpha" +LLAMA_STACK_API_VERSION = "v1" diff --git a/llama_stack/distribution/stack.py b/llama_stack/distribution/stack.py index acbd42fa9..e3edf1e16 100644 --- a/llama_stack/distribution/stack.py +++ b/llama_stack/distribution/stack.py @@ -40,8 +40,6 @@ from llama_stack.providers.datatypes import Api log = logging.getLogger(__name__) -LLAMA_STACK_API_VERSION = "alpha" - class LlamaStack( MemoryBanks,