diff --git a/llama_toolchain/inference/api/api.py b/llama_toolchain/inference/api/api.py index 2bf225be4..8887d312f 100644 --- a/llama_toolchain/inference/api/api.py +++ b/llama_toolchain/inference/api/api.py @@ -85,6 +85,8 @@ class CompletionRequest(BaseModel): @json_schema_type class CompletionResponse(BaseModel): + """Completion response.""" + completion_message: CompletionMessage logprobs: Optional[List[TokenLogProbs]] = None @@ -108,6 +110,8 @@ class BatchCompletionRequest(BaseModel): @json_schema_type class BatchCompletionResponse(BaseModel): + """Batch completion response.""" + completion_message_batch: List[CompletionMessage] @@ -137,6 +141,8 @@ class ChatCompletionResponseStreamChunk(BaseModel): @json_schema_type class ChatCompletionResponse(BaseModel): + """Chat completion response.""" + completion_message: CompletionMessage logprobs: Optional[List[TokenLogProbs]] = None diff --git a/llama_toolchain/telemetry/api/api.py b/llama_toolchain/telemetry/api/api.py index 100836b46..2546c1ede 100644 --- a/llama_toolchain/telemetry/api/api.py +++ b/llama_toolchain/telemetry/api/api.py @@ -125,7 +125,7 @@ Event = Annotated[ class Telemetry(Protocol): @webmethod(route="/telemetry/log_event") - async def log_event(self, event: Event): ... + async def log_event(self, event: Event) -> None: ... @webmethod(route="/telemetry/get_trace", method="GET") async def get_trace(self, trace_id: str) -> Trace: ... diff --git a/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.html b/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.html index 688e78fa9..6e7fe287f 100644 --- a/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.html +++ b/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.html @@ -21,7 +21,7 @@ "info": { "title": "[DRAFT] Llama Stack Specification", "version": "0.0.1", - "description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-09-11 15:30:11.688505" + "description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-09-11 16:05:23.016090" }, "servers": [ { @@ -139,11 +139,18 @@ "post": { "responses": { "200": { - "description": "SSE-stream of these events.", + "description": "Chat completion response. **OR** SSE-stream of these events.", "content": { "text/event-stream": { "schema": { - "$ref": "#/components/schemas/ChatCompletionResponseStreamChunk" + "oneOf": [ + { + "$ref": "#/components/schemas/ChatCompletionResponse" + }, + { + "$ref": "#/components/schemas/ChatCompletionResponseStreamChunk" + } + ] } } } @@ -169,11 +176,18 @@ "post": { "responses": { "200": { - "description": "streamed completion response.", + "description": "Completion response. **OR** streamed completion response.", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/CompletionResponseStreamChunk" + "oneOf": [ + { + "$ref": "#/components/schemas/CompletionResponse" + }, + { + "$ref": "#/components/schemas/CompletionResponseStreamChunk" + } + ] } } } @@ -308,36 +322,6 @@ } } }, - "/experiments/create": { - "post": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/Experiment" - } - } - } - } - }, - "tags": [ - "Telemetry" - ], - "parameters": [], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/CreateExperimentRequest" - } - } - }, - "required": true - } - } - }, "/memory_banks/create": { "post": { "responses": { @@ -368,36 +352,6 @@ } } }, - "/experiments/create_run": { - "post": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/Run" - } - } - } - } - }, - "tags": [ - "Telemetry" - ], - "parameters": [], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/CreateRunRequest" - } - } - }, - "required": true - } - } - }, "/agentic_system/delete": { "post": { "responses": { @@ -769,35 +723,6 @@ ] } }, - "/artifacts/get": { - "get": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/Artifact" - } - } - } - } - }, - "tags": [ - "Telemetry" - ], - "parameters": [ - { - "name": "artifact_id", - "in": "query", - "required": true, - "schema": { - "type": "string" - } - } - ] - } - }, "/datasets/get": { "get": { "responses": { @@ -973,74 +898,6 @@ "parameters": [] } }, - "/experiments/get": { - "get": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/Experiment" - } - } - } - } - }, - "tags": [ - "Telemetry" - ], - "parameters": [ - { - "name": "experiment_id", - "in": "query", - "required": true, - "schema": { - "type": "string" - } - } - ] - } - }, - "/logging/get_logs": { - "post": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/jsonl": { - "schema": { - "$ref": "#/components/schemas/Log" - } - } - } - } - }, - "tags": [ - "Telemetry" - ], - "parameters": [ - { - "name": "query", - "in": "query", - "required": true, - "schema": { - "type": "string" - } - } - ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/GetLogsRequest" - } - } - }, - "required": true - } - } - }, "/memory_banks/get": { "get": { "responses": { @@ -1077,15 +934,15 @@ ] } }, - "/runs/metrics": { + "/telemetry/get_trace": { "get": { "responses": { "200": { "description": "OK", "content": { - "application/jsonl": { + "application/json": { "schema": { - "$ref": "#/components/schemas/Metric" + "$ref": "#/components/schemas/Trace" } } } @@ -1096,7 +953,7 @@ ], "parameters": [ { - "name": "run_id", + "name": "trace_id", "in": "query", "required": true, "schema": { @@ -1236,56 +1093,6 @@ } } }, - "/experiments/artifacts/get": { - "post": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/jsonl": { - "schema": { - "$ref": "#/components/schemas/Artifact" - } - } - } - } - }, - "tags": [ - "Telemetry" - ], - "parameters": [], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/ListArtifactsRequest" - } - } - }, - "required": true - } - } - }, - "/experiments/list": { - "get": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/jsonl": { - "schema": { - "$ref": "#/components/schemas/Experiment" - } - } - } - } - }, - "tags": [ - "Telemetry" - ], - "parameters": [] - } - }, "/memory_banks/list": { "get": { "responses": { @@ -1306,7 +1113,7 @@ "parameters": [] } }, - "/logging/log_messages": { + "/telemetry/log_event": { "post": { "responses": { "200": { @@ -1321,30 +1128,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/LogMessagesRequest" - } - } - }, - "required": true - } - } - }, - "/runs/log_metrics": { - "post": { - "responses": { - "200": { - "description": "OK" - } - }, - "tags": [ - "Telemetry" - ], - "parameters": [], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/LogMetricsRequest" + "$ref": "#/components/schemas/LogEventRequest" } } }, @@ -1554,96 +1338,6 @@ "required": true } } - }, - "/experiments/update": { - "post": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/Experiment" - } - } - } - } - }, - "tags": [ - "Telemetry" - ], - "parameters": [], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/UpdateExperimentRequest" - } - } - }, - "required": true - } - } - }, - "/runs/update": { - "post": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/Run" - } - } - } - } - }, - "tags": [ - "Telemetry" - ], - "parameters": [], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/UpdateRunRequest" - } - } - }, - "required": true - } - } - }, - "/experiments/artifacts/upload": { - "post": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/Artifact" - } - } - } - } - }, - "tags": [ - "Telemetry" - ], - "parameters": [], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/UploadArtifactRequest" - } - } - }, - "required": true - } - } } }, "jsonSchemaDialect": "https://json-schema.org/draft/2020-12/schema", @@ -2213,6 +1907,25 @@ "messages" ] }, + "ChatCompletionResponse": { + "type": "object", + "properties": { + "completion_message": { + "$ref": "#/components/schemas/CompletionMessage" + }, + "logprobs": { + "type": "array", + "items": { + "$ref": "#/components/schemas/TokenLogProbs" + } + } + }, + "additionalProperties": false, + "required": [ + "completion_message" + ], + "title": "Chat completion response." + }, "ChatCompletionResponseEvent": { "type": "object", "properties": { @@ -2355,6 +2068,25 @@ "content" ] }, + "CompletionResponse": { + "type": "object", + "properties": { + "completion_message": { + "$ref": "#/components/schemas/CompletionMessage" + }, + "logprobs": { + "type": "array", + "items": { + "$ref": "#/components/schemas/TokenLogProbs" + } + } + }, + "additionalProperties": false, + "required": [ + "completion_message" + ], + "title": "Completion response." + }, "CompletionResponseStreamChunk": { "type": "object", "properties": { @@ -3630,108 +3362,6 @@ "dataset" ] }, - "CreateExperimentRequest": { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "metadata": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - } - }, - "additionalProperties": false, - "required": [ - "name" - ] - }, - "Experiment": { - "type": "object", - "properties": { - "id": { - "type": "string" - }, - "name": { - "type": "string" - }, - "status": { - "$ref": "#/components/schemas/ExperimentStatus" - }, - "created_at": { - "type": "string", - "format": "date-time" - }, - "updated_at": { - "type": "string", - "format": "date-time" - }, - "metadata": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - } - }, - "additionalProperties": false, - "required": [ - "id", - "name", - "status", - "created_at", - "updated_at", - "metadata" - ] - }, - "ExperimentStatus": { - "type": "string", - "enum": [ - "not_started", - "running", - "completed", - "failed" - ] - }, "CreateMemoryBankRequest": { "type": "object", "properties": { @@ -3902,98 +3532,6 @@ "config" ] }, - "CreateRunRequest": { - "type": "object", - "properties": { - "experiment_id": { - "type": "string" - }, - "metadata": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - } - }, - "additionalProperties": false, - "required": [ - "experiment_id" - ] - }, - "Run": { - "type": "object", - "properties": { - "id": { - "type": "string" - }, - "experiment_id": { - "type": "string" - }, - "status": { - "type": "string" - }, - "started_at": { - "type": "string", - "format": "date-time" - }, - "ended_at": { - "type": "string", - "format": "date-time" - }, - "metadata": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - } - }, - "additionalProperties": false, - "required": [ - "id", - "experiment_id", - "status", - "started_at", - "metadata" - ] - }, "DeleteAgenticSystemRequest": { "type": "object", "properties": { @@ -4251,74 +3789,6 @@ "step" ] }, - "Artifact": { - "type": "object", - "properties": { - "id": { - "type": "string" - }, - "name": { - "type": "string" - }, - "type": { - "$ref": "#/components/schemas/ArtifactType" - }, - "size": { - "type": "integer" - }, - "created_at": { - "type": "string", - "format": "date-time" - }, - "metadata": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - } - }, - "additionalProperties": false, - "required": [ - "id", - "name", - "type", - "size", - "created_at", - "metadata" - ] - }, - "ArtifactType": { - "type": "string", - "enum": [ - "model", - "dataset", - "checkpoint", - "plot", - "metric", - "config", - "code", - "other" - ] - }, "GetDocumentsRequest": { "type": "object", "properties": { @@ -4430,120 +3900,29 @@ "job_uuid" ] }, - "GetLogsRequest": { + "Trace": { "type": "object", "properties": { - "filters": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - } - }, - "additionalProperties": false - }, - "Log": { - "type": "object", - "properties": { - "message": { + "trace_id": { "type": "string" }, - "level": { + "root_span_id": { "type": "string" }, - "timestamp": { + "start_time": { "type": "string", "format": "date-time" }, - "additional_info": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } + "end_time": { + "type": "string", + "format": "date-time" } }, "additionalProperties": false, "required": [ - "message", - "level", - "timestamp", - "additional_info" - ] - }, - "Metric": { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "value": { - "oneOf": [ - { - "type": "number" - }, - { - "type": "integer" - }, - { - "type": "string" - }, - { - "type": "boolean" - } - ] - }, - "timestamp": { - "type": "string", - "format": "date-time" - }, - "run_id": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "name", - "value", - "timestamp", - "run_id" + "trace_id", + "root_span_id", + "start_time" ] }, "Checkpoint": { @@ -4693,53 +4072,272 @@ "documents" ] }, - "ListArtifactsRequest": { - "type": "object", - "properties": { - "experiment_id": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "experiment_id" + "LogSeverity": { + "type": "string", + "enum": [ + "verbose", + "debug", + "info", + "warn", + "error", + "critical" ] }, - "LogMessagesRequest": { + "MetricEvent": { "type": "object", "properties": { - "logs": { - "type": "array", - "items": { - "$ref": "#/components/schemas/Log" + "trace_id": { + "type": "string" + }, + "span_id": { + "type": "string" + }, + "timestamp": { + "type": "string", + "format": "date-time" + }, + "attributes": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] } }, - "run_id": { + "type": { + "type": "string", + "const": "metric" + }, + "metric": { + "type": "string" + }, + "value": { + "oneOf": [ + { + "type": "integer" + }, + { + "type": "number" + } + ] + }, + "unit": { "type": "string" } }, "additionalProperties": false, "required": [ - "logs" + "trace_id", + "span_id", + "timestamp", + "type", + "metric", + "value", + "unit" ] }, - "LogMetricsRequest": { + "SpanEndPayload": { "type": "object", "properties": { - "run_id": { - "type": "string" + "type": { + "type": "string", + "const": "span_end" }, - "metrics": { - "type": "array", - "items": { - "$ref": "#/components/schemas/Metric" - } + "status": { + "$ref": "#/components/schemas/SpanStatus" } }, "additionalProperties": false, "required": [ - "run_id", - "metrics" + "type", + "status" + ] + }, + "SpanStartPayload": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "span_start" + }, + "name": { + "type": "string" + }, + "parent_span_id": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "type", + "name" + ] + }, + "SpanStatus": { + "type": "string", + "enum": [ + "ok", + "error" + ] + }, + "StructuredLogEvent": { + "type": "object", + "properties": { + "trace_id": { + "type": "string" + }, + "span_id": { + "type": "string" + }, + "timestamp": { + "type": "string", + "format": "date-time" + }, + "attributes": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + }, + "type": { + "type": "string", + "const": "structured_log" + }, + "payload": { + "oneOf": [ + { + "$ref": "#/components/schemas/SpanStartPayload" + }, + { + "$ref": "#/components/schemas/SpanEndPayload" + } + ] + } + }, + "additionalProperties": false, + "required": [ + "trace_id", + "span_id", + "timestamp", + "type", + "payload" + ] + }, + "UnstructuredLogEvent": { + "type": "object", + "properties": { + "trace_id": { + "type": "string" + }, + "span_id": { + "type": "string" + }, + "timestamp": { + "type": "string", + "format": "date-time" + }, + "attributes": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + }, + "type": { + "type": "string", + "const": "unstructured_log" + }, + "message": { + "type": "string" + }, + "severity": { + "$ref": "#/components/schemas/LogSeverity" + } + }, + "additionalProperties": false, + "required": [ + "trace_id", + "span_id", + "timestamp", + "type", + "message", + "severity" + ] + }, + "LogEventRequest": { + "type": "object", + "properties": { + "event": { + "oneOf": [ + { + "$ref": "#/components/schemas/UnstructuredLogEvent" + }, + { + "$ref": "#/components/schemas/MetricEvent" + }, + { + "$ref": "#/components/schemas/StructuredLogEvent" + } + ] + } + }, + "additionalProperties": false, + "required": [ + "event" ] }, "DPOAlignmentConfig": { @@ -5533,140 +5131,6 @@ "bank_id", "documents" ] - }, - "UpdateExperimentRequest": { - "type": "object", - "properties": { - "experiment_id": { - "type": "string" - }, - "status": { - "$ref": "#/components/schemas/ExperimentStatus" - }, - "metadata": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - } - }, - "additionalProperties": false, - "required": [ - "experiment_id" - ] - }, - "UpdateRunRequest": { - "type": "object", - "properties": { - "run_id": { - "type": "string" - }, - "status": { - "type": "string" - }, - "ended_at": { - "type": "string", - "format": "date-time" - }, - "metadata": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - } - }, - "additionalProperties": false, - "required": [ - "run_id" - ] - }, - "UploadArtifactRequest": { - "type": "object", - "properties": { - "experiment_id": { - "type": "string" - }, - "name": { - "type": "string" - }, - "artifact_type": { - "type": "string" - }, - "content": { - "type": "string", - "contentEncoding": "base64" - }, - "metadata": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - } - }, - "additionalProperties": false, - "required": [ - "experiment_id", - "name", - "artifact_type", - "content" - ] } }, "responses": {} @@ -5678,38 +5142,38 @@ ], "tags": [ { - "name": "BatchInference" - }, - { - "name": "AgenticSystem" - }, - { - "name": "Memory" - }, - { - "name": "Inference" - }, - { - "name": "RewardScoring" - }, - { - "name": "Telemetry" - }, - { - "name": "Safety" - }, - { - "name": "Evaluations" + "name": "SyntheticDataGeneration" }, { "name": "Datasets" }, { - "name": "SyntheticDataGeneration" + "name": "Evaluations" + }, + { + "name": "Safety" + }, + { + "name": "Inference" + }, + { + "name": "Telemetry" }, { "name": "PostTraining" }, + { + "name": "Memory" + }, + { + "name": "RewardScoring" + }, + { + "name": "BatchInference" + }, + { + "name": "AgenticSystem" + }, { "name": "BuiltinTool", "description": "" @@ -5790,6 +5254,10 @@ "name": "ChatCompletionRequest", "description": "" }, + { + "name": "ChatCompletionResponse", + "description": "Chat completion response.\n\n" + }, { "name": "ChatCompletionResponseEvent", "description": "Chat completion response event.\n\n" @@ -5818,6 +5286,10 @@ "name": "CompletionRequest", "description": "" }, + { + "name": "CompletionResponse", + "description": "Completion response.\n\n" + }, { "name": "CompletionResponseStreamChunk", "description": "streamed completion response.\n\n" @@ -5962,18 +5434,6 @@ "name": "CreateDatasetRequest", "description": "" }, - { - "name": "CreateExperimentRequest", - "description": "" - }, - { - "name": "Experiment", - "description": "" - }, - { - "name": "ExperimentStatus", - "description": "" - }, { "name": "CreateMemoryBankRequest", "description": "" @@ -5982,14 +5442,6 @@ "name": "MemoryBank", "description": "" }, - { - "name": "CreateRunRequest", - "description": "" - }, - { - "name": "Run", - "description": "" - }, { "name": "DeleteAgenticSystemRequest", "description": "" @@ -6046,14 +5498,6 @@ "name": "AgenticSystemStepResponse", "description": "" }, - { - "name": "Artifact", - "description": "" - }, - { - "name": "ArtifactType", - "description": "" - }, { "name": "GetDocumentsRequest", "description": "" @@ -6075,16 +5519,8 @@ "description": "" }, { - "name": "GetLogsRequest", - "description": "" - }, - { - "name": "Log", - "description": "" - }, - { - "name": "Metric", - "description": "" + "name": "Trace", + "description": "" }, { "name": "Checkpoint", @@ -6115,16 +5551,36 @@ "description": "" }, { - "name": "ListArtifactsRequest", - "description": "" + "name": "LogSeverity", + "description": "" }, { - "name": "LogMessagesRequest", - "description": "" + "name": "MetricEvent", + "description": "" }, { - "name": "LogMetricsRequest", - "description": "" + "name": "SpanEndPayload", + "description": "" + }, + { + "name": "SpanStartPayload", + "description": "" + }, + { + "name": "SpanStatus", + "description": "" + }, + { + "name": "StructuredLogEvent", + "description": "" + }, + { + "name": "UnstructuredLogEvent", + "description": "" + }, + { + "name": "LogEventRequest", + "description": "" }, { "name": "DPOAlignmentConfig", @@ -6213,18 +5669,6 @@ { "name": "UpdateDocumentsRequest", "description": "" - }, - { - "name": "UpdateExperimentRequest", - "description": "" - }, - { - "name": "UpdateRunRequest", - "description": "" - }, - { - "name": "UploadArtifactRequest", - "description": "" } ], "x-tagGroups": [ @@ -6258,8 +5702,6 @@ "AgenticSystemTurnResponseStreamChunk", "AgenticSystemTurnResponseTurnCompletePayload", "AgenticSystemTurnResponseTurnStartPayload", - "Artifact", - "ArtifactType", "Attachment", "BatchChatCompletionRequest", "BatchChatCompletionResponse", @@ -6270,6 +5712,7 @@ "CancelEvaluationJobRequest", "CancelTrainingJobRequest", "ChatCompletionRequest", + "ChatCompletionResponse", "ChatCompletionResponseEvent", "ChatCompletionResponseEventType", "ChatCompletionResponseStreamChunk", @@ -6277,14 +5720,13 @@ "CodeInterpreterToolDefinition", "CompletionMessage", "CompletionRequest", + "CompletionResponse", "CompletionResponseStreamChunk", "CreateAgenticSystemRequest", "CreateAgenticSystemSessionRequest", "CreateAgenticSystemTurnRequest", "CreateDatasetRequest", - "CreateExperimentRequest", "CreateMemoryBankRequest", - "CreateRunRequest", "DPOAlignmentConfig", "DeleteAgenticSystemRequest", "DeleteAgenticSystemSessionRequest", @@ -6302,24 +5744,19 @@ "EvaluationJobArtifactsResponse", "EvaluationJobLogStream", "EvaluationJobStatusResponse", - "Experiment", - "ExperimentStatus", "FinetuningAlgorithm", "FunctionCallToolDefinition", "GetAgenticSystemSessionRequest", "GetDocumentsRequest", - "GetLogsRequest", "InferenceStep", "InsertDocumentsRequest", - "ListArtifactsRequest", - "Log", - "LogMessagesRequest", - "LogMetricsRequest", + "LogEventRequest", + "LogSeverity", "LoraFinetuningConfig", "MemoryBank", "MemoryBankDocument", "MemoryRetrievalStep", - "Metric", + "MetricEvent", "OnViolationAction", "OptimizerConfig", "PhotogenToolDefinition", @@ -6337,7 +5774,6 @@ "RestAPIMethod", "RewardScoreRequest", "RewardScoringResponse", - "Run", "RunShieldResponse", "RunShieldsRequest", "SamplingParams", @@ -6349,7 +5785,11 @@ "ShieldCallStep", "ShieldDefinition", "ShieldResponse", + "SpanEndPayload", + "SpanStartPayload", + "SpanStatus", "StopReason", + "StructuredLogEvent", "SupervisedFineTuneRequest", "SyntheticDataGenerateRequest", "SyntheticDataGenerationResponse", @@ -6365,15 +5805,14 @@ "ToolPromptFormat", "ToolResponse", "ToolResponseMessage", + "Trace", "TrainEvalDataset", "TrainEvalDatasetColumnType", "TrainingConfig", "Turn", "URL", + "UnstructuredLogEvent", "UpdateDocumentsRequest", - "UpdateExperimentRequest", - "UpdateRunRequest", - "UploadArtifactRequest", "UserMessage", "WolframAlphaToolDefinition" ] diff --git a/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.yaml b/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.yaml index 2045e94a0..4d1b27bb7 100644 --- a/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.yaml +++ b/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.yaml @@ -306,49 +306,6 @@ components: - event_type - turn_id type: object - Artifact: - additionalProperties: false - properties: - created_at: - format: date-time - type: string - id: - type: string - metadata: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - name: - type: string - size: - type: integer - type: - $ref: '#/components/schemas/ArtifactType' - required: - - id - - name - - type - - size - - created_at - - metadata - type: object - ArtifactType: - enum: - - model - - dataset - - checkpoint - - plot - - metric - - config - - code - - other - type: string Attachment: additionalProperties: false properties: @@ -511,6 +468,19 @@ components: - model - messages type: object + ChatCompletionResponse: + additionalProperties: false + properties: + completion_message: + $ref: '#/components/schemas/CompletionMessage' + logprobs: + items: + $ref: '#/components/schemas/TokenLogProbs' + type: array + required: + - completion_message + title: Chat completion response. + type: object ChatCompletionResponseEvent: additionalProperties: false properties: @@ -619,6 +589,19 @@ components: - model - content type: object + CompletionResponse: + additionalProperties: false + properties: + completion_message: + $ref: '#/components/schemas/CompletionMessage' + logprobs: + items: + $ref: '#/components/schemas/TokenLogProbs' + type: array + required: + - completion_message + title: Completion response. + type: object CompletionResponseStreamChunk: additionalProperties: false properties: @@ -688,24 +671,6 @@ components: - uuid - dataset type: object - CreateExperimentRequest: - additionalProperties: false - properties: - metadata: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - name: - type: string - required: - - name - type: object CreateMemoryBankRequest: additionalProperties: false properties: @@ -759,24 +724,6 @@ components: - name - config type: object - CreateRunRequest: - additionalProperties: false - properties: - experiment_id: - type: string - metadata: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - required: - - experiment_id - type: object DPOAlignmentConfig: additionalProperties: false properties: @@ -989,46 +936,6 @@ components: required: - job_uuid type: object - Experiment: - additionalProperties: false - properties: - created_at: - format: date-time - type: string - id: - type: string - metadata: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - name: - type: string - status: - $ref: '#/components/schemas/ExperimentStatus' - updated_at: - format: date-time - type: string - required: - - id - - name - - status - - created_at - - updated_at - - metadata - type: object - ExperimentStatus: - enum: - - not_started - - running - - completed - - failed - type: string FinetuningAlgorithm: enum: - full @@ -1084,20 +991,6 @@ components: required: - document_ids type: object - GetLogsRequest: - additionalProperties: false - properties: - filters: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - type: object InferenceStep: additionalProperties: false properties: @@ -1137,65 +1030,26 @@ components: - bank_id - documents type: object - ListArtifactsRequest: + LogEventRequest: additionalProperties: false properties: - experiment_id: - type: string + event: + oneOf: + - $ref: '#/components/schemas/UnstructuredLogEvent' + - $ref: '#/components/schemas/MetricEvent' + - $ref: '#/components/schemas/StructuredLogEvent' required: - - experiment_id - type: object - Log: - additionalProperties: false - properties: - additional_info: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - level: - type: string - message: - type: string - timestamp: - format: date-time - type: string - required: - - message - - level - - timestamp - - additional_info - type: object - LogMessagesRequest: - additionalProperties: false - properties: - logs: - items: - $ref: '#/components/schemas/Log' - type: array - run_id: - type: string - required: - - logs - type: object - LogMetricsRequest: - additionalProperties: false - properties: - metrics: - items: - $ref: '#/components/schemas/Metric' - type: array - run_id: - type: string - required: - - run_id - - metrics + - event type: object + LogSeverity: + enum: + - verbose + - debug + - info + - warn + - error + - critical + type: string LoraFinetuningConfig: additionalProperties: false properties: @@ -1337,27 +1191,45 @@ components: - memory_bank_ids - inserted_context type: object - Metric: + MetricEvent: additionalProperties: false properties: - name: + attributes: + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + type: object + metric: type: string - run_id: + span_id: type: string timestamp: format: date-time type: string + trace_id: + type: string + type: + const: metric + type: string + unit: + type: string value: oneOf: - - type: number - type: integer - - type: string - - type: boolean + - type: number required: - - name - - value + - trace_id + - span_id - timestamp - - run_id + - type + - metric + - value + - unit type: object OnViolationAction: enum: @@ -1690,38 +1562,6 @@ components: title: Response from the reward scoring. Batch of (prompt, response, score) tuples that pass the threshold. type: object - Run: - additionalProperties: false - properties: - ended_at: - format: date-time - type: string - experiment_id: - type: string - id: - type: string - metadata: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - started_at: - format: date-time - type: string - status: - type: string - required: - - id - - experiment_id - - status - - started_at - - metadata - type: object RunShieldResponse: additionalProperties: false properties: @@ -1919,12 +1759,77 @@ components: - shield_type - is_violation type: object + SpanEndPayload: + additionalProperties: false + properties: + status: + $ref: '#/components/schemas/SpanStatus' + type: + const: span_end + type: string + required: + - type + - status + type: object + SpanStartPayload: + additionalProperties: false + properties: + name: + type: string + parent_span_id: + type: string + type: + const: span_start + type: string + required: + - type + - name + type: object + SpanStatus: + enum: + - ok + - error + type: string StopReason: enum: - end_of_turn - end_of_message - out_of_tokens type: string + StructuredLogEvent: + additionalProperties: false + properties: + attributes: + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + type: object + payload: + oneOf: + - $ref: '#/components/schemas/SpanStartPayload' + - $ref: '#/components/schemas/SpanEndPayload' + span_id: + type: string + timestamp: + format: date-time + type: string + trace_id: + type: string + type: + const: structured_log + type: string + required: + - trace_id + - span_id + - timestamp + - type + - payload + type: object SupervisedFineTuneRequest: additionalProperties: false properties: @@ -2236,6 +2141,24 @@ components: - tool_name - content type: object + Trace: + additionalProperties: false + properties: + end_time: + format: date-time + type: string + root_span_id: + type: string + start_time: + format: date-time + type: string + trace_id: + type: string + required: + - trace_id + - root_span_id + - start_time + type: object TrainEvalDataset: additionalProperties: false properties: @@ -2341,6 +2264,41 @@ components: format: uri pattern: ^(https?://|file://|data:) type: string + UnstructuredLogEvent: + additionalProperties: false + properties: + attributes: + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + type: object + message: + type: string + severity: + $ref: '#/components/schemas/LogSeverity' + span_id: + type: string + timestamp: + format: date-time + type: string + trace_id: + type: string + type: + const: unstructured_log + type: string + required: + - trace_id + - span_id + - timestamp + - type + - message + - severity + type: object UpdateDocumentsRequest: additionalProperties: false properties: @@ -2354,77 +2312,6 @@ components: - bank_id - documents type: object - UpdateExperimentRequest: - additionalProperties: false - properties: - experiment_id: - type: string - metadata: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - status: - $ref: '#/components/schemas/ExperimentStatus' - required: - - experiment_id - type: object - UpdateRunRequest: - additionalProperties: false - properties: - ended_at: - format: date-time - type: string - metadata: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - run_id: - type: string - status: - type: string - required: - - run_id - type: object - UploadArtifactRequest: - additionalProperties: false - properties: - artifact_type: - type: string - content: - contentEncoding: base64 - type: string - experiment_id: - type: string - metadata: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - name: - type: string - required: - - experiment_id - - name - - artifact_type - - content - type: object UserMessage: additionalProperties: false properties: @@ -2470,7 +2357,7 @@ info: description: "This is the specification of the llama stack that provides\n \ \ a set of endpoints and their corresponding interfaces that are tailored\ \ to\n best leverage Llama Models. The specification is still in\ - \ draft and subject to change.\n Generated at 2024-09-11 15:30:11.688505" + \ draft and subject to change.\n Generated at 2024-09-11 16:05:23.016090" title: '[DRAFT] Llama Stack Specification' version: 0.0.1 jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema @@ -2635,23 +2522,6 @@ paths: description: OK tags: - AgenticSystem - /artifacts/get: - get: - parameters: - - in: query - name: artifact_id - required: true - schema: - type: string - responses: - '200': - content: - application/json: - schema: - $ref: '#/components/schemas/Artifact' - description: OK - tags: - - Telemetry /batch_inference/chat_completion: post: parameters: [] @@ -2864,125 +2734,6 @@ paths: description: OK tags: - Evaluations - /experiments/artifacts/get: - post: - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/ListArtifactsRequest' - required: true - responses: - '200': - content: - application/jsonl: - schema: - $ref: '#/components/schemas/Artifact' - description: OK - tags: - - Telemetry - /experiments/artifacts/upload: - post: - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/UploadArtifactRequest' - required: true - responses: - '200': - content: - application/json: - schema: - $ref: '#/components/schemas/Artifact' - description: OK - tags: - - Telemetry - /experiments/create: - post: - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/CreateExperimentRequest' - required: true - responses: - '200': - content: - application/json: - schema: - $ref: '#/components/schemas/Experiment' - description: OK - tags: - - Telemetry - /experiments/create_run: - post: - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/CreateRunRequest' - required: true - responses: - '200': - content: - application/json: - schema: - $ref: '#/components/schemas/Run' - description: OK - tags: - - Telemetry - /experiments/get: - get: - parameters: - - in: query - name: experiment_id - required: true - schema: - type: string - responses: - '200': - content: - application/json: - schema: - $ref: '#/components/schemas/Experiment' - description: OK - tags: - - Telemetry - /experiments/list: - get: - parameters: [] - responses: - '200': - content: - application/jsonl: - schema: - $ref: '#/components/schemas/Experiment' - description: OK - tags: - - Telemetry - /experiments/update: - post: - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/UpdateExperimentRequest' - required: true - responses: - '200': - content: - application/json: - schema: - $ref: '#/components/schemas/Experiment' - description: OK - tags: - - Telemetry /inference/chat_completion: post: parameters: [] @@ -2997,8 +2748,10 @@ paths: content: text/event-stream: schema: - $ref: '#/components/schemas/ChatCompletionResponseStreamChunk' - description: SSE-stream of these events. + oneOf: + - $ref: '#/components/schemas/ChatCompletionResponse' + - $ref: '#/components/schemas/ChatCompletionResponseStreamChunk' + description: Chat completion response. **OR** SSE-stream of these events. tags: - Inference /inference/completion: @@ -3015,8 +2768,10 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/CompletionResponseStreamChunk' - description: streamed completion response. + oneOf: + - $ref: '#/components/schemas/CompletionResponse' + - $ref: '#/components/schemas/CompletionResponseStreamChunk' + description: Completion response. **OR** streamed completion response. tags: - Inference /inference/embeddings: @@ -3037,43 +2792,6 @@ paths: description: OK tags: - Inference - /logging/get_logs: - post: - parameters: - - in: query - name: query - required: true - schema: - type: string - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/GetLogsRequest' - required: true - responses: - '200': - content: - application/jsonl: - schema: - $ref: '#/components/schemas/Log' - description: OK - tags: - - Telemetry - /logging/log_messages: - post: - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/LogMessagesRequest' - required: true - responses: - '200': - description: OK - tags: - - Telemetry /memory_bank/documents/delete: post: parameters: [] @@ -3355,55 +3073,6 @@ paths: description: OK tags: - RewardScoring - /runs/log_metrics: - post: - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/LogMetricsRequest' - required: true - responses: - '200': - description: OK - tags: - - Telemetry - /runs/metrics: - get: - parameters: - - in: query - name: run_id - required: true - schema: - type: string - responses: - '200': - content: - application/jsonl: - schema: - $ref: '#/components/schemas/Metric' - description: OK - tags: - - Telemetry - /runs/update: - post: - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/UpdateRunRequest' - required: true - responses: - '200': - content: - application/json: - schema: - $ref: '#/components/schemas/Run' - description: OK - tags: - - Telemetry /safety/run_shields: post: parameters: [] @@ -3440,22 +3109,53 @@ paths: description: OK tags: - SyntheticDataGeneration + /telemetry/get_trace: + get: + parameters: + - in: query + name: trace_id + required: true + schema: + type: string + responses: + '200': + content: + application/json: + schema: + $ref: '#/components/schemas/Trace' + description: OK + tags: + - Telemetry + /telemetry/log_event: + post: + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/LogEventRequest' + required: true + responses: + '200': + description: OK + tags: + - Telemetry security: - Default: [] servers: - url: http://any-hosted-llama-stack.com tags: +- name: SyntheticDataGeneration +- name: Datasets +- name: Evaluations +- name: Safety +- name: Inference +- name: Telemetry +- name: PostTraining +- name: Memory +- name: RewardScoring - name: BatchInference - name: AgenticSystem -- name: Memory -- name: Inference -- name: RewardScoring -- name: Telemetry -- name: Safety -- name: Evaluations -- name: Datasets -- name: SyntheticDataGeneration -- name: PostTraining - description: name: BuiltinTool - description: name: ChatCompletionRequest +- description: 'Chat completion response. + + + ' + name: ChatCompletionResponse - description: 'Chat completion response event. @@ -3541,6 +3246,11 @@ tags: - description: name: CompletionRequest +- description: 'Completion response. + + + ' + name: CompletionResponse - description: 'streamed completion response. @@ -3650,24 +3360,11 @@ tags: - description: name: CreateDatasetRequest -- description: - name: CreateExperimentRequest -- description: - name: Experiment -- description: - name: ExperimentStatus - description: name: CreateMemoryBankRequest - description: name: MemoryBank -- description: - name: CreateRunRequest -- description: - name: Run - description: name: DeleteAgenticSystemRequest @@ -3711,10 +3408,6 @@ tags: - description: name: AgenticSystemStepResponse -- description: - name: Artifact -- description: - name: ArtifactType - description: name: GetDocumentsRequest @@ -3733,12 +3426,8 @@ tags: - description: name: EvaluationJobStatusResponse -- description: - name: GetLogsRequest -- description: - name: Log -- description: - name: Metric +- description: + name: Trace - description: 'Checkpoint created during training runs @@ -3770,15 +3459,26 @@ tags: - description: name: InsertDocumentsRequest -- description: + name: LogSeverity +- description: + name: MetricEvent +- description: + name: SpanEndPayload +- description: - name: ListArtifactsRequest -- description: + name: SpanStatus +- description: - name: LogMessagesRequest -- description: - name: LogMetricsRequest + name: UnstructuredLogEvent +- description: + name: LogEventRequest - description: name: DPOAlignmentConfig @@ -3849,15 +3549,6 @@ tags: - description: name: UpdateDocumentsRequest -- description: - name: UpdateExperimentRequest -- description: - name: UpdateRunRequest -- description: - name: UploadArtifactRequest x-tagGroups: - name: Operations tags: @@ -3885,8 +3576,6 @@ x-tagGroups: - AgenticSystemTurnResponseStreamChunk - AgenticSystemTurnResponseTurnCompletePayload - AgenticSystemTurnResponseTurnStartPayload - - Artifact - - ArtifactType - Attachment - BatchChatCompletionRequest - BatchChatCompletionResponse @@ -3897,6 +3586,7 @@ x-tagGroups: - CancelEvaluationJobRequest - CancelTrainingJobRequest - ChatCompletionRequest + - ChatCompletionResponse - ChatCompletionResponseEvent - ChatCompletionResponseEventType - ChatCompletionResponseStreamChunk @@ -3904,14 +3594,13 @@ x-tagGroups: - CodeInterpreterToolDefinition - CompletionMessage - CompletionRequest + - CompletionResponse - CompletionResponseStreamChunk - CreateAgenticSystemRequest - CreateAgenticSystemSessionRequest - CreateAgenticSystemTurnRequest - CreateDatasetRequest - - CreateExperimentRequest - CreateMemoryBankRequest - - CreateRunRequest - DPOAlignmentConfig - DeleteAgenticSystemRequest - DeleteAgenticSystemSessionRequest @@ -3929,24 +3618,19 @@ x-tagGroups: - EvaluationJobArtifactsResponse - EvaluationJobLogStream - EvaluationJobStatusResponse - - Experiment - - ExperimentStatus - FinetuningAlgorithm - FunctionCallToolDefinition - GetAgenticSystemSessionRequest - GetDocumentsRequest - - GetLogsRequest - InferenceStep - InsertDocumentsRequest - - ListArtifactsRequest - - Log - - LogMessagesRequest - - LogMetricsRequest + - LogEventRequest + - LogSeverity - LoraFinetuningConfig - MemoryBank - MemoryBankDocument - MemoryRetrievalStep - - Metric + - MetricEvent - OnViolationAction - OptimizerConfig - PhotogenToolDefinition @@ -3964,7 +3648,6 @@ x-tagGroups: - RestAPIMethod - RewardScoreRequest - RewardScoringResponse - - Run - RunShieldResponse - RunShieldsRequest - SamplingParams @@ -3976,7 +3659,11 @@ x-tagGroups: - ShieldCallStep - ShieldDefinition - ShieldResponse + - SpanEndPayload + - SpanStartPayload + - SpanStatus - StopReason + - StructuredLogEvent - SupervisedFineTuneRequest - SyntheticDataGenerateRequest - SyntheticDataGenerationResponse @@ -3992,14 +3679,13 @@ x-tagGroups: - ToolPromptFormat - ToolResponse - ToolResponseMessage + - Trace - TrainEvalDataset - TrainEvalDatasetColumnType - TrainingConfig - Turn - URL + - UnstructuredLogEvent - UpdateDocumentsRequest - - UpdateExperimentRequest - - UpdateRunRequest - - UploadArtifactRequest - UserMessage - WolframAlphaToolDefinition