diff --git a/.gitignore b/.gitignore index 24ce79959..421ff4db1 100644 --- a/.gitignore +++ b/.gitignore @@ -18,3 +18,4 @@ Package.resolved .vscode _build docs/src +pyrightconfig.json diff --git a/docs/openapi_generator/generate.py b/docs/openapi_generator/generate.py index a82b3db76..3344f462a 100644 --- a/docs/openapi_generator/generate.py +++ b/docs/openapi_generator/generate.py @@ -18,10 +18,6 @@ import yaml from llama_models import schema_utils -from .pyopenapi.options import Options -from .pyopenapi.specification import Info, Server -from .pyopenapi.utility import Specification - # We do some monkey-patching to ensure our definitions only use the minimal # (json_schema_type, webmethod) definitions from the llama_models package. For # generation though, we need the full definitions and implementations from the @@ -31,11 +27,13 @@ from .strong_typing.schema import json_schema_type schema_utils.json_schema_type = json_schema_type -# this line needs to be here to ensure json_schema_type has been altered before -# the imports use the annotation from llama_stack.apis.version import LLAMA_STACK_API_VERSION # noqa: E402 from llama_stack.distribution.stack import LlamaStack # noqa: E402 +from .pyopenapi.options import Options # noqa: E402 +from .pyopenapi.specification import Info, Server # noqa: E402 +from .pyopenapi.utility import Specification # noqa: E402 + def main(output_dir: str): output_dir = Path(output_dir) diff --git a/docs/resources/llama-stack-spec.html b/docs/resources/llama-stack-spec.html index 9a9a29439..cb7c6c3af 100644 --- a/docs/resources/llama-stack-spec.html +++ b/docs/resources/llama-stack-spec.html @@ -1067,7 +1067,10 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/SpanWithChildren" + "type": "object", + "additionalProperties": { + "$ref": "#/components/schemas/SpanWithStatus" + } } } } @@ -1123,45 +1126,14 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/PostTrainingJobArtifactsResponse" - } - } - } - } - }, - "tags": [ - "PostTraining (Coming Soon)" - ], - "parameters": [ - { - "name": "job_uuid", - "in": "query", - "required": true, - "schema": { - "type": "string" - } - }, - { - "name": "X-LlamaStack-ProviderData", - "in": "header", - "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", - "required": false, - "schema": { - "type": "string" - } - } - ] - } - }, - "/alpha/post-training/job/logs": { - "get": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/PostTrainingJobLogStream" + "oneOf": [ + { + "$ref": "#/components/schemas/PostTrainingJobArtifactsResponse" + }, + { + "type": "null" + } + ] } } } @@ -1199,7 +1171,14 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/PostTrainingJobStatusResponse" + "oneOf": [ + { + "$ref": "#/components/schemas/PostTrainingJobStatusResponse" + }, + { + "type": "null" + } + ] } } } @@ -5459,6 +5438,10 @@ "chunk_size_in_tokens": { "type": "integer" }, + "embedding_dimension": { + "type": "integer", + "default": 384 + }, "overlap_size_in_tokens": { "type": "integer" } @@ -5807,6 +5790,10 @@ } ] } + }, + "model_type": { + "$ref": "#/components/schemas/ModelType", + "default": "llm" } }, "additionalProperties": false, @@ -5815,7 +5802,15 @@ "provider_resource_id", "provider_id", "type", - "metadata" + "metadata", + "model_type" + ] + }, + "ModelType": { + "type": "string", + "enum": [ + "llm", + "embedding" ] }, "PaginatedRowsResult": { @@ -6146,7 +6141,7 @@ "error" ] }, - "SpanWithChildren": { + "SpanWithStatus": { "type": "object", "properties": { "span_id": { @@ -6194,12 +6189,6 @@ ] } }, - "children": { - "type": "array", - "items": { - "$ref": "#/components/schemas/SpanWithChildren" - } - }, "status": { "$ref": "#/components/schemas/SpanStatus" } @@ -6209,8 +6198,7 @@ "span_id", "trace_id", "name", - "start_time", - "children" + "start_time" ] }, "Checkpoint": { @@ -6236,31 +6224,11 @@ ], "title": "Artifacts of a finetuning job." }, - "PostTrainingJobLogStream": { - "type": "object", - "properties": { - "job_uuid": { - "type": "string" - }, - "log_lines": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "additionalProperties": false, - "required": [ - "job_uuid", - "log_lines" - ], - "title": "Stream of logs from a finetuning job." - }, - "PostTrainingJobStatus": { + "JobStatus": { "type": "string", "enum": [ - "running", "completed", + "in_progress", "failed", "scheduled" ] @@ -6272,7 +6240,7 @@ "type": "string" }, "status": { - "$ref": "#/components/schemas/PostTrainingJobStatus" + "$ref": "#/components/schemas/JobStatus" }, "scheduled_at": { "type": "string", @@ -6456,13 +6424,6 @@ "job_id" ] }, - "JobStatus": { - "type": "string", - "enum": [ - "completed", - "in_progress" - ] - }, "ProviderInfo": { "type": "object", "properties": { @@ -6796,39 +6757,89 @@ "gamma" ] }, + "DataConfig": { + "type": "object", + "properties": { + "dataset_id": { + "type": "string" + }, + "batch_size": { + "type": "integer" + }, + "shuffle": { + "type": "boolean" + }, + "validation_dataset_id": { + "type": "string" + }, + "packed": { + "type": "boolean", + "default": false + }, + "train_on_input": { + "type": "boolean", + "default": false + } + }, + "additionalProperties": false, + "required": [ + "dataset_id", + "batch_size", + "shuffle" + ] + }, + "EfficiencyConfig": { + "type": "object", + "properties": { + "enable_activation_checkpointing": { + "type": "boolean", + "default": false + }, + "enable_activation_offloading": { + "type": "boolean", + "default": false + }, + "memory_efficient_fsdp_wrap": { + "type": "boolean", + "default": false + }, + "fsdp_cpu_offload": { + "type": "boolean", + "default": false + } + }, + "additionalProperties": false + }, "OptimizerConfig": { "type": "object", "properties": { "optimizer_type": { - "type": "string", - "enum": [ - "adam", - "adamw", - "sgd" - ] + "$ref": "#/components/schemas/OptimizerType" }, "lr": { "type": "number" }, - "lr_min": { - "type": "number" - }, "weight_decay": { "type": "number" + }, + "num_warmup_steps": { + "type": "integer" } }, "additionalProperties": false, "required": [ "optimizer_type", "lr", - "lr_min", - "weight_decay" + "weight_decay", + "num_warmup_steps" ] }, - "RLHFAlgorithm": { + "OptimizerType": { "type": "string", "enum": [ - "dpo" + "adam", + "adamw", + "sgd" ] }, "TrainingConfig": { @@ -6837,34 +6848,33 @@ "n_epochs": { "type": "integer" }, - "batch_size": { + "max_steps_per_epoch": { "type": "integer" }, - "shuffle": { - "type": "boolean" - }, - "n_iters": { + "gradient_accumulation_steps": { "type": "integer" }, - "enable_activation_checkpointing": { - "type": "boolean" + "data_config": { + "$ref": "#/components/schemas/DataConfig" }, - "memory_efficient_fsdp_wrap": { - "type": "boolean" + "optimizer_config": { + "$ref": "#/components/schemas/OptimizerConfig" }, - "fsdp_cpu_offload": { - "type": "boolean" + "efficiency_config": { + "$ref": "#/components/schemas/EfficiencyConfig" + }, + "dtype": { + "type": "string", + "default": "bf16" } }, "additionalProperties": false, "required": [ "n_epochs", - "batch_size", - "shuffle", - "n_iters", - "enable_activation_checkpointing", - "memory_efficient_fsdp_wrap", - "fsdp_cpu_offload" + "max_steps_per_epoch", + "gradient_accumulation_steps", + "data_config", + "optimizer_config" ] }, "PreferenceOptimizeRequest": { @@ -6874,23 +6884,11 @@ "type": "string" }, "finetuned_model": { - "$ref": "#/components/schemas/URL" - }, - "dataset_id": { "type": "string" }, - "validation_dataset_id": { - "type": "string" - }, - "algorithm": { - "$ref": "#/components/schemas/RLHFAlgorithm" - }, "algorithm_config": { "$ref": "#/components/schemas/DPOAlignmentConfig" }, - "optimizer_config": { - "$ref": "#/components/schemas/OptimizerConfig" - }, "training_config": { "$ref": "#/components/schemas/TrainingConfig" }, @@ -6949,11 +6947,7 @@ "required": [ "job_uuid", "finetuned_model", - "dataset_id", - "validation_dataset_id", - "algorithm", "algorithm_config", - "optimizer_config", "training_config", "hyperparam_search_config", "logger_config" @@ -7645,6 +7639,9 @@ } ] } + }, + "model_type": { + "$ref": "#/components/schemas/ModelType" } }, "additionalProperties": false, @@ -8140,49 +8137,14 @@ "results" ] }, - "DoraFinetuningConfig": { - "type": "object", - "properties": { - "lora_attn_modules": { - "type": "array", - "items": { - "type": "string" - } - }, - "apply_lora_to_mlp": { - "type": "boolean" - }, - "apply_lora_to_output": { - "type": "boolean" - }, - "rank": { - "type": "integer" - }, - "alpha": { - "type": "integer" - } - }, - "additionalProperties": false, - "required": [ - "lora_attn_modules", - "apply_lora_to_mlp", - "apply_lora_to_output", - "rank", - "alpha" - ] - }, - "FinetuningAlgorithm": { - "type": "string", - "enum": [ - "full", - "lora", - "qlora", - "dora" - ] - }, "LoraFinetuningConfig": { "type": "object", "properties": { + "type": { + "type": "string", + "const": "LoRA", + "default": "LoRA" + }, "lora_attn_modules": { "type": "array", "items": { @@ -8200,10 +8162,19 @@ }, "alpha": { "type": "integer" + }, + "use_dora": { + "type": "boolean", + "default": false + }, + "quantize_base": { + "type": "boolean", + "default": false } }, "additionalProperties": false, "required": [ + "type", "lora_attn_modules", "apply_lora_to_mlp", "apply_lora_to_output", @@ -8211,35 +8182,26 @@ "alpha" ] }, - "QLoraFinetuningConfig": { + "QATFinetuningConfig": { "type": "object", "properties": { - "lora_attn_modules": { - "type": "array", - "items": { - "type": "string" - } + "type": { + "type": "string", + "const": "QAT", + "default": "QAT" }, - "apply_lora_to_mlp": { - "type": "boolean" + "quantizer_name": { + "type": "string" }, - "apply_lora_to_output": { - "type": "boolean" - }, - "rank": { - "type": "integer" - }, - "alpha": { + "group_size": { "type": "integer" } }, "additionalProperties": false, "required": [ - "lora_attn_modules", - "apply_lora_to_mlp", - "apply_lora_to_output", - "rank", - "alpha" + "type", + "quantizer_name", + "group_size" ] }, "SupervisedFineTuneRequest": { @@ -8248,34 +8210,6 @@ "job_uuid": { "type": "string" }, - "model": { - "type": "string" - }, - "dataset_id": { - "type": "string" - }, - "validation_dataset_id": { - "type": "string" - }, - "algorithm": { - "$ref": "#/components/schemas/FinetuningAlgorithm" - }, - "algorithm_config": { - "oneOf": [ - { - "$ref": "#/components/schemas/LoraFinetuningConfig" - }, - { - "$ref": "#/components/schemas/QLoraFinetuningConfig" - }, - { - "$ref": "#/components/schemas/DoraFinetuningConfig" - } - ] - }, - "optimizer_config": { - "$ref": "#/components/schemas/OptimizerConfig" - }, "training_config": { "$ref": "#/components/schemas/TrainingConfig" }, @@ -8328,20 +8262,31 @@ } ] } + }, + "model": { + "type": "string" + }, + "checkpoint_dir": { + "type": "string" + }, + "algorithm_config": { + "oneOf": [ + { + "$ref": "#/components/schemas/LoraFinetuningConfig" + }, + { + "$ref": "#/components/schemas/QATFinetuningConfig" + } + ] } }, "additionalProperties": false, "required": [ "job_uuid", - "model", - "dataset_id", - "validation_dataset_id", - "algorithm", - "algorithm_config", - "optimizer_config", "training_config", "hyperparam_search_config", - "logger_config" + "logger_config", + "model" ] }, "SyntheticDataGenerateRequest": { @@ -8658,6 +8603,10 @@ "name": "DPOAlignmentConfig", "description": "" }, + { + "name": "DataConfig", + "description": "" + }, { "name": "Dataset", "description": "" @@ -8677,8 +8626,8 @@ "description": "" }, { - "name": "DoraFinetuningConfig", - "description": "" + "name": "EfficiencyConfig", + "description": "" }, { "name": "EmbeddingsRequest", @@ -8706,10 +8655,6 @@ "name": "EvaluateRowsRequest", "description": "" }, - { - "name": "FinetuningAlgorithm", - "description": "" - }, { "name": "FunctionCallToolDefinition", "description": "" @@ -8826,6 +8771,10 @@ "name": "ModelCandidate", "description": "" }, + { + "name": "ModelType", + "description": "" + }, { "name": "Models" }, @@ -8833,6 +8782,10 @@ "name": "OptimizerConfig", "description": "" }, + { + "name": "OptimizerType", + "description": "" + }, { "name": "PaginatedRowsResult", "description": "" @@ -8852,14 +8805,6 @@ "name": "PostTrainingJobArtifactsResponse", "description": "Artifacts of a finetuning job.\n\n" }, - { - "name": "PostTrainingJobLogStream", - "description": "Stream of logs from a finetuning job.\n\n" - }, - { - "name": "PostTrainingJobStatus", - "description": "" - }, { "name": "PostTrainingJobStatusResponse", "description": "Status of a finetuning job.\n\n" @@ -8873,8 +8818,8 @@ "description": "" }, { - "name": "QLoraFinetuningConfig", - "description": "" + "name": "QATFinetuningConfig", + "description": "" }, { "name": "QueryCondition", @@ -8900,10 +8845,6 @@ "name": "QueryTracesRequest", "description": "" }, - { - "name": "RLHFAlgorithm", - "description": "" - }, { "name": "RegexParserScoringFnParams", "description": "" @@ -9041,8 +8982,8 @@ "description": "" }, { - "name": "SpanWithChildren", - "description": "" + "name": "SpanWithStatus", + "description": "" }, { "name": "StopReason", @@ -9237,16 +9178,16 @@ "CreateAgentSessionRequest", "CreateAgentTurnRequest", "DPOAlignmentConfig", + "DataConfig", "Dataset", "DeleteAgentsRequest", "DeleteAgentsSessionRequest", - "DoraFinetuningConfig", + "EfficiencyConfig", "EmbeddingsRequest", "EmbeddingsResponse", "EvalTask", "EvaluateResponse", "EvaluateRowsRequest", - "FinetuningAlgorithm", "FunctionCallToolDefinition", "GetAgentsSessionRequest", "GetSpanTreeRequest", @@ -9273,24 +9214,23 @@ "MetricEvent", "Model", "ModelCandidate", + "ModelType", "OptimizerConfig", + "OptimizerType", "PaginatedRowsResult", "PhotogenToolDefinition", "PostTrainingJob", "PostTrainingJobArtifactsResponse", - "PostTrainingJobLogStream", - "PostTrainingJobStatus", "PostTrainingJobStatusResponse", "PreferenceOptimizeRequest", "ProviderInfo", - "QLoraFinetuningConfig", + "QATFinetuningConfig", "QueryCondition", "QueryConditionOp", "QueryDocumentsRequest", "QueryDocumentsResponse", "QuerySpansRequest", "QueryTracesRequest", - "RLHFAlgorithm", "RegexParserScoringFnParams", "RegisterDatasetRequest", "RegisterEvalTaskRequest", @@ -9322,7 +9262,7 @@ "SpanEndPayload", "SpanStartPayload", "SpanStatus", - "SpanWithChildren", + "SpanWithStatus", "StopReason", "StructuredLogEvent", "SupervisedFineTuneRequest", diff --git a/docs/resources/llama-stack-spec.yaml b/docs/resources/llama-stack-spec.yaml index a1cd08387..d20c623b3 100644 --- a/docs/resources/llama-stack-spec.yaml +++ b/docs/resources/llama-stack-spec.yaml @@ -761,6 +761,28 @@ components: - epsilon - gamma type: object + DataConfig: + additionalProperties: false + properties: + batch_size: + type: integer + dataset_id: + type: string + packed: + default: false + type: boolean + shuffle: + type: boolean + train_on_input: + default: false + type: boolean + validation_dataset_id: + type: string + required: + - dataset_id + - batch_size + - shuffle + type: object Dataset: additionalProperties: false properties: @@ -908,27 +930,21 @@ components: - agent_id - session_id type: object - DoraFinetuningConfig: + EfficiencyConfig: additionalProperties: false properties: - alpha: - type: integer - apply_lora_to_mlp: + enable_activation_checkpointing: + default: false type: boolean - apply_lora_to_output: + enable_activation_offloading: + default: false + type: boolean + fsdp_cpu_offload: + default: false + type: boolean + memory_efficient_fsdp_wrap: + default: false type: boolean - lora_attn_modules: - items: - type: string - type: array - rank: - type: integer - required: - - lora_attn_modules - - apply_lora_to_mlp - - apply_lora_to_output - - rank - - alpha type: object EmbeddingsRequest: additionalProperties: false @@ -1054,13 +1070,6 @@ components: - scoring_functions - task_config type: object - FinetuningAlgorithm: - enum: - - full - - lora - - qlora - - dora - type: string FunctionCallToolDefinition: additionalProperties: false properties: @@ -1230,6 +1239,8 @@ components: enum: - completed - in_progress + - failed + - scheduled type: string KeyValueMemoryBank: additionalProperties: false @@ -1358,9 +1369,20 @@ components: items: type: string type: array + quantize_base: + default: false + type: boolean rank: type: integer + type: + const: LoRA + default: LoRA + type: string + use_dora: + default: false + type: boolean required: + - type - lora_attn_modules - apply_lora_to_mlp - apply_lora_to_output @@ -1621,6 +1643,9 @@ components: - type: array - type: object type: object + model_type: + $ref: '#/components/schemas/ModelType' + default: llm provider_id: type: string provider_resource_id: @@ -1635,6 +1660,7 @@ components: - provider_id - type - metadata + - model_type type: object ModelCandidate: additionalProperties: false @@ -1654,27 +1680,34 @@ components: - model - sampling_params type: object + ModelType: + enum: + - llm + - embedding + type: string OptimizerConfig: additionalProperties: false properties: lr: type: number - lr_min: - type: number + num_warmup_steps: + type: integer optimizer_type: - enum: - - adam - - adamw - - sgd - type: string + $ref: '#/components/schemas/OptimizerType' weight_decay: type: number required: - optimizer_type - lr - - lr_min - weight_decay + - num_warmup_steps type: object + OptimizerType: + enum: + - adam + - adamw + - sgd + type: string PaginatedRowsResult: additionalProperties: false properties: @@ -1740,27 +1773,6 @@ components: - checkpoints title: Artifacts of a finetuning job. type: object - PostTrainingJobLogStream: - additionalProperties: false - properties: - job_uuid: - type: string - log_lines: - items: - type: string - type: array - required: - - job_uuid - - log_lines - title: Stream of logs from a finetuning job. - type: object - PostTrainingJobStatus: - enum: - - running - - completed - - failed - - scheduled - type: string PostTrainingJobStatusResponse: additionalProperties: false properties: @@ -1790,7 +1802,7 @@ components: format: date-time type: string status: - $ref: '#/components/schemas/PostTrainingJobStatus' + $ref: '#/components/schemas/JobStatus' required: - job_uuid - status @@ -1800,14 +1812,10 @@ components: PreferenceOptimizeRequest: additionalProperties: false properties: - algorithm: - $ref: '#/components/schemas/RLHFAlgorithm' algorithm_config: $ref: '#/components/schemas/DPOAlignmentConfig' - dataset_id: - type: string finetuned_model: - $ref: '#/components/schemas/URL' + type: string hyperparam_search_config: additionalProperties: oneOf: @@ -1830,20 +1838,12 @@ components: - type: array - type: object type: object - optimizer_config: - $ref: '#/components/schemas/OptimizerConfig' training_config: $ref: '#/components/schemas/TrainingConfig' - validation_dataset_id: - type: string required: - job_uuid - finetuned_model - - dataset_id - - validation_dataset_id - - algorithm - algorithm_config - - optimizer_config - training_config - hyperparam_search_config - logger_config @@ -1859,27 +1859,21 @@ components: - provider_id - provider_type type: object - QLoraFinetuningConfig: + QATFinetuningConfig: additionalProperties: false properties: - alpha: - type: integer - apply_lora_to_mlp: - type: boolean - apply_lora_to_output: - type: boolean - lora_attn_modules: - items: - type: string - type: array - rank: + group_size: type: integer + quantizer_name: + type: string + type: + const: QAT + default: QAT + type: string required: - - lora_attn_modules - - apply_lora_to_mlp - - apply_lora_to_output - - rank - - alpha + - type + - quantizer_name + - group_size type: object QueryCondition: additionalProperties: false @@ -2003,10 +1997,6 @@ components: type: string type: array type: object - RLHFAlgorithm: - enum: - - dpo - type: string RegexParserScoringFnParams: additionalProperties: false properties: @@ -2209,6 +2199,8 @@ components: type: object model_id: type: string + model_type: + $ref: '#/components/schemas/ModelType' provider_id: type: string provider_model_id: @@ -2941,7 +2933,7 @@ components: - ok - error type: string - SpanWithChildren: + SpanWithStatus: additionalProperties: false properties: attributes: @@ -2954,10 +2946,6 @@ components: - type: array - type: object type: object - children: - items: - $ref: '#/components/schemas/SpanWithChildren' - type: array end_time: format: date-time type: string @@ -2979,7 +2967,6 @@ components: - trace_id - name - start_time - - children type: object StopReason: enum: @@ -3025,14 +3012,11 @@ components: SupervisedFineTuneRequest: additionalProperties: false properties: - algorithm: - $ref: '#/components/schemas/FinetuningAlgorithm' algorithm_config: oneOf: - $ref: '#/components/schemas/LoraFinetuningConfig' - - $ref: '#/components/schemas/QLoraFinetuningConfig' - - $ref: '#/components/schemas/DoraFinetuningConfig' - dataset_id: + - $ref: '#/components/schemas/QATFinetuningConfig' + checkpoint_dir: type: string hyperparam_search_config: additionalProperties: @@ -3058,23 +3042,14 @@ components: type: object model: type: string - optimizer_config: - $ref: '#/components/schemas/OptimizerConfig' training_config: $ref: '#/components/schemas/TrainingConfig' - validation_dataset_id: - type: string required: - job_uuid - - model - - dataset_id - - validation_dataset_id - - algorithm - - algorithm_config - - optimizer_config - training_config - hyperparam_search_config - logger_config + - model type: object SyntheticDataGenerateRequest: additionalProperties: false @@ -3384,28 +3359,27 @@ components: TrainingConfig: additionalProperties: false properties: - batch_size: + data_config: + $ref: '#/components/schemas/DataConfig' + dtype: + default: bf16 + type: string + efficiency_config: + $ref: '#/components/schemas/EfficiencyConfig' + gradient_accumulation_steps: + type: integer + max_steps_per_epoch: type: integer - enable_activation_checkpointing: - type: boolean - fsdp_cpu_offload: - type: boolean - memory_efficient_fsdp_wrap: - type: boolean n_epochs: type: integer - n_iters: - type: integer - shuffle: - type: boolean + optimizer_config: + $ref: '#/components/schemas/OptimizerConfig' required: - n_epochs - - batch_size - - shuffle - - n_iters - - enable_activation_checkpointing - - memory_efficient_fsdp_wrap - - fsdp_cpu_offload + - max_steps_per_epoch + - gradient_accumulation_steps + - data_config + - optimizer_config type: object Turn: additionalProperties: false @@ -3548,6 +3522,9 @@ components: properties: chunk_size_in_tokens: type: integer + embedding_dimension: + default: 384 + type: integer embedding_model: type: string identifier: @@ -4601,7 +4578,9 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/PostTrainingJobArtifactsResponse' + oneOf: + - $ref: '#/components/schemas/PostTrainingJobArtifactsResponse' + - type: 'null' description: OK tags: - PostTraining (Coming Soon) @@ -4626,30 +4605,6 @@ paths: description: OK tags: - PostTraining (Coming Soon) - /alpha/post-training/job/logs: - get: - parameters: - - in: query - name: job_uuid - required: true - schema: - type: string - - description: JSON-encoded provider data which will be made available to the - adapter servicing the API - in: header - name: X-LlamaStack-ProviderData - required: false - schema: - type: string - responses: - '200': - content: - application/json: - schema: - $ref: '#/components/schemas/PostTrainingJobLogStream' - description: OK - tags: - - PostTraining (Coming Soon) /alpha/post-training/job/status: get: parameters: @@ -4670,7 +4625,9 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/PostTrainingJobStatusResponse' + oneOf: + - $ref: '#/components/schemas/PostTrainingJobStatusResponse' + - type: 'null' description: OK tags: - PostTraining (Coming Soon) @@ -5054,7 +5011,9 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/SpanWithChildren' + additionalProperties: + $ref: '#/components/schemas/SpanWithStatus' + type: object description: OK tags: - Telemetry @@ -5290,6 +5249,8 @@ tags: - description: name: DPOAlignmentConfig +- description: + name: DataConfig - description: name: Dataset - name: DatasetIO @@ -5300,9 +5261,9 @@ tags: - description: name: DeleteAgentsSessionRequest -- description: - name: DoraFinetuningConfig + name: EfficiencyConfig - description: name: EmbeddingsRequest @@ -5319,9 +5280,6 @@ tags: - description: name: EvaluateRowsRequest -- description: - name: FinetuningAlgorithm - description: name: FunctionCallToolDefinition @@ -5395,10 +5353,14 @@ tags: name: Model - description: name: ModelCandidate +- description: + name: ModelType - name: Models - description: name: OptimizerConfig +- description: + name: OptimizerType - description: name: PaginatedRowsResult @@ -5415,14 +5377,6 @@ tags: ' name: PostTrainingJobArtifactsResponse -- description: 'Stream of logs from a finetuning job. - - - ' - name: PostTrainingJobLogStream -- description: - name: PostTrainingJobStatus - description: 'Status of a finetuning job. @@ -5434,9 +5388,9 @@ tags: name: PreferenceOptimizeRequest - description: name: ProviderInfo -- description: - name: QLoraFinetuningConfig + name: QATFinetuningConfig - description: name: QueryCondition - description: name: QueryTracesRequest -- description: - name: RLHFAlgorithm - description: name: RegexParserScoringFnParams @@ -5545,9 +5497,8 @@ tags: name: SpanStartPayload - description: name: SpanStatus -- description: - name: SpanWithChildren +- description: + name: SpanWithStatus - description: name: StopReason - description: SpanWithChildren: ... + ) -> Dict[str, SpanWithStatus]: ... @webmethod(route="/telemetry/query-spans", method="POST") async def query_spans( diff --git a/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py b/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py index 2e4a778e4..d7229f508 100644 --- a/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +++ b/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py @@ -243,7 +243,7 @@ class TelemetryAdapter(TelemetryDatasetMixin, Telemetry): span_id: str, attributes_to_return: Optional[List[str]] = None, max_depth: Optional[int] = None, - ) -> SpanWithChildren: + ) -> Dict[str, SpanWithStatus]: return await self.trace_store.get_span_tree( span_id=span_id, attributes_to_return=attributes_to_return, diff --git a/llama_stack/providers/utils/telemetry/dataset_mixin.py b/llama_stack/providers/utils/telemetry/dataset_mixin.py index 7a59801f4..bf5e79c3d 100644 --- a/llama_stack/providers/utils/telemetry/dataset_mixin.py +++ b/llama_stack/providers/utils/telemetry/dataset_mixin.py @@ -7,7 +7,7 @@ from typing import List, Optional from llama_stack.apis.datasetio import DatasetIO -from llama_stack.apis.telemetry import QueryCondition, Span, SpanWithChildren +from llama_stack.apis.telemetry import QueryCondition, Span class TelemetryDatasetMixin: @@ -53,19 +53,18 @@ class TelemetryDatasetMixin: spans = [] for trace in traces: - span_tree = await self.get_span_tree( + spans_by_id = await self.get_span_tree( span_id=trace.root_span_id, attributes_to_return=attributes_to_return, max_depth=max_depth, ) - def extract_spans(span: SpanWithChildren) -> List[Span]: - result = [] + for span in spans_by_id.values(): if span.attributes and all( attr in span.attributes and span.attributes[attr] is not None for attr in attributes_to_return ): - result.append( + spans.append( Span( trace_id=trace.root_span_id, span_id=span.span_id, @@ -77,11 +76,4 @@ class TelemetryDatasetMixin: ) ) - for child in span.children: - result.extend(extract_spans(child)) - - return result - - spans.extend(extract_spans(span_tree)) - return spans diff --git a/llama_stack/providers/utils/telemetry/sqlite_trace_store.py b/llama_stack/providers/utils/telemetry/sqlite_trace_store.py index 8d9035216..b0c3f7868 100644 --- a/llama_stack/providers/utils/telemetry/sqlite_trace_store.py +++ b/llama_stack/providers/utils/telemetry/sqlite_trace_store.py @@ -6,11 +6,11 @@ import json from datetime import datetime -from typing import List, Optional, Protocol +from typing import Dict, List, Optional, Protocol import aiosqlite -from llama_stack.apis.telemetry import QueryCondition, SpanWithChildren, Trace +from llama_stack.apis.telemetry import QueryCondition, SpanWithStatus, Trace class TraceStore(Protocol): @@ -27,7 +27,7 @@ class TraceStore(Protocol): span_id: str, attributes_to_return: Optional[List[str]] = None, max_depth: Optional[int] = None, - ) -> SpanWithChildren: ... + ) -> Dict[str, SpanWithStatus]: ... class SQLiteTraceStore(TraceStore): @@ -114,7 +114,7 @@ class SQLiteTraceStore(TraceStore): span_id: str, attributes_to_return: Optional[List[str]] = None, max_depth: Optional[int] = None, - ) -> SpanWithChildren: + ) -> Dict[str, SpanWithStatus]: # Build the attributes selection attributes_select = "s.attributes" if attributes_to_return: @@ -143,6 +143,7 @@ class SQLiteTraceStore(TraceStore): ORDER BY depth, start_time """ + spans_by_id = {} async with aiosqlite.connect(self.conn_string) as conn: conn.row_factory = aiosqlite.Row async with conn.execute(query, (span_id, max_depth, max_depth)) as cursor: @@ -151,12 +152,8 @@ class SQLiteTraceStore(TraceStore): if not rows: raise ValueError(f"Span {span_id} not found") - # Build span tree - spans_by_id = {} - root_span = None - for row in rows: - span = SpanWithChildren( + span = SpanWithStatus( span_id=row["span_id"], trace_id=row["trace_id"], parent_span_id=row["parent_span_id"], @@ -165,14 +162,8 @@ class SQLiteTraceStore(TraceStore): end_time=datetime.fromisoformat(row["end_time"]), attributes=json.loads(row["filtered_attributes"]), status=row["status"].lower(), - children=[], ) spans_by_id[span.span_id] = span - if span.span_id == span_id: - root_span = span - elif span.parent_span_id in spans_by_id: - spans_by_id[span.parent_span_id].children.append(span) - - return root_span + return spans_by_id diff --git a/llama_stack/providers/utils/telemetry/trace_protocol.py b/llama_stack/providers/utils/telemetry/trace_protocol.py index 938d333fa..67054da90 100644 --- a/llama_stack/providers/utils/telemetry/trace_protocol.py +++ b/llama_stack/providers/utils/telemetry/trace_protocol.py @@ -41,8 +41,6 @@ def trace_protocol(cls: Type[T]) -> Type[T]: """ def trace_method(method: Callable) -> Callable: - from llama_stack.providers.utils.telemetry import tracing - is_async = asyncio.iscoroutinefunction(method) is_async_gen = inspect.isasyncgenfunction(method) @@ -77,6 +75,8 @@ def trace_protocol(cls: Type[T]) -> Type[T]: async def async_gen_wrapper( self: Any, *args: Any, **kwargs: Any ) -> AsyncGenerator: + from llama_stack.providers.utils.telemetry import tracing + class_name, method_name, span_attributes = create_span_context( self, *args, **kwargs ) @@ -92,6 +92,8 @@ def trace_protocol(cls: Type[T]) -> Type[T]: @wraps(method) async def async_wrapper(self: Any, *args: Any, **kwargs: Any) -> Any: + from llama_stack.providers.utils.telemetry import tracing + class_name, method_name, span_attributes = create_span_context( self, *args, **kwargs ) @@ -107,6 +109,8 @@ def trace_protocol(cls: Type[T]) -> Type[T]: @wraps(method) def sync_wrapper(self: Any, *args: Any, **kwargs: Any) -> Any: + from llama_stack.providers.utils.telemetry import tracing + class_name, method_name, span_attributes = create_span_context( self, *args, **kwargs )