diff --git a/.gitignore b/.gitignore
index 24ce79959..421ff4db1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -18,3 +18,4 @@ Package.resolved
.vscode
_build
docs/src
+pyrightconfig.json
diff --git a/docs/openapi_generator/generate.py b/docs/openapi_generator/generate.py
index a82b3db76..3344f462a 100644
--- a/docs/openapi_generator/generate.py
+++ b/docs/openapi_generator/generate.py
@@ -18,10 +18,6 @@ import yaml
from llama_models import schema_utils
-from .pyopenapi.options import Options
-from .pyopenapi.specification import Info, Server
-from .pyopenapi.utility import Specification
-
# We do some monkey-patching to ensure our definitions only use the minimal
# (json_schema_type, webmethod) definitions from the llama_models package. For
# generation though, we need the full definitions and implementations from the
@@ -31,11 +27,13 @@ from .strong_typing.schema import json_schema_type
schema_utils.json_schema_type = json_schema_type
-# this line needs to be here to ensure json_schema_type has been altered before
-# the imports use the annotation
from llama_stack.apis.version import LLAMA_STACK_API_VERSION # noqa: E402
from llama_stack.distribution.stack import LlamaStack # noqa: E402
+from .pyopenapi.options import Options # noqa: E402
+from .pyopenapi.specification import Info, Server # noqa: E402
+from .pyopenapi.utility import Specification # noqa: E402
+
def main(output_dir: str):
output_dir = Path(output_dir)
diff --git a/docs/resources/llama-stack-spec.html b/docs/resources/llama-stack-spec.html
index 9a9a29439..cb7c6c3af 100644
--- a/docs/resources/llama-stack-spec.html
+++ b/docs/resources/llama-stack-spec.html
@@ -1067,7 +1067,10 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/SpanWithChildren"
+ "type": "object",
+ "additionalProperties": {
+ "$ref": "#/components/schemas/SpanWithStatus"
+ }
}
}
}
@@ -1123,45 +1126,14 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/PostTrainingJobArtifactsResponse"
- }
- }
- }
- }
- },
- "tags": [
- "PostTraining (Coming Soon)"
- ],
- "parameters": [
- {
- "name": "job_uuid",
- "in": "query",
- "required": true,
- "schema": {
- "type": "string"
- }
- },
- {
- "name": "X-LlamaStack-ProviderData",
- "in": "header",
- "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
- "required": false,
- "schema": {
- "type": "string"
- }
- }
- ]
- }
- },
- "/alpha/post-training/job/logs": {
- "get": {
- "responses": {
- "200": {
- "description": "OK",
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/PostTrainingJobLogStream"
+ "oneOf": [
+ {
+ "$ref": "#/components/schemas/PostTrainingJobArtifactsResponse"
+ },
+ {
+ "type": "null"
+ }
+ ]
}
}
}
@@ -1199,7 +1171,14 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/PostTrainingJobStatusResponse"
+ "oneOf": [
+ {
+ "$ref": "#/components/schemas/PostTrainingJobStatusResponse"
+ },
+ {
+ "type": "null"
+ }
+ ]
}
}
}
@@ -5459,6 +5438,10 @@
"chunk_size_in_tokens": {
"type": "integer"
},
+ "embedding_dimension": {
+ "type": "integer",
+ "default": 384
+ },
"overlap_size_in_tokens": {
"type": "integer"
}
@@ -5807,6 +5790,10 @@
}
]
}
+ },
+ "model_type": {
+ "$ref": "#/components/schemas/ModelType",
+ "default": "llm"
}
},
"additionalProperties": false,
@@ -5815,7 +5802,15 @@
"provider_resource_id",
"provider_id",
"type",
- "metadata"
+ "metadata",
+ "model_type"
+ ]
+ },
+ "ModelType": {
+ "type": "string",
+ "enum": [
+ "llm",
+ "embedding"
]
},
"PaginatedRowsResult": {
@@ -6146,7 +6141,7 @@
"error"
]
},
- "SpanWithChildren": {
+ "SpanWithStatus": {
"type": "object",
"properties": {
"span_id": {
@@ -6194,12 +6189,6 @@
]
}
},
- "children": {
- "type": "array",
- "items": {
- "$ref": "#/components/schemas/SpanWithChildren"
- }
- },
"status": {
"$ref": "#/components/schemas/SpanStatus"
}
@@ -6209,8 +6198,7 @@
"span_id",
"trace_id",
"name",
- "start_time",
- "children"
+ "start_time"
]
},
"Checkpoint": {
@@ -6236,31 +6224,11 @@
],
"title": "Artifacts of a finetuning job."
},
- "PostTrainingJobLogStream": {
- "type": "object",
- "properties": {
- "job_uuid": {
- "type": "string"
- },
- "log_lines": {
- "type": "array",
- "items": {
- "type": "string"
- }
- }
- },
- "additionalProperties": false,
- "required": [
- "job_uuid",
- "log_lines"
- ],
- "title": "Stream of logs from a finetuning job."
- },
- "PostTrainingJobStatus": {
+ "JobStatus": {
"type": "string",
"enum": [
- "running",
"completed",
+ "in_progress",
"failed",
"scheduled"
]
@@ -6272,7 +6240,7 @@
"type": "string"
},
"status": {
- "$ref": "#/components/schemas/PostTrainingJobStatus"
+ "$ref": "#/components/schemas/JobStatus"
},
"scheduled_at": {
"type": "string",
@@ -6456,13 +6424,6 @@
"job_id"
]
},
- "JobStatus": {
- "type": "string",
- "enum": [
- "completed",
- "in_progress"
- ]
- },
"ProviderInfo": {
"type": "object",
"properties": {
@@ -6796,39 +6757,89 @@
"gamma"
]
},
+ "DataConfig": {
+ "type": "object",
+ "properties": {
+ "dataset_id": {
+ "type": "string"
+ },
+ "batch_size": {
+ "type": "integer"
+ },
+ "shuffle": {
+ "type": "boolean"
+ },
+ "validation_dataset_id": {
+ "type": "string"
+ },
+ "packed": {
+ "type": "boolean",
+ "default": false
+ },
+ "train_on_input": {
+ "type": "boolean",
+ "default": false
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "dataset_id",
+ "batch_size",
+ "shuffle"
+ ]
+ },
+ "EfficiencyConfig": {
+ "type": "object",
+ "properties": {
+ "enable_activation_checkpointing": {
+ "type": "boolean",
+ "default": false
+ },
+ "enable_activation_offloading": {
+ "type": "boolean",
+ "default": false
+ },
+ "memory_efficient_fsdp_wrap": {
+ "type": "boolean",
+ "default": false
+ },
+ "fsdp_cpu_offload": {
+ "type": "boolean",
+ "default": false
+ }
+ },
+ "additionalProperties": false
+ },
"OptimizerConfig": {
"type": "object",
"properties": {
"optimizer_type": {
- "type": "string",
- "enum": [
- "adam",
- "adamw",
- "sgd"
- ]
+ "$ref": "#/components/schemas/OptimizerType"
},
"lr": {
"type": "number"
},
- "lr_min": {
- "type": "number"
- },
"weight_decay": {
"type": "number"
+ },
+ "num_warmup_steps": {
+ "type": "integer"
}
},
"additionalProperties": false,
"required": [
"optimizer_type",
"lr",
- "lr_min",
- "weight_decay"
+ "weight_decay",
+ "num_warmup_steps"
]
},
- "RLHFAlgorithm": {
+ "OptimizerType": {
"type": "string",
"enum": [
- "dpo"
+ "adam",
+ "adamw",
+ "sgd"
]
},
"TrainingConfig": {
@@ -6837,34 +6848,33 @@
"n_epochs": {
"type": "integer"
},
- "batch_size": {
+ "max_steps_per_epoch": {
"type": "integer"
},
- "shuffle": {
- "type": "boolean"
- },
- "n_iters": {
+ "gradient_accumulation_steps": {
"type": "integer"
},
- "enable_activation_checkpointing": {
- "type": "boolean"
+ "data_config": {
+ "$ref": "#/components/schemas/DataConfig"
},
- "memory_efficient_fsdp_wrap": {
- "type": "boolean"
+ "optimizer_config": {
+ "$ref": "#/components/schemas/OptimizerConfig"
},
- "fsdp_cpu_offload": {
- "type": "boolean"
+ "efficiency_config": {
+ "$ref": "#/components/schemas/EfficiencyConfig"
+ },
+ "dtype": {
+ "type": "string",
+ "default": "bf16"
}
},
"additionalProperties": false,
"required": [
"n_epochs",
- "batch_size",
- "shuffle",
- "n_iters",
- "enable_activation_checkpointing",
- "memory_efficient_fsdp_wrap",
- "fsdp_cpu_offload"
+ "max_steps_per_epoch",
+ "gradient_accumulation_steps",
+ "data_config",
+ "optimizer_config"
]
},
"PreferenceOptimizeRequest": {
@@ -6874,23 +6884,11 @@
"type": "string"
},
"finetuned_model": {
- "$ref": "#/components/schemas/URL"
- },
- "dataset_id": {
"type": "string"
},
- "validation_dataset_id": {
- "type": "string"
- },
- "algorithm": {
- "$ref": "#/components/schemas/RLHFAlgorithm"
- },
"algorithm_config": {
"$ref": "#/components/schemas/DPOAlignmentConfig"
},
- "optimizer_config": {
- "$ref": "#/components/schemas/OptimizerConfig"
- },
"training_config": {
"$ref": "#/components/schemas/TrainingConfig"
},
@@ -6949,11 +6947,7 @@
"required": [
"job_uuid",
"finetuned_model",
- "dataset_id",
- "validation_dataset_id",
- "algorithm",
"algorithm_config",
- "optimizer_config",
"training_config",
"hyperparam_search_config",
"logger_config"
@@ -7645,6 +7639,9 @@
}
]
}
+ },
+ "model_type": {
+ "$ref": "#/components/schemas/ModelType"
}
},
"additionalProperties": false,
@@ -8140,49 +8137,14 @@
"results"
]
},
- "DoraFinetuningConfig": {
- "type": "object",
- "properties": {
- "lora_attn_modules": {
- "type": "array",
- "items": {
- "type": "string"
- }
- },
- "apply_lora_to_mlp": {
- "type": "boolean"
- },
- "apply_lora_to_output": {
- "type": "boolean"
- },
- "rank": {
- "type": "integer"
- },
- "alpha": {
- "type": "integer"
- }
- },
- "additionalProperties": false,
- "required": [
- "lora_attn_modules",
- "apply_lora_to_mlp",
- "apply_lora_to_output",
- "rank",
- "alpha"
- ]
- },
- "FinetuningAlgorithm": {
- "type": "string",
- "enum": [
- "full",
- "lora",
- "qlora",
- "dora"
- ]
- },
"LoraFinetuningConfig": {
"type": "object",
"properties": {
+ "type": {
+ "type": "string",
+ "const": "LoRA",
+ "default": "LoRA"
+ },
"lora_attn_modules": {
"type": "array",
"items": {
@@ -8200,10 +8162,19 @@
},
"alpha": {
"type": "integer"
+ },
+ "use_dora": {
+ "type": "boolean",
+ "default": false
+ },
+ "quantize_base": {
+ "type": "boolean",
+ "default": false
}
},
"additionalProperties": false,
"required": [
+ "type",
"lora_attn_modules",
"apply_lora_to_mlp",
"apply_lora_to_output",
@@ -8211,35 +8182,26 @@
"alpha"
]
},
- "QLoraFinetuningConfig": {
+ "QATFinetuningConfig": {
"type": "object",
"properties": {
- "lora_attn_modules": {
- "type": "array",
- "items": {
- "type": "string"
- }
+ "type": {
+ "type": "string",
+ "const": "QAT",
+ "default": "QAT"
},
- "apply_lora_to_mlp": {
- "type": "boolean"
+ "quantizer_name": {
+ "type": "string"
},
- "apply_lora_to_output": {
- "type": "boolean"
- },
- "rank": {
- "type": "integer"
- },
- "alpha": {
+ "group_size": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
- "lora_attn_modules",
- "apply_lora_to_mlp",
- "apply_lora_to_output",
- "rank",
- "alpha"
+ "type",
+ "quantizer_name",
+ "group_size"
]
},
"SupervisedFineTuneRequest": {
@@ -8248,34 +8210,6 @@
"job_uuid": {
"type": "string"
},
- "model": {
- "type": "string"
- },
- "dataset_id": {
- "type": "string"
- },
- "validation_dataset_id": {
- "type": "string"
- },
- "algorithm": {
- "$ref": "#/components/schemas/FinetuningAlgorithm"
- },
- "algorithm_config": {
- "oneOf": [
- {
- "$ref": "#/components/schemas/LoraFinetuningConfig"
- },
- {
- "$ref": "#/components/schemas/QLoraFinetuningConfig"
- },
- {
- "$ref": "#/components/schemas/DoraFinetuningConfig"
- }
- ]
- },
- "optimizer_config": {
- "$ref": "#/components/schemas/OptimizerConfig"
- },
"training_config": {
"$ref": "#/components/schemas/TrainingConfig"
},
@@ -8328,20 +8262,31 @@
}
]
}
+ },
+ "model": {
+ "type": "string"
+ },
+ "checkpoint_dir": {
+ "type": "string"
+ },
+ "algorithm_config": {
+ "oneOf": [
+ {
+ "$ref": "#/components/schemas/LoraFinetuningConfig"
+ },
+ {
+ "$ref": "#/components/schemas/QATFinetuningConfig"
+ }
+ ]
}
},
"additionalProperties": false,
"required": [
"job_uuid",
- "model",
- "dataset_id",
- "validation_dataset_id",
- "algorithm",
- "algorithm_config",
- "optimizer_config",
"training_config",
"hyperparam_search_config",
- "logger_config"
+ "logger_config",
+ "model"
]
},
"SyntheticDataGenerateRequest": {
@@ -8658,6 +8603,10 @@
"name": "DPOAlignmentConfig",
"description": ""
},
+ {
+ "name": "DataConfig",
+ "description": ""
+ },
{
"name": "Dataset",
"description": ""
@@ -8677,8 +8626,8 @@
"description": ""
},
{
- "name": "DoraFinetuningConfig",
- "description": ""
+ "name": "EfficiencyConfig",
+ "description": ""
},
{
"name": "EmbeddingsRequest",
@@ -8706,10 +8655,6 @@
"name": "EvaluateRowsRequest",
"description": ""
},
- {
- "name": "FinetuningAlgorithm",
- "description": ""
- },
{
"name": "FunctionCallToolDefinition",
"description": ""
@@ -8826,6 +8771,10 @@
"name": "ModelCandidate",
"description": ""
},
+ {
+ "name": "ModelType",
+ "description": ""
+ },
{
"name": "Models"
},
@@ -8833,6 +8782,10 @@
"name": "OptimizerConfig",
"description": ""
},
+ {
+ "name": "OptimizerType",
+ "description": ""
+ },
{
"name": "PaginatedRowsResult",
"description": ""
@@ -8852,14 +8805,6 @@
"name": "PostTrainingJobArtifactsResponse",
"description": "Artifacts of a finetuning job.\n\n"
},
- {
- "name": "PostTrainingJobLogStream",
- "description": "Stream of logs from a finetuning job.\n\n"
- },
- {
- "name": "PostTrainingJobStatus",
- "description": ""
- },
{
"name": "PostTrainingJobStatusResponse",
"description": "Status of a finetuning job.\n\n"
@@ -8873,8 +8818,8 @@
"description": ""
},
{
- "name": "QLoraFinetuningConfig",
- "description": ""
+ "name": "QATFinetuningConfig",
+ "description": ""
},
{
"name": "QueryCondition",
@@ -8900,10 +8845,6 @@
"name": "QueryTracesRequest",
"description": ""
},
- {
- "name": "RLHFAlgorithm",
- "description": ""
- },
{
"name": "RegexParserScoringFnParams",
"description": ""
@@ -9041,8 +8982,8 @@
"description": ""
},
{
- "name": "SpanWithChildren",
- "description": ""
+ "name": "SpanWithStatus",
+ "description": ""
},
{
"name": "StopReason",
@@ -9237,16 +9178,16 @@
"CreateAgentSessionRequest",
"CreateAgentTurnRequest",
"DPOAlignmentConfig",
+ "DataConfig",
"Dataset",
"DeleteAgentsRequest",
"DeleteAgentsSessionRequest",
- "DoraFinetuningConfig",
+ "EfficiencyConfig",
"EmbeddingsRequest",
"EmbeddingsResponse",
"EvalTask",
"EvaluateResponse",
"EvaluateRowsRequest",
- "FinetuningAlgorithm",
"FunctionCallToolDefinition",
"GetAgentsSessionRequest",
"GetSpanTreeRequest",
@@ -9273,24 +9214,23 @@
"MetricEvent",
"Model",
"ModelCandidate",
+ "ModelType",
"OptimizerConfig",
+ "OptimizerType",
"PaginatedRowsResult",
"PhotogenToolDefinition",
"PostTrainingJob",
"PostTrainingJobArtifactsResponse",
- "PostTrainingJobLogStream",
- "PostTrainingJobStatus",
"PostTrainingJobStatusResponse",
"PreferenceOptimizeRequest",
"ProviderInfo",
- "QLoraFinetuningConfig",
+ "QATFinetuningConfig",
"QueryCondition",
"QueryConditionOp",
"QueryDocumentsRequest",
"QueryDocumentsResponse",
"QuerySpansRequest",
"QueryTracesRequest",
- "RLHFAlgorithm",
"RegexParserScoringFnParams",
"RegisterDatasetRequest",
"RegisterEvalTaskRequest",
@@ -9322,7 +9262,7 @@
"SpanEndPayload",
"SpanStartPayload",
"SpanStatus",
- "SpanWithChildren",
+ "SpanWithStatus",
"StopReason",
"StructuredLogEvent",
"SupervisedFineTuneRequest",
diff --git a/docs/resources/llama-stack-spec.yaml b/docs/resources/llama-stack-spec.yaml
index a1cd08387..d20c623b3 100644
--- a/docs/resources/llama-stack-spec.yaml
+++ b/docs/resources/llama-stack-spec.yaml
@@ -761,6 +761,28 @@ components:
- epsilon
- gamma
type: object
+ DataConfig:
+ additionalProperties: false
+ properties:
+ batch_size:
+ type: integer
+ dataset_id:
+ type: string
+ packed:
+ default: false
+ type: boolean
+ shuffle:
+ type: boolean
+ train_on_input:
+ default: false
+ type: boolean
+ validation_dataset_id:
+ type: string
+ required:
+ - dataset_id
+ - batch_size
+ - shuffle
+ type: object
Dataset:
additionalProperties: false
properties:
@@ -908,27 +930,21 @@ components:
- agent_id
- session_id
type: object
- DoraFinetuningConfig:
+ EfficiencyConfig:
additionalProperties: false
properties:
- alpha:
- type: integer
- apply_lora_to_mlp:
+ enable_activation_checkpointing:
+ default: false
type: boolean
- apply_lora_to_output:
+ enable_activation_offloading:
+ default: false
+ type: boolean
+ fsdp_cpu_offload:
+ default: false
+ type: boolean
+ memory_efficient_fsdp_wrap:
+ default: false
type: boolean
- lora_attn_modules:
- items:
- type: string
- type: array
- rank:
- type: integer
- required:
- - lora_attn_modules
- - apply_lora_to_mlp
- - apply_lora_to_output
- - rank
- - alpha
type: object
EmbeddingsRequest:
additionalProperties: false
@@ -1054,13 +1070,6 @@ components:
- scoring_functions
- task_config
type: object
- FinetuningAlgorithm:
- enum:
- - full
- - lora
- - qlora
- - dora
- type: string
FunctionCallToolDefinition:
additionalProperties: false
properties:
@@ -1230,6 +1239,8 @@ components:
enum:
- completed
- in_progress
+ - failed
+ - scheduled
type: string
KeyValueMemoryBank:
additionalProperties: false
@@ -1358,9 +1369,20 @@ components:
items:
type: string
type: array
+ quantize_base:
+ default: false
+ type: boolean
rank:
type: integer
+ type:
+ const: LoRA
+ default: LoRA
+ type: string
+ use_dora:
+ default: false
+ type: boolean
required:
+ - type
- lora_attn_modules
- apply_lora_to_mlp
- apply_lora_to_output
@@ -1621,6 +1643,9 @@ components:
- type: array
- type: object
type: object
+ model_type:
+ $ref: '#/components/schemas/ModelType'
+ default: llm
provider_id:
type: string
provider_resource_id:
@@ -1635,6 +1660,7 @@ components:
- provider_id
- type
- metadata
+ - model_type
type: object
ModelCandidate:
additionalProperties: false
@@ -1654,27 +1680,34 @@ components:
- model
- sampling_params
type: object
+ ModelType:
+ enum:
+ - llm
+ - embedding
+ type: string
OptimizerConfig:
additionalProperties: false
properties:
lr:
type: number
- lr_min:
- type: number
+ num_warmup_steps:
+ type: integer
optimizer_type:
- enum:
- - adam
- - adamw
- - sgd
- type: string
+ $ref: '#/components/schemas/OptimizerType'
weight_decay:
type: number
required:
- optimizer_type
- lr
- - lr_min
- weight_decay
+ - num_warmup_steps
type: object
+ OptimizerType:
+ enum:
+ - adam
+ - adamw
+ - sgd
+ type: string
PaginatedRowsResult:
additionalProperties: false
properties:
@@ -1740,27 +1773,6 @@ components:
- checkpoints
title: Artifacts of a finetuning job.
type: object
- PostTrainingJobLogStream:
- additionalProperties: false
- properties:
- job_uuid:
- type: string
- log_lines:
- items:
- type: string
- type: array
- required:
- - job_uuid
- - log_lines
- title: Stream of logs from a finetuning job.
- type: object
- PostTrainingJobStatus:
- enum:
- - running
- - completed
- - failed
- - scheduled
- type: string
PostTrainingJobStatusResponse:
additionalProperties: false
properties:
@@ -1790,7 +1802,7 @@ components:
format: date-time
type: string
status:
- $ref: '#/components/schemas/PostTrainingJobStatus'
+ $ref: '#/components/schemas/JobStatus'
required:
- job_uuid
- status
@@ -1800,14 +1812,10 @@ components:
PreferenceOptimizeRequest:
additionalProperties: false
properties:
- algorithm:
- $ref: '#/components/schemas/RLHFAlgorithm'
algorithm_config:
$ref: '#/components/schemas/DPOAlignmentConfig'
- dataset_id:
- type: string
finetuned_model:
- $ref: '#/components/schemas/URL'
+ type: string
hyperparam_search_config:
additionalProperties:
oneOf:
@@ -1830,20 +1838,12 @@ components:
- type: array
- type: object
type: object
- optimizer_config:
- $ref: '#/components/schemas/OptimizerConfig'
training_config:
$ref: '#/components/schemas/TrainingConfig'
- validation_dataset_id:
- type: string
required:
- job_uuid
- finetuned_model
- - dataset_id
- - validation_dataset_id
- - algorithm
- algorithm_config
- - optimizer_config
- training_config
- hyperparam_search_config
- logger_config
@@ -1859,27 +1859,21 @@ components:
- provider_id
- provider_type
type: object
- QLoraFinetuningConfig:
+ QATFinetuningConfig:
additionalProperties: false
properties:
- alpha:
- type: integer
- apply_lora_to_mlp:
- type: boolean
- apply_lora_to_output:
- type: boolean
- lora_attn_modules:
- items:
- type: string
- type: array
- rank:
+ group_size:
type: integer
+ quantizer_name:
+ type: string
+ type:
+ const: QAT
+ default: QAT
+ type: string
required:
- - lora_attn_modules
- - apply_lora_to_mlp
- - apply_lora_to_output
- - rank
- - alpha
+ - type
+ - quantizer_name
+ - group_size
type: object
QueryCondition:
additionalProperties: false
@@ -2003,10 +1997,6 @@ components:
type: string
type: array
type: object
- RLHFAlgorithm:
- enum:
- - dpo
- type: string
RegexParserScoringFnParams:
additionalProperties: false
properties:
@@ -2209,6 +2199,8 @@ components:
type: object
model_id:
type: string
+ model_type:
+ $ref: '#/components/schemas/ModelType'
provider_id:
type: string
provider_model_id:
@@ -2941,7 +2933,7 @@ components:
- ok
- error
type: string
- SpanWithChildren:
+ SpanWithStatus:
additionalProperties: false
properties:
attributes:
@@ -2954,10 +2946,6 @@ components:
- type: array
- type: object
type: object
- children:
- items:
- $ref: '#/components/schemas/SpanWithChildren'
- type: array
end_time:
format: date-time
type: string
@@ -2979,7 +2967,6 @@ components:
- trace_id
- name
- start_time
- - children
type: object
StopReason:
enum:
@@ -3025,14 +3012,11 @@ components:
SupervisedFineTuneRequest:
additionalProperties: false
properties:
- algorithm:
- $ref: '#/components/schemas/FinetuningAlgorithm'
algorithm_config:
oneOf:
- $ref: '#/components/schemas/LoraFinetuningConfig'
- - $ref: '#/components/schemas/QLoraFinetuningConfig'
- - $ref: '#/components/schemas/DoraFinetuningConfig'
- dataset_id:
+ - $ref: '#/components/schemas/QATFinetuningConfig'
+ checkpoint_dir:
type: string
hyperparam_search_config:
additionalProperties:
@@ -3058,23 +3042,14 @@ components:
type: object
model:
type: string
- optimizer_config:
- $ref: '#/components/schemas/OptimizerConfig'
training_config:
$ref: '#/components/schemas/TrainingConfig'
- validation_dataset_id:
- type: string
required:
- job_uuid
- - model
- - dataset_id
- - validation_dataset_id
- - algorithm
- - algorithm_config
- - optimizer_config
- training_config
- hyperparam_search_config
- logger_config
+ - model
type: object
SyntheticDataGenerateRequest:
additionalProperties: false
@@ -3384,28 +3359,27 @@ components:
TrainingConfig:
additionalProperties: false
properties:
- batch_size:
+ data_config:
+ $ref: '#/components/schemas/DataConfig'
+ dtype:
+ default: bf16
+ type: string
+ efficiency_config:
+ $ref: '#/components/schemas/EfficiencyConfig'
+ gradient_accumulation_steps:
+ type: integer
+ max_steps_per_epoch:
type: integer
- enable_activation_checkpointing:
- type: boolean
- fsdp_cpu_offload:
- type: boolean
- memory_efficient_fsdp_wrap:
- type: boolean
n_epochs:
type: integer
- n_iters:
- type: integer
- shuffle:
- type: boolean
+ optimizer_config:
+ $ref: '#/components/schemas/OptimizerConfig'
required:
- n_epochs
- - batch_size
- - shuffle
- - n_iters
- - enable_activation_checkpointing
- - memory_efficient_fsdp_wrap
- - fsdp_cpu_offload
+ - max_steps_per_epoch
+ - gradient_accumulation_steps
+ - data_config
+ - optimizer_config
type: object
Turn:
additionalProperties: false
@@ -3548,6 +3522,9 @@ components:
properties:
chunk_size_in_tokens:
type: integer
+ embedding_dimension:
+ default: 384
+ type: integer
embedding_model:
type: string
identifier:
@@ -4601,7 +4578,9 @@ paths:
content:
application/json:
schema:
- $ref: '#/components/schemas/PostTrainingJobArtifactsResponse'
+ oneOf:
+ - $ref: '#/components/schemas/PostTrainingJobArtifactsResponse'
+ - type: 'null'
description: OK
tags:
- PostTraining (Coming Soon)
@@ -4626,30 +4605,6 @@ paths:
description: OK
tags:
- PostTraining (Coming Soon)
- /alpha/post-training/job/logs:
- get:
- parameters:
- - in: query
- name: job_uuid
- required: true
- schema:
- type: string
- - description: JSON-encoded provider data which will be made available to the
- adapter servicing the API
- in: header
- name: X-LlamaStack-ProviderData
- required: false
- schema:
- type: string
- responses:
- '200':
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/PostTrainingJobLogStream'
- description: OK
- tags:
- - PostTraining (Coming Soon)
/alpha/post-training/job/status:
get:
parameters:
@@ -4670,7 +4625,9 @@ paths:
content:
application/json:
schema:
- $ref: '#/components/schemas/PostTrainingJobStatusResponse'
+ oneOf:
+ - $ref: '#/components/schemas/PostTrainingJobStatusResponse'
+ - type: 'null'
description: OK
tags:
- PostTraining (Coming Soon)
@@ -5054,7 +5011,9 @@ paths:
content:
application/json:
schema:
- $ref: '#/components/schemas/SpanWithChildren'
+ additionalProperties:
+ $ref: '#/components/schemas/SpanWithStatus'
+ type: object
description: OK
tags:
- Telemetry
@@ -5290,6 +5249,8 @@ tags:
- description:
name: DPOAlignmentConfig
+- description:
+ name: DataConfig
- description:
name: Dataset
- name: DatasetIO
@@ -5300,9 +5261,9 @@ tags:
- description:
name: DeleteAgentsSessionRequest
-- description:
- name: DoraFinetuningConfig
+ name: EfficiencyConfig
- description:
name: EmbeddingsRequest
@@ -5319,9 +5280,6 @@ tags:
- description:
name: EvaluateRowsRequest
-- description:
- name: FinetuningAlgorithm
- description:
name: FunctionCallToolDefinition
@@ -5395,10 +5353,14 @@ tags:
name: Model
- description:
name: ModelCandidate
+- description:
+ name: ModelType
- name: Models
- description:
name: OptimizerConfig
+- description:
+ name: OptimizerType
- description:
name: PaginatedRowsResult
@@ -5415,14 +5377,6 @@ tags:
'
name: PostTrainingJobArtifactsResponse
-- description: 'Stream of logs from a finetuning job.
-
-
- '
- name: PostTrainingJobLogStream
-- description:
- name: PostTrainingJobStatus
- description: 'Status of a finetuning job.
@@ -5434,9 +5388,9 @@ tags:
name: PreferenceOptimizeRequest
- description:
name: ProviderInfo
-- description:
- name: QLoraFinetuningConfig
+ name: QATFinetuningConfig
- description:
name: QueryCondition
- description:
name: QueryTracesRequest
-- description:
- name: RLHFAlgorithm
- description:
name: RegexParserScoringFnParams
@@ -5545,9 +5497,8 @@ tags:
name: SpanStartPayload
- description:
name: SpanStatus
-- description:
- name: SpanWithChildren
+- description:
+ name: SpanWithStatus
- description:
name: StopReason
- description: SpanWithChildren: ...
+ ) -> Dict[str, SpanWithStatus]: ...
@webmethod(route="/telemetry/query-spans", method="POST")
async def query_spans(
diff --git a/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py b/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py
index 2e4a778e4..d7229f508 100644
--- a/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py
+++ b/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py
@@ -243,7 +243,7 @@ class TelemetryAdapter(TelemetryDatasetMixin, Telemetry):
span_id: str,
attributes_to_return: Optional[List[str]] = None,
max_depth: Optional[int] = None,
- ) -> SpanWithChildren:
+ ) -> Dict[str, SpanWithStatus]:
return await self.trace_store.get_span_tree(
span_id=span_id,
attributes_to_return=attributes_to_return,
diff --git a/llama_stack/providers/utils/telemetry/dataset_mixin.py b/llama_stack/providers/utils/telemetry/dataset_mixin.py
index 7a59801f4..bf5e79c3d 100644
--- a/llama_stack/providers/utils/telemetry/dataset_mixin.py
+++ b/llama_stack/providers/utils/telemetry/dataset_mixin.py
@@ -7,7 +7,7 @@
from typing import List, Optional
from llama_stack.apis.datasetio import DatasetIO
-from llama_stack.apis.telemetry import QueryCondition, Span, SpanWithChildren
+from llama_stack.apis.telemetry import QueryCondition, Span
class TelemetryDatasetMixin:
@@ -53,19 +53,18 @@ class TelemetryDatasetMixin:
spans = []
for trace in traces:
- span_tree = await self.get_span_tree(
+ spans_by_id = await self.get_span_tree(
span_id=trace.root_span_id,
attributes_to_return=attributes_to_return,
max_depth=max_depth,
)
- def extract_spans(span: SpanWithChildren) -> List[Span]:
- result = []
+ for span in spans_by_id.values():
if span.attributes and all(
attr in span.attributes and span.attributes[attr] is not None
for attr in attributes_to_return
):
- result.append(
+ spans.append(
Span(
trace_id=trace.root_span_id,
span_id=span.span_id,
@@ -77,11 +76,4 @@ class TelemetryDatasetMixin:
)
)
- for child in span.children:
- result.extend(extract_spans(child))
-
- return result
-
- spans.extend(extract_spans(span_tree))
-
return spans
diff --git a/llama_stack/providers/utils/telemetry/sqlite_trace_store.py b/llama_stack/providers/utils/telemetry/sqlite_trace_store.py
index 8d9035216..b0c3f7868 100644
--- a/llama_stack/providers/utils/telemetry/sqlite_trace_store.py
+++ b/llama_stack/providers/utils/telemetry/sqlite_trace_store.py
@@ -6,11 +6,11 @@
import json
from datetime import datetime
-from typing import List, Optional, Protocol
+from typing import Dict, List, Optional, Protocol
import aiosqlite
-from llama_stack.apis.telemetry import QueryCondition, SpanWithChildren, Trace
+from llama_stack.apis.telemetry import QueryCondition, SpanWithStatus, Trace
class TraceStore(Protocol):
@@ -27,7 +27,7 @@ class TraceStore(Protocol):
span_id: str,
attributes_to_return: Optional[List[str]] = None,
max_depth: Optional[int] = None,
- ) -> SpanWithChildren: ...
+ ) -> Dict[str, SpanWithStatus]: ...
class SQLiteTraceStore(TraceStore):
@@ -114,7 +114,7 @@ class SQLiteTraceStore(TraceStore):
span_id: str,
attributes_to_return: Optional[List[str]] = None,
max_depth: Optional[int] = None,
- ) -> SpanWithChildren:
+ ) -> Dict[str, SpanWithStatus]:
# Build the attributes selection
attributes_select = "s.attributes"
if attributes_to_return:
@@ -143,6 +143,7 @@ class SQLiteTraceStore(TraceStore):
ORDER BY depth, start_time
"""
+ spans_by_id = {}
async with aiosqlite.connect(self.conn_string) as conn:
conn.row_factory = aiosqlite.Row
async with conn.execute(query, (span_id, max_depth, max_depth)) as cursor:
@@ -151,12 +152,8 @@ class SQLiteTraceStore(TraceStore):
if not rows:
raise ValueError(f"Span {span_id} not found")
- # Build span tree
- spans_by_id = {}
- root_span = None
-
for row in rows:
- span = SpanWithChildren(
+ span = SpanWithStatus(
span_id=row["span_id"],
trace_id=row["trace_id"],
parent_span_id=row["parent_span_id"],
@@ -165,14 +162,8 @@ class SQLiteTraceStore(TraceStore):
end_time=datetime.fromisoformat(row["end_time"]),
attributes=json.loads(row["filtered_attributes"]),
status=row["status"].lower(),
- children=[],
)
spans_by_id[span.span_id] = span
- if span.span_id == span_id:
- root_span = span
- elif span.parent_span_id in spans_by_id:
- spans_by_id[span.parent_span_id].children.append(span)
-
- return root_span
+ return spans_by_id
diff --git a/llama_stack/providers/utils/telemetry/trace_protocol.py b/llama_stack/providers/utils/telemetry/trace_protocol.py
index 938d333fa..67054da90 100644
--- a/llama_stack/providers/utils/telemetry/trace_protocol.py
+++ b/llama_stack/providers/utils/telemetry/trace_protocol.py
@@ -41,8 +41,6 @@ def trace_protocol(cls: Type[T]) -> Type[T]:
"""
def trace_method(method: Callable) -> Callable:
- from llama_stack.providers.utils.telemetry import tracing
-
is_async = asyncio.iscoroutinefunction(method)
is_async_gen = inspect.isasyncgenfunction(method)
@@ -77,6 +75,8 @@ def trace_protocol(cls: Type[T]) -> Type[T]:
async def async_gen_wrapper(
self: Any, *args: Any, **kwargs: Any
) -> AsyncGenerator:
+ from llama_stack.providers.utils.telemetry import tracing
+
class_name, method_name, span_attributes = create_span_context(
self, *args, **kwargs
)
@@ -92,6 +92,8 @@ def trace_protocol(cls: Type[T]) -> Type[T]:
@wraps(method)
async def async_wrapper(self: Any, *args: Any, **kwargs: Any) -> Any:
+ from llama_stack.providers.utils.telemetry import tracing
+
class_name, method_name, span_attributes = create_span_context(
self, *args, **kwargs
)
@@ -107,6 +109,8 @@ def trace_protocol(cls: Type[T]) -> Type[T]:
@wraps(method)
def sync_wrapper(self: Any, *args: Any, **kwargs: Any) -> Any:
+ from llama_stack.providers.utils.telemetry import tracing
+
class_name, method_name, span_attributes = create_span_context(
self, *args, **kwargs
)