diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index 0f223b51b..fd7d767ae 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -909,59 +909,6 @@ } } }, - "/v1/eval/benchmarks/{benchmark_id}/evaluations": { - "post": { - "responses": { - "200": { - "description": "EvaluateResponse object containing generations and scores", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/EvaluateResponse" - } - } - } - }, - "400": { - "$ref": "#/components/responses/BadRequest400" - }, - "429": { - "$ref": "#/components/responses/TooManyRequests429" - }, - "500": { - "$ref": "#/components/responses/InternalServerError500" - }, - "default": { - "$ref": "#/components/responses/DefaultError" - } - }, - "tags": [ - "Eval" - ], - "description": "Evaluate a list of rows on a benchmark.", - "parameters": [ - { - "name": "benchmark_id", - "in": "path", - "description": "The ID of the benchmark to run the evaluation on.", - "required": true, - "schema": { - "type": "string" - } - } - ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/EvaluateRowsRequest" - } - } - }, - "required": true - } - } - }, "/v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}": { "get": { "responses": { @@ -1396,48 +1343,6 @@ ] } }, - "/v1/scoring-functions/{scoring_fn_id}": { - "get": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/ScoringFn" - } - } - } - }, - "400": { - "$ref": "#/components/responses/BadRequest400" - }, - "429": { - "$ref": "#/components/responses/TooManyRequests429" - }, - "500": { - "$ref": "#/components/responses/InternalServerError500" - }, - "default": { - "$ref": "#/components/responses/DefaultError" - } - }, - "tags": [ - "ScoringFunctions" - ], - "description": "", - "parameters": [ - { - "name": "scoring_fn_id", - "in": "path", - "required": true, - "schema": { - "type": "string" - } - } - ] - } - }, "/v1/shields/{identifier}": { "get": { "responses": { @@ -2372,153 +2277,6 @@ ] } }, - "/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}": { - "get": { - "responses": { - "200": { - "description": "The status of the evaluationjob.", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/JobStatus" - } - } - } - }, - "400": { - "$ref": "#/components/responses/BadRequest400" - }, - "429": { - "$ref": "#/components/responses/TooManyRequests429" - }, - "500": { - "$ref": "#/components/responses/InternalServerError500" - }, - "default": { - "$ref": "#/components/responses/DefaultError" - } - }, - "tags": [ - "Eval" - ], - "description": "Get the status of a job.", - "parameters": [ - { - "name": "benchmark_id", - "in": "path", - "description": "The ID of the benchmark to run the evaluation on.", - "required": true, - "schema": { - "type": "string" - } - }, - { - "name": "job_id", - "in": "path", - "description": "The ID of the job to get the status of.", - "required": true, - "schema": { - "type": "string" - } - } - ] - }, - "delete": { - "responses": { - "200": { - "description": "OK" - }, - "400": { - "$ref": "#/components/responses/BadRequest400" - }, - "429": { - "$ref": "#/components/responses/TooManyRequests429" - }, - "500": { - "$ref": "#/components/responses/InternalServerError500" - }, - "default": { - "$ref": "#/components/responses/DefaultError" - } - }, - "tags": [ - "Eval" - ], - "description": "Cancel a job.", - "parameters": [ - { - "name": "benchmark_id", - "in": "path", - "description": "The ID of the benchmark to run the evaluation on.", - "required": true, - "schema": { - "type": "string" - } - }, - { - "name": "job_id", - "in": "path", - "description": "The ID of the job to cancel.", - "required": true, - "schema": { - "type": "string" - } - } - ] - } - }, - "/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result": { - "get": { - "responses": { - "200": { - "description": "The result of the job.", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/EvaluateResponse" - } - } - } - }, - "400": { - "$ref": "#/components/responses/BadRequest400" - }, - "429": { - "$ref": "#/components/responses/TooManyRequests429" - }, - "500": { - "$ref": "#/components/responses/InternalServerError500" - }, - "default": { - "$ref": "#/components/responses/DefaultError" - } - }, - "tags": [ - "Eval" - ], - "description": "Get the result of a job.", - "parameters": [ - { - "name": "benchmark_id", - "in": "path", - "description": "The ID of the benchmark to run the evaluation on.", - "required": true, - "schema": { - "type": "string" - } - }, - { - "name": "job_id", - "in": "path", - "description": "The ID of the job to get the result of.", - "required": true, - "schema": { - "type": "string" - } - } - ] - } - }, "/v1/agents/{agent_id}/sessions": { "get": { "responses": { @@ -3050,73 +2808,6 @@ ] } }, - "/v1/scoring-functions": { - "get": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/ListScoringFunctionsResponse" - } - } - } - }, - "400": { - "$ref": "#/components/responses/BadRequest400" - }, - "429": { - "$ref": "#/components/responses/TooManyRequests429" - }, - "500": { - "$ref": "#/components/responses/InternalServerError500" - }, - "default": { - "$ref": "#/components/responses/DefaultError" - } - }, - "tags": [ - "ScoringFunctions" - ], - "description": "", - "parameters": [] - }, - "post": { - "responses": { - "200": { - "description": "OK" - }, - "400": { - "$ref": "#/components/responses/BadRequest400" - }, - "429": { - "$ref": "#/components/responses/TooManyRequests429" - }, - "500": { - "$ref": "#/components/responses/InternalServerError500" - }, - "default": { - "$ref": "#/components/responses/DefaultError" - } - }, - "tags": [ - "ScoringFunctions" - ], - "description": "", - "parameters": [], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/RegisterScoringFunctionRequest" - } - } - }, - "required": true - } - } - }, "/v1/shields": { "get": { "responses": { @@ -3744,59 +3435,6 @@ } } }, - "/v1/eval/benchmarks/{benchmark_id}/jobs": { - "post": { - "responses": { - "200": { - "description": "The job that was created to run the evaluation.", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/Job" - } - } - } - }, - "400": { - "$ref": "#/components/responses/BadRequest400" - }, - "429": { - "$ref": "#/components/responses/TooManyRequests429" - }, - "500": { - "$ref": "#/components/responses/InternalServerError500" - }, - "default": { - "$ref": "#/components/responses/DefaultError" - } - }, - "tags": [ - "Eval" - ], - "description": "Run an evaluation on a benchmark.", - "parameters": [ - { - "name": "benchmark_id", - "in": "path", - "description": "The ID of the benchmark to run the evaluation on.", - "required": true, - "schema": { - "type": "string" - } - } - ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/RunEvalRequest" - } - } - }, - "required": true - } - } - }, "/v1/safety/run-shield": { "post": { "responses": { @@ -3919,92 +3557,6 @@ } } }, - "/v1/scoring/score": { - "post": { - "responses": { - "200": { - "description": "ScoreResponse object containing rows and aggregated results", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/ScoreResponse" - } - } - } - }, - "400": { - "$ref": "#/components/responses/BadRequest400" - }, - "429": { - "$ref": "#/components/responses/TooManyRequests429" - }, - "500": { - "$ref": "#/components/responses/InternalServerError500" - }, - "default": { - "$ref": "#/components/responses/DefaultError" - } - }, - "tags": [ - "Scoring" - ], - "description": "Score a list of rows.", - "parameters": [], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/ScoreRequest" - } - } - }, - "required": true - } - } - }, - "/v1/scoring/score-batch": { - "post": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/ScoreBatchResponse" - } - } - } - }, - "400": { - "$ref": "#/components/responses/BadRequest400" - }, - "429": { - "$ref": "#/components/responses/TooManyRequests429" - }, - "500": { - "$ref": "#/components/responses/InternalServerError500" - }, - "default": { - "$ref": "#/components/responses/DefaultError" - } - }, - "tags": [ - "Scoring" - ], - "description": "", - "parameters": [], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/ScoreBatchRequest" - } - } - }, - "required": true - } - } - }, "/v1/post-training/supervised-fine-tune": { "post": { "responses": { @@ -6630,381 +6182,6 @@ "title": "EmbeddingsResponse", "description": "Response containing generated embeddings." }, - "AgentCandidate": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "agent", - "default": "agent" - }, - "config": { - "$ref": "#/components/schemas/AgentConfig", - "description": "The configuration for the agent candidate." - } - }, - "additionalProperties": false, - "required": [ - "type", - "config" - ], - "title": "AgentCandidate", - "description": "An agent candidate for evaluation." - }, - "AggregationFunctionType": { - "type": "string", - "enum": [ - "average", - "median", - "categorical_count", - "accuracy" - ], - "title": "AggregationFunctionType" - }, - "BasicScoringFnParams": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "basic", - "default": "basic" - }, - "aggregation_functions": { - "type": "array", - "items": { - "$ref": "#/components/schemas/AggregationFunctionType" - } - } - }, - "additionalProperties": false, - "required": [ - "type" - ], - "title": "BasicScoringFnParams" - }, - "BenchmarkConfig": { - "type": "object", - "properties": { - "eval_candidate": { - "$ref": "#/components/schemas/EvalCandidate", - "description": "The candidate to evaluate." - }, - "scoring_params": { - "type": "object", - "additionalProperties": { - "$ref": "#/components/schemas/ScoringFnParams" - }, - "description": "Map between scoring function id and parameters for each scoring function you want to run" - }, - "num_examples": { - "type": "integer", - "description": "(Optional) The number of examples to evaluate. If not provided, all examples in the dataset will be evaluated" - } - }, - "additionalProperties": false, - "required": [ - "eval_candidate", - "scoring_params" - ], - "title": "BenchmarkConfig", - "description": "A benchmark configuration for evaluation." - }, - "EvalCandidate": { - "oneOf": [ - { - "$ref": "#/components/schemas/ModelCandidate" - }, - { - "$ref": "#/components/schemas/AgentCandidate" - } - ], - "discriminator": { - "propertyName": "type", - "mapping": { - "model": "#/components/schemas/ModelCandidate", - "agent": "#/components/schemas/AgentCandidate" - } - } - }, - "LLMAsJudgeScoringFnParams": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "llm_as_judge", - "default": "llm_as_judge" - }, - "judge_model": { - "type": "string" - }, - "prompt_template": { - "type": "string" - }, - "judge_score_regexes": { - "type": "array", - "items": { - "type": "string" - } - }, - "aggregation_functions": { - "type": "array", - "items": { - "$ref": "#/components/schemas/AggregationFunctionType" - } - } - }, - "additionalProperties": false, - "required": [ - "type", - "judge_model" - ], - "title": "LLMAsJudgeScoringFnParams" - }, - "ModelCandidate": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "model", - "default": "model" - }, - "model": { - "type": "string", - "description": "The model ID to evaluate." - }, - "sampling_params": { - "$ref": "#/components/schemas/SamplingParams", - "description": "The sampling parameters for the model." - }, - "system_message": { - "$ref": "#/components/schemas/SystemMessage", - "description": "(Optional) The system message providing instructions or context to the model." - } - }, - "additionalProperties": false, - "required": [ - "type", - "model", - "sampling_params" - ], - "title": "ModelCandidate", - "description": "A model candidate for evaluation." - }, - "RegexParserScoringFnParams": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "regex_parser", - "default": "regex_parser" - }, - "parsing_regexes": { - "type": "array", - "items": { - "type": "string" - } - }, - "aggregation_functions": { - "type": "array", - "items": { - "$ref": "#/components/schemas/AggregationFunctionType" - } - } - }, - "additionalProperties": false, - "required": [ - "type" - ], - "title": "RegexParserScoringFnParams" - }, - "ScoringFnParams": { - "oneOf": [ - { - "$ref": "#/components/schemas/LLMAsJudgeScoringFnParams" - }, - { - "$ref": "#/components/schemas/RegexParserScoringFnParams" - }, - { - "$ref": "#/components/schemas/BasicScoringFnParams" - } - ], - "discriminator": { - "propertyName": "type", - "mapping": { - "llm_as_judge": "#/components/schemas/LLMAsJudgeScoringFnParams", - "regex_parser": "#/components/schemas/RegexParserScoringFnParams", - "basic": "#/components/schemas/BasicScoringFnParams" - } - } - }, - "EvaluateRowsRequest": { - "type": "object", - "properties": { - "input_rows": { - "type": "array", - "items": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - }, - "description": "The rows to evaluate." - }, - "scoring_functions": { - "type": "array", - "items": { - "type": "string" - }, - "description": "The scoring functions to use for the evaluation." - }, - "benchmark_config": { - "$ref": "#/components/schemas/BenchmarkConfig", - "description": "The configuration for the benchmark." - } - }, - "additionalProperties": false, - "required": [ - "input_rows", - "scoring_functions", - "benchmark_config" - ], - "title": "EvaluateRowsRequest" - }, - "EvaluateResponse": { - "type": "object", - "properties": { - "generations": { - "type": "array", - "items": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - }, - "description": "The generations from the evaluation." - }, - "scores": { - "type": "object", - "additionalProperties": { - "$ref": "#/components/schemas/ScoringResult" - }, - "description": "The scores from the evaluation." - } - }, - "additionalProperties": false, - "required": [ - "generations", - "scores" - ], - "title": "EvaluateResponse", - "description": "The response from an evaluation." - }, - "ScoringResult": { - "type": "object", - "properties": { - "score_rows": { - "type": "array", - "items": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - }, - "description": "The scoring result for each row. Each row is a map of column name to value." - }, - "aggregated_results": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - }, - "description": "Map of metric name to aggregated value" - } - }, - "additionalProperties": false, - "required": [ - "score_rows", - "aggregated_results" - ], - "title": "ScoringResult", - "description": "A scoring result for a single row." - }, "Agent": { "type": "object", "properties": { @@ -7732,268 +6909,6 @@ ], "title": "ModelType" }, - "AgentTurnInputType": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "agent_turn_input", - "default": "agent_turn_input" - } - }, - "additionalProperties": false, - "required": [ - "type" - ], - "title": "AgentTurnInputType" - }, - "ArrayType": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "array", - "default": "array" - } - }, - "additionalProperties": false, - "required": [ - "type" - ], - "title": "ArrayType" - }, - "BooleanType": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "boolean", - "default": "boolean" - } - }, - "additionalProperties": false, - "required": [ - "type" - ], - "title": "BooleanType" - }, - "ChatCompletionInputType": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "chat_completion_input", - "default": "chat_completion_input" - } - }, - "additionalProperties": false, - "required": [ - "type" - ], - "title": "ChatCompletionInputType" - }, - "CompletionInputType": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "completion_input", - "default": "completion_input" - } - }, - "additionalProperties": false, - "required": [ - "type" - ], - "title": "CompletionInputType" - }, - "JsonType": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "json", - "default": "json" - } - }, - "additionalProperties": false, - "required": [ - "type" - ], - "title": "JsonType" - }, - "NumberType": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "number", - "default": "number" - } - }, - "additionalProperties": false, - "required": [ - "type" - ], - "title": "NumberType" - }, - "ObjectType": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "object", - "default": "object" - } - }, - "additionalProperties": false, - "required": [ - "type" - ], - "title": "ObjectType" - }, - "ParamType": { - "oneOf": [ - { - "$ref": "#/components/schemas/StringType" - }, - { - "$ref": "#/components/schemas/NumberType" - }, - { - "$ref": "#/components/schemas/BooleanType" - }, - { - "$ref": "#/components/schemas/ArrayType" - }, - { - "$ref": "#/components/schemas/ObjectType" - }, - { - "$ref": "#/components/schemas/JsonType" - }, - { - "$ref": "#/components/schemas/UnionType" - }, - { - "$ref": "#/components/schemas/ChatCompletionInputType" - }, - { - "$ref": "#/components/schemas/CompletionInputType" - }, - { - "$ref": "#/components/schemas/AgentTurnInputType" - } - ], - "discriminator": { - "propertyName": "type", - "mapping": { - "string": "#/components/schemas/StringType", - "number": "#/components/schemas/NumberType", - "boolean": "#/components/schemas/BooleanType", - "array": "#/components/schemas/ArrayType", - "object": "#/components/schemas/ObjectType", - "json": "#/components/schemas/JsonType", - "union": "#/components/schemas/UnionType", - "chat_completion_input": "#/components/schemas/ChatCompletionInputType", - "completion_input": "#/components/schemas/CompletionInputType", - "agent_turn_input": "#/components/schemas/AgentTurnInputType" - } - } - }, - "ScoringFn": { - "type": "object", - "properties": { - "identifier": { - "type": "string" - }, - "provider_resource_id": { - "type": "string" - }, - "provider_id": { - "type": "string" - }, - "type": { - "type": "string", - "const": "scoring_function", - "default": "scoring_function" - }, - "description": { - "type": "string" - }, - "metadata": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - }, - "return_type": { - "$ref": "#/components/schemas/ParamType" - }, - "params": { - "$ref": "#/components/schemas/ScoringFnParams" - } - }, - "additionalProperties": false, - "required": [ - "identifier", - "provider_resource_id", - "provider_id", - "type", - "metadata", - "return_type" - ], - "title": "ScoringFn" - }, - "StringType": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "string", - "default": "string" - } - }, - "additionalProperties": false, - "required": [ - "type" - ], - "title": "StringType" - }, - "UnionType": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "union", - "default": "union" - } - }, - "additionalProperties": false, - "required": [ - "type" - ], - "title": "UnionType" - }, "Shield": { "type": "object", "properties": { @@ -8564,6 +7479,26 @@ ], "title": "GradeRequest" }, + "AgentCandidate": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "agent", + "default": "agent" + }, + "agent_config": { + "$ref": "#/components/schemas/AgentConfig" + } + }, + "additionalProperties": false, + "required": [ + "type", + "agent_config" + ], + "title": "AgentCandidate", + "description": "An agent candidate for evaluation." + }, "EvaluationCandidate": { "oneOf": [ { @@ -8636,6 +7571,35 @@ ], "title": "EvaluationJob" }, + "ModelCandidate": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "model", + "default": "model" + }, + "model_id": { + "type": "string" + }, + "sampling_params": { + "$ref": "#/components/schemas/SamplingParams", + "description": "The sampling parameters for the model." + }, + "system_message": { + "$ref": "#/components/schemas/SystemMessage", + "description": "(Optional) The system message providing instructions or context to the model." + } + }, + "additionalProperties": false, + "required": [ + "type", + "model_id", + "sampling_params" + ], + "title": "ModelCandidate", + "description": "A model candidate for evaluation." + }, "GradeSyncRequest": { "type": "object", "properties": { @@ -8698,6 +7662,73 @@ "title": "EvaluationResponse", "description": "A response to an inline evaluation." }, + "ScoringResult": { + "type": "object", + "properties": { + "scores": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + }, + "description": "The scoring result for each row. Each row is a map of grader column name to value." + }, + "metrics": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "Map of metric name to aggregated value." + } + }, + "additionalProperties": false, + "required": [ + "scores", + "metrics" + ], + "title": "ScoringResult", + "description": "A scoring result for a single row." + }, "HealthInfo": { "type": "object", "properties": { @@ -9030,17 +8061,6 @@ "title": "IterrowsResponse", "description": "A paginated list of rows from a dataset." }, - "JobStatus": { - "type": "string", - "enum": [ - "completed", - "in_progress", - "failed", - "scheduled", - "cancelled" - ], - "title": "JobStatus" - }, "ListAgentSessionsResponse": { "type": "object", "properties": { @@ -9301,22 +8321,6 @@ ], "title": "ListRoutesResponse" }, - "ListScoringFunctionsResponse": { - "type": "object", - "properties": { - "data": { - "type": "array", - "items": { - "$ref": "#/components/schemas/ScoringFn" - } - } - }, - "additionalProperties": false, - "required": [ - "data" - ], - "title": "ListScoringFunctionsResponse" - }, "ListShieldsResponse": { "type": "object", "properties": { @@ -10501,36 +9505,6 @@ ], "title": "RegisterModelRequest" }, - "RegisterScoringFunctionRequest": { - "type": "object", - "properties": { - "scoring_fn_id": { - "type": "string" - }, - "description": { - "type": "string" - }, - "return_type": { - "$ref": "#/components/schemas/ParamType" - }, - "provider_scoring_fn_id": { - "type": "string" - }, - "provider_id": { - "type": "string" - }, - "params": { - "$ref": "#/components/schemas/ScoringFnParams" - } - }, - "additionalProperties": false, - "required": [ - "scoring_fn_id", - "description", - "return_type" - ], - "title": "RegisterScoringFunctionRequest" - }, "RegisterShieldRequest": { "type": "object", "properties": { @@ -10686,33 +9660,6 @@ ], "title": "RunRequest" }, - "RunEvalRequest": { - "type": "object", - "properties": { - "benchmark_config": { - "$ref": "#/components/schemas/BenchmarkConfig", - "description": "The configuration for the benchmark." - } - }, - "additionalProperties": false, - "required": [ - "benchmark_config" - ], - "title": "RunEvalRequest" - }, - "Job": { - "type": "object", - "properties": { - "job_id": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "job_id" - ], - "title": "Job" - }, "RunShieldRequest": { "type": "object", "properties": { @@ -10818,128 +9765,6 @@ ], "title": "SaveSpansToDatasetRequest" }, - "ScoreRequest": { - "type": "object", - "properties": { - "input_rows": { - "type": "array", - "items": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - }, - "description": "The rows to score." - }, - "scoring_functions": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "$ref": "#/components/schemas/ScoringFnParams" - }, - { - "type": "null" - } - ] - }, - "description": "The scoring functions to use for the scoring." - } - }, - "additionalProperties": false, - "required": [ - "input_rows", - "scoring_functions" - ], - "title": "ScoreRequest" - }, - "ScoreResponse": { - "type": "object", - "properties": { - "results": { - "type": "object", - "additionalProperties": { - "$ref": "#/components/schemas/ScoringResult" - }, - "description": "A map of scoring function name to ScoringResult." - } - }, - "additionalProperties": false, - "required": [ - "results" - ], - "title": "ScoreResponse", - "description": "The response from scoring." - }, - "ScoreBatchRequest": { - "type": "object", - "properties": { - "dataset_id": { - "type": "string" - }, - "scoring_functions": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "$ref": "#/components/schemas/ScoringFnParams" - }, - { - "type": "null" - } - ] - } - }, - "save_results_dataset": { - "type": "boolean" - } - }, - "additionalProperties": false, - "required": [ - "dataset_id", - "scoring_functions", - "save_results_dataset" - ], - "title": "ScoreBatchRequest" - }, - "ScoreBatchResponse": { - "type": "object", - "properties": { - "dataset_id": { - "type": "string" - }, - "results": { - "type": "object", - "additionalProperties": { - "$ref": "#/components/schemas/ScoringResult" - } - } - }, - "additionalProperties": false, - "required": [ - "results" - ], - "title": "ScoreBatchResponse" - }, "LoraFinetuningConfig": { "type": "object", "properties": { @@ -11292,10 +10117,6 @@ { "name": "Datasets" }, - { - "name": "Eval", - "x-displayName": "Llama Stack Evaluation API for running evaluations on model and agent candidates." - }, { "name": "Evaluation" }, @@ -11326,12 +10147,6 @@ { "name": "Safety" }, - { - "name": "Scoring" - }, - { - "name": "ScoringFunctions" - }, { "name": "Shields" }, @@ -11363,7 +10178,6 @@ "Benchmarks", "DatasetIO", "Datasets", - "Eval", "Evaluation", "Files", "Graders", @@ -11373,8 +10187,6 @@ "PostTraining (Coming Soon)", "Providers", "Safety", - "Scoring", - "ScoringFunctions", "Shields", "SyntheticDataGeneration (Coming Soon)", "Telemetry", diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml index 7c4ea81b8..402106208 100644 --- a/docs/_static/llama-stack-spec.yaml +++ b/docs/_static/llama-stack-spec.yaml @@ -622,43 +622,6 @@ paths: schema: $ref: '#/components/schemas/EmbeddingsRequest' required: true - /v1/eval/benchmarks/{benchmark_id}/evaluations: - post: - responses: - '200': - description: >- - EvaluateResponse object containing generations and scores - content: - application/json: - schema: - $ref: '#/components/schemas/EvaluateResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Eval - description: Evaluate a list of rows on a benchmark. - parameters: - - name: benchmark_id - in: path - description: >- - The ID of the benchmark to run the evaluation on. - required: true - schema: - type: string - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/EvaluateRowsRequest' - required: true /v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}: get: responses: @@ -956,34 +919,6 @@ paths: required: true schema: type: string - /v1/scoring-functions/{scoring_fn_id}: - get: - responses: - '200': - description: OK - content: - application/json: - schema: - $ref: '#/components/schemas/ScoringFn' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - ScoringFunctions - description: '' - parameters: - - name: scoring_fn_id - in: path - required: true - schema: - type: string /v1/shields/{identifier}: get: responses: @@ -1627,109 +1562,6 @@ paths: required: false schema: type: integer - /v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}: - get: - responses: - '200': - description: The status of the evaluationjob. - content: - application/json: - schema: - $ref: '#/components/schemas/JobStatus' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Eval - description: Get the status of a job. - parameters: - - name: benchmark_id - in: path - description: >- - The ID of the benchmark to run the evaluation on. - required: true - schema: - type: string - - name: job_id - in: path - description: The ID of the job to get the status of. - required: true - schema: - type: string - delete: - responses: - '200': - description: OK - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Eval - description: Cancel a job. - parameters: - - name: benchmark_id - in: path - description: >- - The ID of the benchmark to run the evaluation on. - required: true - schema: - type: string - - name: job_id - in: path - description: The ID of the job to cancel. - required: true - schema: - type: string - /v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result: - get: - responses: - '200': - description: The result of the job. - content: - application/json: - schema: - $ref: '#/components/schemas/EvaluateResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Eval - description: Get the result of a job. - parameters: - - name: benchmark_id - in: path - description: >- - The ID of the benchmark to run the evaluation on. - required: true - schema: - type: string - - name: job_id - in: path - description: The ID of the job to get the result of. - required: true - schema: - type: string /v1/agents/{agent_id}/sessions: get: responses: @@ -2098,53 +1930,6 @@ paths: required: false schema: $ref: '#/components/schemas/URL' - /v1/scoring-functions: - get: - responses: - '200': - description: OK - content: - application/json: - schema: - $ref: '#/components/schemas/ListScoringFunctionsResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - ScoringFunctions - description: '' - parameters: [] - post: - responses: - '200': - description: OK - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - ScoringFunctions - description: '' - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/RegisterScoringFunctionRequest' - required: true /v1/shields: get: responses: @@ -2581,43 +2366,6 @@ paths: schema: $ref: '#/components/schemas/RunRequest' required: true - /v1/eval/benchmarks/{benchmark_id}/jobs: - post: - responses: - '200': - description: >- - The job that was created to run the evaluation. - content: - application/json: - schema: - $ref: '#/components/schemas/Job' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Eval - description: Run an evaluation on a benchmark. - parameters: - - name: benchmark_id - in: path - description: >- - The ID of the benchmark to run the evaluation on. - required: true - schema: - type: string - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/RunEvalRequest' - required: true /v1/safety/run-shield: post: responses: @@ -2704,65 +2452,6 @@ paths: schema: $ref: '#/components/schemas/SaveSpansToDatasetRequest' required: true - /v1/scoring/score: - post: - responses: - '200': - description: >- - ScoreResponse object containing rows and aggregated results - content: - application/json: - schema: - $ref: '#/components/schemas/ScoreResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Scoring - description: Score a list of rows. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/ScoreRequest' - required: true - /v1/scoring/score-batch: - post: - responses: - '200': - description: OK - content: - application/json: - schema: - $ref: '#/components/schemas/ScoreBatchResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Scoring - description: '' - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/ScoreBatchRequest' - required: true /v1/post-training/supervised-fine-tune: post: responses: @@ -4652,251 +4341,6 @@ components: title: EmbeddingsResponse description: >- Response containing generated embeddings. - AgentCandidate: - type: object - properties: - type: - type: string - const: agent - default: agent - config: - $ref: '#/components/schemas/AgentConfig' - description: >- - The configuration for the agent candidate. - additionalProperties: false - required: - - type - - config - title: AgentCandidate - description: An agent candidate for evaluation. - AggregationFunctionType: - type: string - enum: - - average - - median - - categorical_count - - accuracy - title: AggregationFunctionType - BasicScoringFnParams: - type: object - properties: - type: - type: string - const: basic - default: basic - aggregation_functions: - type: array - items: - $ref: '#/components/schemas/AggregationFunctionType' - additionalProperties: false - required: - - type - title: BasicScoringFnParams - BenchmarkConfig: - type: object - properties: - eval_candidate: - $ref: '#/components/schemas/EvalCandidate' - description: The candidate to evaluate. - scoring_params: - type: object - additionalProperties: - $ref: '#/components/schemas/ScoringFnParams' - description: >- - Map between scoring function id and parameters for each scoring function - you want to run - num_examples: - type: integer - description: >- - (Optional) The number of examples to evaluate. If not provided, all examples - in the dataset will be evaluated - additionalProperties: false - required: - - eval_candidate - - scoring_params - title: BenchmarkConfig - description: >- - A benchmark configuration for evaluation. - EvalCandidate: - oneOf: - - $ref: '#/components/schemas/ModelCandidate' - - $ref: '#/components/schemas/AgentCandidate' - discriminator: - propertyName: type - mapping: - model: '#/components/schemas/ModelCandidate' - agent: '#/components/schemas/AgentCandidate' - LLMAsJudgeScoringFnParams: - type: object - properties: - type: - type: string - const: llm_as_judge - default: llm_as_judge - judge_model: - type: string - prompt_template: - type: string - judge_score_regexes: - type: array - items: - type: string - aggregation_functions: - type: array - items: - $ref: '#/components/schemas/AggregationFunctionType' - additionalProperties: false - required: - - type - - judge_model - title: LLMAsJudgeScoringFnParams - ModelCandidate: - type: object - properties: - type: - type: string - const: model - default: model - model: - type: string - description: The model ID to evaluate. - sampling_params: - $ref: '#/components/schemas/SamplingParams' - description: The sampling parameters for the model. - system_message: - $ref: '#/components/schemas/SystemMessage' - description: >- - (Optional) The system message providing instructions or context to the - model. - additionalProperties: false - required: - - type - - model - - sampling_params - title: ModelCandidate - description: A model candidate for evaluation. - RegexParserScoringFnParams: - type: object - properties: - type: - type: string - const: regex_parser - default: regex_parser - parsing_regexes: - type: array - items: - type: string - aggregation_functions: - type: array - items: - $ref: '#/components/schemas/AggregationFunctionType' - additionalProperties: false - required: - - type - title: RegexParserScoringFnParams - ScoringFnParams: - oneOf: - - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams' - - $ref: '#/components/schemas/RegexParserScoringFnParams' - - $ref: '#/components/schemas/BasicScoringFnParams' - discriminator: - propertyName: type - mapping: - llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams' - regex_parser: '#/components/schemas/RegexParserScoringFnParams' - basic: '#/components/schemas/BasicScoringFnParams' - EvaluateRowsRequest: - type: object - properties: - input_rows: - type: array - items: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The rows to evaluate. - scoring_functions: - type: array - items: - type: string - description: >- - The scoring functions to use for the evaluation. - benchmark_config: - $ref: '#/components/schemas/BenchmarkConfig' - description: The configuration for the benchmark. - additionalProperties: false - required: - - input_rows - - scoring_functions - - benchmark_config - title: EvaluateRowsRequest - EvaluateResponse: - type: object - properties: - generations: - type: array - items: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The generations from the evaluation. - scores: - type: object - additionalProperties: - $ref: '#/components/schemas/ScoringResult' - description: The scores from the evaluation. - additionalProperties: false - required: - - generations - - scores - title: EvaluateResponse - description: The response from an evaluation. - ScoringResult: - type: object - properties: - score_rows: - type: array - items: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - The scoring result for each row. Each row is a map of column name to value. - aggregated_results: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: Map of metric name to aggregated value - additionalProperties: false - required: - - score_rows - - aggregated_results - title: ScoringResult - description: A scoring result for a single row. Agent: type: object properties: @@ -5392,179 +4836,6 @@ components: - llm - embedding title: ModelType - AgentTurnInputType: - type: object - properties: - type: - type: string - const: agent_turn_input - default: agent_turn_input - additionalProperties: false - required: - - type - title: AgentTurnInputType - ArrayType: - type: object - properties: - type: - type: string - const: array - default: array - additionalProperties: false - required: - - type - title: ArrayType - BooleanType: - type: object - properties: - type: - type: string - const: boolean - default: boolean - additionalProperties: false - required: - - type - title: BooleanType - ChatCompletionInputType: - type: object - properties: - type: - type: string - const: chat_completion_input - default: chat_completion_input - additionalProperties: false - required: - - type - title: ChatCompletionInputType - CompletionInputType: - type: object - properties: - type: - type: string - const: completion_input - default: completion_input - additionalProperties: false - required: - - type - title: CompletionInputType - JsonType: - type: object - properties: - type: - type: string - const: json - default: json - additionalProperties: false - required: - - type - title: JsonType - NumberType: - type: object - properties: - type: - type: string - const: number - default: number - additionalProperties: false - required: - - type - title: NumberType - ObjectType: - type: object - properties: - type: - type: string - const: object - default: object - additionalProperties: false - required: - - type - title: ObjectType - ParamType: - oneOf: - - $ref: '#/components/schemas/StringType' - - $ref: '#/components/schemas/NumberType' - - $ref: '#/components/schemas/BooleanType' - - $ref: '#/components/schemas/ArrayType' - - $ref: '#/components/schemas/ObjectType' - - $ref: '#/components/schemas/JsonType' - - $ref: '#/components/schemas/UnionType' - - $ref: '#/components/schemas/ChatCompletionInputType' - - $ref: '#/components/schemas/CompletionInputType' - - $ref: '#/components/schemas/AgentTurnInputType' - discriminator: - propertyName: type - mapping: - string: '#/components/schemas/StringType' - number: '#/components/schemas/NumberType' - boolean: '#/components/schemas/BooleanType' - array: '#/components/schemas/ArrayType' - object: '#/components/schemas/ObjectType' - json: '#/components/schemas/JsonType' - union: '#/components/schemas/UnionType' - chat_completion_input: '#/components/schemas/ChatCompletionInputType' - completion_input: '#/components/schemas/CompletionInputType' - agent_turn_input: '#/components/schemas/AgentTurnInputType' - ScoringFn: - type: object - properties: - identifier: - type: string - provider_resource_id: - type: string - provider_id: - type: string - type: - type: string - const: scoring_function - default: scoring_function - description: - type: string - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - return_type: - $ref: '#/components/schemas/ParamType' - params: - $ref: '#/components/schemas/ScoringFnParams' - additionalProperties: false - required: - - identifier - - provider_resource_id - - provider_id - - type - - metadata - - return_type - title: ScoringFn - StringType: - type: object - properties: - type: - type: string - const: string - default: string - additionalProperties: false - required: - - type - title: StringType - UnionType: - type: object - properties: - type: - type: string - const: union - default: union - additionalProperties: false - required: - - type - title: UnionType Shield: type: object properties: @@ -5947,6 +5218,21 @@ components: required: - task title: GradeRequest + AgentCandidate: + type: object + properties: + type: + type: string + const: agent + default: agent + agent_config: + $ref: '#/components/schemas/AgentConfig' + additionalProperties: false + required: + - type + - agent_config + title: AgentCandidate + description: An agent candidate for evaluation. EvaluationCandidate: oneOf: - $ref: '#/components/schemas/ModelCandidate' @@ -6000,6 +5286,30 @@ components: - task - candidate title: EvaluationJob + ModelCandidate: + type: object + properties: + type: + type: string + const: model + default: model + model_id: + type: string + sampling_params: + $ref: '#/components/schemas/SamplingParams' + description: The sampling parameters for the model. + system_message: + $ref: '#/components/schemas/SystemMessage' + description: >- + (Optional) The system message providing instructions or context to the + model. + additionalProperties: false + required: + - type + - model_id + - sampling_params + title: ModelCandidate + description: A model candidate for evaluation. GradeSyncRequest: type: object properties: @@ -6044,6 +5354,41 @@ components: - scores title: EvaluationResponse description: A response to an inline evaluation. + ScoringResult: + type: object + properties: + scores: + type: array + items: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + The scoring result for each row. Each row is a map of grader column name + to value. + metrics: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: Map of metric name to aggregated value. + additionalProperties: false + required: + - scores + - metrics + title: ScoringResult + description: A scoring result for a single row. HealthInfo: type: object properties: @@ -6230,15 +5575,6 @@ components: - data title: IterrowsResponse description: A paginated list of rows from a dataset. - JobStatus: - type: string - enum: - - completed - - in_progress - - failed - - scheduled - - cancelled - title: JobStatus ListAgentSessionsResponse: type: object properties: @@ -6431,17 +5767,6 @@ components: required: - data title: ListRoutesResponse - ListScoringFunctionsResponse: - type: object - properties: - data: - type: array - items: - $ref: '#/components/schemas/ScoringFn' - additionalProperties: false - required: - - data - title: ListScoringFunctionsResponse ListShieldsResponse: type: object properties: @@ -7217,27 +6542,6 @@ components: required: - model_id title: RegisterModelRequest - RegisterScoringFunctionRequest: - type: object - properties: - scoring_fn_id: - type: string - description: - type: string - return_type: - $ref: '#/components/schemas/ParamType' - provider_scoring_fn_id: - type: string - provider_id: - type: string - params: - $ref: '#/components/schemas/ScoringFnParams' - additionalProperties: false - required: - - scoring_fn_id - - description - - return_type - title: RegisterScoringFunctionRequest RegisterShieldRequest: type: object properties: @@ -7338,25 +6642,6 @@ components: - task - candidate title: RunRequest - RunEvalRequest: - type: object - properties: - benchmark_config: - $ref: '#/components/schemas/BenchmarkConfig' - description: The configuration for the benchmark. - additionalProperties: false - required: - - benchmark_config - title: RunEvalRequest - Job: - type: object - properties: - job_id: - type: string - additionalProperties: false - required: - - job_id - title: Job RunShieldRequest: type: object properties: @@ -7429,81 +6714,6 @@ components: - attributes_to_save - dataset_id title: SaveSpansToDatasetRequest - ScoreRequest: - type: object - properties: - input_rows: - type: array - items: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The rows to score. - scoring_functions: - type: object - additionalProperties: - oneOf: - - $ref: '#/components/schemas/ScoringFnParams' - - type: 'null' - description: >- - The scoring functions to use for the scoring. - additionalProperties: false - required: - - input_rows - - scoring_functions - title: ScoreRequest - ScoreResponse: - type: object - properties: - results: - type: object - additionalProperties: - $ref: '#/components/schemas/ScoringResult' - description: >- - A map of scoring function name to ScoringResult. - additionalProperties: false - required: - - results - title: ScoreResponse - description: The response from scoring. - ScoreBatchRequest: - type: object - properties: - dataset_id: - type: string - scoring_functions: - type: object - additionalProperties: - oneOf: - - $ref: '#/components/schemas/ScoringFnParams' - - type: 'null' - save_results_dataset: - type: boolean - additionalProperties: false - required: - - dataset_id - - scoring_functions - - save_results_dataset - title: ScoreBatchRequest - ScoreBatchResponse: - type: object - properties: - dataset_id: - type: string - results: - type: object - additionalProperties: - $ref: '#/components/schemas/ScoringResult' - additionalProperties: false - required: - - results - title: ScoreBatchResponse LoraFinetuningConfig: type: object properties: @@ -7734,9 +6944,6 @@ tags: - name: Benchmarks - name: DatasetIO - name: Datasets - - name: Eval - x-displayName: >- - Llama Stack Evaluation API for running evaluations on model and agent candidates. - name: Evaluation - name: Files - name: Graders @@ -7759,8 +6966,6 @@ tags: x-displayName: >- Providers API for inspecting, listing, and modifying providers and their configurations. - name: Safety - - name: Scoring - - name: ScoringFunctions - name: Shields - name: SyntheticDataGeneration (Coming Soon) - name: Telemetry @@ -7776,7 +6981,6 @@ x-tagGroups: - Benchmarks - DatasetIO - Datasets - - Eval - Evaluation - Files - Graders @@ -7786,8 +6990,6 @@ x-tagGroups: - PostTraining (Coming Soon) - Providers - Safety - - Scoring - - ScoringFunctions - Shields - SyntheticDataGeneration (Coming Soon) - Telemetry diff --git a/llama_stack/distribution/datatypes.py b/llama_stack/distribution/datatypes.py index d5854b53e..fea22a414 100644 --- a/llama_stack/distribution/datatypes.py +++ b/llama_stack/distribution/datatypes.py @@ -187,9 +187,7 @@ a default SQLite store will be used.""", benchmarks: List[BenchmarkInput] = Field(default_factory=list) tool_groups: List[ToolGroupInput] = Field(default_factory=list) - logging: Optional[LoggingConfig] = Field( - default=None, description="Configuration for Llama Stack Logging" - ) + logging: Optional[LoggingConfig] = Field(default=None, description="Configuration for Llama Stack Logging") server: ServerConfig = Field( default_factory=ServerConfig, @@ -200,9 +198,7 @@ a default SQLite store will be used.""", class BuildConfig(BaseModel): version: str = LLAMA_STACK_BUILD_CONFIG_VERSION - distribution_spec: DistributionSpec = Field( - description="The distribution spec to build including API providers. " - ) + distribution_spec: DistributionSpec = Field(description="The distribution spec to build including API providers. ") image_type: str = Field( default="conda", description="Type of package to build (conda | container | venv)", diff --git a/llama_stack/distribution/distribution.py b/llama_stack/distribution/distribution.py index 2aa45322e..7e7237403 100644 --- a/llama_stack/distribution/distribution.py +++ b/llama_stack/distribution/distribution.py @@ -47,14 +47,9 @@ def builtin_automatically_routed_apis() -> List[AutoRoutedApiInfo]: def providable_apis() -> List[Api]: - routing_table_apis = { - x.routing_table_api for x in builtin_automatically_routed_apis() - } + routing_table_apis = {x.routing_table_api for x in builtin_automatically_routed_apis()} return [ - api - for api in Api - if api not in routing_table_apis - and api not in [Api.inspect, Api.providers, Api.benchmarks] + api for api in Api if api not in routing_table_apis and api not in [Api.inspect, Api.providers, Api.benchmarks] ] diff --git a/llama_stack/distribution/resolver.py b/llama_stack/distribution/resolver.py index c08ee9881..3a6140478 100644 --- a/llama_stack/distribution/resolver.py +++ b/llama_stack/distribution/resolver.py @@ -103,9 +103,7 @@ async def resolve_impls( 2. Sorting them in dependency order. 3. Instantiating them with required dependencies. """ - routing_table_apis = { - x.routing_table_api for x in builtin_automatically_routed_apis() - } + routing_table_apis = {x.routing_table_api for x in builtin_automatically_routed_apis()} router_apis = {x.router_api for x in builtin_automatically_routed_apis()} providers_with_specs = validate_and_prepare_providers( @@ -113,9 +111,7 @@ async def resolve_impls( ) apis_to_serve = run_config.apis or set( - list(providers_with_specs.keys()) - + [x.value for x in routing_table_apis] - + [x.value for x in router_apis] + list(providers_with_specs.keys()) + [x.value for x in routing_table_apis] + [x.value for x in router_apis] ) providers_with_specs.update(specs_for_autorouted_apis(apis_to_serve)) @@ -180,23 +176,17 @@ def validate_and_prepare_providers( for api_str, providers in run_config.providers.items(): api = Api(api_str) if api in routing_table_apis: - raise ValueError( - f"Provider for `{api_str}` is automatically provided and cannot be overridden" - ) + raise ValueError(f"Provider for `{api_str}` is automatically provided and cannot be overridden") specs = {} for provider in providers: if not provider.provider_id or provider.provider_id == "__disabled__": - logger.warning( - f"Provider `{provider.provider_type}` for API `{api}` is disabled" - ) + logger.warning(f"Provider `{provider.provider_type}` for API `{api}` is disabled") continue validate_provider(provider, api, provider_registry) p = provider_registry[api][provider.provider_type] - p.deps__ = [a.value for a in p.api_dependencies] + [ - a.value for a in p.optional_api_dependencies - ] + p.deps__ = [a.value for a in p.api_dependencies] + [a.value for a in p.optional_api_dependencies] spec = ProviderWithSpec(spec=p, **provider.model_dump()) specs[provider.provider_id] = spec @@ -206,14 +196,10 @@ def validate_and_prepare_providers( return providers_with_specs -def validate_provider( - provider: Provider, api: Api, provider_registry: ProviderRegistry -): +def validate_provider(provider: Provider, api: Api, provider_registry: ProviderRegistry): """Validates if the provider is allowed and handles deprecations.""" if provider.provider_type not in provider_registry[api]: - raise ValueError( - f"Provider `{provider.provider_type}` is not available for API `{api}`" - ) + raise ValueError(f"Provider `{provider.provider_type}` is not available for API `{api}`") p = provider_registry[api][provider.provider_type] if p.deprecation_error: @@ -288,9 +274,7 @@ async def instantiate_providers( ) -> Dict: """Instantiates providers asynchronously while managing dependencies.""" impls: Dict[Api, Any] = {} - inner_impls_by_provider_id: Dict[str, Dict[str, Any]] = { - f"inner-{x.value}": {} for x in router_apis - } + inner_impls_by_provider_id: Dict[str, Dict[str, Any]] = {f"inner-{x.value}": {} for x in router_apis} for api_str, provider in sorted_providers: deps = {a: impls[a] for a in provider.spec.api_dependencies} for a in provider.spec.optional_api_dependencies: @@ -299,9 +283,7 @@ async def instantiate_providers( inner_impls = {} if isinstance(provider.spec, RoutingTableProviderSpec): - inner_impls = inner_impls_by_provider_id[ - f"inner-{provider.spec.router_api.value}" - ] + inner_impls = inner_impls_by_provider_id[f"inner-{provider.spec.router_api.value}"] impl = await instantiate_provider(provider, deps, inner_impls, dist_registry) @@ -359,9 +341,7 @@ async def instantiate_provider( provider_spec = provider.spec if not hasattr(provider_spec, "module"): - raise AttributeError( - f"ProviderSpec of type {type(provider_spec)} does not have a 'module' attribute" - ) + raise AttributeError(f"ProviderSpec of type {type(provider_spec)} does not have a 'module' attribute") module = importlib.import_module(provider_spec.module) args = [] @@ -398,10 +378,7 @@ async def instantiate_provider( # TODO: check compliance for special tool groups # the impl should be for Api.tool_runtime, the name should be the special tool group, the protocol should be the special tool group protocol check_protocol_compliance(impl, protocols[provider_spec.api]) - if ( - not isinstance(provider_spec, AutoRoutedProviderSpec) - and provider_spec.api in additional_protocols - ): + if not isinstance(provider_spec, AutoRoutedProviderSpec) and provider_spec.api in additional_protocols: additional_api, _, _ = additional_protocols[provider_spec.api] check_protocol_compliance(impl, additional_api) @@ -429,19 +406,12 @@ def check_protocol_compliance(obj: Any, protocol: Any) -> None: obj_params = set(obj_sig.parameters) obj_params.discard("self") if not (proto_params <= obj_params): - logger.error( - f"Method {name} incompatible proto: {proto_params} vs. obj: {obj_params}" - ) + logger.error(f"Method {name} incompatible proto: {proto_params} vs. obj: {obj_params}") missing_methods.append((name, "signature_mismatch")) else: # Check if the method is actually implemented in the class - method_owner = next( - (cls for cls in mro if name in cls.__dict__), None - ) - if ( - method_owner is None - or method_owner.__name__ == protocol.__name__ - ): + method_owner = next((cls for cls in mro if name in cls.__dict__), None) + if method_owner is None or method_owner.__name__ == protocol.__name__: missing_methods.append((name, "not_actually_implemented")) if missing_methods: diff --git a/llama_stack/distribution/routers/__init__.py b/llama_stack/distribution/routers/__init__.py index 803c94a92..6e2287b87 100644 --- a/llama_stack/distribution/routers/__init__.py +++ b/llama_stack/distribution/routers/__init__.py @@ -44,9 +44,7 @@ async def get_routing_table_impl( return impl -async def get_auto_router_impl( - api: Api, routing_table: RoutingTable, deps: Dict[str, Any] -) -> Any: +async def get_auto_router_impl(api: Api, routing_table: RoutingTable, deps: Dict[str, Any]) -> Any: from .routers import ( DatasetIORouter, EvalRouter, diff --git a/llama_stack/distribution/routers/routers.py b/llama_stack/distribution/routers/routers.py index 369789a16..6c77d09e8 100644 --- a/llama_stack/distribution/routers/routers.py +++ b/llama_stack/distribution/routers/routers.py @@ -8,19 +8,12 @@ import time from typing import Any, AsyncGenerator, AsyncIterator, Dict, List, Optional, Union from llama_stack.apis.common.content_types import ( + URL, InterleavedContent, InterleavedContentItem, - URL, ) from llama_stack.apis.datasetio import DatasetIO, IterrowsResponse from llama_stack.apis.datasets import DatasetPurpose, DataSource -from llama_stack.apis.eval import ( - BenchmarkConfig, - Eval, - EvaluateResponse, - Job, - JobStatus, -) from llama_stack.apis.inference import ( ChatCompletionResponse, ChatCompletionResponseEventType, @@ -42,12 +35,6 @@ from llama_stack.apis.inference import ( ) from llama_stack.apis.models import Model, ModelType from llama_stack.apis.safety import RunShieldResponse, Safety -from llama_stack.apis.scoring import ( - ScoreBatchResponse, - ScoreResponse, - Scoring, - ScoringFnParams, -) from llama_stack.apis.shields import Shield from llama_stack.apis.telemetry import MetricEvent, MetricInResponse, Telemetry from llama_stack.apis.tools import ( @@ -94,9 +81,7 @@ class VectorIORouter(VectorIO): provider_id: Optional[str] = None, provider_vector_db_id: Optional[str] = None, ) -> None: - logger.debug( - f"VectorIORouter.register_vector_db: {vector_db_id}, {embedding_model}" - ) + logger.debug(f"VectorIORouter.register_vector_db: {vector_db_id}, {embedding_model}") await self.routing_table.register_vector_db( vector_db_id, embedding_model, @@ -114,9 +99,7 @@ class VectorIORouter(VectorIO): logger.debug( f"VectorIORouter.insert_chunks: {vector_db_id}, {len(chunks)} chunks, ttl_seconds={ttl_seconds}, chunk_ids={[chunk.metadata['document_id'] for chunk in chunks[:3]]}{' and more...' if len(chunks) > 3 else ''}", ) - return await self.routing_table.get_provider_impl(vector_db_id).insert_chunks( - vector_db_id, chunks, ttl_seconds - ) + return await self.routing_table.get_provider_impl(vector_db_id).insert_chunks(vector_db_id, chunks, ttl_seconds) async def query_chunks( self, @@ -125,9 +108,7 @@ class VectorIORouter(VectorIO): params: Optional[Dict[str, Any]] = None, ) -> QueryChunksResponse: logger.debug(f"VectorIORouter.query_chunks: {vector_db_id}") - return await self.routing_table.get_provider_impl(vector_db_id).query_chunks( - vector_db_id, query, params - ) + return await self.routing_table.get_provider_impl(vector_db_id).query_chunks(vector_db_id, query, params) class InferenceRouter(Inference): @@ -164,9 +145,7 @@ class InferenceRouter(Inference): logger.debug( f"InferenceRouter.register_model: {model_id=} {provider_model_id=} {provider_id=} {metadata=} {model_type=}", ) - await self.routing_table.register_model( - model_id, provider_model_id, provider_id, metadata, model_type - ) + await self.routing_table.register_model(model_id, provider_model_id, provider_id, metadata, model_type) def _construct_metrics( self, @@ -220,16 +199,11 @@ class InferenceRouter(Inference): total_tokens: int, model: Model, ) -> List[MetricInResponse]: - metrics = self._construct_metrics( - prompt_tokens, completion_tokens, total_tokens, model - ) + metrics = self._construct_metrics(prompt_tokens, completion_tokens, total_tokens, model) if self.telemetry: for metric in metrics: await self.telemetry.log_event(metric) - return [ - MetricInResponse(metric=metric.metric, value=metric.value) - for metric in metrics - ] + return [MetricInResponse(metric=metric.metric, value=metric.value) for metric in metrics] async def _count_tokens( self, @@ -254,9 +228,7 @@ class InferenceRouter(Inference): stream: Optional[bool] = False, logprobs: Optional[LogProbConfig] = None, tool_config: Optional[ToolConfig] = None, - ) -> Union[ - ChatCompletionResponse, AsyncIterator[ChatCompletionResponseStreamChunk] - ]: + ) -> Union[ChatCompletionResponse, AsyncIterator[ChatCompletionResponseStreamChunk]]: logger.debug( f"InferenceRouter.chat_completion: {model_id=}, {stream=}, {messages=}, {tools=}, {tool_config=}, {response_format=}", ) @@ -266,19 +238,12 @@ class InferenceRouter(Inference): if model is None: raise ValueError(f"Model '{model_id}' not found") if model.model_type == ModelType.embedding: - raise ValueError( - f"Model '{model_id}' is an embedding model and does not support chat completions" - ) + raise ValueError(f"Model '{model_id}' is an embedding model and does not support chat completions") if tool_config: if tool_choice and tool_choice != tool_config.tool_choice: raise ValueError("tool_choice and tool_config.tool_choice must match") - if ( - tool_prompt_format - and tool_prompt_format != tool_config.tool_prompt_format - ): - raise ValueError( - "tool_prompt_format and tool_config.tool_prompt_format must match" - ) + if tool_prompt_format and tool_prompt_format != tool_config.tool_prompt_format: + raise ValueError("tool_prompt_format and tool_config.tool_prompt_format must match") else: params = {} if tool_choice: @@ -296,14 +261,9 @@ class InferenceRouter(Inference): pass else: # verify tool_choice is one of the tools - tool_names = [ - t.tool_name if isinstance(t.tool_name, str) else t.tool_name.value - for t in tools - ] + tool_names = [t.tool_name if isinstance(t.tool_name, str) else t.tool_name.value for t in tools] if tool_config.tool_choice not in tool_names: - raise ValueError( - f"Tool choice {tool_config.tool_choice} is not one of the tools: {tool_names}" - ) + raise ValueError(f"Tool choice {tool_config.tool_choice} is not one of the tools: {tool_names}") params = dict( model_id=model_id, @@ -318,25 +278,17 @@ class InferenceRouter(Inference): tool_config=tool_config, ) provider = self.routing_table.get_provider_impl(model_id) - prompt_tokens = await self._count_tokens( - messages, tool_config.tool_prompt_format - ) + prompt_tokens = await self._count_tokens(messages, tool_config.tool_prompt_format) if stream: async def stream_generator(): completion_text = "" async for chunk in await provider.chat_completion(**params): - if ( - chunk.event.event_type - == ChatCompletionResponseEventType.progress - ): + if chunk.event.event_type == ChatCompletionResponseEventType.progress: if chunk.event.delta.type == "text": completion_text += chunk.event.delta.text - if ( - chunk.event.event_type - == ChatCompletionResponseEventType.complete - ): + if chunk.event.event_type == ChatCompletionResponseEventType.complete: completion_tokens = await self._count_tokens( [ CompletionMessage( @@ -353,11 +305,7 @@ class InferenceRouter(Inference): total_tokens, model, ) - chunk.metrics = ( - metrics - if chunk.metrics is None - else chunk.metrics + metrics - ) + chunk.metrics = metrics if chunk.metrics is None else chunk.metrics + metrics yield chunk return stream_generator() @@ -374,9 +322,7 @@ class InferenceRouter(Inference): total_tokens, model, ) - response.metrics = ( - metrics if response.metrics is None else response.metrics + metrics - ) + response.metrics = metrics if response.metrics is None else response.metrics + metrics return response async def completion( @@ -397,9 +343,7 @@ class InferenceRouter(Inference): if model is None: raise ValueError(f"Model '{model_id}' not found") if model.model_type == ModelType.embedding: - raise ValueError( - f"Model '{model_id}' is an embedding model and does not support chat completions" - ) + raise ValueError(f"Model '{model_id}' is an embedding model and does not support chat completions") provider = self.routing_table.get_provider_impl(model_id) params = dict( model_id=model_id, @@ -419,11 +363,7 @@ class InferenceRouter(Inference): async for chunk in await provider.completion(**params): if hasattr(chunk, "delta"): completion_text += chunk.delta - if ( - hasattr(chunk, "stop_reason") - and chunk.stop_reason - and self.telemetry - ): + if hasattr(chunk, "stop_reason") and chunk.stop_reason and self.telemetry: completion_tokens = await self._count_tokens(completion_text) total_tokens = (prompt_tokens or 0) + (completion_tokens or 0) metrics = await self._compute_and_log_token_usage( @@ -432,11 +372,7 @@ class InferenceRouter(Inference): total_tokens, model, ) - chunk.metrics = ( - metrics - if chunk.metrics is None - else chunk.metrics + metrics - ) + chunk.metrics = metrics if chunk.metrics is None else chunk.metrics + metrics yield chunk return stream_generator() @@ -450,9 +386,7 @@ class InferenceRouter(Inference): total_tokens, model, ) - response.metrics = ( - metrics if response.metrics is None else response.metrics + metrics - ) + response.metrics = metrics if response.metrics is None else response.metrics + metrics return response async def embeddings( @@ -468,9 +402,7 @@ class InferenceRouter(Inference): if model is None: raise ValueError(f"Model '{model_id}' not found") if model.model_type == ModelType.llm: - raise ValueError( - f"Model '{model_id}' is an LLM model and does not support embeddings" - ) + raise ValueError(f"Model '{model_id}' is an LLM model and does not support embeddings") return await self.routing_table.get_provider_impl(model_id).embeddings( model_id=model_id, contents=contents, @@ -504,9 +436,7 @@ class SafetyRouter(Safety): params: Optional[Dict[str, Any]] = None, ) -> Shield: logger.debug(f"SafetyRouter.register_shield: {shield_id}") - return await self.routing_table.register_shield( - shield_id, provider_shield_id, provider_id, params - ) + return await self.routing_table.register_shield(shield_id, provider_shield_id, provider_id, params) async def run_shield( self, @@ -607,9 +537,9 @@ class ToolRuntimeRouter(ToolRuntime): logger.debug( f"ToolRuntimeRouter.RagToolImpl.insert: {vector_db_id}, {len(documents)} documents, chunk_size={chunk_size_in_tokens}" ) - return await self.routing_table.get_provider_impl( - "insert_into_memory" - ).insert(documents, vector_db_id, chunk_size_in_tokens) + return await self.routing_table.get_provider_impl("insert_into_memory").insert( + documents, vector_db_id, chunk_size_in_tokens + ) def __init__( self, @@ -642,6 +572,4 @@ class ToolRuntimeRouter(ToolRuntime): self, tool_group_id: Optional[str] = None, mcp_endpoint: Optional[URL] = None ) -> List[ToolDef]: logger.debug(f"ToolRuntimeRouter.list_runtime_tools: {tool_group_id}") - return await self.routing_table.get_provider_impl(tool_group_id).list_tools( - tool_group_id, mcp_endpoint - ) + return await self.routing_table.get_provider_impl(tool_group_id).list_tools(tool_group_id, mcp_endpoint) diff --git a/llama_stack/distribution/routers/routing_tables.py b/llama_stack/distribution/routers/routing_tables.py index 9aaf83483..69834868e 100644 --- a/llama_stack/distribution/routers/routing_tables.py +++ b/llama_stack/distribution/routers/routing_tables.py @@ -12,7 +12,6 @@ from pydantic import TypeAdapter from llama_stack.apis.benchmarks import Benchmark, Benchmarks, ListBenchmarksResponse from llama_stack.apis.common.content_types import URL -from llama_stack.apis.common.type_system import ParamType from llama_stack.apis.datasets import ( Dataset, DatasetPurpose, @@ -95,9 +94,7 @@ class CommonRoutingTableImpl(RoutingTable): self.dist_registry = dist_registry async def initialize(self) -> None: - async def add_objects( - objs: List[RoutableObjectWithProvider], provider_id: str, cls - ) -> None: + async def add_objects(objs: List[RoutableObjectWithProvider], provider_id: str, cls) -> None: for obj in objs: if cls is None: obj.provider_id = provider_id @@ -126,9 +123,7 @@ class CommonRoutingTableImpl(RoutingTable): for p in self.impls_by_provider_id.values(): await p.shutdown() - def get_provider_impl( - self, routing_key: str, provider_id: Optional[str] = None - ) -> Any: + def get_provider_impl(self, routing_key: str, provider_id: Optional[str] = None) -> Any: def apiname_object(): if isinstance(self, ModelsRoutingTable): return ("Inference", "model") @@ -164,9 +159,7 @@ class CommonRoutingTableImpl(RoutingTable): raise ValueError(f"Provider not found for `{routing_key}`") - async def get_object_by_identifier( - self, type: str, identifier: str - ) -> Optional[RoutableObjectWithProvider]: + async def get_object_by_identifier(self, type: str, identifier: str) -> Optional[RoutableObjectWithProvider]: # Get from disk registry obj = await self.dist_registry.get(type, identifier) if not obj: @@ -176,13 +169,9 @@ class CommonRoutingTableImpl(RoutingTable): async def unregister_object(self, obj: RoutableObjectWithProvider) -> None: await self.dist_registry.delete(obj.type, obj.identifier) - await unregister_object_from_provider( - obj, self.impls_by_provider_id[obj.provider_id] - ) + await unregister_object_from_provider(obj, self.impls_by_provider_id[obj.provider_id]) - async def register_object( - self, obj: RoutableObjectWithProvider - ) -> RoutableObjectWithProvider: + async def register_object(self, obj: RoutableObjectWithProvider) -> RoutableObjectWithProvider: # if provider_id is not specified, pick an arbitrary one from existing entries if not obj.provider_id and len(self.impls_by_provider_id) > 0: obj.provider_id = list(self.impls_by_provider_id.keys())[0] @@ -240,9 +229,7 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models): if model_type is None: model_type = ModelType.llm if "embedding_dimension" not in metadata and model_type == ModelType.embedding: - raise ValueError( - "Embedding model must have an embedding dimension in its metadata" - ) + raise ValueError("Embedding model must have an embedding dimension in its metadata") model = Model( identifier=model_id, provider_resource_id=provider_model_id, @@ -262,9 +249,7 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models): class ShieldsRoutingTable(CommonRoutingTableImpl, Shields): async def list_shields(self) -> ListShieldsResponse: - return ListShieldsResponse( - data=await self.get_all_with_type(ResourceType.shield.value) - ) + return ListShieldsResponse(data=await self.get_all_with_type(ResourceType.shield.value)) async def get_shield(self, identifier: str) -> Shield: shield = await self.get_object_by_identifier("shield", identifier) @@ -329,18 +314,14 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl, VectorDBs): f"No provider specified and multiple providers available. Arbitrarily selected the first provider {provider_id}." ) else: - raise ValueError( - "No provider available. Please configure a vector_io provider." - ) + raise ValueError("No provider available. Please configure a vector_io provider.") model = await self.get_object_by_identifier("model", embedding_model) if model is None: raise ValueError(f"Model {embedding_model} not found") if model.model_type != ModelType.embedding: raise ValueError(f"Model {embedding_model} is not an embedding model") if "embedding_dimension" not in model.metadata: - raise ValueError( - f"Model {embedding_model} does not have an embedding dimension" - ) + raise ValueError(f"Model {embedding_model} does not have an embedding dimension") vector_db_data = { "identifier": vector_db_id, "type": ResourceType.vector_db.value, @@ -362,9 +343,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl, VectorDBs): class DatasetsRoutingTable(CommonRoutingTableImpl, Datasets): async def list_datasets(self) -> ListDatasetsResponse: - return ListDatasetsResponse( - data=await self.get_all_with_type(ResourceType.dataset.value) - ) + return ListDatasetsResponse(data=await self.get_all_with_type(ResourceType.dataset.value)) async def get_dataset(self, dataset_id: str) -> Dataset: dataset = await self.get_object_by_identifier("dataset", dataset_id) @@ -447,9 +426,7 @@ class BenchmarksRoutingTable(CommonRoutingTableImpl, Benchmarks): # TODO (xiyan): we will need a way to infer provider_id for evaluation # keep it as meta-reference for now if len(self.impls_by_provider_id) == 0: - raise ValueError( - "No evaluation providers available. Please configure an evaluation provider." - ) + raise ValueError("No evaluation providers available. Please configure an evaluation provider.") provider_id = list(self.impls_by_provider_id.keys())[0] benchmark = Benchmark( @@ -491,12 +468,8 @@ class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups): args: Optional[Dict[str, Any]] = None, ) -> None: tools = [] - tool_defs = await self.impls_by_provider_id[provider_id].list_runtime_tools( - toolgroup_id, mcp_endpoint - ) - tool_host = ( - ToolHost.model_context_protocol if mcp_endpoint else ToolHost.distribution - ) + tool_defs = await self.impls_by_provider_id[provider_id].list_runtime_tools(toolgroup_id, mcp_endpoint) + tool_host = ToolHost.model_context_protocol if mcp_endpoint else ToolHost.distribution for tool_def in tool_defs: tools.append( diff --git a/llama_stack/distribution/stack.py b/llama_stack/distribution/stack.py index 9ec52bce0..90f55fc87 100644 --- a/llama_stack/distribution/stack.py +++ b/llama_stack/distribution/stack.py @@ -105,9 +105,7 @@ class EnvVarError(Exception): def __init__(self, var_name: str, path: str = ""): self.var_name = var_name self.path = path - super().__init__( - f"Environment variable '{var_name}' not set or empty{f' at {path}' if path else ''}" - ) + super().__init__(f"Environment variable '{var_name}' not set or empty{f' at {path}' if path else ''}") def redact_sensitive_fields(data: Dict[str, Any]) -> Dict[str, Any]: @@ -198,9 +196,7 @@ def validate_env_pair(env_pair: str) -> tuple[str, str]: if not key: raise ValueError(f"Empty key in environment variable pair: {env_pair}") if not all(c.isalnum() or c == "_" for c in key): - raise ValueError( - f"Key must contain only alphanumeric characters and underscores: {key}" - ) + raise ValueError(f"Key must contain only alphanumeric characters and underscores: {key}") return key, value except ValueError as e: raise ValueError( @@ -213,20 +209,14 @@ def validate_env_pair(env_pair: str) -> tuple[str, str]: async def construct_stack( run_config: StackRunConfig, provider_registry: Optional[ProviderRegistry] = None ) -> Dict[Api, Any]: - dist_registry, _ = await create_dist_registry( - run_config.metadata_store, run_config.image_name - ) - impls = await resolve_impls( - run_config, provider_registry or get_provider_registry(), dist_registry - ) + dist_registry, _ = await create_dist_registry(run_config.metadata_store, run_config.image_name) + impls = await resolve_impls(run_config, provider_registry or get_provider_registry(), dist_registry) await register_resources(run_config, impls) return impls def get_stack_run_config_from_template(template: str) -> StackRunConfig: - template_path = ( - importlib.resources.files("llama_stack") / f"templates/{template}/run.yaml" - ) + template_path = importlib.resources.files("llama_stack") / f"templates/{template}/run.yaml" with importlib.resources.as_file(template_path) as path: if not path.exists(): @@ -269,9 +259,7 @@ def run_config_from_adhoc_config_spec( # call method "sample_run_config" on the provider spec config class provider_config_type = instantiate_class_type(provider_spec.config_class) - provider_config = replace_env_vars( - provider_config_type.sample_run_config(__distro_dir__=distro_dir) - ) + provider_config = replace_env_vars(provider_config_type.sample_run_config(__distro_dir__=distro_dir)) provider_configs_by_api[api_str] = [ Provider( diff --git a/llama_stack/distribution/ui/modules/api.py b/llama_stack/distribution/ui/modules/api.py index 0e2e1d14f..1746a8a4f 100644 --- a/llama_stack/distribution/ui/modules/api.py +++ b/llama_stack/distribution/ui/modules/api.py @@ -22,9 +22,7 @@ class LlamaStackApi: }, ) - def run_scoring( - self, row, scoring_function_ids: list[str], scoring_params: Optional[dict] - ): + def run_scoring(self, row, scoring_function_ids: list[str], scoring_params: Optional[dict]): """Run scoring on a single row""" if not scoring_params: scoring_params = {fn_id: None for fn_id in scoring_function_ids} diff --git a/llama_stack/distribution/ui/page/distribution/resources.py b/llama_stack/distribution/ui/page/distribution/resources.py index da42c468c..28f35fbd0 100644 --- a/llama_stack/distribution/ui/page/distribution/resources.py +++ b/llama_stack/distribution/ui/page/distribution/resources.py @@ -4,12 +4,13 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +from streamlit_option_menu import option_menu + from llama_stack.distribution.ui.page.distribution.datasets import datasets from llama_stack.distribution.ui.page.distribution.eval_tasks import benchmarks from llama_stack.distribution.ui.page.distribution.models import models from llama_stack.distribution.ui.page.distribution.shields import shields from llama_stack.distribution.ui.page.distribution.vector_dbs import vector_dbs -from streamlit_option_menu import option_menu def resources_page(): diff --git a/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py b/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py index ac41df000..d9b129a8b 100644 --- a/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +++ b/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py @@ -20,7 +20,5 @@ context_entity_recall_fn_def = ScoringFn( provider_id="braintrust", provider_resource_id="context-entity-recall", return_type=NumberType(), - params=BasicScoringFnParams( - aggregation_functions=[AggregationFunctionType.average] - ), + params=BasicScoringFnParams(aggregation_functions=[AggregationFunctionType.average]), ) diff --git a/llama_stack/providers/registry/eval.py b/llama_stack/providers/registry/eval.py deleted file mode 100644 index 755d30382..000000000 --- a/llama_stack/providers/registry/eval.py +++ /dev/null @@ -1,28 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import List - -from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec - - -def available_providers() -> List[ProviderSpec]: - return [ - InlineProviderSpec( - api=Api.eval, - provider_type="inline::meta-reference", - pip_packages=["tree_sitter"], - module="llama_stack.providers.inline.eval.meta_reference", - config_class="llama_stack.providers.inline.eval.meta_reference.MetaReferenceEvalConfig", - api_dependencies=[ - Api.datasetio, - Api.datasets, - Api.scoring, - Api.inference, - Api.agents, - ], - ), - ] diff --git a/llama_stack/providers/registry/scoring.py b/llama_stack/providers/registry/scoring.py deleted file mode 100644 index ca09be984..000000000 --- a/llama_stack/providers/registry/scoring.py +++ /dev/null @@ -1,49 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import List - -from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec - - -def available_providers() -> List[ProviderSpec]: - return [ - InlineProviderSpec( - api=Api.scoring, - provider_type="inline::basic", - pip_packages=[], - module="llama_stack.providers.inline.scoring.basic", - config_class="llama_stack.providers.inline.scoring.basic.BasicScoringConfig", - api_dependencies=[ - Api.datasetio, - Api.datasets, - ], - ), - InlineProviderSpec( - api=Api.scoring, - provider_type="inline::llm-as-judge", - pip_packages=[], - module="llama_stack.providers.inline.scoring.llm_as_judge", - config_class="llama_stack.providers.inline.scoring.llm_as_judge.LlmAsJudgeScoringConfig", - api_dependencies=[ - Api.datasetio, - Api.datasets, - Api.inference, - ], - ), - InlineProviderSpec( - api=Api.scoring, - provider_type="inline::braintrust", - pip_packages=["autoevals", "openai"], - module="llama_stack.providers.inline.scoring.braintrust", - config_class="llama_stack.providers.inline.scoring.braintrust.BraintrustScoringConfig", - api_dependencies=[ - Api.datasetio, - Api.datasets, - ], - provider_data_validator="llama_stack.providers.inline.scoring.braintrust.BraintrustProviderDataValidator", - ), - ] diff --git a/llama_stack/providers/utils/common/data_schema_validator.py b/llama_stack/providers/utils/common/data_schema_validator.py index eb9d9dd60..3f8c4b111 100644 --- a/llama_stack/providers/utils/common/data_schema_validator.py +++ b/llama_stack/providers/utils/common/data_schema_validator.py @@ -75,29 +75,31 @@ VALID_SCHEMAS_FOR_EVAL = [ ] -def get_valid_schemas(api_str: str): - if api_str == Api.scoring.value: - return VALID_SCHEMAS_FOR_SCORING - elif api_str == Api.eval.value: - return VALID_SCHEMAS_FOR_EVAL - else: - raise ValueError(f"Invalid API string: {api_str}") +# TODO(xiyan): add this back + +# def get_valid_schemas(api_str: str): +# if api_str == Api.scoring.value: +# return VALID_SCHEMAS_FOR_SCORING +# elif api_str == Api.eval.value: +# return VALID_SCHEMAS_FOR_EVAL +# else: +# raise ValueError(f"Invalid API string: {api_str}") -def validate_dataset_schema( - dataset_schema: Dict[str, Any], - expected_schemas: List[Dict[str, Any]], -): - if dataset_schema not in expected_schemas: - raise ValueError(f"Dataset {dataset_schema} does not have a correct input schema in {expected_schemas}") +# def validate_dataset_schema( +# dataset_schema: Dict[str, Any], +# expected_schemas: List[Dict[str, Any]], +# ): +# if dataset_schema not in expected_schemas: +# raise ValueError(f"Dataset {dataset_schema} does not have a correct input schema in {expected_schemas}") -def validate_row_schema( - input_row: Dict[str, Any], - expected_schemas: List[Dict[str, Any]], -): - for schema in expected_schemas: - if all(key in input_row for key in schema): - return +# def validate_row_schema( +# input_row: Dict[str, Any], +# expected_schemas: List[Dict[str, Any]], +# ): +# for schema in expected_schemas: +# if all(key in input_row for key in schema): +# return - raise ValueError(f"Input row {input_row} does not match any of the expected schemas in {expected_schemas}") +# raise ValueError(f"Input row {input_row} does not match any of the expected schemas in {expected_schemas}") diff --git a/llama_stack/templates/bedrock/bedrock.py b/llama_stack/templates/bedrock/bedrock.py index 61999c270..5a30e7189 100644 --- a/llama_stack/templates/bedrock/bedrock.py +++ b/llama_stack/templates/bedrock/bedrock.py @@ -11,8 +11,8 @@ from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOCon from llama_stack.providers.remote.inference.bedrock.models import MODEL_ENTRIES from llama_stack.templates.template import ( DistributionTemplate, - get_model_registry, RunConfigSettings, + get_model_registry, ) diff --git a/llama_stack/templates/cerebras/cerebras.py b/llama_stack/templates/cerebras/cerebras.py index 7d3fe7ca2..beacfc521 100644 --- a/llama_stack/templates/cerebras/cerebras.py +++ b/llama_stack/templates/cerebras/cerebras.py @@ -16,8 +16,8 @@ from llama_stack.providers.remote.inference.cerebras import CerebrasImplConfig from llama_stack.providers.remote.inference.cerebras.models import MODEL_ENTRIES from llama_stack.templates.template import ( DistributionTemplate, - get_model_registry, RunConfigSettings, + get_model_registry, ) diff --git a/llama_stack/templates/ci-tests/ci_tests.py b/llama_stack/templates/ci-tests/ci_tests.py index 85523ef06..efb9647f7 100644 --- a/llama_stack/templates/ci-tests/ci_tests.py +++ b/llama_stack/templates/ci-tests/ci_tests.py @@ -22,8 +22,8 @@ from llama_stack.providers.remote.inference.fireworks.config import FireworksImp from llama_stack.providers.remote.inference.fireworks.models import MODEL_ENTRIES from llama_stack.templates.template import ( DistributionTemplate, - get_model_registry, RunConfigSettings, + get_model_registry, ) diff --git a/llama_stack/templates/dev/dev.py b/llama_stack/templates/dev/dev.py index dad8b6a8e..36ab22188 100644 --- a/llama_stack/templates/dev/dev.py +++ b/llama_stack/templates/dev/dev.py @@ -45,8 +45,8 @@ from llama_stack.providers.remote.vector_io.pgvector.config import ( ) from llama_stack.templates.template import ( DistributionTemplate, - get_model_registry, RunConfigSettings, + get_model_registry, ) @@ -96,10 +96,7 @@ def get_inference_providers() -> Tuple[List[Provider], List[ModelInput]]: def get_distribution_template() -> DistributionTemplate: inference_providers, available_models = get_inference_providers() providers = { - "inference": ( - [p.provider_type for p in inference_providers] - + ["inline::sentence-transformers"] - ), + "inference": ([p.provider_type for p in inference_providers] + ["inline::sentence-transformers"]), "vector_io": ["inline::sqlite-vec", "remote::chromadb", "remote::pgvector"], "safety": ["inline::llama-guard"], "agents": ["inline::meta-reference"], @@ -119,9 +116,7 @@ def get_distribution_template() -> DistributionTemplate: Provider( provider_id="sqlite-vec", provider_type="inline::sqlite-vec", - config=SQLiteVectorIOConfig.sample_run_config( - f"~/.llama/distributions/{name}" - ), + config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"), ), Provider( provider_id="${env.ENABLE_CHROMADB+chromadb}", diff --git a/llama_stack/templates/fireworks/fireworks.py b/llama_stack/templates/fireworks/fireworks.py index 6ea73d3b8..9b33ebc7b 100644 --- a/llama_stack/templates/fireworks/fireworks.py +++ b/llama_stack/templates/fireworks/fireworks.py @@ -21,8 +21,8 @@ from llama_stack.providers.remote.inference.fireworks.config import FireworksImp from llama_stack.providers.remote.inference.fireworks.models import MODEL_ENTRIES from llama_stack.templates.template import ( DistributionTemplate, - get_model_registry, RunConfigSettings, + get_model_registry, ) diff --git a/llama_stack/templates/groq/groq.py b/llama_stack/templates/groq/groq.py index bde870c55..b51cceb0e 100644 --- a/llama_stack/templates/groq/groq.py +++ b/llama_stack/templates/groq/groq.py @@ -15,8 +15,8 @@ from llama_stack.providers.remote.inference.groq import GroqConfig from llama_stack.providers.remote.inference.groq.models import MODEL_ENTRIES from llama_stack.templates.template import ( DistributionTemplate, - get_model_registry, RunConfigSettings, + get_model_registry, ) diff --git a/llama_stack/templates/nvidia/nvidia.py b/llama_stack/templates/nvidia/nvidia.py index f273c1a17..2cf8e98d4 100644 --- a/llama_stack/templates/nvidia/nvidia.py +++ b/llama_stack/templates/nvidia/nvidia.py @@ -17,8 +17,8 @@ from llama_stack.providers.remote.inference.nvidia.models import MODEL_ENTRIES from llama_stack.providers.remote.safety.nvidia import NVIDIASafetyConfig from llama_stack.templates.template import ( DistributionTemplate, - get_model_registry, RunConfigSettings, + get_model_registry, ) @@ -87,9 +87,7 @@ def get_distribution_template() -> DistributionTemplate: ] }, default_models=[inference_model, safety_model], - default_shields=[ - ShieldInput(shield_id="${env.SAFETY_MODEL}", provider_id="nvidia") - ], + default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}", provider_id="nvidia")], default_tool_groups=default_tool_groups, ), }, diff --git a/llama_stack/templates/open-benchmark/open_benchmark.py b/llama_stack/templates/open-benchmark/open_benchmark.py index 185fd867f..1009efa43 100644 --- a/llama_stack/templates/open-benchmark/open_benchmark.py +++ b/llama_stack/templates/open-benchmark/open_benchmark.py @@ -9,7 +9,6 @@ from typing import Dict, List, Tuple from llama_stack.apis.datasets import DatasetPurpose, URIDataSource from llama_stack.apis.models.models import ModelType from llama_stack.distribution.datatypes import ( - BenchmarkInput, DatasetInput, ModelInput, Provider, @@ -31,14 +30,12 @@ from llama_stack.providers.remote.vector_io.pgvector.config import ( from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry from llama_stack.templates.template import ( DistributionTemplate, - get_model_registry, RunConfigSettings, + get_model_registry, ) -def get_inference_providers() -> ( - Tuple[List[Provider], Dict[str, List[ProviderModelEntry]]] -): +def get_inference_providers() -> Tuple[List[Provider], Dict[str, List[ProviderModelEntry]]]: # in this template, we allow each API key to be optional providers = [ ( @@ -119,9 +116,7 @@ def get_distribution_template() -> DistributionTemplate: Provider( provider_id="sqlite-vec", provider_type="inline::sqlite-vec", - config=SQLiteVectorIOConfig.sample_run_config( - f"~/.llama/distributions/{name}" - ), + config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"), ), Provider( provider_id="${env.ENABLE_CHROMADB+chromadb}", diff --git a/llama_stack/templates/together/together.py b/llama_stack/templates/together/together.py index fbeeaad09..fbb9417b9 100644 --- a/llama_stack/templates/together/together.py +++ b/llama_stack/templates/together/together.py @@ -21,8 +21,8 @@ from llama_stack.providers.remote.inference.together import TogetherImplConfig from llama_stack.providers.remote.inference.together.models import MODEL_ENTRIES from llama_stack.templates.template import ( DistributionTemplate, - get_model_registry, RunConfigSettings, + get_model_registry, )