diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index af1f97ca0..e3505752f 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -7681,73 +7681,6 @@
"title": "EvaluationResponse",
"description": "A response to an inline evaluation."
},
- "ScoringResult": {
- "type": "object",
- "properties": {
- "scores": {
- "type": "array",
- "items": {
- "type": "object",
- "additionalProperties": {
- "oneOf": [
- {
- "type": "null"
- },
- {
- "type": "boolean"
- },
- {
- "type": "number"
- },
- {
- "type": "string"
- },
- {
- "type": "array"
- },
- {
- "type": "object"
- }
- ]
- }
- },
- "description": "The scoring result for each row. Each row is a map of grader column name to value."
- },
- "metrics": {
- "type": "object",
- "additionalProperties": {
- "oneOf": [
- {
- "type": "null"
- },
- {
- "type": "boolean"
- },
- {
- "type": "number"
- },
- {
- "type": "string"
- },
- {
- "type": "array"
- },
- {
- "type": "object"
- }
- ]
- },
- "description": "Map of metric name to aggregated value."
- }
- },
- "additionalProperties": false,
- "required": [
- "scores",
- "metrics"
- ],
- "title": "ScoringResult",
- "description": "A scoring result for a single row."
- },
"HealthInfo": {
"type": "object",
"properties": {
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index 5d5b323be..1fee27e59 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -5359,41 +5359,6 @@ components:
- grades
title: EvaluationResponse
description: A response to an inline evaluation.
- ScoringResult:
- type: object
- properties:
- scores:
- type: array
- items:
- type: object
- additionalProperties:
- oneOf:
- - type: 'null'
- - type: boolean
- - type: number
- - type: string
- - type: array
- - type: object
- description: >-
- The scoring result for each row. Each row is a map of grader column name
- to value.
- metrics:
- type: object
- additionalProperties:
- oneOf:
- - type: 'null'
- - type: boolean
- - type: number
- - type: string
- - type: array
- - type: object
- description: Map of metric name to aggregated value.
- additionalProperties: false
- required:
- - scores
- - metrics
- title: ScoringResult
- description: A scoring result for a single row.
HealthInfo:
type: object
properties:
diff --git a/llama_stack/distribution/distribution.py b/llama_stack/distribution/distribution.py
index a710c49b1..43c37806e 100644
--- a/llama_stack/distribution/distribution.py
+++ b/llama_stack/distribution/distribution.py
@@ -51,14 +51,8 @@ def builtin_automatically_routed_apis() -> List[AutoRoutedApiInfo]:
def providable_apis() -> List[Api]:
- routing_table_apis = {
- x.routing_table_api for x in builtin_automatically_routed_apis()
- }
- return [
- api
- for api in Api
- if api not in routing_table_apis and api not in [Api.inspect, Api.providers]
- ]
+ routing_table_apis = {x.routing_table_api for x in builtin_automatically_routed_apis()}
+ return [api for api in Api if api not in routing_table_apis and api not in [Api.inspect, Api.providers]]
def get_provider_registry() -> Dict[Api, Dict[str, ProviderSpec]]:
diff --git a/tests/integration/eval/test_eval.py b/tests/integration/eval/test_eval.py
index 6ffa22cb0..7a519f208 100644
--- a/tests/integration/eval/test_eval.py
+++ b/tests/integration/eval/test_eval.py
@@ -22,9 +22,7 @@ def test_evaluate_rows(llama_stack_client, text_model_id, scoring_fn_id):
purpose="eval/messages-answer",
source={
"type": "uri",
- "uri": data_url_from_file(
- Path(__file__).parent.parent / "datasets" / "test_dataset.csv"
- ),
+ "uri": data_url_from_file(Path(__file__).parent.parent / "datasets" / "test_dataset.csv"),
},
)
response = llama_stack_client.datasets.list()
@@ -74,9 +72,7 @@ def test_evaluate_benchmark(llama_stack_client, text_model_id, scoring_fn_id):
purpose="eval/messages-answer",
source={
"type": "uri",
- "uri": data_url_from_file(
- Path(__file__).parent.parent / "datasets" / "test_dataset.csv"
- ),
+ "uri": data_url_from_file(Path(__file__).parent.parent / "datasets" / "test_dataset.csv"),
},
)
benchmark_id = str(uuid.uuid4())
@@ -99,14 +95,10 @@ def test_evaluate_benchmark(llama_stack_client, text_model_id, scoring_fn_id):
},
)
assert response.job_id == "0"
- job_status = llama_stack_client.eval.jobs.status(
- job_id=response.job_id, benchmark_id=benchmark_id
- )
+ job_status = llama_stack_client.eval.jobs.status(job_id=response.job_id, benchmark_id=benchmark_id)
assert job_status and job_status == "completed"
- eval_response = llama_stack_client.eval.jobs.retrieve(
- job_id=response.job_id, benchmark_id=benchmark_id
- )
+ eval_response = llama_stack_client.eval.jobs.retrieve(job_id=response.job_id, benchmark_id=benchmark_id)
assert eval_response is not None
assert len(eval_response.generations) == 5
assert scoring_fn_id in eval_response.scores
diff --git a/tests/integration/scoring/test_scoring.py b/tests/integration/scoring/test_scoring.py
index d43ee15a1..675090f7f 100644
--- a/tests/integration/scoring/test_scoring.py
+++ b/tests/integration/scoring/test_scoring.py
@@ -154,11 +154,7 @@ def test_scoring_score_with_aggregation_functions(
df = pd.read_csv(Path(__file__).parent.parent / "datasets" / "test_dataset.csv")
rows = df.to_dict(orient="records")
- scoring_fns_list = [
- x
- for x in llama_stack_client.scoring_functions.list()
- if x.provider_id == provider_id
- ]
+ scoring_fns_list = [x for x in llama_stack_client.scoring_functions.list() if x.provider_id == provider_id]
if len(scoring_fns_list) == 0:
pytest.skip(f"No scoring functions found for provider {provider_id}, skipping")