pre

2025-12-31 03:43:53 +00:00 · 2025-03-19 10:10:02 -07:00 · 2025-03-19 10:10:02 -07:00 · 443b18a992
commit 443b18a992
parent d1b44c1251
5 changed files with 7 additions and 127 deletions
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@ -7681,73 +7681,6 @@
                "title": "EvaluationResponse",
                "description": "A response to an inline evaluation."
            },
            "ScoringResult": {
                "type": "object",
                "properties": {
                    "scores": {
                        "type": "array",
                        "items": {
                            "type": "object",
                            "additionalProperties": {
                                "oneOf": [
                                    {
                                        "type": "null"
                                    },
                                    {
                                        "type": "boolean"
                                    },
                                    {
                                        "type": "number"
                                    },
                                    {
                                        "type": "string"
                                    },
                                    {
                                        "type": "array"
                                    },
                                    {
                                        "type": "object"
                                    }
                                ]
                            }
                        },
                        "description": "The scoring result for each row. Each row is a map of grader column name to value."
                    },
                    "metrics": {
                        "type": "object",
                        "additionalProperties": {
                            "oneOf": [
                                {
                                    "type": "null"
                                },
                                {
                                    "type": "boolean"
                                },
                                {
                                    "type": "number"
                                },
                                {
                                    "type": "string"
                                },
                                {
                                    "type": "array"
                                },
                                {
                                    "type": "object"
                                }
                            ]
                        },
                        "description": "Map of metric name to aggregated value."
                    }
                },
                "additionalProperties": false,
                "required": [
                    "scores",
                    "metrics"
                ],
                "title": "ScoringResult",
                "description": "A scoring result for a single row."
            },
            "HealthInfo": {
                "type": "object",
                "properties": {
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@ -5359,41 +5359,6 @@ components:
        - grades
      title: EvaluationResponse
      description: A response to an inline evaluation.
    ScoringResult:
      type: object
      properties:
        scores:
          type: array
          items:
            type: object
            additionalProperties:
              oneOf:
                - type: 'null'
                - type: boolean
                - type: number
                - type: string
                - type: array
                - type: object
          description: >-
            The scoring result for each row. Each row is a map of grader column name
            to value.
        metrics:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: Map of metric name to aggregated value.
      additionalProperties: false
      required:
        - scores
        - metrics
      title: ScoringResult
      description: A scoring result for a single row.
    HealthInfo:
      type: object
      properties:
--- a/llama_stack/distribution/distribution.py
+++ b/llama_stack/distribution/distribution.py
@ -51,14 +51,8 @@ def builtin_automatically_routed_apis() -> List[AutoRoutedApiInfo]:
 def providable_apis() -> List[Api]:
-    routing_table_apis = {
+    routing_table_apis = {x.routing_table_api for x in builtin_automatically_routed_apis()}
-        x.routing_table_api for x in builtin_automatically_routed_apis()
+    return [api for api in Api if api not in routing_table_apis and api not in [Api.inspect, Api.providers]]
    }
    return [
        api
        for api in Api
        if api not in routing_table_apis and api not in [Api.inspect, Api.providers]
    ]
 def get_provider_registry() -> Dict[Api, Dict[str, ProviderSpec]]:
--- a/tests/integration/eval/test_eval.py
+++ b/tests/integration/eval/test_eval.py
@ -22,9 +22,7 @@ def test_evaluate_rows(llama_stack_client, text_model_id, scoring_fn_id):
        purpose="eval/messages-answer",
        source={
            "type": "uri",
-            "uri": data_url_from_file(
+            "uri": data_url_from_file(Path(__file__).parent.parent / "datasets" / "test_dataset.csv"),
                Path(__file__).parent.parent / "datasets" / "test_dataset.csv"
            ),
        },
    )
    response = llama_stack_client.datasets.list()
@ -74,9 +72,7 @@ def test_evaluate_benchmark(llama_stack_client, text_model_id, scoring_fn_id):
        purpose="eval/messages-answer",
        source={
            "type": "uri",
-            "uri": data_url_from_file(
+            "uri": data_url_from_file(Path(__file__).parent.parent / "datasets" / "test_dataset.csv"),
                Path(__file__).parent.parent / "datasets" / "test_dataset.csv"
            ),
        },
    )
    benchmark_id = str(uuid.uuid4())
@ -99,14 +95,10 @@ def test_evaluate_benchmark(llama_stack_client, text_model_id, scoring_fn_id):
        },
    )
    assert response.job_id == "0"
-    job_status = llama_stack_client.eval.jobs.status(
+    job_status = llama_stack_client.eval.jobs.status(job_id=response.job_id, benchmark_id=benchmark_id)
        job_id=response.job_id, benchmark_id=benchmark_id
    )
    assert job_status and job_status == "completed"
-    eval_response = llama_stack_client.eval.jobs.retrieve(
+    eval_response = llama_stack_client.eval.jobs.retrieve(job_id=response.job_id, benchmark_id=benchmark_id)
        job_id=response.job_id, benchmark_id=benchmark_id
    )
    assert eval_response is not None
    assert len(eval_response.generations) == 5
    assert scoring_fn_id in eval_response.scores
--- a/tests/integration/scoring/test_scoring.py
+++ b/tests/integration/scoring/test_scoring.py
@ -154,11 +154,7 @@ def test_scoring_score_with_aggregation_functions(
    df = pd.read_csv(Path(__file__).parent.parent / "datasets" / "test_dataset.csv")
    rows = df.to_dict(orient="records")
-    scoring_fns_list = [
+    scoring_fns_list = [x for x in llama_stack_client.scoring_functions.list() if x.provider_id == provider_id]
        x
        for x in llama_stack_client.scoring_functions.list()
        if x.provider_id == provider_id
    ]
    if len(scoring_fns_list) == 0:
        pytest.skip(f"No scoring functions found for provider {provider_id}, skipping")