feat(eval api): (2.2/n) delete eval / scoring / scoring_fn apis (#1700)

# What does this PR do? - To make it easier, delete existing `eval/scoring/scoring_function` apis. There will be a bunch of broken impls here. The sequence is: 1. migrate benchmark graders 2. clean up existing scoring functions - Add a skeleton evaluation impl to make tests pass. ## Test Plan tested in following PRs [//]: # (## Documentation)
2025-03-19 11:04:23 -07:00 · 2025-03-19 11:04:23 -07:00 · c1d18283d2
commit c1d18283d2
parent 0048274ec0
113 changed files with 408 additions and 3900 deletions
--- a/llama_stack/templates/groq/groq.py
+++ b/llama_stack/templates/groq/groq.py
@ -7,17 +7,17 @@
 from pathlib import Path

 from llama_stack.apis.models.models import ModelType
-from llama_stack.distribution.datatypes import (
-    ModelInput,
-    Provider,
-    ToolGroupInput,
-)
+from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput
 from llama_stack.providers.inline.inference.sentence_transformers import (
    SentenceTransformersInferenceConfig,
 )
 from llama_stack.providers.remote.inference.groq import GroqConfig
 from llama_stack.providers.remote.inference.groq.models import MODEL_ENTRIES
-from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
+from llama_stack.templates.template import (
+    DistributionTemplate,
+    RunConfigSettings,
+    get_model_registry,
+)


 def get_distribution_template() -> DistributionTemplate:
@ -27,9 +27,7 @@ def get_distribution_template() -> DistributionTemplate:
        "safety": ["inline::llama-guard"],
        "agents": ["inline::meta-reference"],
        "telemetry": ["inline::meta-reference"],
-        "eval": ["inline::meta-reference"],
        "datasetio": ["remote::huggingface", "inline::localfs"],
-        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
        "tool_runtime": [
            "remote::brave-search",
            "remote::tavily-search",