feat(eval api): (2.2/n) delete eval / scoring / scoring_fn apis (#1700)

# What does this PR do? - To make it easier, delete existing `eval/scoring/scoring_function` apis. There will be a bunch of broken impls here. The sequence is: 1. migrate benchmark graders 2. clean up existing scoring functions - Add a skeleton evaluation impl to make tests pass. ## Test Plan tested in following PRs [//]: # (## Documentation)
2025-03-19 11:04:23 -07:00 · 2025-03-19 11:04:23 -07:00 · c1d18283d2
commit c1d18283d2
parent 0048274ec0
113 changed files with 408 additions and 3900 deletions
--- a/llama_stack/templates/remote-vllm/build.yaml
+++ b/llama_stack/templates/remote-vllm/build.yaml
@ -13,15 +13,9 @@ distribution_spec:
    - inline::llama-guard
    agents:
    - inline::meta-reference
-    eval:
-    - inline::meta-reference
    datasetio:
    - remote::huggingface
    - inline::localfs
-    scoring:
-    - inline::basic
-    - inline::llm-as-judge
-    - inline::braintrust
    telemetry:
    - inline::meta-reference
    tool_runtime:
--- a/llama_stack/templates/remote-vllm/run-with-safety.yaml
+++ b/llama_stack/templates/remote-vllm/run-with-safety.yaml
@ -3,10 +3,8 @@ image_name: remote-vllm
 apis:
 - agents
 - datasetio
- eval
 - inference
 - safety
- scoring
 - telemetry
 - tool_runtime
 - vector_io
@ -50,14 +48,6 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/agents_store.db
-  eval:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
    provider_type: remote::huggingface
@ -73,17 +63,6 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/localfs_datasetio.db
-  scoring:
-  - provider_id: basic
-    provider_type: inline::basic
-    config: {}
-  - provider_id: llm-as-judge
-    provider_type: inline::llm-as-judge
-    config: {}
-  - provider_id: braintrust
-    provider_type: inline::braintrust
-    config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -136,7 +115,6 @@ shields:
 - shield_id: ${env.SAFETY_MODEL}
 vector_dbs: []
 datasets: []
-scoring_fns: []
 benchmarks: []
 tool_groups:
 - toolgroup_id: builtin::websearch
--- a/llama_stack/templates/remote-vllm/run.yaml
+++ b/llama_stack/templates/remote-vllm/run.yaml
@ -3,10 +3,8 @@ image_name: remote-vllm
 apis:
 - agents
 - datasetio
- eval
 - inference
 - safety
- scoring
 - telemetry
 - tool_runtime
 - vector_io
@ -43,14 +41,6 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/agents_store.db
-  eval:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
    provider_type: remote::huggingface
@ -66,17 +56,6 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/localfs_datasetio.db
-  scoring:
-  - provider_id: basic
-    provider_type: inline::basic
-    config: {}
-  - provider_id: llm-as-judge
-    provider_type: inline::llm-as-judge
-    config: {}
-  - provider_id: braintrust
-    provider_type: inline::braintrust
-    config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -124,7 +103,6 @@ models:
 shields: []
 vector_dbs: []
 datasets: []
-scoring_fns: []
 benchmarks: []
 tool_groups:
 - toolgroup_id: builtin::websearch
--- a/llama_stack/templates/remote-vllm/vllm.py
+++ b/llama_stack/templates/remote-vllm/vllm.py
@ -27,9 +27,7 @@ def get_distribution_template() -> DistributionTemplate:
        "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
        "safety": ["inline::llama-guard"],
        "agents": ["inline::meta-reference"],
-        "eval": ["inline::meta-reference"],
        "datasetio": ["remote::huggingface", "inline::localfs"],
-        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
        "telemetry": ["inline::meta-reference"],
        "tool_runtime": [
            "remote::brave-search",