From 0248f3590a58e9ee4e7b820a85c4a54a8273d77d Mon Sep 17 00:00:00 2001
From: Botao Chen <markchen1015@meta.com>
Date: Tue, 18 Mar 2025 23:38:45 -0700
Subject: [PATCH] temp commit

---
 distributions/dependencies.json               | 57 +++++++++++++++++++
 .../providers/inline/scoring/basic/scoring.py |  2 +
 .../open-benchmark/open_benchmark.py          | 13 +++++
 llama_stack/templates/open-benchmark/run.yaml | 12 ++++
 4 files changed, 84 insertions(+)

diff --git a/distributions/dependencies.json b/distributions/dependencies.json
index 33b497a33..da0de2820 100644
--- a/distributions/dependencies.json
+++ b/distributions/dependencies.json
@@ -7,10 +7,12 @@
     "chardet",
     "chromadb-client",
     "datasets",
+    "emoji",
     "faiss-cpu",
     "fastapi",
     "fire",
     "httpx",
+    "langdetect",
     "matplotlib",
     "mcp",
     "nltk",
@@ -23,6 +25,7 @@
     "psycopg2-binary",
     "pymongo",
     "pypdf",
+    "pythainlp",
     "redis",
     "requests",
     "scikit-learn",
@@ -41,10 +44,12 @@
     "chardet",
     "chromadb-client",
     "datasets",
+    "emoji",
     "faiss-cpu",
     "fastapi",
     "fire",
     "httpx",
+    "langdetect",
     "matplotlib",
     "nltk",
     "numpy",
@@ -56,6 +61,7 @@
     "psycopg2-binary",
     "pymongo",
     "pypdf",
+    "pythainlp",
     "redis",
     "requests",
     "scikit-learn",
@@ -75,10 +81,12 @@
     "chardet",
     "chromadb-client",
     "datasets",
+    "emoji",
     "fastapi",
     "fire",
     "fireworks-ai",
     "httpx",
+    "langdetect",
     "matplotlib",
     "mcp",
     "nltk",
@@ -91,6 +99,7 @@
     "psycopg2-binary",
     "pymongo",
     "pypdf",
+    "pythainlp",
     "redis",
     "requests",
     "scikit-learn",
@@ -112,11 +121,13 @@
     "chardet",
     "chromadb-client",
     "datasets",
+    "emoji",
     "faiss-cpu",
     "fastapi",
     "fire",
     "httpx",
     "huggingface_hub",
+    "langdetect",
     "matplotlib",
     "nltk",
     "numpy",
@@ -128,6 +139,7 @@
     "psycopg2-binary",
     "pymongo",
     "pypdf",
+    "pythainlp",
     "redis",
     "requests",
     "scikit-learn",
@@ -147,10 +159,12 @@
     "chardet",
     "chromadb-client",
     "datasets",
+    "emoji",
     "fastapi",
     "fire",
     "fireworks-ai",
     "httpx",
+    "langdetect",
     "litellm",
     "matplotlib",
     "mcp",
@@ -164,6 +178,7 @@
     "psycopg2-binary",
     "pymongo",
     "pypdf",
+    "pythainlp",
     "redis",
     "requests",
     "scikit-learn",
@@ -184,11 +199,13 @@
     "chardet",
     "chromadb-client",
     "datasets",
+    "emoji",
     "faiss-cpu",
     "fastapi",
     "fire",
     "fireworks-ai",
     "httpx",
+    "langdetect",
     "matplotlib",
     "mcp",
     "nltk",
@@ -201,6 +218,7 @@
     "psycopg2-binary",
     "pymongo",
     "pypdf",
+    "pythainlp",
     "redis",
     "requests",
     "scikit-learn",
@@ -219,10 +237,12 @@
     "blobfile",
     "chardet",
     "datasets",
+    "emoji",
     "faiss-cpu",
     "fastapi",
     "fire",
     "httpx",
+    "langdetect",
     "litellm",
     "matplotlib",
     "nltk",
@@ -235,6 +255,7 @@
     "psycopg2-binary",
     "pymongo",
     "pypdf",
+    "pythainlp",
     "redis",
     "requests",
     "scikit-learn",
@@ -253,11 +274,13 @@
     "chardet",
     "chromadb-client",
     "datasets",
+    "emoji",
     "faiss-cpu",
     "fastapi",
     "fire",
     "httpx",
     "huggingface_hub",
+    "langdetect",
     "matplotlib",
     "mcp",
     "nltk",
@@ -270,6 +293,7 @@
     "psycopg2-binary",
     "pymongo",
     "pypdf",
+    "pythainlp",
     "redis",
     "requests",
     "scikit-learn",
@@ -288,11 +312,13 @@
     "chardet",
     "chromadb-client",
     "datasets",
+    "emoji",
     "faiss-cpu",
     "fastapi",
     "fire",
     "httpx",
     "huggingface_hub",
+    "langdetect",
     "matplotlib",
     "mcp",
     "nltk",
@@ -305,6 +331,7 @@
     "psycopg2-binary",
     "pymongo",
     "pypdf",
+    "pythainlp",
     "redis",
     "requests",
     "scikit-learn",
@@ -325,11 +352,13 @@
     "chardet",
     "chromadb-client",
     "datasets",
+    "emoji",
     "fairscale",
     "faiss-cpu",
     "fastapi",
     "fire",
     "httpx",
+    "langdetect",
     "lm-format-enforcer",
     "matplotlib",
     "mcp",
@@ -343,6 +372,7 @@
     "psycopg2-binary",
     "pymongo",
     "pypdf",
+    "pythainlp",
     "redis",
     "requests",
     "scikit-learn",
@@ -365,12 +395,14 @@
     "chardet",
     "chromadb-client",
     "datasets",
+    "emoji",
     "fairscale",
     "faiss-cpu",
     "fastapi",
     "fbgemm-gpu",
     "fire",
     "httpx",
+    "langdetect",
     "lm-format-enforcer",
     "matplotlib",
     "mcp",
@@ -384,6 +416,7 @@
     "psycopg2-binary",
     "pymongo",
     "pypdf",
+    "pythainlp",
     "redis",
     "requests",
     "scikit-learn",
@@ -403,10 +436,12 @@
     "aiosqlite",
     "blobfile",
     "chardet",
+    "emoji",
     "faiss-cpu",
     "fastapi",
     "fire",
     "httpx",
+    "langdetect",
     "matplotlib",
     "nltk",
     "numpy",
@@ -418,6 +453,7 @@
     "psycopg2-binary",
     "pymongo",
     "pypdf",
+    "pythainlp",
     "redis",
     "requests",
     "scikit-learn",
@@ -436,10 +472,12 @@
     "chardet",
     "chromadb-client",
     "datasets",
+    "emoji",
     "faiss-cpu",
     "fastapi",
     "fire",
     "httpx",
+    "langdetect",
     "matplotlib",
     "mcp",
     "nltk",
@@ -453,6 +491,7 @@
     "psycopg2-binary",
     "pymongo",
     "pypdf",
+    "pythainlp",
     "redis",
     "requests",
     "scikit-learn",
@@ -470,9 +509,11 @@
     "chardet",
     "chromadb-client",
     "datasets",
+    "emoji",
     "fastapi",
     "fire",
     "httpx",
+    "langdetect",
     "litellm",
     "matplotlib",
     "mcp",
@@ -486,6 +527,7 @@
     "psycopg2-binary",
     "pymongo",
     "pypdf",
+    "pythainlp",
     "redis",
     "requests",
     "scikit-learn",
@@ -505,10 +547,12 @@
     "chardet",
     "chromadb-client",
     "datasets",
+    "emoji",
     "faiss-cpu",
     "fastapi",
     "fire",
     "httpx",
+    "langdetect",
     "matplotlib",
     "mcp",
     "nltk",
@@ -521,6 +565,7 @@
     "psycopg2-binary",
     "pymongo",
     "pypdf",
+    "pythainlp",
     "redis",
     "requests",
     "scikit-learn",
@@ -540,10 +585,12 @@
     "chardet",
     "chromadb-client",
     "datasets",
+    "emoji",
     "faiss-cpu",
     "fastapi",
     "fire",
     "httpx",
+    "langdetect",
     "matplotlib",
     "mcp",
     "nltk",
@@ -556,6 +603,7 @@
     "psycopg2-binary",
     "pymongo",
     "pypdf",
+    "pythainlp",
     "redis",
     "requests",
     "scikit-learn",
@@ -605,11 +653,13 @@
     "chardet",
     "chromadb-client",
     "datasets",
+    "emoji",
     "faiss-cpu",
     "fastapi",
     "fire",
     "httpx",
     "huggingface_hub",
+    "langdetect",
     "matplotlib",
     "mcp",
     "nltk",
@@ -622,6 +672,7 @@
     "psycopg2-binary",
     "pymongo",
     "pypdf",
+    "pythainlp",
     "redis",
     "requests",
     "scikit-learn",
@@ -641,10 +692,12 @@
     "chardet",
     "chromadb-client",
     "datasets",
+    "emoji",
     "faiss-cpu",
     "fastapi",
     "fire",
     "httpx",
+    "langdetect",
     "matplotlib",
     "mcp",
     "nltk",
@@ -657,6 +710,7 @@
     "psycopg2-binary",
     "pymongo",
     "pypdf",
+    "pythainlp",
     "redis",
     "requests",
     "scikit-learn",
@@ -677,10 +731,12 @@
     "chardet",
     "chromadb-client",
     "datasets",
+    "emoji",
     "faiss-cpu",
     "fastapi",
     "fire",
     "httpx",
+    "langdetect",
     "matplotlib",
     "mcp",
     "nltk",
@@ -693,6 +749,7 @@
     "psycopg2-binary",
     "pymongo",
     "pypdf",
+    "pythainlp",
     "redis",
     "requests",
     "scikit-learn",
diff --git a/llama_stack/providers/inline/scoring/basic/scoring.py b/llama_stack/providers/inline/scoring/basic/scoring.py
index a735166e1..9434dd8ce 100644
--- a/llama_stack/providers/inline/scoring/basic/scoring.py
+++ b/llama_stack/providers/inline/scoring/basic/scoring.py
@@ -24,6 +24,7 @@ from llama_stack.providers.utils.common.data_schema_validator import (
 from .config import BasicScoringConfig
 from .scoring_fn.bfcl_scoring_fn import BFCLScoringFn
 from .scoring_fn.equality_scoring_fn import EqualityScoringFn
+from .scoring_fn.ifeval_scoring_fn import IfEvalScoringFn
 from .scoring_fn.regex_parser_math_response_scoring_fn import (
     RegexParserMathResponseScoringFn,
 )
@@ -36,6 +37,7 @@ FIXED_FNS = [
     RegexParserScoringFn,
     RegexParserMathResponseScoringFn,
     BFCLScoringFn,
+    IfEvalScoringFn,
 ]
 
 
diff --git a/llama_stack/templates/open-benchmark/open_benchmark.py b/llama_stack/templates/open-benchmark/open_benchmark.py
index b339e8c80..e29b987ee 100644
--- a/llama_stack/templates/open-benchmark/open_benchmark.py
+++ b/llama_stack/templates/open-benchmark/open_benchmark.py
@@ -208,6 +208,14 @@ def get_distribution_template() -> DistributionTemplate:
                 uri="huggingface://datasets/llamastack/bfcl_v3?split=train",
             ),
         ),
+        DatasetInput(
+            dataset_id="IfEval",
+            provider_id="huggingface",
+            purpose=DatasetPurpose.eval_messages_answer,
+            source=URIDataSource(
+                uri="huggingface://datasets/llamastack/IfEval?split=train",
+            ),
+        ),
     ]
 
     default_benchmarks = [
@@ -236,6 +244,11 @@ def get_distribution_template() -> DistributionTemplate:
             dataset_id="bfcl",
             scoring_functions=["basic::bfcl"],
         ),
+        BenchmarkInput(
+            benchmark_id="meta-reference-IfEval",
+            dataset_id="IfEval",
+            scoring_functions=["basic::IfEval"],
+        ),
     ]
     return DistributionTemplate(
         name=name,
diff --git a/llama_stack/templates/open-benchmark/run.yaml b/llama_stack/templates/open-benchmark/run.yaml
index 93f437273..a9d082d53 100644
--- a/llama_stack/templates/open-benchmark/run.yaml
+++ b/llama_stack/templates/open-benchmark/run.yaml
@@ -193,6 +193,13 @@ datasets:
   metadata: {}
   dataset_id: bfcl
   provider_id: huggingface
+- purpose: eval/messages-answer
+  source:
+    type: uri
+    uri: huggingface://datasets/llamastack/IfEval?split=train
+  metadata: {}
+  dataset_id: IfEval
+  provider_id: huggingface
 scoring_fns: []
 benchmarks:
 - dataset_id: simpleqa
@@ -220,6 +227,11 @@ benchmarks:
   - basic::bfcl
   metadata: {}
   benchmark_id: meta-reference-bfcl
+- dataset_id: IfEval
+  scoring_functions:
+  - basic::IfEval
+  metadata: {}
+  benchmark_id: meta-reference-IfEval
 tool_groups:
 - toolgroup_id: builtin::websearch
   provider_id: tavily-search