diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index 12e44e2c3..54d888441 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -6471,7 +6471,8 @@
"additionalProperties": false,
"required": [
"type",
- "model"
+ "model",
+ "sampling_params"
],
"title": "ModelCandidate",
"description": "A model candidate for evaluation."
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index 5243579de..cf657bff9 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -4553,6 +4553,7 @@ components:
required:
- type
- model
+ - sampling_params
title: ModelCandidate
description: A model candidate for evaluation.
RegexParserScoringFnParams:
diff --git a/llama_stack/apis/eval/eval.py b/llama_stack/apis/eval/eval.py
index a3a4c0c3d..0e5959c37 100644
--- a/llama_stack/apis/eval/eval.py
+++ b/llama_stack/apis/eval/eval.py
@@ -28,7 +28,7 @@ class ModelCandidate(BaseModel):
type: Literal["model"] = "model"
model: str
- sampling_params: Optional[SamplingParams] = Field(default_factory=SamplingParams)
+ sampling_params: SamplingParams
system_message: Optional[SystemMessage] = None
diff --git a/llama_stack/providers/remote/eval/nvidia/config.py b/llama_stack/providers/remote/eval/nvidia/config.py
index acd6e6e36..b660fcd68 100644
--- a/llama_stack/providers/remote/eval/nvidia/config.py
+++ b/llama_stack/providers/remote/eval/nvidia/config.py
@@ -14,10 +14,10 @@ class NVIDIAEvalConfig(BaseModel):
Configuration for the NVIDIA NeMo Evaluator microservice endpoint.
Attributes:
- evaluator_service_url (str): A base url for accessing the NVIDIA evaluation endpoint, e.g. http://localhost:8000.
+ evaluator_url (str): A base url for accessing the NVIDIA evaluation endpoint, e.g. http://localhost:8000.
"""
- evaluator_service_url: str = Field(
+ evaluator_url: str = Field(
default_factory=lambda: os.getenv("NVIDIA_EVALUATOR_URL", "http://0.0.0.0:7331"),
description="The url for accessing the evaluator service",
)
@@ -25,5 +25,5 @@ class NVIDIAEvalConfig(BaseModel):
@classmethod
def sample_run_config(cls, **kwargs) -> Dict[str, Any]:
return {
- "evaluator_service_url": "${env.NVIDIA_EVALUATOR_URL:http://localhost:7331}",
+ "evaluator_url": "${env.NVIDIA_EVALUATOR_URL:http://localhost:7331}",
}
diff --git a/llama_stack/providers/remote/eval/nvidia/eval.py b/llama_stack/providers/remote/eval/nvidia/eval.py
index 92a734058..b17bf8013 100644
--- a/llama_stack/providers/remote/eval/nvidia/eval.py
+++ b/llama_stack/providers/remote/eval/nvidia/eval.py
@@ -53,13 +53,13 @@ class NVIDIAEvalImpl(
async def _evaluator_get(self, path):
"""Helper for making GET requests to the evaluator service."""
- response = requests.get(url=f"{self.config.evaluator_service_url}{path}")
+ response = requests.get(url=f"{self.config.evaluator_url}{path}")
response.raise_for_status()
return response.json()
async def _evaluator_post(self, path, data):
"""Helper for making POST requests to the evaluator service."""
- response = requests.post(url=f"{self.config.evaluator_service_url}{path}", json=data)
+ response = requests.post(url=f"{self.config.evaluator_url}{path}", json=data)
response.raise_for_status()
return response.json()
diff --git a/llama_stack/templates/nvidia/run-with-safety.yaml b/llama_stack/templates/nvidia/run-with-safety.yaml
index 55d7d87cf..8483fb9bf 100644
--- a/llama_stack/templates/nvidia/run-with-safety.yaml
+++ b/llama_stack/templates/nvidia/run-with-safety.yaml
@@ -56,7 +56,7 @@ providers:
- provider_id: nvidia
provider_type: remote::nvidia
config:
- evaluator_service_url: ${env.NVIDIA_EVALUATOR_URL:http://localhost:7331}
+ evaluator_url: ${env.NVIDIA_EVALUATOR_URL:http://localhost:7331}
post_training:
- provider_id: nvidia
provider_type: remote::nvidia
diff --git a/llama_stack/templates/nvidia/run.yaml b/llama_stack/templates/nvidia/run.yaml
index 82054001e..f999dd64b 100644
--- a/llama_stack/templates/nvidia/run.yaml
+++ b/llama_stack/templates/nvidia/run.yaml
@@ -51,7 +51,7 @@ providers:
- provider_id: nvidia
provider_type: remote::nvidia
config:
- evaluator_service_url: ${env.NVIDIA_EVALUATOR_URL:http://localhost:7331}
+ evaluator_url: ${env.NVIDIA_EVALUATOR_URL:http://localhost:7331}
post_training:
- provider_id: nvidia
provider_type: remote::nvidia
diff --git a/tests/unit/providers/nvidia/test_eval.py b/tests/unit/providers/nvidia/test_eval.py
index 68f102f83..584ca2101 100644
--- a/tests/unit/providers/nvidia/test_eval.py
+++ b/tests/unit/providers/nvidia/test_eval.py
@@ -33,7 +33,7 @@ class TestNVIDIAEvalImpl(unittest.TestCase):
self.agents_api = MagicMock()
self.config = NVIDIAEvalConfig(
- evaluator_service_url=os.environ["NVIDIA_EVALUATOR_URL"],
+ evaluator_url=os.environ["NVIDIA_EVALUATOR_URL"],
)
self.eval_impl = NVIDIAEvalImpl(
@@ -56,9 +56,6 @@ class TestNVIDIAEvalImpl(unittest.TestCase):
self.mock_evaluator_get = self.evaluator_get_patcher.start()
self.mock_evaluator_post = self.evaluator_post_patcher.start()
- # Set up async test helper
- # self.run_async = self._create_async_helper()
-
def tearDown(self):
"""Clean up after each test."""
self.evaluator_get_patcher.stop()