diff --git a/llama_stack/providers/inline/eval/meta_reference/eval.py b/llama_stack/providers/inline/eval/meta_reference/eval.py index ba2fc7c95..58241eb42 100644 --- a/llama_stack/providers/inline/eval/meta_reference/eval.py +++ b/llama_stack/providers/inline/eval/meta_reference/eval.py @@ -150,7 +150,7 @@ class MetaReferenceEvalImpl(Eval, EvalTasksProtocolPrivate): messages.append(candidate.system_message) messages += input_messages response = await self.inference_api.chat_completion( - model=candidate.model, + model_id=candidate.model, messages=messages, sampling_params=candidate.sampling_params, ) diff --git a/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py b/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py index a950f35f9..4b43de93f 100644 --- a/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +++ b/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py @@ -62,7 +62,7 @@ class LlmAsJudgeScoringFn(BaseScoringFn): ) judge_response = await self.inference_api.chat_completion( - model=fn_def.params.judge_model, + model_id=fn_def.params.judge_model, messages=[ { "role": "user", diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py index 2d03a9ef8..e5eb6e1ea 100644 --- a/llama_stack/providers/remote/inference/vllm/vllm.py +++ b/llama_stack/providers/remote/inference/vllm/vllm.py @@ -85,7 +85,6 @@ class VLLMInferenceAdapter(Inference, ModelRegistryHelper, ModelsProtocolPrivate logprobs: Optional[LogProbConfig] = None, ) -> AsyncGenerator: model = await self.model_store.get_model(model_id) - print(f"model={model}") request = ChatCompletionRequest( model=model.provider_resource_id, messages=messages,