diff --git a/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py b/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py index 340215a53..d60efe828 100644 --- a/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +++ b/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py @@ -6,7 +6,7 @@ import re from typing import Any -from llama_stack.apis.inference import Inference, UserMessage +from llama_stack.apis.inference import Inference from llama_stack.apis.scoring import ScoringResultRow from llama_stack.apis.scoring_functions import ScoringFnParams from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn @@ -55,15 +55,16 @@ class LlmAsJudgeScoringFn(RegisteredBaseScoringFn): generated_answer=generated_answer, ) - judge_response = await self.inference_api.chat_completion( - model_id=fn_def.params.judge_model, + judge_response = await self.inference_api.openai_chat_completion( + model=fn_def.params.judge_model, messages=[ - UserMessage( - content=judge_input_msg, - ), + { + "role": "user", + "content": judge_input_msg, + } ], ) - content = judge_response.completion_message.content + content = judge_response.choices[0].message.content rating_regexes = fn_def.params.judge_score_regexes judge_rating = None