From fb418813fc40661a851e20768a7f7c08c5cbe461 Mon Sep 17 00:00:00 2001 From: yyymeta <123776235+yyymeta@users.noreply.github.com> Date: Mon, 17 Mar 2025 13:42:08 -0700 Subject: [PATCH] fix: passthrough impl response.content.text (#1665) # What does this PR do? current passthrough impl returns chatcompletion_message.content as a TextItem() , not a straight string. so it's not compatible with other providers, and causes parsing error downstream. change away from the generic pydantic conversion, and explicitly parse out content.text ## Test Plan setup llama server with passthrough ``` llama-stack-client eval run-benchmark "MMMU_Pro_standard" --model-id meta-llama/Llama-3-8B --output-dir /tmp/ --num-examples 20 ``` works without parsing error --- .../remote/inference/passthrough/passthrough.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/llama_stack/providers/remote/inference/passthrough/passthrough.py b/llama_stack/providers/remote/inference/passthrough/passthrough.py index 8f3a0d147..96b2d73d8 100644 --- a/llama_stack/providers/remote/inference/passthrough/passthrough.py +++ b/llama_stack/providers/remote/inference/passthrough/passthrough.py @@ -12,6 +12,7 @@ from llama_stack.apis.common.content_types import InterleavedContent from llama_stack.apis.inference import ( ChatCompletionResponse, ChatCompletionResponseStreamChunk, + CompletionMessage, EmbeddingsResponse, EmbeddingTaskType, Inference, @@ -160,12 +161,14 @@ class PassthroughInferenceAdapter(Inference): client = self._get_client() response = await client.inference.chat_completion(**json_params) - response = response.to_dict() - - # temporary hack to remove the metrics from the response - response["metrics"] = [] - - return convert_to_pydantic(ChatCompletionResponse, response) + return ChatCompletionResponse( + completion_message=CompletionMessage( + content=response.completion_message.content.text, + stop_reason=response.completion_message.stop_reason, + tool_calls=response.completion_message.tool_calls, + ), + logprobs=response.logprobs, + ) async def _stream_chat_completion(self, json_params: Dict[str, Any]) -> AsyncGenerator: client = self._get_client()