fix passthrough impl, response.content.text

This commit is contained in:
Yang Yang 2025-03-16 23:17:11 -07:00
parent a626b7bce3
commit af20652677

View file

@ -12,6 +12,7 @@ from llama_stack.apis.common.content_types import InterleavedContent
from llama_stack.apis.inference import ( from llama_stack.apis.inference import (
ChatCompletionResponse, ChatCompletionResponse,
ChatCompletionResponseStreamChunk, ChatCompletionResponseStreamChunk,
CompletionMessage,
EmbeddingsResponse, EmbeddingsResponse,
EmbeddingTaskType, EmbeddingTaskType,
Inference, Inference,
@ -160,12 +161,14 @@ class PassthroughInferenceAdapter(Inference):
client = self._get_client() client = self._get_client()
response = await client.inference.chat_completion(**json_params) response = await client.inference.chat_completion(**json_params)
response = response.to_dict() return ChatCompletionResponse(
completion_message=CompletionMessage(
# temporary hack to remove the metrics from the response content=response.completion_message.content.text,
response["metrics"] = [] stop_reason=response.completion_message.stop_reason,
tool_calls=response.completion_message.tool_calls,
return convert_to_pydantic(ChatCompletionResponse, response) ),
logprobs=response.logprobs,
)
async def _stream_chat_completion(self, json_params: Dict[str, Any]) -> AsyncGenerator: async def _stream_chat_completion(self, json_params: Dict[str, Any]) -> AsyncGenerator:
client = self._get_client() client = self._get_client()