This commit is contained in:
Botao Chen 2025-03-11 20:46:19 -07:00
parent 3ca640be7f
commit 8667b137f6

View file

@ -159,7 +159,10 @@ class PassthroughInferenceAdapter(Inference):
async def _nonstream_chat_completion(self, json_params: Dict[str, Any]) -> ChatCompletionResponse: async def _nonstream_chat_completion(self, json_params: Dict[str, Any]) -> ChatCompletionResponse:
client = self._get_client() client = self._get_client()
response = await client.inference.chat_completion(**json_params) response = await client.inference.chat_completion(**json_params)
response = response.to_dict() response = response.to_dict()
# temporary hack to remove the metrics from the response
response["metrics"] = [] response["metrics"] = []
return convert_to_pydantic(ChatCompletionResponse, response) return convert_to_pydantic(ChatCompletionResponse, response)
@ -170,6 +173,8 @@ class PassthroughInferenceAdapter(Inference):
async for chunk in stream_response: async for chunk in stream_response:
chunk = chunk.to_dict() chunk = chunk.to_dict()
# temporary hack to remove the metrics from the response
chunk["metrics"] = [] chunk["metrics"] = []
chunk = convert_to_pydantic(ChatCompletionResponseStreamChunk, chunk) chunk = convert_to_pydantic(ChatCompletionResponseStreamChunk, chunk)
yield chunk yield chunk