mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-11 12:38:02 +00:00
refine
This commit is contained in:
parent
3ca640be7f
commit
8667b137f6
1 changed files with 5 additions and 0 deletions
|
@ -159,7 +159,10 @@ class PassthroughInferenceAdapter(Inference):
|
|||
async def _nonstream_chat_completion(self, json_params: Dict[str, Any]) -> ChatCompletionResponse:
|
||||
client = self._get_client()
|
||||
response = await client.inference.chat_completion(**json_params)
|
||||
|
||||
response = response.to_dict()
|
||||
|
||||
# temporary hack to remove the metrics from the response
|
||||
response["metrics"] = []
|
||||
|
||||
return convert_to_pydantic(ChatCompletionResponse, response)
|
||||
|
@ -170,6 +173,8 @@ class PassthroughInferenceAdapter(Inference):
|
|||
|
||||
async for chunk in stream_response:
|
||||
chunk = chunk.to_dict()
|
||||
|
||||
# temporary hack to remove the metrics from the response
|
||||
chunk["metrics"] = []
|
||||
chunk = convert_to_pydantic(ChatCompletionResponseStreamChunk, chunk)
|
||||
yield chunk
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue