mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-10 04:08:31 +00:00
fix passthrough impl, response.content.text
This commit is contained in:
parent
a626b7bce3
commit
af20652677
1 changed files with 9 additions and 6 deletions
|
@ -12,6 +12,7 @@ from llama_stack.apis.common.content_types import InterleavedContent
|
||||||
from llama_stack.apis.inference import (
|
from llama_stack.apis.inference import (
|
||||||
ChatCompletionResponse,
|
ChatCompletionResponse,
|
||||||
ChatCompletionResponseStreamChunk,
|
ChatCompletionResponseStreamChunk,
|
||||||
|
CompletionMessage,
|
||||||
EmbeddingsResponse,
|
EmbeddingsResponse,
|
||||||
EmbeddingTaskType,
|
EmbeddingTaskType,
|
||||||
Inference,
|
Inference,
|
||||||
|
@ -160,12 +161,14 @@ class PassthroughInferenceAdapter(Inference):
|
||||||
client = self._get_client()
|
client = self._get_client()
|
||||||
response = await client.inference.chat_completion(**json_params)
|
response = await client.inference.chat_completion(**json_params)
|
||||||
|
|
||||||
response = response.to_dict()
|
return ChatCompletionResponse(
|
||||||
|
completion_message=CompletionMessage(
|
||||||
# temporary hack to remove the metrics from the response
|
content=response.completion_message.content.text,
|
||||||
response["metrics"] = []
|
stop_reason=response.completion_message.stop_reason,
|
||||||
|
tool_calls=response.completion_message.tool_calls,
|
||||||
return convert_to_pydantic(ChatCompletionResponse, response)
|
),
|
||||||
|
logprobs=response.logprobs,
|
||||||
|
)
|
||||||
|
|
||||||
async def _stream_chat_completion(self, json_params: Dict[str, Any]) -> AsyncGenerator:
|
async def _stream_chat_completion(self, json_params: Dict[str, Any]) -> AsyncGenerator:
|
||||||
client = self._get_client()
|
client = self._get_client()
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue