forked from phoenix/litellm-mirror
fix(vertex_httpx.py): add better debug logging for vertex httpx
This commit is contained in:
parent
d880fb2619
commit
aef5cf3f22
1 changed files with 52 additions and 41 deletions
|
@ -366,54 +366,65 @@ class VertexLLM(BaseLLM):
|
||||||
|
|
||||||
## GET MODEL ##
|
## GET MODEL ##
|
||||||
model_response.model = model
|
model_response.model = model
|
||||||
## GET TEXT ##
|
|
||||||
chat_completion_message: ChatCompletionResponseMessage = {"role": "assistant"}
|
|
||||||
content_str = ""
|
|
||||||
tools: List[ChatCompletionToolCallChunk] = []
|
|
||||||
for idx, candidate in enumerate(completion_response["candidates"]):
|
|
||||||
if "content" not in candidate:
|
|
||||||
continue
|
|
||||||
|
|
||||||
if "text" in candidate["content"]["parts"][0]:
|
try:
|
||||||
content_str = candidate["content"]["parts"][0]["text"]
|
## GET TEXT ##
|
||||||
|
chat_completion_message: ChatCompletionResponseMessage = {
|
||||||
|
"role": "assistant"
|
||||||
|
}
|
||||||
|
content_str = ""
|
||||||
|
tools: List[ChatCompletionToolCallChunk] = []
|
||||||
|
for idx, candidate in enumerate(completion_response["candidates"]):
|
||||||
|
if "content" not in candidate:
|
||||||
|
continue
|
||||||
|
|
||||||
if "functionCall" in candidate["content"]["parts"][0]:
|
if "text" in candidate["content"]["parts"][0]:
|
||||||
_function_chunk = ChatCompletionToolCallFunctionChunk(
|
content_str = candidate["content"]["parts"][0]["text"]
|
||||||
name=candidate["content"]["parts"][0]["functionCall"]["name"],
|
|
||||||
arguments=json.dumps(
|
if "functionCall" in candidate["content"]["parts"][0]:
|
||||||
candidate["content"]["parts"][0]["functionCall"]["args"]
|
_function_chunk = ChatCompletionToolCallFunctionChunk(
|
||||||
),
|
name=candidate["content"]["parts"][0]["functionCall"]["name"],
|
||||||
|
arguments=json.dumps(
|
||||||
|
candidate["content"]["parts"][0]["functionCall"]["args"]
|
||||||
|
),
|
||||||
|
)
|
||||||
|
_tool_response_chunk = ChatCompletionToolCallChunk(
|
||||||
|
id=f"call_{str(uuid.uuid4())}",
|
||||||
|
type="function",
|
||||||
|
function=_function_chunk,
|
||||||
|
)
|
||||||
|
tools.append(_tool_response_chunk)
|
||||||
|
|
||||||
|
chat_completion_message["content"] = content_str
|
||||||
|
chat_completion_message["tool_calls"] = tools
|
||||||
|
|
||||||
|
choice = litellm.Choices(
|
||||||
|
finish_reason=candidate.get("finishReason", "stop"),
|
||||||
|
index=candidate.get("index", idx),
|
||||||
|
message=chat_completion_message, # type: ignore
|
||||||
|
logprobs=None,
|
||||||
|
enhancements=None,
|
||||||
)
|
)
|
||||||
_tool_response_chunk = ChatCompletionToolCallChunk(
|
|
||||||
id=f"call_{str(uuid.uuid4())}",
|
|
||||||
type="function",
|
|
||||||
function=_function_chunk,
|
|
||||||
)
|
|
||||||
tools.append(_tool_response_chunk)
|
|
||||||
|
|
||||||
chat_completion_message["content"] = content_str
|
model_response.choices.append(choice)
|
||||||
chat_completion_message["tool_calls"] = tools
|
|
||||||
|
|
||||||
choice = litellm.Choices(
|
## GET USAGE ##
|
||||||
finish_reason=candidate.get("finishReason", "stop"),
|
usage = litellm.Usage(
|
||||||
index=candidate.get("index", idx),
|
prompt_tokens=completion_response["usageMetadata"]["promptTokenCount"],
|
||||||
message=chat_completion_message, # type: ignore
|
completion_tokens=completion_response["usageMetadata"][
|
||||||
logprobs=None,
|
"candidatesTokenCount"
|
||||||
enhancements=None,
|
],
|
||||||
|
total_tokens=completion_response["usageMetadata"]["totalTokenCount"],
|
||||||
)
|
)
|
||||||
|
|
||||||
model_response.choices.append(choice)
|
setattr(model_response, "usage", usage)
|
||||||
|
except Exception as e:
|
||||||
## GET USAGE ##
|
raise VertexAIError(
|
||||||
usage = litellm.Usage(
|
message="Received={}, Error converting to valid response block={}. File an issue if litellm error - https://github.com/BerriAI/litellm/issues".format(
|
||||||
prompt_tokens=completion_response["usageMetadata"]["promptTokenCount"],
|
completion_response, str(e)
|
||||||
completion_tokens=completion_response["usageMetadata"][
|
),
|
||||||
"candidatesTokenCount"
|
status_code=422,
|
||||||
],
|
)
|
||||||
total_tokens=completion_response["usageMetadata"]["totalTokenCount"],
|
|
||||||
)
|
|
||||||
|
|
||||||
setattr(model_response, "usage", usage)
|
|
||||||
|
|
||||||
return model_response
|
return model_response
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue