fix(vertex_httpx.py): add better debug logging for vertex httpx

2024-06-18 18:00:29 -07:00 · 2024-06-18 18:00:29 -07:00 · aef5cf3f22
commit aef5cf3f22
parent d880fb2619
1 changed files with 52 additions and 41 deletions
--- a/litellm/llms/vertex_httpx.py
+++ b/litellm/llms/vertex_httpx.py
@ -366,54 +366,65 @@ class VertexLLM(BaseLLM):
        ## GET MODEL ##
        model_response.model = model
        ## GET TEXT ##
        chat_completion_message: ChatCompletionResponseMessage = {"role": "assistant"}
        content_str = ""
        tools: List[ChatCompletionToolCallChunk] = []
        for idx, candidate in enumerate(completion_response["candidates"]):
            if "content" not in candidate:
                continue
-            if "text" in candidate["content"]["parts"][0]:
+        try:
-                content_str = candidate["content"]["parts"][0]["text"]
+            ## GET TEXT ##
            chat_completion_message: ChatCompletionResponseMessage = {
                "role": "assistant"
            }
            content_str = ""
            tools: List[ChatCompletionToolCallChunk] = []
            for idx, candidate in enumerate(completion_response["candidates"]):
                if "content" not in candidate:
                    continue
-            if "functionCall" in candidate["content"]["parts"][0]:
+                if "text" in candidate["content"]["parts"][0]:
-                _function_chunk = ChatCompletionToolCallFunctionChunk(
+                    content_str = candidate["content"]["parts"][0]["text"]
-                    name=candidate["content"]["parts"][0]["functionCall"]["name"],
+
-                    arguments=json.dumps(
+                if "functionCall" in candidate["content"]["parts"][0]:
-                        candidate["content"]["parts"][0]["functionCall"]["args"]
+                    _function_chunk = ChatCompletionToolCallFunctionChunk(
-                    ),
+                        name=candidate["content"]["parts"][0]["functionCall"]["name"],
                        arguments=json.dumps(
                            candidate["content"]["parts"][0]["functionCall"]["args"]
                        ),
                    )
                    _tool_response_chunk = ChatCompletionToolCallChunk(
                        id=f"call_{str(uuid.uuid4())}",
                        type="function",
                        function=_function_chunk,
                    )
                    tools.append(_tool_response_chunk)
                chat_completion_message["content"] = content_str
                chat_completion_message["tool_calls"] = tools
                choice = litellm.Choices(
                    finish_reason=candidate.get("finishReason", "stop"),
                    index=candidate.get("index", idx),
                    message=chat_completion_message,  # type: ignore
                    logprobs=None,
                    enhancements=None,
                )
                _tool_response_chunk = ChatCompletionToolCallChunk(
                    id=f"call_{str(uuid.uuid4())}",
                    type="function",
                    function=_function_chunk,
                )
                tools.append(_tool_response_chunk)
-            chat_completion_message["content"] = content_str
+                model_response.choices.append(choice)
            chat_completion_message["tool_calls"] = tools
-            choice = litellm.Choices(
+            ## GET USAGE ##
-                finish_reason=candidate.get("finishReason", "stop"),
+            usage = litellm.Usage(
-                index=candidate.get("index", idx),
+                prompt_tokens=completion_response["usageMetadata"]["promptTokenCount"],
-                message=chat_completion_message,  # type: ignore
+                completion_tokens=completion_response["usageMetadata"][
-                logprobs=None,
+                    "candidatesTokenCount"
-                enhancements=None,
+                ],
                total_tokens=completion_response["usageMetadata"]["totalTokenCount"],
            )
-            model_response.choices.append(choice)
+            setattr(model_response, "usage", usage)
-
+        except Exception as e:
-        ## GET USAGE ##
+            raise VertexAIError(
-        usage = litellm.Usage(
+                message="Received={}, Error converting to valid response block={}. File an issue if litellm error - https://github.com/BerriAI/litellm/issues".format(
-            prompt_tokens=completion_response["usageMetadata"]["promptTokenCount"],
+                    completion_response, str(e)
-            completion_tokens=completion_response["usageMetadata"][
+                ),
-                "candidatesTokenCount"
+                status_code=422,
-            ],
+            )
            total_tokens=completion_response["usageMetadata"]["totalTokenCount"],
        )
        setattr(model_response, "usage", usage)
        return model_response