fix(utils.py): fix anthropic streaming usage calculation

Fixes https://github.com/BerriAI/litellm/issues/4965
2024-08-01 14:45:54 -07:00 · 2024-08-01 14:45:54 -07:00 · ca0a0bed46
commit ca0a0bed46
parent aad0bbb08c
3 changed files with 59 additions and 29 deletions
--- a/litellm/main.py
+++ b/litellm/main.py
@ -5191,17 +5191,24 @@ def stream_chunk_builder(
    prompt_tokens = 0
    completion_tokens = 0
    for chunk in chunks:
+        usage_chunk: Optional[Usage] = None
        if "usage" in chunk:
-            if "prompt_tokens" in chunk["usage"]:
-                prompt_tokens = chunk["usage"].get("prompt_tokens", 0) or 0
-            if "completion_tokens" in chunk["usage"]:
-                completion_tokens = chunk["usage"].get("completion_tokens", 0) or 0
+            usage_chunk = chunk.usage
+        elif hasattr(chunk, "_hidden_params") and "usage" in chunk._hidden_params:
+            usage_chunk = chunk._hidden_params["usage"]
+        if usage_chunk is not None:
+            if "prompt_tokens" in usage_chunk:
+                prompt_tokens = usage_chunk.get("prompt_tokens", 0) or 0
+            if "completion_tokens" in usage_chunk:
+                completion_tokens = usage_chunk.get("completion_tokens", 0) or 0
    try:
        response["usage"]["prompt_tokens"] = prompt_tokens or token_counter(
            model=model, messages=messages
        )
-    except:  # don't allow this failing to block a complete streaming response from being returned
-        print_verbose(f"token_counter failed, assuming prompt tokens is 0")
+    except (
+        Exception
+    ):  # don't allow this failing to block a complete streaming response from being returned
+        print_verbose("token_counter failed, assuming prompt tokens is 0")
        response["usage"]["prompt_tokens"] = 0
    response["usage"]["completion_tokens"] = completion_tokens or token_counter(
        model=model,