fix(utils.py): fix streaming to not return usage dict

Fixes https://github.com/BerriAI/litellm/issues/3237
2025-04-25 18:54:30 +00:00 · 2024-04-24 08:06:07 -07:00 · 2024-04-24 08:06:07 -07:00 · 48c2c3d78a
commit 48c2c3d78a
parent 70c98617da
24 changed files with 107 additions and 83 deletions
--- a/litellm/main.py
+++ b/litellm/main.py
@ -407,8 +407,10 @@ def mock_completion(
        model_response["created"] = int(time.time())
        model_response["model"] = model

-        model_response.usage = Usage(
-            prompt_tokens=10, completion_tokens=20, total_tokens=30
+        setattr(
+            model_response,
+            "usage",
+            Usage(prompt_tokens=10, completion_tokens=20, total_tokens=30),
        )

        try:
@ -652,6 +654,7 @@ def completion(
                model
            ]  # update the model to the actual value if an alias has been passed in
        model_response = ModelResponse()
+        setattr(model_response, "usage", litellm.Usage())
        if (
            kwargs.get("azure", False) == True
        ):  # don't remove flag check, to remain backwards compatible for repos like Codium