LiteLLM Minor fixes + improvements (08/03/2024) (#5488)

* fix(internal_user_endpoints.py): set budget_reset_at for /user/update * fix(vertex_and_google_ai_studio_gemini.py): handle accumulated json Fixes https://github.com/BerriAI/litellm/issues/5479 * fix(vertex_ai_and_gemini.py): fix assistant message function call when content is not None Fixes https://github.com/BerriAI/litellm/issues/5490 * fix(proxy_server.py): generic state uuid for okta sso * fix(lago.py): improve debug logs Debugging for https://github.com/BerriAI/litellm/issues/5477 * docs(bedrock.md): add bedrock cross-region inferencing to docs * fix(azure.py): return azure response headers on aembedding call * feat(azure.py): return azure response headers for `/audio/transcription` * fix(types/utils.py): standardize deepseek / anthropic prompt caching usage information Closes https://github.com/BerriAI/litellm/issues/5285 * docs(usage.md): add docs on litellm usage object * test(test_completion.py): mark flaky test
2025-04-26 11:14:04 +00:00 · 2024-09-03 21:21:34 -07:00 · 2024-09-03 21:21:34 -07:00 · 8eb7cb5300
commit 8eb7cb5300
parent 510c024e72
19 changed files with 736 additions and 81 deletions
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -6146,6 +6146,7 @@ def convert_to_model_response_object(
    ] = None,  # used for supporting 'json_schema' on older models
 ):
    received_args = locals()
+
    if _response_headers is not None:
        llm_response_headers = {
            "{}-{}".format("llm_provider", k): v for k, v in _response_headers.items()
@ -6230,13 +6231,8 @@ def convert_to_model_response_object(
            model_response_object.choices = choice_list

            if "usage" in response_object and response_object["usage"] is not None:
-                model_response_object.usage.completion_tokens = response_object["usage"].get("completion_tokens", 0)  # type: ignore
-                model_response_object.usage.prompt_tokens = response_object["usage"].get("prompt_tokens", 0)  # type: ignore
-                model_response_object.usage.total_tokens = response_object["usage"].get("total_tokens", 0)  # type: ignore
-                special_keys = ["completion_tokens", "prompt_tokens", "total_tokens"]
-                for k, v in response_object["usage"].items():
-                    if k not in special_keys:
-                        setattr(model_response_object.usage, k, v)  # type: ignore
+                usage_object = litellm.Usage(**response_object["usage"])
+                setattr(model_response_object, "usage", usage_object)
            if "created" in response_object:
                model_response_object.created = response_object["created"] or int(
                    time.time()