LiteLLM Minor fixes + improvements (08/03/2024) (#5488)

* fix(internal_user_endpoints.py): set budget_reset_at for /user/update

* fix(vertex_and_google_ai_studio_gemini.py): handle accumulated json

Fixes https://github.com/BerriAI/litellm/issues/5479

* fix(vertex_ai_and_gemini.py): fix assistant message function call when content is not None

Fixes https://github.com/BerriAI/litellm/issues/5490

* fix(proxy_server.py): generic state uuid for okta sso

* fix(lago.py): improve debug logs

Debugging for https://github.com/BerriAI/litellm/issues/5477

* docs(bedrock.md): add bedrock cross-region inferencing to docs

* fix(azure.py): return azure response headers on aembedding call

* feat(azure.py): return azure response headers for `/audio/transcription`

* fix(types/utils.py): standardize deepseek / anthropic prompt caching usage information

Closes https://github.com/BerriAI/litellm/issues/5285

* docs(usage.md): add docs on litellm usage object

* test(test_completion.py): mark flaky test
This commit is contained in:
Krish Dholakia 2024-09-03 21:21:34 -07:00 committed by GitHub
parent 59042511c9
commit be3c7b401e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
19 changed files with 736 additions and 81 deletions

View file

@ -6146,6 +6146,7 @@ def convert_to_model_response_object(
] = None, # used for supporting 'json_schema' on older models
):
received_args = locals()
if _response_headers is not None:
llm_response_headers = {
"{}-{}".format("llm_provider", k): v for k, v in _response_headers.items()
@ -6230,13 +6231,8 @@ def convert_to_model_response_object(
model_response_object.choices = choice_list
if "usage" in response_object and response_object["usage"] is not None:
model_response_object.usage.completion_tokens = response_object["usage"].get("completion_tokens", 0) # type: ignore
model_response_object.usage.prompt_tokens = response_object["usage"].get("prompt_tokens", 0) # type: ignore
model_response_object.usage.total_tokens = response_object["usage"].get("total_tokens", 0) # type: ignore
special_keys = ["completion_tokens", "prompt_tokens", "total_tokens"]
for k, v in response_object["usage"].items():
if k not in special_keys:
setattr(model_response_object.usage, k, v) # type: ignore
usage_object = litellm.Usage(**response_object["usage"])
setattr(model_response_object, "usage", usage_object)
if "created" in response_object:
model_response_object.created = response_object["created"] or int(
time.time()