LiteLLM Minor fixes + improvements (08/03/2024) (#5488)

* fix(internal_user_endpoints.py): set budget_reset_at for /user/update * fix(vertex_and_google_ai_studio_gemini.py): handle accumulated json Fixes https://github.com/BerriAI/litellm/issues/5479 * fix(vertex_ai_and_gemini.py): fix assistant message function call when content is not None Fixes https://github.com/BerriAI/litellm/issues/5490 * fix(proxy_server.py): generic state uuid for okta sso * fix(lago.py): improve debug logs Debugging for https://github.com/BerriAI/litellm/issues/5477 * docs(bedrock.md): add bedrock cross-region inferencing to docs * fix(azure.py): return azure response headers on aembedding call * feat(azure.py): return azure response headers for `/audio/transcription` * fix(types/utils.py): standardize deepseek / anthropic prompt caching usage information Closes https://github.com/BerriAI/litellm/issues/5285 * docs(usage.md): add docs on litellm usage object * test(test_completion.py): mark flaky test
2024-09-03 21:21:34 -07:00 · 2024-09-03 21:21:34 -07:00 · be3c7b401e
commit be3c7b401e
parent 59042511c9
19 changed files with 736 additions and 81 deletions
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -3703,6 +3703,7 @@ async def embeddings(
        api_base = hidden_params.get("api_base", None) or ""
        response_cost = hidden_params.get("response_cost", None) or ""
        litellm_call_id = hidden_params.get("litellm_call_id", None) or ""
+        additional_headers: dict = hidden_params.get("additional_headers", {}) or {}

        fastapi_response.headers.update(
            get_custom_headers(
@ -3715,6 +3716,7 @@ async def embeddings(
                model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
                call_id=litellm_call_id,
                request_data=data,
+                **additional_headers,
            )
        )
        await check_response_size_is_safe(response=response)
@ -4090,6 +4092,7 @@ async def audio_transcriptions(
        api_base = hidden_params.get("api_base", None) or ""
        response_cost = hidden_params.get("response_cost", None) or ""
        litellm_call_id = hidden_params.get("litellm_call_id", None) or ""
+        additional_headers: dict = hidden_params.get("additional_headers", {}) or {}

        fastapi_response.headers.update(
            get_custom_headers(
@ -4102,6 +4105,7 @@ async def audio_transcriptions(
                model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
                call_id=litellm_call_id,
                request_data=data,
+                **additional_headers,
            )
        )

@ -8019,8 +8023,13 @@ async def google_login(request: Request):
            # SSO providers do not allow stateless verification
            redirect_params = {}
            state = os.getenv("GENERIC_CLIENT_STATE", None)
+
            if state:
                redirect_params["state"] = state
+            elif "okta" in generic_authorization_endpoint:
+                redirect_params["state"] = (
+                    uuid.uuid4().hex
+                )  # set state param for okta - required
            return await generic_sso.get_login_redirect(**redirect_params)  # type: ignore
    elif ui_username is not None:
        # No Google, Microsoft SSO