fix(key_management_endpoints.py): override metadata field value on up… (#7008)

* fix(key_management_endpoints.py): override metadata field value on update allow user to override tags * feat(__init__.py): expose new disable_end_user_cost_tracking_prometheus_only metric allow disabling end user cost tracking on prometheus - fixes cardinality issue * fix(litellm_pre_call_utils.py): add key/team level enforced params Fixes https://github.com/BerriAI/litellm/issues/6652 * fix(key_management_endpoints.py): allow user to pass in `enforced_params` as a top level param on /key/generate and /key/update * docs(enterprise.md): add docs on enforcing required params for llm requests * Add support of Galadriel API (#7005) * fix(router.py): robust retry after handling set retry after time to 0 if >0 healthy deployments. handle base case = 1 deployment * test(test_router.py): fix test * feat(bedrock/): add support for 'nova' models also adds explicit 'converse/' route for simpler routing * fix: fix 'supports_pdf_input' return if model supports pdf input on get_model_info * feat(converse_transformation.py): support bedrock pdf input * docs(document_understanding.md): add document understanding to docs * fix(litellm_pre_call_utils.py): fix linting error * fix(init.py): fix passing of bedrock converse models * feat(bedrock/converse): support 'response_format={"type": "json_object"}' * fix(converse_handler.py): fix linting error * fix(base_llm_unit_tests.py): fix test * fix: fix test * test: fix test * test: fix test * test: remove duplicate test --------- Co-authored-by: h4n0 <4738254+h4n0@users.noreply.github.com>
2025-04-26 03:04:13 +00:00 · 2024-12-03 23:03:50 -08:00 · 2024-12-03 23:03:50 -08:00 · 6bb934c0ac
commit 6bb934c0ac
parent d558b643be
37 changed files with 1297 additions and 503 deletions
--- a/litellm/router.py
+++ b/litellm/router.py
@ -2940,6 +2940,7 @@ class Router:
                remaining_retries=num_retries,
                num_retries=num_retries,
                healthy_deployments=_healthy_deployments,
+                all_deployments=_all_deployments,
            )

            await asyncio.sleep(retry_after)
@ -2972,6 +2973,7 @@ class Router:
                        remaining_retries=remaining_retries,
                        num_retries=num_retries,
                        healthy_deployments=_healthy_deployments,
+                        all_deployments=_all_deployments,
                    )
                    await asyncio.sleep(_timeout)

@ -3149,6 +3151,7 @@ class Router:
        remaining_retries: int,
        num_retries: int,
        healthy_deployments: Optional[List] = None,
+        all_deployments: Optional[List] = None,
    ) -> Union[int, float]:
        """
        Calculate back-off, then retry
@ -3157,10 +3160,14 @@ class Router:
            1. there are healthy deployments in the same model group
            2. there are fallbacks for the completion call
        """
-        if (
+
+        ## base case - single deployment
+        if all_deployments is not None and len(all_deployments) == 1:
+            pass
+        elif (
            healthy_deployments is not None
            and isinstance(healthy_deployments, list)
-            and len(healthy_deployments) > 1
+            and len(healthy_deployments) > 0
        ):
            return 0

@ -3242,6 +3249,7 @@ class Router:
                remaining_retries=num_retries,
                num_retries=num_retries,
                healthy_deployments=_healthy_deployments,
+                all_deployments=_all_deployments,
            )

            ## LOGGING
@ -3276,6 +3284,7 @@ class Router:
                        remaining_retries=remaining_retries,
                        num_retries=num_retries,
                        healthy_deployments=_healthy_deployments,
+                        all_deployments=_all_deployments,
                    )
                    time.sleep(_timeout)