LiteLLM Minor Fixes and Improvements (11/09/2024) (#5634)

* fix(caching.py): set ttl for async_increment cache fixes issue where ttl for redis client was not being set on increment_cache Fixes https://github.com/BerriAI/litellm/issues/5609 * fix(caching.py): fix increment cache w/ ttl for sync increment cache on redis Fixes https://github.com/BerriAI/litellm/issues/5609 * fix(router.py): support adding retry policy + allowed fails policy via config.yaml * fix(router.py): don't cooldown single deployments No point, as there's no other deployment to loadbalance with. * fix(user_api_key_auth.py): support setting allowed email domains on jwt tokens Closes https://github.com/BerriAI/litellm/issues/5605 * docs(token_auth.md): add user upsert + allowed email domain to jwt auth docs * fix(litellm_pre_call_utils.py): fix dynamic key logging when team id is set Fixes issue where key logging would not be set if team metadata was not none * fix(secret_managers/main.py): load environment variables correctly Fixes issue where os.environ/ was not being loaded correctly * test(test_router.py): fix test * feat(spend_tracking_utils.py): support logging additional usage params - e.g. prompt caching values for deepseek * test: fix tests * test: fix test * test: fix test * test: fix test * test: fix test
2024-09-11 22:36:06 -07:00 · 2024-09-11 22:36:06 -07:00 · 98c34a7e27
commit 98c34a7e27
parent 70100d716b
25 changed files with 745 additions and 114 deletions
--- a/litellm/tests/test_custom_callback_router.py
+++ b/litellm/tests/test_custom_callback_router.py
@ -38,6 +38,8 @@ from litellm.integrations.custom_logger import CustomLogger
 ## 1. router.completion() + router.embeddings()
 ## 2. proxy.completions + proxy.embeddings

+litellm.num_retries = 0
+

 class CompletionCustomHandler(
    CustomLogger
@ -401,7 +403,7 @@ async def test_async_chat_azure():
                "rpm": 1800,
            },
        ]
-        router = Router(model_list=model_list)  # type: ignore
+        router = Router(model_list=model_list, num_retries=0)  # type: ignore
        response = await router.acompletion(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}],
@ -413,7 +415,7 @@ async def test_async_chat_azure():
        )  # pre, post, success
        # streaming
        litellm.callbacks = [customHandler_streaming_azure_router]
-        router2 = Router(model_list=model_list)  # type: ignore
+        router2 = Router(model_list=model_list, num_retries=0)  # type: ignore
        response = await router2.acompletion(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}],
@ -443,7 +445,7 @@ async def test_async_chat_azure():
            },
        ]
        litellm.callbacks = [customHandler_failure]
-        router3 = Router(model_list=model_list)  # type: ignore
+        router3 = Router(model_list=model_list, num_retries=0)  # type: ignore
        try:
            response = await router3.acompletion(
                model="gpt-3.5-turbo",
@ -505,7 +507,7 @@ async def test_async_embedding_azure():
            },
        ]
        litellm.callbacks = [customHandler_failure]
-        router3 = Router(model_list=model_list)  # type: ignore
+        router3 = Router(model_list=model_list, num_retries=0)  # type: ignore
        try:
            response = await router3.aembedding(
                model="azure-embedding-model", input=["hello from litellm!"]
@ -678,22 +680,21 @@ async def test_rate_limit_error_callback():
        pass

    with patch.object(
-        customHandler, "log_model_group_rate_limit_error", new=MagicMock()
+        customHandler, "log_model_group_rate_limit_error", new=AsyncMock()
    ) as mock_client:

        print(
            f"customHandler.log_model_group_rate_limit_error: {customHandler.log_model_group_rate_limit_error}"
        )

-        for _ in range(3):
-            try:
-                _ = await router.acompletion(
-                    model="my-test-gpt",
-                    messages=[{"role": "user", "content": "Hey, how's it going?"}],
-                    litellm_logging_obj=litellm_logging_obj,
-                )
-            except (litellm.RateLimitError, ValueError):
-                pass
+        try:
+            _ = await router.acompletion(
+                model="my-test-gpt",
+                messages=[{"role": "user", "content": "Hey, how's it going?"}],
+                litellm_logging_obj=litellm_logging_obj,
+            )
+        except (litellm.RateLimitError, ValueError):
+            pass

        await asyncio.sleep(3)
        mock_client.assert_called_once()