LiteLLM Minor Fixes and Improvements (11/09/2024) (#5634)

* fix(caching.py): set ttl for async_increment cache

fixes issue where ttl for redis client was not being set on increment_cache

Fixes https://github.com/BerriAI/litellm/issues/5609

* fix(caching.py): fix increment cache w/ ttl for sync increment cache on redis

Fixes https://github.com/BerriAI/litellm/issues/5609

* fix(router.py): support adding retry policy + allowed fails policy via config.yaml

* fix(router.py): don't cooldown single deployments

No point, as there's no other deployment to loadbalance with.

* fix(user_api_key_auth.py): support setting allowed email domains on jwt tokens

Closes https://github.com/BerriAI/litellm/issues/5605

* docs(token_auth.md): add user upsert + allowed email domain to jwt auth docs

* fix(litellm_pre_call_utils.py): fix dynamic key logging when team id is set

Fixes issue where key logging would not be set if team metadata was not none

* fix(secret_managers/main.py): load environment variables correctly

Fixes issue where os.environ/ was not being loaded correctly

* test(test_router.py): fix test

* feat(spend_tracking_utils.py): support logging additional usage params - e.g. prompt caching values for deepseek

* test: fix tests

* test: fix test

* test: fix test

* test: fix test

* test: fix test
This commit is contained in:
Krish Dholakia 2024-09-11 22:36:06 -07:00 committed by GitHub
parent 70100d716b
commit 98c34a7e27
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
25 changed files with 745 additions and 114 deletions

View file

@ -38,6 +38,8 @@ from litellm.integrations.custom_logger import CustomLogger
## 1. router.completion() + router.embeddings()
## 2. proxy.completions + proxy.embeddings
litellm.num_retries = 0
class CompletionCustomHandler(
CustomLogger
@ -401,7 +403,7 @@ async def test_async_chat_azure():
"rpm": 1800,
},
]
router = Router(model_list=model_list) # type: ignore
router = Router(model_list=model_list, num_retries=0) # type: ignore
response = await router.acompletion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}],
@ -413,7 +415,7 @@ async def test_async_chat_azure():
) # pre, post, success
# streaming
litellm.callbacks = [customHandler_streaming_azure_router]
router2 = Router(model_list=model_list) # type: ignore
router2 = Router(model_list=model_list, num_retries=0) # type: ignore
response = await router2.acompletion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}],
@ -443,7 +445,7 @@ async def test_async_chat_azure():
},
]
litellm.callbacks = [customHandler_failure]
router3 = Router(model_list=model_list) # type: ignore
router3 = Router(model_list=model_list, num_retries=0) # type: ignore
try:
response = await router3.acompletion(
model="gpt-3.5-turbo",
@ -505,7 +507,7 @@ async def test_async_embedding_azure():
},
]
litellm.callbacks = [customHandler_failure]
router3 = Router(model_list=model_list) # type: ignore
router3 = Router(model_list=model_list, num_retries=0) # type: ignore
try:
response = await router3.aembedding(
model="azure-embedding-model", input=["hello from litellm!"]
@ -678,22 +680,21 @@ async def test_rate_limit_error_callback():
pass
with patch.object(
customHandler, "log_model_group_rate_limit_error", new=MagicMock()
customHandler, "log_model_group_rate_limit_error", new=AsyncMock()
) as mock_client:
print(
f"customHandler.log_model_group_rate_limit_error: {customHandler.log_model_group_rate_limit_error}"
)
for _ in range(3):
try:
_ = await router.acompletion(
model="my-test-gpt",
messages=[{"role": "user", "content": "Hey, how's it going?"}],
litellm_logging_obj=litellm_logging_obj,
)
except (litellm.RateLimitError, ValueError):
pass
try:
_ = await router.acompletion(
model="my-test-gpt",
messages=[{"role": "user", "content": "Hey, how's it going?"}],
litellm_logging_obj=litellm_logging_obj,
)
except (litellm.RateLimitError, ValueError):
pass
await asyncio.sleep(3)
mock_client.assert_called_once()