forked from phoenix/litellm-mirror
LiteLLM Minor Fixes and Improvements (11/09/2024) (#5634)
* fix(caching.py): set ttl for async_increment cache fixes issue where ttl for redis client was not being set on increment_cache Fixes https://github.com/BerriAI/litellm/issues/5609 * fix(caching.py): fix increment cache w/ ttl for sync increment cache on redis Fixes https://github.com/BerriAI/litellm/issues/5609 * fix(router.py): support adding retry policy + allowed fails policy via config.yaml * fix(router.py): don't cooldown single deployments No point, as there's no other deployment to loadbalance with. * fix(user_api_key_auth.py): support setting allowed email domains on jwt tokens Closes https://github.com/BerriAI/litellm/issues/5605 * docs(token_auth.md): add user upsert + allowed email domain to jwt auth docs * fix(litellm_pre_call_utils.py): fix dynamic key logging when team id is set Fixes issue where key logging would not be set if team metadata was not none * fix(secret_managers/main.py): load environment variables correctly Fixes issue where os.environ/ was not being loaded correctly * test(test_router.py): fix test * feat(spend_tracking_utils.py): support logging additional usage params - e.g. prompt caching values for deepseek * test: fix tests * test: fix test * test: fix test * test: fix test * test: fix test
This commit is contained in:
parent
70100d716b
commit
98c34a7e27
25 changed files with 745 additions and 114 deletions
|
@ -38,6 +38,8 @@ from litellm.integrations.custom_logger import CustomLogger
|
|||
## 1. router.completion() + router.embeddings()
|
||||
## 2. proxy.completions + proxy.embeddings
|
||||
|
||||
litellm.num_retries = 0
|
||||
|
||||
|
||||
class CompletionCustomHandler(
|
||||
CustomLogger
|
||||
|
@ -401,7 +403,7 @@ async def test_async_chat_azure():
|
|||
"rpm": 1800,
|
||||
},
|
||||
]
|
||||
router = Router(model_list=model_list) # type: ignore
|
||||
router = Router(model_list=model_list, num_retries=0) # type: ignore
|
||||
response = await router.acompletion(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}],
|
||||
|
@ -413,7 +415,7 @@ async def test_async_chat_azure():
|
|||
) # pre, post, success
|
||||
# streaming
|
||||
litellm.callbacks = [customHandler_streaming_azure_router]
|
||||
router2 = Router(model_list=model_list) # type: ignore
|
||||
router2 = Router(model_list=model_list, num_retries=0) # type: ignore
|
||||
response = await router2.acompletion(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}],
|
||||
|
@ -443,7 +445,7 @@ async def test_async_chat_azure():
|
|||
},
|
||||
]
|
||||
litellm.callbacks = [customHandler_failure]
|
||||
router3 = Router(model_list=model_list) # type: ignore
|
||||
router3 = Router(model_list=model_list, num_retries=0) # type: ignore
|
||||
try:
|
||||
response = await router3.acompletion(
|
||||
model="gpt-3.5-turbo",
|
||||
|
@ -505,7 +507,7 @@ async def test_async_embedding_azure():
|
|||
},
|
||||
]
|
||||
litellm.callbacks = [customHandler_failure]
|
||||
router3 = Router(model_list=model_list) # type: ignore
|
||||
router3 = Router(model_list=model_list, num_retries=0) # type: ignore
|
||||
try:
|
||||
response = await router3.aembedding(
|
||||
model="azure-embedding-model", input=["hello from litellm!"]
|
||||
|
@ -678,22 +680,21 @@ async def test_rate_limit_error_callback():
|
|||
pass
|
||||
|
||||
with patch.object(
|
||||
customHandler, "log_model_group_rate_limit_error", new=MagicMock()
|
||||
customHandler, "log_model_group_rate_limit_error", new=AsyncMock()
|
||||
) as mock_client:
|
||||
|
||||
print(
|
||||
f"customHandler.log_model_group_rate_limit_error: {customHandler.log_model_group_rate_limit_error}"
|
||||
)
|
||||
|
||||
for _ in range(3):
|
||||
try:
|
||||
_ = await router.acompletion(
|
||||
model="my-test-gpt",
|
||||
messages=[{"role": "user", "content": "Hey, how's it going?"}],
|
||||
litellm_logging_obj=litellm_logging_obj,
|
||||
)
|
||||
except (litellm.RateLimitError, ValueError):
|
||||
pass
|
||||
try:
|
||||
_ = await router.acompletion(
|
||||
model="my-test-gpt",
|
||||
messages=[{"role": "user", "content": "Hey, how's it going?"}],
|
||||
litellm_logging_obj=litellm_logging_obj,
|
||||
)
|
||||
except (litellm.RateLimitError, ValueError):
|
||||
pass
|
||||
|
||||
await asyncio.sleep(3)
|
||||
mock_client.assert_called_once()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue