mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
LiteLLM Minor Fixes & Improvements (09/21/2024) (#5819)
* fix(router.py): fix error message * Litellm disable keys (#5814) * build(schema.prisma): allow blocking/unblocking keys Fixes https://github.com/BerriAI/litellm/issues/5328 * fix(key_management_endpoints.py): fix pop * feat(auth_checks.py): allow admin to enable/disable virtual keys Closes https://github.com/BerriAI/litellm/issues/5328 * docs(vertex.md): add auth section for vertex ai Addresses - https://github.com/BerriAI/litellm/issues/5768#issuecomment-2365284223 * build(model_prices_and_context_window.json): show which models support prompt_caching Closes https://github.com/BerriAI/litellm/issues/5776 * fix(router.py): allow setting default priority for requests * fix(router.py): add 'retry-after' header for concurrent request limit errors Fixes https://github.com/BerriAI/litellm/issues/5783 * fix(router.py): correctly raise and use retry-after header from azure+openai Fixes https://github.com/BerriAI/litellm/issues/5783 * fix(user_api_key_auth.py): fix valid token being none * fix(auth_checks.py): fix model dump for cache management object * fix(user_api_key_auth.py): pass prisma_client to obj * test(test_otel.py): update test for new key check * test: fix test
This commit is contained in:
parent
1ca638973f
commit
8039b95aaf
25 changed files with 1006 additions and 182 deletions
|
@ -910,6 +910,7 @@ async def test_exception_with_headers(sync_mode, provider, model, call_type, str
|
|||
{"message": "litellm.proxy.proxy_server.embeddings(): Exception occured - No deployments available for selected model, Try again in 60 seconds. Passed model=text-embedding-ada-002. pre-call-checks=False, allowed_model_region=n/a, cooldown_list=[('b49cbc9314273db7181fe69b1b19993f04efb88f2c1819947c538bac08097e4c', {'Exception Received': 'litellm.RateLimitError: AzureException RateLimitError - Requests to the Embeddings_Create Operation under Azure OpenAI API version 2023-09-01-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 9 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit.', 'Status Code': '429'})]", "level": "ERROR", "timestamp": "2024-08-22T03:25:36.900476"}
|
||||
```
|
||||
"""
|
||||
print(f"Received args: {locals()}")
|
||||
import openai
|
||||
|
||||
if sync_mode:
|
||||
|
@ -939,13 +940,38 @@ async def test_exception_with_headers(sync_mode, provider, model, call_type, str
|
|||
cooldown_time = 30.0
|
||||
|
||||
def _return_exception(*args, **kwargs):
|
||||
from fastapi import HTTPException
|
||||
import datetime
|
||||
|
||||
raise HTTPException(
|
||||
status_code=429,
|
||||
detail="Rate Limited!",
|
||||
headers={"retry-after": cooldown_time}, # type: ignore
|
||||
)
|
||||
from httpx import Headers, Request, Response
|
||||
|
||||
kwargs = {
|
||||
"request": Request("POST", "https://www.google.com"),
|
||||
"message": "Error code: 429 - Rate Limit Error!",
|
||||
"body": {"detail": "Rate Limit Error!"},
|
||||
"code": None,
|
||||
"param": None,
|
||||
"type": None,
|
||||
"response": Response(
|
||||
status_code=429,
|
||||
headers=Headers(
|
||||
{
|
||||
"date": "Sat, 21 Sep 2024 22:56:53 GMT",
|
||||
"server": "uvicorn",
|
||||
"retry-after": "30",
|
||||
"content-length": "30",
|
||||
"content-type": "application/json",
|
||||
}
|
||||
),
|
||||
request=Request("POST", "http://0.0.0.0:9000/chat/completions"),
|
||||
),
|
||||
"status_code": 429,
|
||||
"request_id": None,
|
||||
}
|
||||
|
||||
exception = Exception()
|
||||
for k, v in kwargs.items():
|
||||
setattr(exception, k, v)
|
||||
raise exception
|
||||
|
||||
with patch.object(
|
||||
mapped_target,
|
||||
|
@ -975,7 +1001,7 @@ async def test_exception_with_headers(sync_mode, provider, model, call_type, str
|
|||
except litellm.RateLimitError as e:
|
||||
exception_raised = True
|
||||
assert e.litellm_response_headers is not None
|
||||
assert e.litellm_response_headers["retry-after"] == cooldown_time
|
||||
assert int(e.litellm_response_headers["retry-after"]) == cooldown_time
|
||||
|
||||
if exception_raised is False:
|
||||
print(resp)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue