LiteLLM Minor Fixes & Improvements (09/21/2024) (#5819)

* fix(router.py): fix error message

* Litellm disable keys (#5814)

* build(schema.prisma): allow blocking/unblocking keys

Fixes https://github.com/BerriAI/litellm/issues/5328

* fix(key_management_endpoints.py): fix pop

* feat(auth_checks.py): allow admin to enable/disable virtual keys

Closes https://github.com/BerriAI/litellm/issues/5328

* docs(vertex.md): add auth section for vertex ai

Addresses - https://github.com/BerriAI/litellm/issues/5768#issuecomment-2365284223

* build(model_prices_and_context_window.json): show which models support prompt_caching

Closes https://github.com/BerriAI/litellm/issues/5776

* fix(router.py): allow setting default priority for requests

* fix(router.py): add 'retry-after' header for concurrent request limit errors

Fixes https://github.com/BerriAI/litellm/issues/5783

* fix(router.py): correctly raise and use retry-after header from azure+openai

Fixes https://github.com/BerriAI/litellm/issues/5783

* fix(user_api_key_auth.py): fix valid token being none

* fix(auth_checks.py): fix model dump for cache management object

* fix(user_api_key_auth.py): pass prisma_client to obj

* test(test_otel.py): update test for new key check

* test: fix test
This commit is contained in:
Krish Dholakia 2024-09-21 18:51:53 -07:00 committed by GitHub
parent f0543a6f9d
commit f3fa2160a0
25 changed files with 1006 additions and 182 deletions

View file

@ -704,7 +704,6 @@ class OpenAIChatCompletion(BaseLLM):
drop_params: Optional[bool] = None,
):
super().completion()
exception_mapping_worked = False
try:
if headers:
optional_params["extra_headers"] = headers
@ -911,6 +910,9 @@ class OpenAIChatCompletion(BaseLLM):
status_code = getattr(e, "status_code", 500)
error_headers = getattr(e, "headers", None)
error_text = getattr(e, "text", str(e))
error_response = getattr(e, "response", None)
if error_headers is None and error_response:
error_headers = getattr(error_response, "headers", None)
raise OpenAIError(
status_code=status_code, message=error_text, headers=error_headers
)
@ -1003,8 +1005,12 @@ class OpenAIChatCompletion(BaseLLM):
raise e
# e.message
except Exception as e:
exception_response = getattr(e, "response", None)
status_code = getattr(e, "status_code", 500)
error_headers = getattr(e, "headers", None)
if error_headers is None and exception_response:
error_headers = getattr(exception_response, "headers", None)
raise OpenAIError(
status_code=status_code, message=str(e), headers=error_headers
)
@ -1144,10 +1150,13 @@ class OpenAIChatCompletion(BaseLLM):
raise e
error_headers = getattr(e, "headers", None)
status_code = getattr(e, "status_code", 500)
error_response = getattr(e, "response", None)
if error_headers is None and error_response:
error_headers = getattr(error_response, "headers", None)
if response is not None and hasattr(response, "text"):
error_headers = getattr(e, "headers", None)
raise OpenAIError(
status_code=500,
status_code=status_code,
message=f"{str(e)}\n\nOriginal Response: {response.text}", # type: ignore
headers=error_headers,
)
@ -1272,8 +1281,12 @@ class OpenAIChatCompletion(BaseLLM):
)
status_code = getattr(e, "status_code", 500)
error_headers = getattr(e, "headers", None)
error_text = getattr(e, "text", str(e))
error_response = getattr(e, "response", None)
if error_headers is None and error_response:
error_headers = getattr(error_response, "headers", None)
raise OpenAIError(
status_code=status_code, message=str(e), headers=error_headers
status_code=status_code, message=error_text, headers=error_headers
)
def embedding( # type: ignore
@ -1352,8 +1365,12 @@ class OpenAIChatCompletion(BaseLLM):
except Exception as e:
status_code = getattr(e, "status_code", 500)
error_headers = getattr(e, "headers", None)
error_text = getattr(e, "text", str(e))
error_response = getattr(e, "response", None)
if error_headers is None and error_response:
error_headers = getattr(error_response, "headers", None)
raise OpenAIError(
status_code=status_code, message=str(e), headers=error_headers
status_code=status_code, message=error_text, headers=error_headers
)
async def aimage_generation(
@ -1774,7 +1791,15 @@ class OpenAITextCompletion(BaseLLM):
## RESPONSE OBJECT
return TextCompletionResponse(**response_json)
except Exception as e:
raise e
status_code = getattr(e, "status_code", 500)
error_headers = getattr(e, "headers", None)
error_text = getattr(e, "text", str(e))
error_response = getattr(e, "response", None)
if error_headers is None and error_response:
error_headers = getattr(error_response, "headers", None)
raise OpenAIError(
status_code=status_code, message=error_text, headers=error_headers
)
async def acompletion(
self,
@ -1825,7 +1850,15 @@ class OpenAITextCompletion(BaseLLM):
response_obj._hidden_params.original_response = json.dumps(response_json)
return response_obj
except Exception as e:
raise e
status_code = getattr(e, "status_code", 500)
error_headers = getattr(e, "headers", None)
error_text = getattr(e, "text", str(e))
error_response = getattr(e, "response", None)
if error_headers is None and error_response:
error_headers = getattr(error_response, "headers", None)
raise OpenAIError(
status_code=status_code, message=error_text, headers=error_headers
)
def streaming(
self,
@ -1860,8 +1893,12 @@ class OpenAITextCompletion(BaseLLM):
except Exception as e:
status_code = getattr(e, "status_code", 500)
error_headers = getattr(e, "headers", None)
error_text = getattr(e, "text", str(e))
error_response = getattr(e, "response", None)
if error_headers is None and error_response:
error_headers = getattr(error_response, "headers", None)
raise OpenAIError(
status_code=status_code, message=str(e), headers=error_headers
status_code=status_code, message=error_text, headers=error_headers
)
streamwrapper = CustomStreamWrapper(
completion_stream=response,
@ -1871,8 +1908,19 @@ class OpenAITextCompletion(BaseLLM):
stream_options=data.get("stream_options", None),
)
for chunk in streamwrapper:
yield chunk
try:
for chunk in streamwrapper:
yield chunk
except Exception as e:
status_code = getattr(e, "status_code", 500)
error_headers = getattr(e, "headers", None)
error_text = getattr(e, "text", str(e))
error_response = getattr(e, "response", None)
if error_headers is None and error_response:
error_headers = getattr(error_response, "headers", None)
raise OpenAIError(
status_code=status_code, message=error_text, headers=error_headers
)
async def async_streaming(
self,
@ -1910,8 +1958,19 @@ class OpenAITextCompletion(BaseLLM):
stream_options=data.get("stream_options", None),
)
async for transformed_chunk in streamwrapper:
yield transformed_chunk
try:
async for transformed_chunk in streamwrapper:
yield transformed_chunk
except Exception as e:
status_code = getattr(e, "status_code", 500)
error_headers = getattr(e, "headers", None)
error_text = getattr(e, "text", str(e))
error_response = getattr(e, "response", None)
if error_headers is None and error_response:
error_headers = getattr(error_response, "headers", None)
raise OpenAIError(
status_code=status_code, message=error_text, headers=error_headers
)
class OpenAIFilesAPI(BaseLLM):