forked from phoenix/litellm-mirror
Litellm dev 10 26 2024 (#6472)
* docs(exception_mapping.md): add missing exception types Fixes https://github.com/Aider-AI/aider/issues/2120#issuecomment-2438971183 * fix(main.py): register custom model pricing with specific key Ensure custom model pricing is registered to the specific model+provider key combination * test: make testing more robust for custom pricing * fix(redis_cache.py): instrument otel logging for sync redis calls ensures complete coverage for all redis cache calls
This commit is contained in:
parent
f44ab00de2
commit
70111a7abd
9 changed files with 310 additions and 72 deletions
|
@ -2,18 +2,33 @@
|
||||||
|
|
||||||
LiteLLM maps exceptions across all providers to their OpenAI counterparts.
|
LiteLLM maps exceptions across all providers to their OpenAI counterparts.
|
||||||
|
|
||||||
| Status Code | Error Type |
|
All exceptions can be imported from `litellm` - e.g. `from litellm import BadRequestError`
|
||||||
|-------------|--------------------------|
|
|
||||||
| 400 | BadRequestError |
|
## LiteLLM Exceptions
|
||||||
| 401 | AuthenticationError |
|
|
||||||
| 403 | PermissionDeniedError |
|
| Status Code | Error Type | Inherits from | Description |
|
||||||
| 404 | NotFoundError |
|
|-------------|--------------------------|---------------|-------------|
|
||||||
| 422 | UnprocessableEntityError |
|
| 400 | BadRequestError | openai.BadRequestError |
|
||||||
| 429 | RateLimitError |
|
| 400 | UnsupportedParamsError | litellm.BadRequestError | Raised when unsupported params are passed |
|
||||||
| >=500 | InternalServerError |
|
| 400 | ContextWindowExceededError| litellm.BadRequestError | Special error type for context window exceeded error messages - enables context window fallbacks |
|
||||||
| N/A | ContextWindowExceededError|
|
| 400 | ContentPolicyViolationError| litellm.BadRequestError | Special error type for content policy violation error messages - enables content policy fallbacks |
|
||||||
| 400 | ContentPolicyViolationError|
|
| 400 | InvalidRequestError | openai.BadRequestError | Deprecated error, use BadRequestError instead |
|
||||||
| 500 | APIConnectionError |
|
| 401 | AuthenticationError | openai.AuthenticationError |
|
||||||
|
| 403 | PermissionDeniedError | openai.PermissionDeniedError |
|
||||||
|
| 404 | NotFoundError | openai.NotFoundError | raise when invalid models passed, example gpt-8 |
|
||||||
|
| 408 | Timeout | openai.APITimeoutError | Raised when a timeout occurs |
|
||||||
|
| 422 | UnprocessableEntityError | openai.UnprocessableEntityError |
|
||||||
|
| 429 | RateLimitError | openai.RateLimitError |
|
||||||
|
| 500 | APIConnectionError | openai.APIConnectionError | If any unmapped error is returned, we return this error |
|
||||||
|
| 500 | APIError | openai.APIError | Generic 500-status code error |
|
||||||
|
| 503 | ServiceUnavailableError | openai.APIStatusError | If provider returns a service unavailable error, this error is raised |
|
||||||
|
| >=500 | InternalServerError | openai.InternalServerError | If any unmapped 500-status code error is returned, this error is raised |
|
||||||
|
| N/A | APIResponseValidationError | openai.APIResponseValidationError | If Rules are used, and request/response fails a rule, this error is raised |
|
||||||
|
| N/A | BudgetExceededError | Exception | Raised for proxy, when budget is exceeded |
|
||||||
|
| N/A | JSONSchemaValidationError | litellm.APIResponseValidationError | Raised when response does not match expected json schema - used if `response_schema` param passed in with `enforce_validation=True` |
|
||||||
|
| N/A | MockException | Exception | Internal exception, raised by mock_completion class. Do not use directly |
|
||||||
|
| N/A | OpenAIError | openai.OpenAIError | Deprecated internal exception, inherits from openai.OpenAIError. |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Base case we return APIConnectionError
|
Base case we return APIConnectionError
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
import asyncio
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
from typing import TYPE_CHECKING, Any, Optional, Union
|
from typing import TYPE_CHECKING, Any, Optional, Union
|
||||||
|
|
||||||
|
@ -32,14 +33,63 @@ class ServiceLogging(CustomLogger):
|
||||||
self.prometheusServicesLogger = PrometheusServicesLogger()
|
self.prometheusServicesLogger = PrometheusServicesLogger()
|
||||||
|
|
||||||
def service_success_hook(
|
def service_success_hook(
|
||||||
self, service: ServiceTypes, duration: float, call_type: str
|
self,
|
||||||
|
service: ServiceTypes,
|
||||||
|
duration: float,
|
||||||
|
call_type: str,
|
||||||
|
parent_otel_span: Optional[Span] = None,
|
||||||
|
start_time: Optional[Union[datetime, float]] = None,
|
||||||
|
end_time: Optional[Union[float, datetime]] = None,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
[TODO] Not implemented for sync calls yet. V0 is focused on async monitoring (used by proxy).
|
Handles both sync and async monitoring by checking for existing event loop.
|
||||||
"""
|
"""
|
||||||
|
# if service == ServiceTypes.REDIS:
|
||||||
|
# print(f"SYNC service: {service}, call_type: {call_type}")
|
||||||
if self.mock_testing:
|
if self.mock_testing:
|
||||||
self.mock_testing_sync_success_hook += 1
|
self.mock_testing_sync_success_hook += 1
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Try to get the current event loop
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
# Check if the loop is running
|
||||||
|
if loop.is_running():
|
||||||
|
# If we're in a running loop, create a task
|
||||||
|
loop.create_task(
|
||||||
|
self.async_service_success_hook(
|
||||||
|
service=service,
|
||||||
|
duration=duration,
|
||||||
|
call_type=call_type,
|
||||||
|
parent_otel_span=parent_otel_span,
|
||||||
|
start_time=start_time,
|
||||||
|
end_time=end_time,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# Loop exists but not running, we can use run_until_complete
|
||||||
|
loop.run_until_complete(
|
||||||
|
self.async_service_success_hook(
|
||||||
|
service=service,
|
||||||
|
duration=duration,
|
||||||
|
call_type=call_type,
|
||||||
|
parent_otel_span=parent_otel_span,
|
||||||
|
start_time=start_time,
|
||||||
|
end_time=end_time,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
except RuntimeError:
|
||||||
|
# No event loop exists, create a new one and run
|
||||||
|
asyncio.run(
|
||||||
|
self.async_service_success_hook(
|
||||||
|
service=service,
|
||||||
|
duration=duration,
|
||||||
|
call_type=call_type,
|
||||||
|
parent_otel_span=parent_otel_span,
|
||||||
|
start_time=start_time,
|
||||||
|
end_time=end_time,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
def service_failure_hook(
|
def service_failure_hook(
|
||||||
self, service: ServiceTypes, duration: float, error: Exception, call_type: str
|
self, service: ServiceTypes, duration: float, error: Exception, call_type: str
|
||||||
):
|
):
|
||||||
|
@ -62,6 +112,8 @@ class ServiceLogging(CustomLogger):
|
||||||
"""
|
"""
|
||||||
- For counting if the redis, postgres call is successful
|
- For counting if the redis, postgres call is successful
|
||||||
"""
|
"""
|
||||||
|
# if service == ServiceTypes.REDIS:
|
||||||
|
# print(f"service: {service}, call_type: {call_type}")
|
||||||
if self.mock_testing:
|
if self.mock_testing:
|
||||||
self.mock_testing_async_success_hook += 1
|
self.mock_testing_async_success_hook += 1
|
||||||
|
|
||||||
|
|
|
@ -143,7 +143,17 @@ class RedisCache(BaseCache):
|
||||||
)
|
)
|
||||||
key = self.check_and_fix_namespace(key=key)
|
key = self.check_and_fix_namespace(key=key)
|
||||||
try:
|
try:
|
||||||
|
start_time = time.time()
|
||||||
self.redis_client.set(name=key, value=str(value), ex=ttl)
|
self.redis_client.set(name=key, value=str(value), ex=ttl)
|
||||||
|
end_time = time.time()
|
||||||
|
_duration = end_time - start_time
|
||||||
|
self.service_logger_obj.service_success_hook(
|
||||||
|
service=ServiceTypes.REDIS,
|
||||||
|
duration=_duration,
|
||||||
|
call_type="set_cache",
|
||||||
|
start_time=start_time,
|
||||||
|
end_time=end_time,
|
||||||
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# NON blocking - notify users Redis is throwing an exception
|
# NON blocking - notify users Redis is throwing an exception
|
||||||
print_verbose(
|
print_verbose(
|
||||||
|
@ -157,14 +167,44 @@ class RedisCache(BaseCache):
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
set_ttl = self.get_ttl(ttl=ttl)
|
set_ttl = self.get_ttl(ttl=ttl)
|
||||||
try:
|
try:
|
||||||
|
start_time = time.time()
|
||||||
result: int = _redis_client.incr(name=key, amount=value) # type: ignore
|
result: int = _redis_client.incr(name=key, amount=value) # type: ignore
|
||||||
|
end_time = time.time()
|
||||||
|
_duration = end_time - start_time
|
||||||
|
self.service_logger_obj.service_success_hook(
|
||||||
|
service=ServiceTypes.REDIS,
|
||||||
|
duration=_duration,
|
||||||
|
call_type="increment_cache",
|
||||||
|
start_time=start_time,
|
||||||
|
end_time=end_time,
|
||||||
|
)
|
||||||
|
|
||||||
if set_ttl is not None:
|
if set_ttl is not None:
|
||||||
# check if key already has ttl, if not -> set ttl
|
# check if key already has ttl, if not -> set ttl
|
||||||
|
start_time = time.time()
|
||||||
current_ttl = _redis_client.ttl(key)
|
current_ttl = _redis_client.ttl(key)
|
||||||
|
end_time = time.time()
|
||||||
|
_duration = end_time - start_time
|
||||||
|
self.service_logger_obj.service_success_hook(
|
||||||
|
service=ServiceTypes.REDIS,
|
||||||
|
duration=_duration,
|
||||||
|
call_type="increment_cache_ttl",
|
||||||
|
start_time=start_time,
|
||||||
|
end_time=end_time,
|
||||||
|
)
|
||||||
if current_ttl == -1:
|
if current_ttl == -1:
|
||||||
# Key has no expiration
|
# Key has no expiration
|
||||||
|
start_time = time.time()
|
||||||
_redis_client.expire(key, set_ttl) # type: ignore
|
_redis_client.expire(key, set_ttl) # type: ignore
|
||||||
|
end_time = time.time()
|
||||||
|
_duration = end_time - start_time
|
||||||
|
self.service_logger_obj.service_success_hook(
|
||||||
|
service=ServiceTypes.REDIS,
|
||||||
|
duration=_duration,
|
||||||
|
call_type="increment_cache_expire",
|
||||||
|
start_time=start_time,
|
||||||
|
end_time=end_time,
|
||||||
|
)
|
||||||
return result
|
return result
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
## LOGGING ##
|
## LOGGING ##
|
||||||
|
@ -565,7 +605,17 @@ class RedisCache(BaseCache):
|
||||||
try:
|
try:
|
||||||
key = self.check_and_fix_namespace(key=key)
|
key = self.check_and_fix_namespace(key=key)
|
||||||
print_verbose(f"Get Redis Cache: key: {key}")
|
print_verbose(f"Get Redis Cache: key: {key}")
|
||||||
|
start_time = time.time()
|
||||||
cached_response = self.redis_client.get(key)
|
cached_response = self.redis_client.get(key)
|
||||||
|
end_time = time.time()
|
||||||
|
_duration = end_time - start_time
|
||||||
|
self.service_logger_obj.service_success_hook(
|
||||||
|
service=ServiceTypes.REDIS,
|
||||||
|
duration=_duration,
|
||||||
|
call_type="get_cache",
|
||||||
|
start_time=start_time,
|
||||||
|
end_time=end_time,
|
||||||
|
)
|
||||||
print_verbose(
|
print_verbose(
|
||||||
f"Got Redis Cache: key: {key}, cached_response {cached_response}"
|
f"Got Redis Cache: key: {key}, cached_response {cached_response}"
|
||||||
)
|
)
|
||||||
|
@ -586,7 +636,17 @@ class RedisCache(BaseCache):
|
||||||
for cache_key in key_list:
|
for cache_key in key_list:
|
||||||
cache_key = self.check_and_fix_namespace(key=cache_key)
|
cache_key = self.check_and_fix_namespace(key=cache_key)
|
||||||
_keys.append(cache_key)
|
_keys.append(cache_key)
|
||||||
|
start_time = time.time()
|
||||||
results: List = self.redis_client.mget(keys=_keys) # type: ignore
|
results: List = self.redis_client.mget(keys=_keys) # type: ignore
|
||||||
|
end_time = time.time()
|
||||||
|
_duration = end_time - start_time
|
||||||
|
self.service_logger_obj.service_success_hook(
|
||||||
|
service=ServiceTypes.REDIS,
|
||||||
|
duration=_duration,
|
||||||
|
call_type="batch_get_cache",
|
||||||
|
start_time=start_time,
|
||||||
|
end_time=end_time,
|
||||||
|
)
|
||||||
|
|
||||||
# Associate the results back with their keys.
|
# Associate the results back with their keys.
|
||||||
# 'results' is a list of values corresponding to the order of keys in 'key_list'.
|
# 'results' is a list of values corresponding to the order of keys in 'key_list'.
|
||||||
|
@ -725,6 +785,8 @@ class RedisCache(BaseCache):
|
||||||
service=ServiceTypes.REDIS,
|
service=ServiceTypes.REDIS,
|
||||||
duration=_duration,
|
duration=_duration,
|
||||||
call_type="sync_ping",
|
call_type="sync_ping",
|
||||||
|
start_time=start_time,
|
||||||
|
end_time=end_time,
|
||||||
)
|
)
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
|
@ -661,13 +661,7 @@ class APIResponseValidationError(openai.APIResponseValidationError): # type: ig
|
||||||
return _message
|
return _message
|
||||||
|
|
||||||
|
|
||||||
class OpenAIError(openai.OpenAIError): # type: ignore
|
class JSONSchemaValidationError(APIResponseValidationError):
|
||||||
def __init__(self, original_exception=None):
|
|
||||||
super().__init__()
|
|
||||||
self.llm_provider = "openai"
|
|
||||||
|
|
||||||
|
|
||||||
class JSONSchemaValidationError(APIError):
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self, model: str, llm_provider: str, raw_response: str, schema: str
|
self, model: str, llm_provider: str, raw_response: str, schema: str
|
||||||
) -> None:
|
) -> None:
|
||||||
|
@ -678,9 +672,13 @@ class JSONSchemaValidationError(APIError):
|
||||||
model, raw_response, schema
|
model, raw_response, schema
|
||||||
)
|
)
|
||||||
self.message = message
|
self.message = message
|
||||||
super().__init__(
|
super().__init__(model=model, message=message, llm_provider=llm_provider)
|
||||||
model=model, message=message, llm_provider=llm_provider, status_code=500
|
|
||||||
)
|
|
||||||
|
class OpenAIError(openai.OpenAIError): # type: ignore
|
||||||
|
def __init__(self, original_exception=None):
|
||||||
|
super().__init__()
|
||||||
|
self.llm_provider = "openai"
|
||||||
|
|
||||||
|
|
||||||
class UnsupportedParamsError(BadRequestError):
|
class UnsupportedParamsError(BadRequestError):
|
||||||
|
|
|
@ -933,12 +933,7 @@ def completion( # type: ignore # noqa: PLR0915
|
||||||
"input_cost_per_token": input_cost_per_token,
|
"input_cost_per_token": input_cost_per_token,
|
||||||
"output_cost_per_token": output_cost_per_token,
|
"output_cost_per_token": output_cost_per_token,
|
||||||
"litellm_provider": custom_llm_provider,
|
"litellm_provider": custom_llm_provider,
|
||||||
},
|
}
|
||||||
model: {
|
|
||||||
"input_cost_per_token": input_cost_per_token,
|
|
||||||
"output_cost_per_token": output_cost_per_token,
|
|
||||||
"litellm_provider": custom_llm_provider,
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
elif (
|
elif (
|
||||||
|
@ -951,12 +946,7 @@ def completion( # type: ignore # noqa: PLR0915
|
||||||
"input_cost_per_second": input_cost_per_second,
|
"input_cost_per_second": input_cost_per_second,
|
||||||
"output_cost_per_second": output_cost_per_second,
|
"output_cost_per_second": output_cost_per_second,
|
||||||
"litellm_provider": custom_llm_provider,
|
"litellm_provider": custom_llm_provider,
|
||||||
},
|
}
|
||||||
model: {
|
|
||||||
"input_cost_per_second": input_cost_per_second,
|
|
||||||
"output_cost_per_second": output_cost_per_second,
|
|
||||||
"litellm_provider": custom_llm_provider,
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
### BUILD CUSTOM PROMPT TEMPLATE -- IF GIVEN ###
|
### BUILD CUSTOM PROMPT TEMPLATE -- IF GIVEN ###
|
||||||
|
@ -3331,7 +3321,7 @@ def embedding( # noqa: PLR0915
|
||||||
if input_cost_per_token is not None and output_cost_per_token is not None:
|
if input_cost_per_token is not None and output_cost_per_token is not None:
|
||||||
litellm.register_model(
|
litellm.register_model(
|
||||||
{
|
{
|
||||||
model: {
|
f"{custom_llm_provider}/{model}": {
|
||||||
"input_cost_per_token": input_cost_per_token,
|
"input_cost_per_token": input_cost_per_token,
|
||||||
"output_cost_per_token": output_cost_per_token,
|
"output_cost_per_token": output_cost_per_token,
|
||||||
"litellm_provider": custom_llm_provider,
|
"litellm_provider": custom_llm_provider,
|
||||||
|
@ -3342,7 +3332,7 @@ def embedding( # noqa: PLR0915
|
||||||
output_cost_per_second = output_cost_per_second or 0.0
|
output_cost_per_second = output_cost_per_second or 0.0
|
||||||
litellm.register_model(
|
litellm.register_model(
|
||||||
{
|
{
|
||||||
model: {
|
f"{custom_llm_provider}/{model}": {
|
||||||
"input_cost_per_second": input_cost_per_second,
|
"input_cost_per_second": input_cost_per_second,
|
||||||
"output_cost_per_second": output_cost_per_second,
|
"output_cost_per_second": output_cost_per_second,
|
||||||
"litellm_provider": custom_llm_provider,
|
"litellm_provider": custom_llm_provider,
|
||||||
|
|
|
@ -1,15 +1,19 @@
|
||||||
model_list:
|
model_list:
|
||||||
- model_name: gpt-4o
|
- model_name: claude-3-5-sonnet-20240620
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: openai/fake
|
model: claude-3-5-sonnet-20240620
|
||||||
api_key: fake-key
|
api_key: os.environ/ANTHROPIC_API_KEY
|
||||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
- model_name: claude-3-5-sonnet-aihubmix
|
||||||
|
litellm_params:
|
||||||
|
model: openai/claude-3-5-sonnet-20240620
|
||||||
|
input_cost_per_token: 0.000003 # 3$/M
|
||||||
|
output_cost_per_token: 0.000015 # 15$/M
|
||||||
|
api_base: "https://exampleopenaiendpoint-production.up.railway.app"
|
||||||
|
api_key: my-fake-key
|
||||||
|
|
||||||
litellm_settings:
|
litellm_settings:
|
||||||
callbacks: ["prometheus", "otel"]
|
fallbacks: [{ "claude-3-5-sonnet-20240620": ["claude-3-5-sonnet-aihubmix"] }]
|
||||||
|
callbacks: ["otel"]
|
||||||
general_settings:
|
|
||||||
user_api_key_cache_ttl: 3600
|
|
||||||
|
|
||||||
router_settings:
|
router_settings:
|
||||||
routing_strategy: latency-based-routing
|
routing_strategy: latency-based-routing
|
||||||
|
@ -19,32 +23,6 @@ router_settings:
|
||||||
|
|
||||||
# consider last five minutes of calls for latency calculation
|
# consider last five minutes of calls for latency calculation
|
||||||
ttl: 300
|
ttl: 300
|
||||||
|
|
||||||
# model_group_alias:
|
|
||||||
# gpt-4o: gpt-4o-128k-2024-05-13
|
|
||||||
# gpt-4o-mini: gpt-4o-mini-128k-2024-07-18
|
|
||||||
|
|
||||||
enable_tag_filtering: True
|
|
||||||
|
|
||||||
# retry call 3 times on each model_name (we don't use fallbacks, so this would be 3 times total)
|
|
||||||
num_retries: 3
|
|
||||||
|
|
||||||
# -- cooldown settings --
|
|
||||||
# see https://github.com/BerriAI/litellm/blob/main/litellm/router_utils/cooldown_handlers.py#L265
|
|
||||||
|
|
||||||
# cooldown model if it fails > n calls in a minute.
|
|
||||||
allowed_fails: 2
|
|
||||||
|
|
||||||
# (in seconds) how long to cooldown model if fails/min > allowed_fails
|
|
||||||
cooldown_time: 60
|
|
||||||
|
|
||||||
allowed_fails_policy:
|
|
||||||
InternalServerErrorAllowedFails: 1
|
|
||||||
RateLimitErrorAllowedFails: 2
|
|
||||||
TimeoutErrorAllowedFails: 3
|
|
||||||
# -- end cooldown settings --
|
|
||||||
|
|
||||||
# see https://docs.litellm.ai/docs/proxy/prod#3-use-redis-porthost-password-not-redis_url
|
|
||||||
redis_host: os.environ/REDIS_HOST
|
redis_host: os.environ/REDIS_HOST
|
||||||
redis_port: os.environ/REDIS_PORT
|
redis_port: os.environ/REDIS_PORT
|
||||||
redis_password: os.environ/REDIS_PASSWORD
|
redis_password: os.environ/REDIS_PASSWORD
|
||||||
|
|
|
@ -2003,6 +2003,7 @@ def register_model(model_cost: Union[str, dict]): # noqa: PLR0915
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
loaded_model_cost = {}
|
loaded_model_cost = {}
|
||||||
if isinstance(model_cost, dict):
|
if isinstance(model_cost, dict):
|
||||||
loaded_model_cost = model_cost
|
loaded_model_cost = model_cost
|
||||||
|
|
81
tests/documentation_tests/test_exception_types.py
Normal file
81
tests/documentation_tests/test_exception_types.py
Normal file
|
@ -0,0 +1,81 @@
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import traceback
|
||||||
|
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
import io
|
||||||
|
import re
|
||||||
|
|
||||||
|
# Backup the original sys.path
|
||||||
|
original_sys_path = sys.path.copy()
|
||||||
|
|
||||||
|
sys.path.insert(
|
||||||
|
0, os.path.abspath("../..")
|
||||||
|
) # Adds the parent directory to the system path
|
||||||
|
import litellm
|
||||||
|
|
||||||
|
public_exceptions = litellm.LITELLM_EXCEPTION_TYPES
|
||||||
|
# Regular expression to extract the error name
|
||||||
|
error_name_pattern = re.compile(r"\.exceptions\.([A-Za-z]+Error)")
|
||||||
|
|
||||||
|
# Extract error names from each item
|
||||||
|
error_names = {
|
||||||
|
error_name_pattern.search(str(item)).group(1)
|
||||||
|
for item in public_exceptions
|
||||||
|
if error_name_pattern.search(str(item))
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# sys.path = original_sys_path
|
||||||
|
|
||||||
|
|
||||||
|
# Parse the documentation to extract documented keys
|
||||||
|
# repo_base = "./"
|
||||||
|
repo_base = "../../"
|
||||||
|
print(os.listdir(repo_base))
|
||||||
|
docs_path = f"{repo_base}/docs/my-website/docs/exception_mapping.md" # Path to the documentation
|
||||||
|
documented_keys = set()
|
||||||
|
try:
|
||||||
|
with open(docs_path, "r", encoding="utf-8") as docs_file:
|
||||||
|
content = docs_file.read()
|
||||||
|
|
||||||
|
exceptions_section = re.search(
|
||||||
|
r"## LiteLLM Exceptions(.*?)\n##", content, re.DOTALL
|
||||||
|
)
|
||||||
|
if exceptions_section:
|
||||||
|
# Step 2: Extract the table content
|
||||||
|
table_content = exceptions_section.group(1)
|
||||||
|
|
||||||
|
# Step 3: Create a pattern to capture the Error Types from each row
|
||||||
|
error_type_pattern = re.compile(r"\|\s*[^|]+\s*\|\s*([^\|]+?)\s*\|")
|
||||||
|
|
||||||
|
# Extract the error types
|
||||||
|
exceptions = error_type_pattern.findall(table_content)
|
||||||
|
print(f"exceptions: {exceptions}")
|
||||||
|
|
||||||
|
# Remove extra spaces if any
|
||||||
|
exceptions = [exception.strip() for exception in exceptions]
|
||||||
|
|
||||||
|
print(exceptions)
|
||||||
|
documented_keys.update(exceptions)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
raise Exception(
|
||||||
|
f"Error reading documentation: {e}, \n repo base - {os.listdir(repo_base)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
print(documented_keys)
|
||||||
|
print(public_exceptions)
|
||||||
|
print(error_names)
|
||||||
|
|
||||||
|
# Compare and find undocumented keys
|
||||||
|
undocumented_keys = error_names - documented_keys
|
||||||
|
|
||||||
|
if undocumented_keys:
|
||||||
|
raise Exception(
|
||||||
|
f"\nKeys not documented in 'LiteLLM Exceptions': {undocumented_keys}"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
print("\nAll keys are documented in 'LiteLLM Exceptions'. - {}".format(error_names))
|
|
@ -1337,3 +1337,64 @@ async def test_anthropic_streaming_fallbacks(sync_mode):
|
||||||
mock_client.assert_called_once()
|
mock_client.assert_called_once()
|
||||||
print(chunks)
|
print(chunks)
|
||||||
assert len(chunks) > 0
|
assert len(chunks) > 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_router_fallbacks_with_custom_model_costs():
|
||||||
|
"""
|
||||||
|
Tests prod use-case where a custom model is registered with a different provider + custom costs.
|
||||||
|
|
||||||
|
Goal: make sure custom model doesn't override default model costs.
|
||||||
|
"""
|
||||||
|
model_list = [
|
||||||
|
{
|
||||||
|
"model_name": "claude-3-5-sonnet-20240620",
|
||||||
|
"litellm_params": {
|
||||||
|
"model": "claude-3-5-sonnet-20240620",
|
||||||
|
"api_key": os.environ["ANTHROPIC_API_KEY"],
|
||||||
|
"input_cost_per_token": 30,
|
||||||
|
"output_cost_per_token": 60,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model_name": "claude-3-5-sonnet-aihubmix",
|
||||||
|
"litellm_params": {
|
||||||
|
"model": "openai/claude-3-5-sonnet-20240620",
|
||||||
|
"input_cost_per_token": 0.000003, # 3$/M
|
||||||
|
"output_cost_per_token": 0.000015, # 15$/M
|
||||||
|
"api_base": "https://exampleopenaiendpoint-production.up.railway.app",
|
||||||
|
"api_key": "my-fake-key",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
router = Router(
|
||||||
|
model_list=model_list,
|
||||||
|
fallbacks=[{"claude-3-5-sonnet-20240620": ["claude-3-5-sonnet-aihubmix"]}],
|
||||||
|
)
|
||||||
|
|
||||||
|
router.completion(
|
||||||
|
model="claude-3-5-sonnet-aihubmix",
|
||||||
|
messages=[{"role": "user", "content": "Hey, how's it going?"}],
|
||||||
|
)
|
||||||
|
|
||||||
|
model_info = litellm.get_model_info(model="claude-3-5-sonnet-20240620")
|
||||||
|
|
||||||
|
print(f"key: {model_info['key']}")
|
||||||
|
|
||||||
|
assert model_info["litellm_provider"] == "anthropic"
|
||||||
|
|
||||||
|
response = router.completion(
|
||||||
|
model="claude-3-5-sonnet-20240620",
|
||||||
|
messages=[{"role": "user", "content": "Hey, how's it going?"}],
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"response_cost: {response._hidden_params['response_cost']}")
|
||||||
|
|
||||||
|
assert response._hidden_params["response_cost"] > 10
|
||||||
|
|
||||||
|
model_info = litellm.get_model_info(model="claude-3-5-sonnet-20240620")
|
||||||
|
|
||||||
|
print(f"key: {model_info['key']}")
|
||||||
|
|
||||||
|
assert model_info["input_cost_per_token"] == 30
|
||||||
|
assert model_info["output_cost_per_token"] == 60
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue