forked from phoenix/litellm-mirror
fix(slack_alerting.py): don't fire spam alerts when backend api call fails
This commit is contained in:
parent
b063ef7a47
commit
13e1577753
4 changed files with 69 additions and 21 deletions
|
@ -76,16 +76,14 @@ class SlackAlerting(CustomLogger):
|
|||
internal_usage_cache: Optional[DualCache] = None,
|
||||
alerting_threshold: float = 300, # threshold for slow / hanging llm responses (in seconds)
|
||||
alerting: Optional[List] = [],
|
||||
alert_types: Optional[
|
||||
List[
|
||||
Literal[
|
||||
"llm_exceptions",
|
||||
"llm_too_slow",
|
||||
"llm_requests_hanging",
|
||||
"budget_alerts",
|
||||
"db_exceptions",
|
||||
"daily_reports",
|
||||
]
|
||||
alert_types: List[
|
||||
Literal[
|
||||
"llm_exceptions",
|
||||
"llm_too_slow",
|
||||
"llm_requests_hanging",
|
||||
"budget_alerts",
|
||||
"db_exceptions",
|
||||
"daily_reports",
|
||||
]
|
||||
] = [
|
||||
"llm_exceptions",
|
||||
|
@ -812,14 +810,6 @@ Model Info:
|
|||
updated_at=litellm.utils.get_utc_datetime(),
|
||||
)
|
||||
)
|
||||
if "llm_exceptions" in self.alert_types:
|
||||
original_exception = kwargs.get("exception", None)
|
||||
|
||||
await self.send_alert(
|
||||
message="LLM API Failure - " + str(original_exception),
|
||||
level="High",
|
||||
alert_type="llm_exceptions",
|
||||
)
|
||||
|
||||
async def _run_scheduler_helper(self, llm_router) -> bool:
|
||||
"""
|
||||
|
|
|
@ -8,6 +8,16 @@ model_list:
|
|||
base_model: text-embedding-ada-002
|
||||
mode: embedding
|
||||
model_name: text-embedding-ada-002
|
||||
- model_name: gpt-3.5-turbo-012
|
||||
litellm_params:
|
||||
model: gpt-3.5-turbo
|
||||
api_base: http://0.0.0.0:8080
|
||||
api_key: ""
|
||||
- model_name: gpt-3.5-turbo-0125-preview
|
||||
litellm_params:
|
||||
model: azure/chatgpt-v-2
|
||||
api_key: os.environ/AZURE_API_KEY
|
||||
api_base: os.environ/AZURE_API_BASE
|
||||
|
||||
router_settings:
|
||||
redis_host: redis
|
||||
|
@ -17,6 +27,7 @@ router_settings:
|
|||
|
||||
litellm_settings:
|
||||
set_verbose: True
|
||||
fallbacks: [{"gpt-3.5-turbo-012": ["gpt-3.5-turbo-0125-preview"]}]
|
||||
# service_callback: ["prometheus_system"]
|
||||
# success_callback: ["prometheus"]
|
||||
# failure_callback: ["prometheus"]
|
||||
|
@ -26,3 +37,4 @@ general_settings:
|
|||
disable_reset_budget: True
|
||||
proxy_batch_write_at: 60 # 👈 Frequency of batch writing logs to server (in seconds)
|
||||
routing_strategy: simple-shuffle # Literal["simple-shuffle", "least-busy", "usage-based-routing","latency-based-routing"], default="simple-shuffle"
|
||||
alerting: ["slack"]
|
||||
|
|
|
@ -1413,7 +1413,7 @@ class Router:
|
|||
verbose_router_logger.debug(f"Trying to fallback b/w models")
|
||||
if (
|
||||
hasattr(e, "status_code")
|
||||
and e.status_code == 400
|
||||
and e.status_code == 400 # type: ignore
|
||||
and not isinstance(e, litellm.ContextWindowExceededError)
|
||||
): # don't retry a malformed request
|
||||
raise e
|
||||
|
@ -3648,7 +3648,7 @@ class Router:
|
|||
)
|
||||
asyncio.create_task(
|
||||
proxy_logging_obj.slack_alerting_instance.send_alert(
|
||||
message=f"Router: Cooling down deployment: {_api_base}, for {self.cooldown_time} seconds. Got exception: {str(exception_status)}",
|
||||
message=f"Router: Cooling down deployment: {_api_base}, for {self.cooldown_time} seconds. Got exception: {str(exception_status)}. Change 'cooldown_time' + 'allowed_failes' under 'Router Settings' on proxy UI, or via config - https://docs.litellm.ai/docs/proxy/reliability#fallbacks--retries--timeouts--cooldowns",
|
||||
alert_type="cooldown_deployment",
|
||||
level="Low",
|
||||
)
|
||||
|
|
|
@ -961,3 +961,49 @@ def test_custom_cooldown_times():
|
|||
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||
@pytest.mark.asyncio
|
||||
async def test_service_unavailable_fallbacks(sync_mode):
|
||||
"""
|
||||
Initial model - openai
|
||||
Fallback - azure
|
||||
|
||||
Error - 503, service unavailable
|
||||
"""
|
||||
router = Router(
|
||||
model_list=[
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo-012",
|
||||
"litellm_params": {
|
||||
"model": "gpt-3.5-turbo",
|
||||
"api_key": "anything",
|
||||
"api_base": "http://0.0.0.0:8080",
|
||||
},
|
||||
},
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo-0125-preview",
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
},
|
||||
},
|
||||
],
|
||||
fallbacks=[{"gpt-3.5-turbo-012": ["gpt-3.5-turbo-0125-preview"]}],
|
||||
)
|
||||
|
||||
if sync_mode:
|
||||
response = router.completion(
|
||||
model="gpt-3.5-turbo-012",
|
||||
messages=[{"role": "user", "content": "Hey, how's it going?"}],
|
||||
)
|
||||
else:
|
||||
response = await router.acompletion(
|
||||
model="gpt-3.5-turbo-012",
|
||||
messages=[{"role": "user", "content": "Hey, how's it going?"}],
|
||||
)
|
||||
|
||||
assert response.model == "gpt-35-turbo"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue