mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
fix(router.py): check if azure returns 'content_filter' response + fallback available -> fallback
Exception maps azure content filter response exceptions
This commit is contained in:
parent
f814f24d9d
commit
2c7a80d08d
8 changed files with 100 additions and 70 deletions
|
@ -477,6 +477,9 @@ def mock_completion(
|
|||
if time_delay is not None:
|
||||
time.sleep(time_delay)
|
||||
|
||||
if isinstance(mock_response, dict):
|
||||
return ModelResponse(**mock_response)
|
||||
|
||||
model_response = ModelResponse(stream=stream)
|
||||
if stream is True:
|
||||
# don't try to access stream object,
|
||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1,61 +1,14 @@
|
|||
environment_variables:
|
||||
LANGFUSE_PUBLIC_KEY: Q6K8MQN6L7sPYSJiFKM9eNrETOx6V/FxVPup4FqdKsZK1hyR4gyanlQ2KHLg5D5afng99uIt0JCEQ2jiKF9UxFvtnb4BbJ4qpeceH+iK8v/bdg==
|
||||
LANGFUSE_SECRET_KEY: 5xQ7KMa6YMLsm+H/Pf1VmlqWq1NON5IoCxABhkUBeSck7ftsj2CmpkL2ZwrxwrktgiTUBH+3gJYBX+XBk7lqOOUpvmiLjol/E5lCqq0M1CqLWA==
|
||||
SLACK_WEBHOOK_URL: RJjhS0Hhz0/s07sCIf1OTXmTGodpK9L2K9p953Z+fOX0l2SkPFT6mB9+yIrLufmlwEaku5NNEBKy//+AG01yOd+7wV1GhK65vfj3B/gTN8t5cuVnR4vFxKY5Rx4eSGLtzyAs+aIBTp4GoNXDIjroCqfCjPkItEZWCg==
|
||||
general_settings:
|
||||
alerting:
|
||||
- slack
|
||||
alerting_threshold: 300
|
||||
database_connection_pool_limit: 100
|
||||
database_connection_timeout: 60
|
||||
disable_master_key_return: true
|
||||
health_check_interval: 300
|
||||
proxy_batch_write_at: 60
|
||||
ui_access_mode: all
|
||||
# master_key: sk-1234
|
||||
litellm_settings:
|
||||
allowed_fails: 3
|
||||
failure_callback:
|
||||
- prometheus
|
||||
num_retries: 3
|
||||
service_callback:
|
||||
- prometheus_system
|
||||
success_callback:
|
||||
- langfuse
|
||||
- prometheus
|
||||
- langsmith
|
||||
model_list:
|
||||
- litellm_params:
|
||||
- model_name: my-fake-model
|
||||
litellm_params:
|
||||
model: gpt-3.5-turbo
|
||||
model_name: gpt-3.5-turbo
|
||||
- litellm_params:
|
||||
api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/
|
||||
api_key: my-fake-key
|
||||
model: openai/my-fake-model
|
||||
stream_timeout: 0.001
|
||||
model_name: fake-openai-endpoint
|
||||
- litellm_params:
|
||||
api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/
|
||||
api_key: my-fake-key
|
||||
model: openai/my-fake-model-2
|
||||
stream_timeout: 0.001
|
||||
model_name: fake-openai-endpoint
|
||||
- litellm_params:
|
||||
api_base: os.environ/AZURE_API_BASE
|
||||
api_key: os.environ/AZURE_API_KEY
|
||||
api_version: 2023-07-01-preview
|
||||
model: azure/chatgpt-v-2
|
||||
stream_timeout: 0.001
|
||||
model_name: azure-gpt-3.5
|
||||
- litellm_params:
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
model: text-embedding-ada-002
|
||||
model_name: text-embedding-ada-002
|
||||
- litellm_params:
|
||||
model: text-completion-openai/gpt-3.5-turbo-instruct
|
||||
model_name: gpt-instruct
|
||||
router_settings:
|
||||
enable_pre_call_checks: true
|
||||
redis_host: os.environ/REDIS_HOST
|
||||
redis_password: os.environ/REDIS_PASSWORD
|
||||
redis_port: os.environ/REDIS_PORT
|
||||
mock_response: hello-world
|
||||
- model_name: gpt-4o
|
||||
litellm_params:
|
||||
model: azure/gpt-4o
|
||||
api_base: https://litellm8397336933.openai.azure.com/
|
||||
api_key: 610f806211ab47f2a694493000045858
|
||||
|
||||
litellm_settings:
|
||||
content_policy_fallbacks: [{"gpt-4o": ["my-fake-model"]}]
|
|
@ -562,6 +562,18 @@ class Router:
|
|||
f"litellm.completion(model={model_name})\033[32m 200 OK\033[0m"
|
||||
)
|
||||
|
||||
## CHECK CONTENT FILTER ERROR ##
|
||||
if isinstance(response, ModelResponse):
|
||||
_should_raise = self._should_raise_content_policy_error(
|
||||
model=model, response=response, kwargs=kwargs
|
||||
)
|
||||
if _should_raise:
|
||||
raise litellm.ContentPolicyViolationError(
|
||||
message="Response output was blocked.",
|
||||
model=model,
|
||||
llm_provider="",
|
||||
)
|
||||
|
||||
return response
|
||||
except Exception as e:
|
||||
verbose_router_logger.info(
|
||||
|
@ -721,6 +733,18 @@ class Router:
|
|||
await self.async_routing_strategy_pre_call_checks(deployment=deployment)
|
||||
response = await _response
|
||||
|
||||
## CHECK CONTENT FILTER ERROR ##
|
||||
if isinstance(response, ModelResponse):
|
||||
_should_raise = self._should_raise_content_policy_error(
|
||||
model=model, response=response, kwargs=kwargs
|
||||
)
|
||||
if _should_raise:
|
||||
raise litellm.ContentPolicyViolationError(
|
||||
message="Response output was blocked.",
|
||||
model=model,
|
||||
llm_provider="",
|
||||
)
|
||||
|
||||
self.success_calls[model_name] += 1
|
||||
verbose_router_logger.info(
|
||||
f"litellm.acompletion(model={model_name})\033[32m 200 OK\033[0m"
|
||||
|
@ -2801,6 +2825,40 @@ class Router:
|
|||
# Catch all - if any exceptions default to cooling down
|
||||
return True
|
||||
|
||||
def _should_raise_content_policy_error(
|
||||
self, model: str, response: ModelResponse, kwargs: dict
|
||||
) -> bool:
|
||||
"""
|
||||
Determines if a content policy error should be raised.
|
||||
|
||||
Only raised if a fallback is available.
|
||||
|
||||
Else, original response is returned.
|
||||
"""
|
||||
if response.choices[0].finish_reason != "content_filter":
|
||||
return False
|
||||
|
||||
content_policy_fallbacks = kwargs.get(
|
||||
"content_policy_fallbacks", self.content_policy_fallbacks
|
||||
)
|
||||
### ONLY RAISE ERROR IF CP FALLBACK AVAILABLE ###
|
||||
if content_policy_fallbacks is not None:
|
||||
fallback_model_group = None
|
||||
for item in content_policy_fallbacks: # [{"gpt-3.5-turbo": ["gpt-4"]}]
|
||||
if list(item.keys())[0] == model:
|
||||
fallback_model_group = item[model]
|
||||
break
|
||||
|
||||
if fallback_model_group is not None:
|
||||
return True
|
||||
|
||||
verbose_router_logger.info(
|
||||
"Content Policy Error occurred. No available fallbacks. Returning original response. model={}, content_policy_fallbacks={}".format(
|
||||
model, content_policy_fallbacks
|
||||
)
|
||||
)
|
||||
return False
|
||||
|
||||
def _set_cooldown_deployments(
|
||||
self,
|
||||
original_exception: Any,
|
||||
|
|
|
@ -1,8 +1,12 @@
|
|||
#### What this tests ####
|
||||
# This tests calling router with fallback models
|
||||
|
||||
import sys, os, time
|
||||
import traceback, asyncio
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import traceback
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(
|
||||
|
@ -762,9 +766,11 @@ def test_ausage_based_routing_fallbacks():
|
|||
# The Request should fail azure/gpt-4-fast. Then fallback -> "azure/gpt-4-basic" -> "openai-gpt-4"
|
||||
# It should work with "openai-gpt-4"
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
import litellm
|
||||
from litellm import Router
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
@ -1112,9 +1118,19 @@ async def test_client_side_fallbacks_list(sync_mode):
|
|||
|
||||
|
||||
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||
@pytest.mark.parametrize("content_filter_response_exception", [True, False])
|
||||
@pytest.mark.asyncio
|
||||
async def test_router_content_policy_fallbacks(sync_mode):
|
||||
async def test_router_content_policy_fallbacks(
|
||||
sync_mode, content_filter_response_exception
|
||||
):
|
||||
os.environ["LITELLM_LOG"] = "DEBUG"
|
||||
|
||||
if content_filter_response_exception:
|
||||
mock_response = Exception("content filtering policy")
|
||||
else:
|
||||
mock_response = litellm.ModelResponse(
|
||||
choices=[litellm.Choices(finish_reason="content_filter")]
|
||||
)
|
||||
router = Router(
|
||||
model_list=[
|
||||
{
|
||||
|
@ -1122,13 +1138,13 @@ async def test_router_content_policy_fallbacks(sync_mode):
|
|||
"litellm_params": {
|
||||
"model": "claude-2",
|
||||
"api_key": "",
|
||||
"mock_response": Exception("content filtering policy"),
|
||||
"mock_response": mock_response,
|
||||
},
|
||||
},
|
||||
{
|
||||
"model_name": "my-fallback-model",
|
||||
"litellm_params": {
|
||||
"model": "claude-2",
|
||||
"model": "openai/my-fake-model",
|
||||
"api_key": "",
|
||||
"mock_response": "This works!",
|
||||
},
|
||||
|
@ -1165,3 +1181,5 @@ async def test_router_content_policy_fallbacks(sync_mode):
|
|||
model="claude-2",
|
||||
messages=[{"role": "user", "content": "Hey, how's it going?"}],
|
||||
)
|
||||
|
||||
assert response.model == "my-fake-model"
|
||||
|
|
|
@ -12,6 +12,7 @@ from pydantic import BaseModel, ConfigDict, Field
|
|||
|
||||
from .completion import CompletionRequest
|
||||
from .embedding import EmbeddingRequest
|
||||
from .utils import ModelResponse
|
||||
|
||||
|
||||
class ModelConfig(BaseModel):
|
||||
|
@ -315,7 +316,7 @@ class LiteLLMParamsTypedDict(TypedDict, total=False):
|
|||
input_cost_per_second: Optional[float]
|
||||
output_cost_per_second: Optional[float]
|
||||
## MOCK RESPONSES ##
|
||||
mock_response: Optional[str]
|
||||
mock_response: Optional[Union[str, ModelResponse, Exception]]
|
||||
|
||||
|
||||
class DeploymentTypedDict(TypedDict):
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue