diff --git a/litellm/main.py b/litellm/main.py
index a76ef64a13..a0db6aff48 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -477,6 +477,9 @@ def mock_completion(
if time_delay is not None:
time.sleep(time_delay)
+ if isinstance(mock_response, dict):
+ return ModelResponse(**mock_response)
+
model_response = ModelResponse(stream=stream)
if stream is True:
# don't try to access stream object,
diff --git a/litellm/proxy/_experimental/out/404.html b/litellm/proxy/_experimental/out/404.html
deleted file mode 100644
index e27fe5bab8..0000000000
--- a/litellm/proxy/_experimental/out/404.html
+++ /dev/null
@@ -1 +0,0 @@
-
404: This page could not be found.LiteLLM Dashboard404
This page could not be found.
\ No newline at end of file
diff --git a/litellm/proxy/_experimental/out/model_hub.html b/litellm/proxy/_experimental/out/model_hub.html
deleted file mode 100644
index 49dfe314fc..0000000000
--- a/litellm/proxy/_experimental/out/model_hub.html
+++ /dev/null
@@ -1 +0,0 @@
-LiteLLM Dashboard
\ No newline at end of file
diff --git a/litellm/proxy/_experimental/out/onboarding.html b/litellm/proxy/_experimental/out/onboarding.html
deleted file mode 100644
index ca40252081..0000000000
--- a/litellm/proxy/_experimental/out/onboarding.html
+++ /dev/null
@@ -1 +0,0 @@
-LiteLLM Dashboard
\ No newline at end of file
diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml
index 866ca0ab0a..d990a0e9b2 100644
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@@ -1,61 +1,14 @@
-environment_variables:
- LANGFUSE_PUBLIC_KEY: Q6K8MQN6L7sPYSJiFKM9eNrETOx6V/FxVPup4FqdKsZK1hyR4gyanlQ2KHLg5D5afng99uIt0JCEQ2jiKF9UxFvtnb4BbJ4qpeceH+iK8v/bdg==
- LANGFUSE_SECRET_KEY: 5xQ7KMa6YMLsm+H/Pf1VmlqWq1NON5IoCxABhkUBeSck7ftsj2CmpkL2ZwrxwrktgiTUBH+3gJYBX+XBk7lqOOUpvmiLjol/E5lCqq0M1CqLWA==
- SLACK_WEBHOOK_URL: RJjhS0Hhz0/s07sCIf1OTXmTGodpK9L2K9p953Z+fOX0l2SkPFT6mB9+yIrLufmlwEaku5NNEBKy//+AG01yOd+7wV1GhK65vfj3B/gTN8t5cuVnR4vFxKY5Rx4eSGLtzyAs+aIBTp4GoNXDIjroCqfCjPkItEZWCg==
-general_settings:
- alerting:
- - slack
- alerting_threshold: 300
- database_connection_pool_limit: 100
- database_connection_timeout: 60
- disable_master_key_return: true
- health_check_interval: 300
- proxy_batch_write_at: 60
- ui_access_mode: all
- # master_key: sk-1234
+model_list:
+ - model_name: my-fake-model
+ litellm_params:
+ model: gpt-3.5-turbo
+ api_key: my-fake-key
+ mock_response: hello-world
+ - model_name: gpt-4o
+ litellm_params:
+ model: azure/gpt-4o
+ api_base: https://litellm8397336933.openai.azure.com/
+ api_key: 610f806211ab47f2a694493000045858
+
litellm_settings:
- allowed_fails: 3
- failure_callback:
- - prometheus
- num_retries: 3
- service_callback:
- - prometheus_system
- success_callback:
- - langfuse
- - prometheus
- - langsmith
-model_list:
-- litellm_params:
- model: gpt-3.5-turbo
- model_name: gpt-3.5-turbo
-- litellm_params:
- api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/
- api_key: my-fake-key
- model: openai/my-fake-model
- stream_timeout: 0.001
- model_name: fake-openai-endpoint
-- litellm_params:
- api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/
- api_key: my-fake-key
- model: openai/my-fake-model-2
- stream_timeout: 0.001
- model_name: fake-openai-endpoint
-- litellm_params:
- api_base: os.environ/AZURE_API_BASE
- api_key: os.environ/AZURE_API_KEY
- api_version: 2023-07-01-preview
- model: azure/chatgpt-v-2
- stream_timeout: 0.001
- model_name: azure-gpt-3.5
-- litellm_params:
- api_key: os.environ/OPENAI_API_KEY
- model: text-embedding-ada-002
- model_name: text-embedding-ada-002
-- litellm_params:
- model: text-completion-openai/gpt-3.5-turbo-instruct
- model_name: gpt-instruct
-router_settings:
- enable_pre_call_checks: true
- redis_host: os.environ/REDIS_HOST
- redis_password: os.environ/REDIS_PASSWORD
- redis_port: os.environ/REDIS_PORT
+ content_policy_fallbacks: [{"gpt-4o": ["my-fake-model"]}]
\ No newline at end of file
diff --git a/litellm/router.py b/litellm/router.py
index 69000d6048..a726e7f44f 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -562,6 +562,18 @@ class Router:
f"litellm.completion(model={model_name})\033[32m 200 OK\033[0m"
)
+ ## CHECK CONTENT FILTER ERROR ##
+ if isinstance(response, ModelResponse):
+ _should_raise = self._should_raise_content_policy_error(
+ model=model, response=response, kwargs=kwargs
+ )
+ if _should_raise:
+ raise litellm.ContentPolicyViolationError(
+ message="Response output was blocked.",
+ model=model,
+ llm_provider="",
+ )
+
return response
except Exception as e:
verbose_router_logger.info(
@@ -721,6 +733,18 @@ class Router:
await self.async_routing_strategy_pre_call_checks(deployment=deployment)
response = await _response
+ ## CHECK CONTENT FILTER ERROR ##
+ if isinstance(response, ModelResponse):
+ _should_raise = self._should_raise_content_policy_error(
+ model=model, response=response, kwargs=kwargs
+ )
+ if _should_raise:
+ raise litellm.ContentPolicyViolationError(
+ message="Response output was blocked.",
+ model=model,
+ llm_provider="",
+ )
+
self.success_calls[model_name] += 1
verbose_router_logger.info(
f"litellm.acompletion(model={model_name})\033[32m 200 OK\033[0m"
@@ -2801,6 +2825,40 @@ class Router:
# Catch all - if any exceptions default to cooling down
return True
+ def _should_raise_content_policy_error(
+ self, model: str, response: ModelResponse, kwargs: dict
+ ) -> bool:
+ """
+ Determines if a content policy error should be raised.
+
+ Only raised if a fallback is available.
+
+ Else, original response is returned.
+ """
+ if response.choices[0].finish_reason != "content_filter":
+ return False
+
+ content_policy_fallbacks = kwargs.get(
+ "content_policy_fallbacks", self.content_policy_fallbacks
+ )
+ ### ONLY RAISE ERROR IF CP FALLBACK AVAILABLE ###
+ if content_policy_fallbacks is not None:
+ fallback_model_group = None
+ for item in content_policy_fallbacks: # [{"gpt-3.5-turbo": ["gpt-4"]}]
+ if list(item.keys())[0] == model:
+ fallback_model_group = item[model]
+ break
+
+ if fallback_model_group is not None:
+ return True
+
+ verbose_router_logger.info(
+ "Content Policy Error occurred. No available fallbacks. Returning original response. model={}, content_policy_fallbacks={}".format(
+ model, content_policy_fallbacks
+ )
+ )
+ return False
+
def _set_cooldown_deployments(
self,
original_exception: Any,
diff --git a/litellm/tests/test_router_fallbacks.py b/litellm/tests/test_router_fallbacks.py
index 545eb23db3..99d2a600c8 100644
--- a/litellm/tests/test_router_fallbacks.py
+++ b/litellm/tests/test_router_fallbacks.py
@@ -1,8 +1,12 @@
#### What this tests ####
# This tests calling router with fallback models
-import sys, os, time
-import traceback, asyncio
+import asyncio
+import os
+import sys
+import time
+import traceback
+
import pytest
sys.path.insert(
@@ -762,9 +766,11 @@ def test_ausage_based_routing_fallbacks():
# The Request should fail azure/gpt-4-fast. Then fallback -> "azure/gpt-4-basic" -> "openai-gpt-4"
# It should work with "openai-gpt-4"
import os
+
+ from dotenv import load_dotenv
+
import litellm
from litellm import Router
- from dotenv import load_dotenv
load_dotenv()
@@ -1112,9 +1118,19 @@ async def test_client_side_fallbacks_list(sync_mode):
@pytest.mark.parametrize("sync_mode", [True, False])
+@pytest.mark.parametrize("content_filter_response_exception", [True, False])
@pytest.mark.asyncio
-async def test_router_content_policy_fallbacks(sync_mode):
+async def test_router_content_policy_fallbacks(
+ sync_mode, content_filter_response_exception
+):
os.environ["LITELLM_LOG"] = "DEBUG"
+
+ if content_filter_response_exception:
+ mock_response = Exception("content filtering policy")
+ else:
+ mock_response = litellm.ModelResponse(
+ choices=[litellm.Choices(finish_reason="content_filter")]
+ )
router = Router(
model_list=[
{
@@ -1122,13 +1138,13 @@ async def test_router_content_policy_fallbacks(sync_mode):
"litellm_params": {
"model": "claude-2",
"api_key": "",
- "mock_response": Exception("content filtering policy"),
+ "mock_response": mock_response,
},
},
{
"model_name": "my-fallback-model",
"litellm_params": {
- "model": "claude-2",
+ "model": "openai/my-fake-model",
"api_key": "",
"mock_response": "This works!",
},
@@ -1165,3 +1181,5 @@ async def test_router_content_policy_fallbacks(sync_mode):
model="claude-2",
messages=[{"role": "user", "content": "Hey, how's it going?"}],
)
+
+ assert response.model == "my-fake-model"
diff --git a/litellm/types/router.py b/litellm/types/router.py
index 206216ef0c..83c21e5d91 100644
--- a/litellm/types/router.py
+++ b/litellm/types/router.py
@@ -12,6 +12,7 @@ from pydantic import BaseModel, ConfigDict, Field
from .completion import CompletionRequest
from .embedding import EmbeddingRequest
+from .utils import ModelResponse
class ModelConfig(BaseModel):
@@ -315,7 +316,7 @@ class LiteLLMParamsTypedDict(TypedDict, total=False):
input_cost_per_second: Optional[float]
output_cost_per_second: Optional[float]
## MOCK RESPONSES ##
- mock_response: Optional[str]
+ mock_response: Optional[Union[str, ModelResponse, Exception]]
class DeploymentTypedDict(TypedDict):