diff --git a/litellm/main.py b/litellm/main.py index a76ef64a1..a0db6aff4 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -477,6 +477,9 @@ def mock_completion( if time_delay is not None: time.sleep(time_delay) + if isinstance(mock_response, dict): + return ModelResponse(**mock_response) + model_response = ModelResponse(stream=stream) if stream is True: # don't try to access stream object, diff --git a/litellm/proxy/_experimental/out/404.html b/litellm/proxy/_experimental/out/404.html deleted file mode 100644 index e27fe5bab..000000000 --- a/litellm/proxy/_experimental/out/404.html +++ /dev/null @@ -1 +0,0 @@ -404: This page could not be found.LiteLLM Dashboard

404

This page could not be found.

\ No newline at end of file diff --git a/litellm/proxy/_experimental/out/model_hub.html b/litellm/proxy/_experimental/out/model_hub.html deleted file mode 100644 index 49dfe314f..000000000 --- a/litellm/proxy/_experimental/out/model_hub.html +++ /dev/null @@ -1 +0,0 @@ -LiteLLM Dashboard \ No newline at end of file diff --git a/litellm/proxy/_experimental/out/onboarding.html b/litellm/proxy/_experimental/out/onboarding.html deleted file mode 100644 index ca4025208..000000000 --- a/litellm/proxy/_experimental/out/onboarding.html +++ /dev/null @@ -1 +0,0 @@ -LiteLLM Dashboard \ No newline at end of file diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index 866ca0ab0..d990a0e9b 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -1,61 +1,14 @@ -environment_variables: - LANGFUSE_PUBLIC_KEY: Q6K8MQN6L7sPYSJiFKM9eNrETOx6V/FxVPup4FqdKsZK1hyR4gyanlQ2KHLg5D5afng99uIt0JCEQ2jiKF9UxFvtnb4BbJ4qpeceH+iK8v/bdg== - LANGFUSE_SECRET_KEY: 5xQ7KMa6YMLsm+H/Pf1VmlqWq1NON5IoCxABhkUBeSck7ftsj2CmpkL2ZwrxwrktgiTUBH+3gJYBX+XBk7lqOOUpvmiLjol/E5lCqq0M1CqLWA== - SLACK_WEBHOOK_URL: RJjhS0Hhz0/s07sCIf1OTXmTGodpK9L2K9p953Z+fOX0l2SkPFT6mB9+yIrLufmlwEaku5NNEBKy//+AG01yOd+7wV1GhK65vfj3B/gTN8t5cuVnR4vFxKY5Rx4eSGLtzyAs+aIBTp4GoNXDIjroCqfCjPkItEZWCg== -general_settings: - alerting: - - slack - alerting_threshold: 300 - database_connection_pool_limit: 100 - database_connection_timeout: 60 - disable_master_key_return: true - health_check_interval: 300 - proxy_batch_write_at: 60 - ui_access_mode: all - # master_key: sk-1234 +model_list: + - model_name: my-fake-model + litellm_params: + model: gpt-3.5-turbo + api_key: my-fake-key + mock_response: hello-world + - model_name: gpt-4o + litellm_params: + model: azure/gpt-4o + api_base: https://litellm8397336933.openai.azure.com/ + api_key: 610f806211ab47f2a694493000045858 + litellm_settings: - allowed_fails: 3 - failure_callback: - - prometheus - num_retries: 3 - service_callback: - - prometheus_system - success_callback: - - langfuse - - prometheus - - langsmith -model_list: -- litellm_params: - model: gpt-3.5-turbo - model_name: gpt-3.5-turbo -- litellm_params: - api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/ - api_key: my-fake-key - model: openai/my-fake-model - stream_timeout: 0.001 - model_name: fake-openai-endpoint -- litellm_params: - api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/ - api_key: my-fake-key - model: openai/my-fake-model-2 - stream_timeout: 0.001 - model_name: fake-openai-endpoint -- litellm_params: - api_base: os.environ/AZURE_API_BASE - api_key: os.environ/AZURE_API_KEY - api_version: 2023-07-01-preview - model: azure/chatgpt-v-2 - stream_timeout: 0.001 - model_name: azure-gpt-3.5 -- litellm_params: - api_key: os.environ/OPENAI_API_KEY - model: text-embedding-ada-002 - model_name: text-embedding-ada-002 -- litellm_params: - model: text-completion-openai/gpt-3.5-turbo-instruct - model_name: gpt-instruct -router_settings: - enable_pre_call_checks: true - redis_host: os.environ/REDIS_HOST - redis_password: os.environ/REDIS_PASSWORD - redis_port: os.environ/REDIS_PORT + content_policy_fallbacks: [{"gpt-4o": ["my-fake-model"]}] \ No newline at end of file diff --git a/litellm/router.py b/litellm/router.py index 69000d604..a726e7f44 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -562,6 +562,18 @@ class Router: f"litellm.completion(model={model_name})\033[32m 200 OK\033[0m" ) + ## CHECK CONTENT FILTER ERROR ## + if isinstance(response, ModelResponse): + _should_raise = self._should_raise_content_policy_error( + model=model, response=response, kwargs=kwargs + ) + if _should_raise: + raise litellm.ContentPolicyViolationError( + message="Response output was blocked.", + model=model, + llm_provider="", + ) + return response except Exception as e: verbose_router_logger.info( @@ -721,6 +733,18 @@ class Router: await self.async_routing_strategy_pre_call_checks(deployment=deployment) response = await _response + ## CHECK CONTENT FILTER ERROR ## + if isinstance(response, ModelResponse): + _should_raise = self._should_raise_content_policy_error( + model=model, response=response, kwargs=kwargs + ) + if _should_raise: + raise litellm.ContentPolicyViolationError( + message="Response output was blocked.", + model=model, + llm_provider="", + ) + self.success_calls[model_name] += 1 verbose_router_logger.info( f"litellm.acompletion(model={model_name})\033[32m 200 OK\033[0m" @@ -2801,6 +2825,40 @@ class Router: # Catch all - if any exceptions default to cooling down return True + def _should_raise_content_policy_error( + self, model: str, response: ModelResponse, kwargs: dict + ) -> bool: + """ + Determines if a content policy error should be raised. + + Only raised if a fallback is available. + + Else, original response is returned. + """ + if response.choices[0].finish_reason != "content_filter": + return False + + content_policy_fallbacks = kwargs.get( + "content_policy_fallbacks", self.content_policy_fallbacks + ) + ### ONLY RAISE ERROR IF CP FALLBACK AVAILABLE ### + if content_policy_fallbacks is not None: + fallback_model_group = None + for item in content_policy_fallbacks: # [{"gpt-3.5-turbo": ["gpt-4"]}] + if list(item.keys())[0] == model: + fallback_model_group = item[model] + break + + if fallback_model_group is not None: + return True + + verbose_router_logger.info( + "Content Policy Error occurred. No available fallbacks. Returning original response. model={}, content_policy_fallbacks={}".format( + model, content_policy_fallbacks + ) + ) + return False + def _set_cooldown_deployments( self, original_exception: Any, diff --git a/litellm/tests/test_router_fallbacks.py b/litellm/tests/test_router_fallbacks.py index 545eb23db..99d2a600c 100644 --- a/litellm/tests/test_router_fallbacks.py +++ b/litellm/tests/test_router_fallbacks.py @@ -1,8 +1,12 @@ #### What this tests #### # This tests calling router with fallback models -import sys, os, time -import traceback, asyncio +import asyncio +import os +import sys +import time +import traceback + import pytest sys.path.insert( @@ -762,9 +766,11 @@ def test_ausage_based_routing_fallbacks(): # The Request should fail azure/gpt-4-fast. Then fallback -> "azure/gpt-4-basic" -> "openai-gpt-4" # It should work with "openai-gpt-4" import os + + from dotenv import load_dotenv + import litellm from litellm import Router - from dotenv import load_dotenv load_dotenv() @@ -1112,9 +1118,19 @@ async def test_client_side_fallbacks_list(sync_mode): @pytest.mark.parametrize("sync_mode", [True, False]) +@pytest.mark.parametrize("content_filter_response_exception", [True, False]) @pytest.mark.asyncio -async def test_router_content_policy_fallbacks(sync_mode): +async def test_router_content_policy_fallbacks( + sync_mode, content_filter_response_exception +): os.environ["LITELLM_LOG"] = "DEBUG" + + if content_filter_response_exception: + mock_response = Exception("content filtering policy") + else: + mock_response = litellm.ModelResponse( + choices=[litellm.Choices(finish_reason="content_filter")] + ) router = Router( model_list=[ { @@ -1122,13 +1138,13 @@ async def test_router_content_policy_fallbacks(sync_mode): "litellm_params": { "model": "claude-2", "api_key": "", - "mock_response": Exception("content filtering policy"), + "mock_response": mock_response, }, }, { "model_name": "my-fallback-model", "litellm_params": { - "model": "claude-2", + "model": "openai/my-fake-model", "api_key": "", "mock_response": "This works!", }, @@ -1165,3 +1181,5 @@ async def test_router_content_policy_fallbacks(sync_mode): model="claude-2", messages=[{"role": "user", "content": "Hey, how's it going?"}], ) + + assert response.model == "my-fake-model" diff --git a/litellm/types/router.py b/litellm/types/router.py index 206216ef0..83c21e5d9 100644 --- a/litellm/types/router.py +++ b/litellm/types/router.py @@ -12,6 +12,7 @@ from pydantic import BaseModel, ConfigDict, Field from .completion import CompletionRequest from .embedding import EmbeddingRequest +from .utils import ModelResponse class ModelConfig(BaseModel): @@ -315,7 +316,7 @@ class LiteLLMParamsTypedDict(TypedDict, total=False): input_cost_per_second: Optional[float] output_cost_per_second: Optional[float] ## MOCK RESPONSES ## - mock_response: Optional[str] + mock_response: Optional[Union[str, ModelResponse, Exception]] class DeploymentTypedDict(TypedDict):