feat(router.py): support mock testing content policy + context window fallbacks

2025-04-26 11:14:04 +00:00 · 2024-06-25 10:57:32 -07:00 · 2024-06-25 10:57:32 -07:00 · c46b229202
commit c46b229202
parent e215dc86c1
2 changed files with 76 additions and 20 deletions
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -1,24 +1,54 @@
 # model_list: 
 #   - model_name: my-fake-model
 #     litellm_params:
 #       model: bedrock/anthropic.claude-3-sonnet-20240229-v1:0
 #       api_key: my-fake-key
 #       aws_bedrock_runtime_endpoint: http://127.0.0.1:8000
 #       mock_response: "Hello world 1"
 #     model_info: 
 #       max_input_tokens: 0 # trigger context window fallback
 #   - model_name: my-fake-model
 #     litellm_params:
 #       model: bedrock/anthropic.claude-3-sonnet-20240229-v1:0
 #       api_key: my-fake-key
 #       aws_bedrock_runtime_endpoint: http://127.0.0.1:8000
 #       mock_response: "Hello world 2"
 #     model_info: 
 #       max_input_tokens: 0
 # router_settings:
 #   enable_pre_call_checks: True
 # litellm_settings:
 #   failure_callback: ["langfuse"]
 model_list:
-  - model_name: my-fake-model
+  - model_name: summarize
    litellm_params:
-      model: bedrock/anthropic.claude-3-sonnet-20240229-v1:0
+        model: openai/gpt-4o
-      api_key: my-fake-key
+        rpm: 10000      
-      aws_bedrock_runtime_endpoint: http://127.0.0.1:8000
+        tpm: 12000000
-      mock_response: "Hello world 1"
+        api_key: os.environ/OPENAI_API_KEY
-    model_info: 
+        mock_response: Hello world 1
-      max_input_tokens: 0 # trigger context window fallback
+
-  - model_name: my-fake-model
+  - model_name: summarize-l
    litellm_params:
-      model: bedrock/anthropic.claude-3-sonnet-20240229-v1:0
+        model: claude-3-5-sonnet-20240620
-      api_key: my-fake-key
+        rpm: 4000
-      aws_bedrock_runtime_endpoint: http://127.0.0.1:8000
+        tpm: 400000
-      mock_response: "Hello world 2"
+        api_key: os.environ/ANTHROPIC_API_KEY
-    model_info: 
+        mock_response: Hello world 2
      max_input_tokens: 0
 router_settings:
  enable_pre_call_checks: True
 litellm_settings:
-  failure_callback: ["langfuse"]
+  num_retries: 3
  request_timeout: 120
  allowed_fails: 3
  # fallbacks: [{"summarize": ["summarize-l", "summarize-xl"]}, {"summarize-l": ["summarize-xl"]}]
  context_window_fallbacks: [{"summarize": ["summarize-l", "summarize-xl"]}, {"summarize-l": ["summarize-xl"]}]
 router_settings:
  routing_strategy: simple-shuffle
  enable_pre_call_checks: true.
--- a/litellm/router.py
+++ b/litellm/router.py
@ -2117,6 +2117,12 @@ class Router:
        If it fails after num_retries, fall back to another model group
        """
        mock_testing_fallbacks = kwargs.pop("mock_testing_fallbacks", None)
        mock_testing_context_fallbacks = kwargs.pop(
            "mock_testing_context_fallbacks", None
        )
        mock_testing_content_policy_fallbacks = kwargs.pop(
            "mock_testing_content_policy_fallbacks", None
        )
        model_group = kwargs.get("model")
        fallbacks = kwargs.get("fallbacks", self.fallbacks)
        context_window_fallbacks = kwargs.get(
@ -2130,6 +2136,26 @@ class Router:
                raise Exception(
                    f"This is a mock exception for model={model_group}, to trigger a fallback. Fallbacks={fallbacks}"
                )
            elif (
                mock_testing_context_fallbacks is not None
                and mock_testing_context_fallbacks is True
            ):
                raise litellm.ContextWindowExceededError(
                    model=model_group,
                    llm_provider="",
                    message=f"This is a mock exception for model={model_group}, to trigger a fallback. \
                        Context_Window_Fallbacks={context_window_fallbacks}",
                )
            elif (
                mock_testing_content_policy_fallbacks is not None
                and mock_testing_content_policy_fallbacks is True
            ):
                raise litellm.ContentPolicyViolationError(
                    model=model_group,
                    llm_provider="",
                    message=f"This is a mock exception for model={model_group}, to trigger a fallback. \
                        Context_Policy_Fallbacks={content_policy_fallbacks}",
                )
            response = await self.async_function_with_retries(*args, **kwargs)
            verbose_router_logger.debug(f"Async Response: {response}")