fix(router.py): make sure pre call rpm check runs even when model not in model cost map

2024-04-11 09:27:26 -07:00 · 2024-04-11 09:27:26 -07:00 · 84d43484c6
commit 84d43484c6
parent 240aaf7af8
3 changed files with 52 additions and 11 deletions
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -5,6 +5,7 @@ model_list:
    api_key: my-fake-key
    api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/
    stream_timeout: 0.001
+    rpm: 10
 - litellm_params:
      model: azure/chatgpt-v-2
      api_base: os.environ/AZURE_API_BASE
@ -27,6 +28,12 @@ litellm_settings:
  upperbound_key_generate_params: 
    max_budget: os.environ/LITELLM_UPPERBOUND_KEYS_MAX_BUDGET

+router_settings:
+  routing_strategy: usage-based-routing-v2 
+  redis_host: os.environ/REDIS_HOST
+  redis_password: os.environ/REDIS_PASSWORD
+  redis_port: os.environ/REDIS_PORT
+  enable_pre_call_checks: True

 general_settings:
  master_key: sk-1234
--- a/litellm/router.py
+++ b/litellm/router.py
@ -2357,20 +2357,20 @@ class Router:
                    "model", None
                )
                model_info = litellm.get_model_info(model=model)
-            except:
-                continue

-            if (
-                isinstance(model_info, dict)
-                and model_info.get("max_input_tokens", None) is not None
-            ):
                if (
-                    isinstance(model_info["max_input_tokens"], int)
-                    and input_tokens > model_info["max_input_tokens"]
+                    isinstance(model_info, dict)
+                    and model_info.get("max_input_tokens", None) is not None
                ):
-                    invalid_model_indices.append(idx)
-                    _context_window_error = True
-                    continue
+                    if (
+                        isinstance(model_info["max_input_tokens"], int)
+                        and input_tokens > model_info["max_input_tokens"]
+                    ):
+                        invalid_model_indices.append(idx)
+                        _context_window_error = True
+                        continue
+            except Exception as e:
+                verbose_router_logger.debug("An error occurs - {}".format(str(e)))

            ## RPM CHECK ##
            _litellm_params = deployment.get("litellm_params", {})
--- a/litellm/tests/test_router.py
+++ b/litellm/tests/test_router.py
@ -398,6 +398,40 @@ async def test_async_router_context_window_fallback():
        pytest.fail(f"Got unexpected exception on router! - {str(e)}")


+def test_router_rpm_pre_call_check():
+    """
+    - for a given model not in model cost map
+    - with rpm set
+    - check if rpm check is run
+    """
+    try:
+        model_list = [
+            {
+                "model_name": "fake-openai-endpoint",  # openai model name
+                "litellm_params": {  # params for litellm completion/embedding call
+                    "model": "openai/my-fake-model",
+                    "api_key": "my-fake-key",
+                    "api_base": "https://openai-function-calling-workers.tasslexyz.workers.dev/",
+                    "rpm": 0,
+                },
+            },
+        ]
+
+        router = Router(model_list=model_list, set_verbose=True, enable_pre_call_checks=True, num_retries=0)  # type: ignore
+
+        try:
+            router._pre_call_checks(
+                model="fake-openai-endpoint",
+                healthy_deployments=model_list,
+                messages=[{"role": "user", "content": "Hey, how's it going?"}],
+            )
+            pytest.fail("Expected this to fail")
+        except:
+            pass
+    except Exception as e:
+        pytest.fail(f"Got unexpected exception on router! - {str(e)}")
+
+
 def test_router_context_window_check_pre_call_check_in_group():
    """
    - Give a gpt-3.5-turbo model group with different context windows (4k vs. 16k)