diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index ff950798b3..1bc1d7119e 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -5,6 +5,7 @@ model_list: api_key: my-fake-key api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/ stream_timeout: 0.001 + rpm: 10 - litellm_params: model: azure/chatgpt-v-2 api_base: os.environ/AZURE_API_BASE @@ -27,6 +28,12 @@ litellm_settings: upperbound_key_generate_params: max_budget: os.environ/LITELLM_UPPERBOUND_KEYS_MAX_BUDGET +router_settings: + routing_strategy: usage-based-routing-v2 + redis_host: os.environ/REDIS_HOST + redis_password: os.environ/REDIS_PASSWORD + redis_port: os.environ/REDIS_PORT + enable_pre_call_checks: True general_settings: master_key: sk-1234 diff --git a/litellm/router.py b/litellm/router.py index 072662baee..37b0ede00a 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -2357,20 +2357,20 @@ class Router: "model", None ) model_info = litellm.get_model_info(model=model) - except: - continue - if ( - isinstance(model_info, dict) - and model_info.get("max_input_tokens", None) is not None - ): if ( - isinstance(model_info["max_input_tokens"], int) - and input_tokens > model_info["max_input_tokens"] + isinstance(model_info, dict) + and model_info.get("max_input_tokens", None) is not None ): - invalid_model_indices.append(idx) - _context_window_error = True - continue + if ( + isinstance(model_info["max_input_tokens"], int) + and input_tokens > model_info["max_input_tokens"] + ): + invalid_model_indices.append(idx) + _context_window_error = True + continue + except Exception as e: + verbose_router_logger.debug("An error occurs - {}".format(str(e))) ## RPM CHECK ## _litellm_params = deployment.get("litellm_params", {}) diff --git a/litellm/tests/test_router.py b/litellm/tests/test_router.py index 1a6df38887..7beb1d67c7 100644 --- a/litellm/tests/test_router.py +++ b/litellm/tests/test_router.py @@ -398,6 +398,40 @@ async def test_async_router_context_window_fallback(): pytest.fail(f"Got unexpected exception on router! - {str(e)}") +def test_router_rpm_pre_call_check(): + """ + - for a given model not in model cost map + - with rpm set + - check if rpm check is run + """ + try: + model_list = [ + { + "model_name": "fake-openai-endpoint", # openai model name + "litellm_params": { # params for litellm completion/embedding call + "model": "openai/my-fake-model", + "api_key": "my-fake-key", + "api_base": "https://openai-function-calling-workers.tasslexyz.workers.dev/", + "rpm": 0, + }, + }, + ] + + router = Router(model_list=model_list, set_verbose=True, enable_pre_call_checks=True, num_retries=0) # type: ignore + + try: + router._pre_call_checks( + model="fake-openai-endpoint", + healthy_deployments=model_list, + messages=[{"role": "user", "content": "Hey, how's it going?"}], + ) + pytest.fail("Expected this to fail") + except: + pass + except Exception as e: + pytest.fail(f"Got unexpected exception on router! - {str(e)}") + + def test_router_context_window_check_pre_call_check_in_group(): """ - Give a gpt-3.5-turbo model group with different context windows (4k vs. 16k)