fix(router.py): make sure pre call rpm check runs even when model not in model cost map

This commit is contained in:
Krrish Dholakia 2024-04-11 09:27:26 -07:00
parent 240aaf7af8
commit 84d43484c6
3 changed files with 52 additions and 11 deletions

View file

@ -5,6 +5,7 @@ model_list:
api_key: my-fake-key
api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/
stream_timeout: 0.001
rpm: 10
- litellm_params:
model: azure/chatgpt-v-2
api_base: os.environ/AZURE_API_BASE
@ -27,6 +28,12 @@ litellm_settings:
upperbound_key_generate_params:
max_budget: os.environ/LITELLM_UPPERBOUND_KEYS_MAX_BUDGET
router_settings:
routing_strategy: usage-based-routing-v2
redis_host: os.environ/REDIS_HOST
redis_password: os.environ/REDIS_PASSWORD
redis_port: os.environ/REDIS_PORT
enable_pre_call_checks: True
general_settings:
master_key: sk-1234

View file

@ -2357,20 +2357,20 @@ class Router:
"model", None
)
model_info = litellm.get_model_info(model=model)
except:
continue
if (
isinstance(model_info, dict)
and model_info.get("max_input_tokens", None) is not None
):
if (
isinstance(model_info["max_input_tokens"], int)
and input_tokens > model_info["max_input_tokens"]
isinstance(model_info, dict)
and model_info.get("max_input_tokens", None) is not None
):
invalid_model_indices.append(idx)
_context_window_error = True
continue
if (
isinstance(model_info["max_input_tokens"], int)
and input_tokens > model_info["max_input_tokens"]
):
invalid_model_indices.append(idx)
_context_window_error = True
continue
except Exception as e:
verbose_router_logger.debug("An error occurs - {}".format(str(e)))
## RPM CHECK ##
_litellm_params = deployment.get("litellm_params", {})

View file

@ -398,6 +398,40 @@ async def test_async_router_context_window_fallback():
pytest.fail(f"Got unexpected exception on router! - {str(e)}")
def test_router_rpm_pre_call_check():
"""
- for a given model not in model cost map
- with rpm set
- check if rpm check is run
"""
try:
model_list = [
{
"model_name": "fake-openai-endpoint", # openai model name
"litellm_params": { # params for litellm completion/embedding call
"model": "openai/my-fake-model",
"api_key": "my-fake-key",
"api_base": "https://openai-function-calling-workers.tasslexyz.workers.dev/",
"rpm": 0,
},
},
]
router = Router(model_list=model_list, set_verbose=True, enable_pre_call_checks=True, num_retries=0) # type: ignore
try:
router._pre_call_checks(
model="fake-openai-endpoint",
healthy_deployments=model_list,
messages=[{"role": "user", "content": "Hey, how's it going?"}],
)
pytest.fail("Expected this to fail")
except:
pass
except Exception as e:
pytest.fail(f"Got unexpected exception on router! - {str(e)}")
def test_router_context_window_check_pre_call_check_in_group():
"""
- Give a gpt-3.5-turbo model group with different context windows (4k vs. 16k)