fix parallel request limiter

This commit is contained in:
Ishaan Jaff 2024-08-17 14:14:12 -07:00
parent 5731287f1b
commit 221e5b829b
2 changed files with 29 additions and 5 deletions

View file

@ -4201,6 +4201,15 @@
"litellm_provider": "ollama", "litellm_provider": "ollama",
"mode": "completion" "mode": "completion"
}, },
"ollama/llama2:7b": {
"max_tokens": 4096,
"max_input_tokens": 4096,
"max_output_tokens": 4096,
"input_cost_per_token": 0.0,
"output_cost_per_token": 0.0,
"litellm_provider": "ollama",
"mode": "completion"
},
"ollama/llama2:13b": { "ollama/llama2:13b": {
"max_tokens": 4096, "max_tokens": 4096,
"max_input_tokens": 4096, "max_input_tokens": 4096,
@ -4237,6 +4246,15 @@
"litellm_provider": "ollama", "litellm_provider": "ollama",
"mode": "chat" "mode": "chat"
}, },
"ollama/llama3:8b": {
"max_tokens": 8192,
"max_input_tokens": 8192,
"max_output_tokens": 8192,
"input_cost_per_token": 0.0,
"output_cost_per_token": 0.0,
"litellm_provider": "ollama",
"mode": "chat"
},
"ollama/llama3:70b": { "ollama/llama3:70b": {
"max_tokens": 8192, "max_tokens": 8192,
"max_input_tokens": 8192, "max_input_tokens": 8192,

View file

@ -948,8 +948,10 @@ async def test_bad_router_tpm_limit_per_model():
api_key=_api_key, api_key=_api_key,
max_parallel_requests=10, max_parallel_requests=10,
tpm_limit=10, tpm_limit=10,
tpm_limit_per_model={model: 5}, metadata={
rpm_limit_per_model={model: 5}, "model_rpm_limit": {model: 5},
"model_tpm_limit": {model: 5},
},
) )
local_cache = DualCache() local_cache = DualCache()
pl = ProxyLogging(user_api_key_cache=local_cache) pl = ProxyLogging(user_api_key_cache=local_cache)
@ -1026,7 +1028,9 @@ async def test_pre_call_hook_rpm_limits_per_model():
max_parallel_requests=100, max_parallel_requests=100,
tpm_limit=900000, tpm_limit=900000,
rpm_limit=100000, rpm_limit=100000,
rpm_limit_per_model={"azure-model": 1}, metadata={
"model_rpm_limit": {"azure-model": 1},
},
) )
local_cache = DualCache() local_cache = DualCache()
pl = ProxyLogging(user_api_key_cache=local_cache) pl = ProxyLogging(user_api_key_cache=local_cache)
@ -1096,8 +1100,10 @@ async def test_pre_call_hook_tpm_limits_per_model():
max_parallel_requests=100, max_parallel_requests=100,
tpm_limit=900000, tpm_limit=900000,
rpm_limit=100000, rpm_limit=100000,
rpm_limit_per_model={"azure-model": 100}, metadata={
tpm_limit_per_model={"azure-model": 10}, "model_tpm_limit": {"azure-model": 1},
"model_rpm_limit": {"azure-model": 100},
},
) )
local_cache = DualCache() local_cache = DualCache()
pl = ProxyLogging(user_api_key_cache=local_cache) pl = ProxyLogging(user_api_key_cache=local_cache)