mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 11:14:04 +00:00
fix parallel request limiter
This commit is contained in:
parent
5731287f1b
commit
221e5b829b
2 changed files with 29 additions and 5 deletions
|
@ -4201,6 +4201,15 @@
|
||||||
"litellm_provider": "ollama",
|
"litellm_provider": "ollama",
|
||||||
"mode": "completion"
|
"mode": "completion"
|
||||||
},
|
},
|
||||||
|
"ollama/llama2:7b": {
|
||||||
|
"max_tokens": 4096,
|
||||||
|
"max_input_tokens": 4096,
|
||||||
|
"max_output_tokens": 4096,
|
||||||
|
"input_cost_per_token": 0.0,
|
||||||
|
"output_cost_per_token": 0.0,
|
||||||
|
"litellm_provider": "ollama",
|
||||||
|
"mode": "completion"
|
||||||
|
},
|
||||||
"ollama/llama2:13b": {
|
"ollama/llama2:13b": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
"max_input_tokens": 4096,
|
"max_input_tokens": 4096,
|
||||||
|
@ -4237,6 +4246,15 @@
|
||||||
"litellm_provider": "ollama",
|
"litellm_provider": "ollama",
|
||||||
"mode": "chat"
|
"mode": "chat"
|
||||||
},
|
},
|
||||||
|
"ollama/llama3:8b": {
|
||||||
|
"max_tokens": 8192,
|
||||||
|
"max_input_tokens": 8192,
|
||||||
|
"max_output_tokens": 8192,
|
||||||
|
"input_cost_per_token": 0.0,
|
||||||
|
"output_cost_per_token": 0.0,
|
||||||
|
"litellm_provider": "ollama",
|
||||||
|
"mode": "chat"
|
||||||
|
},
|
||||||
"ollama/llama3:70b": {
|
"ollama/llama3:70b": {
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
"max_input_tokens": 8192,
|
"max_input_tokens": 8192,
|
||||||
|
|
|
@ -948,8 +948,10 @@ async def test_bad_router_tpm_limit_per_model():
|
||||||
api_key=_api_key,
|
api_key=_api_key,
|
||||||
max_parallel_requests=10,
|
max_parallel_requests=10,
|
||||||
tpm_limit=10,
|
tpm_limit=10,
|
||||||
tpm_limit_per_model={model: 5},
|
metadata={
|
||||||
rpm_limit_per_model={model: 5},
|
"model_rpm_limit": {model: 5},
|
||||||
|
"model_tpm_limit": {model: 5},
|
||||||
|
},
|
||||||
)
|
)
|
||||||
local_cache = DualCache()
|
local_cache = DualCache()
|
||||||
pl = ProxyLogging(user_api_key_cache=local_cache)
|
pl = ProxyLogging(user_api_key_cache=local_cache)
|
||||||
|
@ -1026,7 +1028,9 @@ async def test_pre_call_hook_rpm_limits_per_model():
|
||||||
max_parallel_requests=100,
|
max_parallel_requests=100,
|
||||||
tpm_limit=900000,
|
tpm_limit=900000,
|
||||||
rpm_limit=100000,
|
rpm_limit=100000,
|
||||||
rpm_limit_per_model={"azure-model": 1},
|
metadata={
|
||||||
|
"model_rpm_limit": {"azure-model": 1},
|
||||||
|
},
|
||||||
)
|
)
|
||||||
local_cache = DualCache()
|
local_cache = DualCache()
|
||||||
pl = ProxyLogging(user_api_key_cache=local_cache)
|
pl = ProxyLogging(user_api_key_cache=local_cache)
|
||||||
|
@ -1096,8 +1100,10 @@ async def test_pre_call_hook_tpm_limits_per_model():
|
||||||
max_parallel_requests=100,
|
max_parallel_requests=100,
|
||||||
tpm_limit=900000,
|
tpm_limit=900000,
|
||||||
rpm_limit=100000,
|
rpm_limit=100000,
|
||||||
rpm_limit_per_model={"azure-model": 100},
|
metadata={
|
||||||
tpm_limit_per_model={"azure-model": 10},
|
"model_tpm_limit": {"azure-model": 1},
|
||||||
|
"model_rpm_limit": {"azure-model": 100},
|
||||||
|
},
|
||||||
)
|
)
|
||||||
local_cache = DualCache()
|
local_cache = DualCache()
|
||||||
pl = ProxyLogging(user_api_key_cache=local_cache)
|
pl = ProxyLogging(user_api_key_cache=local_cache)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue