diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index d30270c5c8..2dc846df92 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -4201,6 +4201,15 @@ "litellm_provider": "ollama", "mode": "completion" }, + "ollama/llama2:7b": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "ollama", + "mode": "completion" + }, "ollama/llama2:13b": { "max_tokens": 4096, "max_input_tokens": 4096, @@ -4237,6 +4246,15 @@ "litellm_provider": "ollama", "mode": "chat" }, + "ollama/llama3:8b": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "ollama", + "mode": "chat" + }, "ollama/llama3:70b": { "max_tokens": 8192, "max_input_tokens": 8192, diff --git a/litellm/tests/test_parallel_request_limiter.py b/litellm/tests/test_parallel_request_limiter.py index e6ffa272f7..6b272fa8cf 100644 --- a/litellm/tests/test_parallel_request_limiter.py +++ b/litellm/tests/test_parallel_request_limiter.py @@ -948,8 +948,10 @@ async def test_bad_router_tpm_limit_per_model(): api_key=_api_key, max_parallel_requests=10, tpm_limit=10, - tpm_limit_per_model={model: 5}, - rpm_limit_per_model={model: 5}, + metadata={ + "model_rpm_limit": {model: 5}, + "model_tpm_limit": {model: 5}, + }, ) local_cache = DualCache() pl = ProxyLogging(user_api_key_cache=local_cache) @@ -1026,7 +1028,9 @@ async def test_pre_call_hook_rpm_limits_per_model(): max_parallel_requests=100, tpm_limit=900000, rpm_limit=100000, - rpm_limit_per_model={"azure-model": 1}, + metadata={ + "model_rpm_limit": {"azure-model": 1}, + }, ) local_cache = DualCache() pl = ProxyLogging(user_api_key_cache=local_cache) @@ -1096,8 +1100,10 @@ async def test_pre_call_hook_tpm_limits_per_model(): max_parallel_requests=100, tpm_limit=900000, rpm_limit=100000, - rpm_limit_per_model={"azure-model": 100}, - tpm_limit_per_model={"azure-model": 10}, + metadata={ + "model_tpm_limit": {"azure-model": 1}, + "model_rpm_limit": {"azure-model": 100}, + }, ) local_cache = DualCache() pl = ProxyLogging(user_api_key_cache=local_cache)