Allow editing model api key + provider on UI (#8406)

* fix(parallel_request_limiter.py): add back parallel request information to max parallel request limiter Resolves https://github.com/BerriAI/litellm/issues/8392 * test: mark flaky test to handle time based tracking issues * feat(model_management_endpoints.py): expose new patch `/model/{model_id}/update` endpoint Allows updating specific values of a model in db - makes it easy for admin to know this by calling it a PA TCH * feat(edit_model_modal.tsx): allow user to update llm provider + api key on the ui * fix: fix linting error
2025-04-26 03:04:13 +00:00 · 2025-02-08 23:50:47 -08:00 · 2025-02-08 23:50:47 -08:00 · e4411e4815
commit e4411e4815
parent 0d2e723e95
7 changed files with 285 additions and 11 deletions
--- a/tests/local_testing/test_parallel_request_limiter.py
+++ b/tests/local_testing/test_parallel_request_limiter.py
@ -65,7 +65,40 @@ async def test_global_max_parallel_requests():
            )
            pytest.fail("Expected call to fail")
        except Exception as e:
-            pass
+            print(e)
+
+
+@pytest.mark.flaky(retries=6, delay=1)
+@pytest.mark.asyncio
+async def test_key_max_parallel_requests():
+    """
+    Ensure the error str returned contains parallel request information.
+
+    Relevant Issue: https://github.com/BerriAI/litellm/issues/8392
+    """
+    _api_key = "sk-12345"
+    _api_key = hash_token("sk-12345")
+    user_api_key_dict = UserAPIKeyAuth(api_key=_api_key, max_parallel_requests=1)
+    local_cache = DualCache()
+    parallel_request_handler = MaxParallelRequestsHandler(
+        internal_usage_cache=InternalUsageCache(dual_cache=local_cache)
+    )
+
+    parallel_limit_reached = False
+    for _ in range(3):
+        try:
+            await parallel_request_handler.async_pre_call_hook(
+                user_api_key_dict=user_api_key_dict,
+                cache=local_cache,
+                data={},
+                call_type="",
+            )
+            await asyncio.sleep(1)
+        except Exception as e:
+            if "current max_parallel_requests" in str(e):
+                parallel_limit_reached = True
+
+    assert parallel_limit_reached


@pytest.mark.asyncio