fix: Minor LiteLLM Fixes + Improvements (29/08/2024) (#5436)

* fix(model_checks.py): support returning wildcard models on `/v1/models` Fixes https://github.com/BerriAI/litellm/issues/4903 * fix(bedrock_httpx.py): support calling bedrock via api_base Closes https://github.com/BerriAI/litellm/pull/4587 * fix(litellm_logging.py): only leave last 4 char of gemini key unmasked Fixes https://github.com/BerriAI/litellm/issues/5433 * feat(router.py): support setting 'weight' param for models on router Closes https://github.com/BerriAI/litellm/issues/5410 * test(test_bedrock_completion.py): add unit test for custom api base * fix(model_checks.py): handle no "/" in model
2025-04-25 18:54:30 +00:00 · 2024-08-29 22:40:25 -07:00 · 2024-08-29 22:40:25 -07:00 · dd7b008161
commit dd7b008161
parent f70b7575d2
12 changed files with 219 additions and 25 deletions
--- a/litellm/tests/test_router.py
+++ b/litellm/tests/test_router.py
@ -2342,3 +2342,55 @@ async def test_aaarouter_dynamic_cooldown_message_retry_time(sync_mode):
            assert e.cooldown_time == cooldown_time

        assert exception_raised
+
+
+@pytest.mark.parametrize("sync_mode", [True, False])
+@pytest.mark.asyncio()
+async def test_router_weighted_pick(sync_mode):
+    router = Router(
+        model_list=[
+            {
+                "model_name": "gpt-3.5-turbo",
+                "litellm_params": {
+                    "model": "gpt-3.5-turbo",
+                    "weight": 2,
+                    "mock_response": "Hello world 1!",
+                },
+                "model_info": {"id": "1"},
+            },
+            {
+                "model_name": "gpt-3.5-turbo",
+                "litellm_params": {
+                    "model": "gpt-3.5-turbo",
+                    "weight": 1,
+                    "mock_response": "Hello world 2!",
+                },
+                "model_info": {"id": "2"},
+            },
+        ]
+    )
+
+    model_id_1_count = 0
+    model_id_2_count = 0
+    for _ in range(50):
+        # make 50 calls. expect model id 1 to be picked more than model id 2
+        if sync_mode:
+            response = router.completion(
+                model="gpt-3.5-turbo",
+                messages=[{"role": "user", "content": "Hello world!"}],
+            )
+        else:
+            response = await router.acompletion(
+                model="gpt-3.5-turbo",
+                messages=[{"role": "user", "content": "Hello world!"}],
+            )
+
+        model_id = int(response._hidden_params["model_id"])
+
+        if model_id == 1:
+            model_id_1_count += 1
+        elif model_id == 2:
+            model_id_2_count += 1
+        else:
+            raise Exception("invalid model id returned!")
+    assert model_id_1_count > model_id_2_count