fix(router.py): skip setting model_group response headers for now

current implementation increases redis cache calls by 3x
2025-04-26 03:04:13 +00:00 · 2024-09-28 18:40:17 -07:00 · 2024-09-28 18:40:17 -07:00 · 81d6c5e5a5
commit 81d6c5e5a5
parent 5fbcdd8b11
4 changed files with 43 additions and 89 deletions
--- a/tests/local_testing/test_router.py
+++ b/tests/local_testing/test_router.py
@ -2568,45 +2568,45 @@ def test_model_group_alias(hidden):
        assert len(model_names) == len(_model_list) + 1


-@pytest.mark.parametrize("on_error", [True, False])
-@pytest.mark.asyncio
-async def test_router_response_headers(on_error):
-    router = Router(
-        model_list=[
-            {
-                "model_name": "gpt-3.5-turbo",
-                "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
-                    "api_key": os.getenv("AZURE_API_KEY"),
-                    "api_base": os.getenv("AZURE_API_BASE"),
-                    "tpm": 100000,
-                    "rpm": 100000,
-                },
-            },
-            {
-                "model_name": "gpt-3.5-turbo",
-                "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
-                    "api_key": os.getenv("AZURE_API_KEY"),
-                    "api_base": os.getenv("AZURE_API_BASE"),
-                    "tpm": 500,
-                    "rpm": 500,
-                },
-            },
-        ]
-    )
+# @pytest.mark.parametrize("on_error", [True, False])
+# @pytest.mark.asyncio
+# async def test_router_response_headers(on_error):
+#     router = Router(
+#         model_list=[
+#             {
+#                 "model_name": "gpt-3.5-turbo",
+#                 "litellm_params": {
+#                     "model": "azure/chatgpt-v-2",
+#                     "api_key": os.getenv("AZURE_API_KEY"),
+#                     "api_base": os.getenv("AZURE_API_BASE"),
+#                     "tpm": 100000,
+#                     "rpm": 100000,
+#                 },
+#             },
+#             {
+#                 "model_name": "gpt-3.5-turbo",
+#                 "litellm_params": {
+#                     "model": "azure/chatgpt-v-2",
+#                     "api_key": os.getenv("AZURE_API_KEY"),
+#                     "api_base": os.getenv("AZURE_API_BASE"),
+#                     "tpm": 500,
+#                     "rpm": 500,
+#                 },
+#             },
+#         ]
+#     )

-    response = await router.acompletion(
-        model="gpt-3.5-turbo",
-        messages=[{"role": "user", "content": "Hello world!"}],
-        mock_testing_rate_limit_error=on_error,
-    )
+#     response = await router.acompletion(
+#         model="gpt-3.5-turbo",
+#         messages=[{"role": "user", "content": "Hello world!"}],
+#         mock_testing_rate_limit_error=on_error,
+#     )

-    response_headers = response._hidden_params["additional_headers"]
+#     response_headers = response._hidden_params["additional_headers"]

-    print(response_headers)
+#     print(response_headers)

-    assert response_headers["x-ratelimit-limit-requests"] == 100500
-    assert int(response_headers["x-ratelimit-remaining-requests"]) > 0
-    assert response_headers["x-ratelimit-limit-tokens"] == 100500
-    assert int(response_headers["x-ratelimit-remaining-tokens"]) > 0
+#     assert response_headers["x-ratelimit-limit-requests"] == 100500
+#     assert int(response_headers["x-ratelimit-remaining-requests"]) > 0
+#     assert response_headers["x-ratelimit-limit-tokens"] == 100500
+#     assert int(response_headers["x-ratelimit-remaining-tokens"]) > 0