fix(router.py): check for context window error when handling 400 status code errors

was causing proxy context window fallbacks to not work as expected
2025-04-27 11:43:54 +00:00 · 2024-03-26 08:07:53 -07:00 · 2024-03-26 08:07:53 -07:00 · 00d27a324d
commit 00d27a324d
parent bf5b55df69
6 changed files with 308 additions and 1943 deletions
--- a/tests/test_fallbacks.py
+++ b/tests/test_fallbacks.py
@ -0,0 +1,45 @@
+# What is this?
+## This tests if the proxy fallbacks work as expected
+import pytest
+import asyncio
+import aiohttp
+from large_text import text
+
+
+async def chat_completion(session, key: str, model: str, messages: list):
+    url = "http://0.0.0.0:4000/chat/completions"
+    headers = {
+        "Authorization": f"Bearer {key}",
+        "Content-Type": "application/json",
+    }
+    data = {
+        "model": model,
+        "messages": messages,
+    }
+
+    async with session.post(url, headers=headers, json=data) as response:
+        status = response.status
+        response_text = await response.text()
+
+        print(response_text)
+        print()
+
+        if status != 200:
+            raise Exception(f"Request did not return a 200 status code: {status}")
+        return await response.json()
+
+
+@pytest.mark.asyncio
+async def test_chat_completion():
+    """
+    make chat completion call with prompt > context window. expect it to work with fallback
+    """
+    async with aiohttp.ClientSession() as session:
+        model = "gpt-3.5-turbo"
+        messages = [
+            {"role": "system", "content": text},
+            {"role": "user", "content": "Who was Alexander?"},
+        ]
+        await chat_completion(
+            session=session, key="sk-1234", model=model, messages=messages
+        )