fix(proxy/utils.py): tracking db failed writes

2025-04-26 03:04:13 +00:00 · 2023-12-08 10:40:02 -08:00 · 2023-12-08 10:40:02 -08:00 · 4e6a8d09d0
commit 4e6a8d09d0
parent 1b35736797
2 changed files with 99 additions and 78 deletions
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@ -70,15 +70,25 @@ class PrismaClient:
        """
        Update existing data
        """
-        hashed_token = self.hash_token(token=token)
-        data["token"] = hashed_token
-        await self.db.litellm_verificationtoken.update(
-            where={
-                "token": hashed_token
-            },
-            data={**data} # type: ignore 
-        )
-        return {"token": token, "data": data}
+        try: 
+            hashed_token = self.hash_token(token=token)
+            data["token"] = hashed_token
+            await self.db.litellm_verificationtoken.update(
+                where={
+                    "token": hashed_token
+                },
+                data={**data} # type: ignore 
+            )
+            print("\033[91m" + f"DB write succeeded" + "\033[0m")
+            return {"token": token, "data": data}
+        except Exception as e: 
+            print()
+            print()
+            print()
+            print("\033[91m" + f"DB write failed: {e}" + "\033[0m")
+            print()
+            print()
+            print()

    async def delete_data(self, tokens: List):
        """
@ -96,8 +106,7 @@ class PrismaClient:
    async def disconnect(self): 
        await self.db.disconnect()

-# ### CUSTOM FILE ###
-
+### CUSTOM FILE ###
 def get_instance_fn(value: str, config_file_path: Optional[str] = None) -> Any:
    try:
        print(f"value: {value}")
@ -134,7 +143,6 @@ def get_instance_fn(value: str, config_file_path: Optional[str] = None) -> Any:
        raise e

 ### CALL HOOKS ###
-
 class CallHooks: 
    """
    Allows users to modify the incoming request / output to the proxy, without having to deal with parsing Request body.
--- a/litellm/tests/test_proxy_server_spend.py
+++ b/litellm/tests/test_proxy_server_spend.py
@ -1,69 +1,82 @@
-# import openai, json
-# client = openai.OpenAI(
-#     api_key="sk-1234",
-#     base_url="http://0.0.0.0:8000"
-# )
+import openai, json, time, asyncio
+client = openai.AsyncOpenAI(
+    api_key="sk-1234",
+    base_url="http://0.0.0.0:8000"
+)

-# super_fake_messages = [
-#   {
-#     "role": "user",
-#     "content": "What's the weather like in San Francisco, Tokyo, and Paris?"
-#   },
-#   {
-#     "content": None,
-#     "role": "assistant",
-#     "tool_calls": [
-#       {
-#         "id": "1",
-#         "function": {
-#           "arguments": "{\"location\": \"San Francisco\", \"unit\": \"celsius\"}",
-#           "name": "get_current_weather"
-#         },
-#         "type": "function"
-#       },
-#       {
-#         "id": "2",
-#         "function": {
-#           "arguments": "{\"location\": \"Tokyo\", \"unit\": \"celsius\"}",
-#           "name": "get_current_weather"
-#         },
-#         "type": "function"
-#       },
-#       {
-#         "id": "3",
-#         "function": {
-#           "arguments": "{\"location\": \"Paris\", \"unit\": \"celsius\"}",
-#           "name": "get_current_weather"
-#         },
-#         "type": "function"
-#       }
-#     ]
-#   },
-#   {
-#     "tool_call_id": "1",
-#     "role": "tool",
-#     "name": "get_current_weather",
-#     "content": "{\"location\": \"San Francisco\", \"temperature\": \"90\", \"unit\": \"celsius\"}"
-#   },
-#   {
-#     "tool_call_id": "2",
-#     "role": "tool",
-#     "name": "get_current_weather",
-#     "content": "{\"location\": \"Tokyo\", \"temperature\": \"30\", \"unit\": \"celsius\"}"
-#   },
-#   {
-#     "tool_call_id": "3",
-#     "role": "tool",
-#     "name": "get_current_weather",
-#     "content": "{\"location\": \"Paris\", \"temperature\": \"50\", \"unit\": \"celsius\"}"
-#   }
-# ]
+super_fake_messages = [
+  {
+    "role": "user",
+    "content": f"What's the weather like in San Francisco, Tokyo, and Paris? {time.time()}"
+  },
+  {
+    "content": None,
+    "role": "assistant",
+    "tool_calls": [
+      {
+        "id": "1",
+        "function": {
+          "arguments": "{\"location\": \"San Francisco\", \"unit\": \"celsius\"}",
+          "name": "get_current_weather"
+        },
+        "type": "function"
+      },
+      {
+        "id": "2",
+        "function": {
+          "arguments": "{\"location\": \"Tokyo\", \"unit\": \"celsius\"}",
+          "name": "get_current_weather"
+        },
+        "type": "function"
+      },
+      {
+        "id": "3",
+        "function": {
+          "arguments": "{\"location\": \"Paris\", \"unit\": \"celsius\"}",
+          "name": "get_current_weather"
+        },
+        "type": "function"
+      }
+    ]
+  },
+  {
+    "tool_call_id": "1",
+    "role": "tool",
+    "name": "get_current_weather",
+    "content": "{\"location\": \"San Francisco\", \"temperature\": \"90\", \"unit\": \"celsius\"}"
+  },
+  {
+    "tool_call_id": "2",
+    "role": "tool",
+    "name": "get_current_weather",
+    "content": "{\"location\": \"Tokyo\", \"temperature\": \"30\", \"unit\": \"celsius\"}"
+  },
+  {
+    "tool_call_id": "3",
+    "role": "tool",
+    "name": "get_current_weather",
+    "content": "{\"location\": \"Paris\", \"temperature\": \"50\", \"unit\": \"celsius\"}"
+  }
+]

-# super_fake_response = client.chat.completions.create(
-#     model="gpt-3.5-turbo",
-#     messages=super_fake_messages,
-#     seed=1337,
-#     stream=False
-# )  # get a new response from the model where it can see the function response
+async def chat_completions():
+    super_fake_response = await client.chat.completions.create(
+        model="gpt-3.5-turbo",
+        messages=super_fake_messages,
+        seed=1337,
+        stream=False
+    )  # get a new response from the model where it can see the function response
+    await asyncio.sleep(1)
+    return super_fake_response

-# print(json.dumps(super_fake_response.model_dump(), indent=4))
+async def loadtest_fn(n = 2000):
+    global num_task_cancelled_errors, exception_counts, chat_completions
+    start = time.time()
+    tasks = [chat_completions() for _ in range(n)]
+    chat_completions = await asyncio.gather(*tasks)
+    successful_completions = [c for c in chat_completions if c is not None]
+    print(n, time.time() - start, len(successful_completions))
+
+# print(json.dumps(super_fake_response.model_dump(), indent=4))
+
+asyncio.run(loadtest_fn())