Merge pull request #4322 from BerriAI/litellm_bug_fix_spend_tracking

[Fix] user field and user_api_key_* is sometimes omitted randomly
2025-04-26 03:04:13 +00:00 · 2024-06-20 21:10:13 -07:00 · 2024-06-20 21:10:13 -07:00 · 558c00931e
commit 558c00931e
parent cd73ea245a f2677620fd
2 changed files with 228 additions and 102 deletions
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -802,7 +802,7 @@ async def update_database(
                        + prisma_client.key_list_transactons.get(hashed_token, 0)
                    )
            except Exception as e:
-                verbose_proxy_logger.info(
+                verbose_proxy_logger.error(
                    f"Update Key DB Call failed to execute - {str(e)}\n{traceback.format_exc()}"
                )
                raise e
@ -936,8 +936,7 @@ async def update_cache(
            f"_update_key_cache: existing spend: {existing_spend_obj}"
        )
        if existing_spend_obj is None:
-            existing_spend = 0
-            existing_spend_obj = LiteLLM_VerificationTokenView(token=token)
+            return
        else:
            existing_spend = existing_spend_obj.spend
        # Calculate the new cost by adding the existing cost and response_cost
@ -1019,27 +1018,16 @@ async def update_cache(
                    continue
                existing_spend_obj = await user_api_key_cache.async_get_cache(key=_id)
                if existing_spend_obj is None:
-                    # if user does not exist in LiteLLM_UserTable, create a new user
-                    existing_spend = 0
-                    max_user_budget = None
-                    if litellm.max_user_budget is not None:
-                        max_user_budget = litellm.max_user_budget
-                    existing_spend_obj = LiteLLM_UserTable(
-                        user_id=_id,
-                        spend=0,
-                        max_budget=max_user_budget,
-                        user_email=None,
-                    )
+                    # do nothing if there is no cache value
+                    return
                verbose_proxy_logger.debug(
                    f"_update_user_db: existing spend: {existing_spend_obj}; response_cost: {response_cost}"
                )
-                if existing_spend_obj is None:
-                    existing_spend = 0
+
+                if isinstance(existing_spend_obj, dict):
+                    existing_spend = existing_spend_obj["spend"]
                else:
-                    if isinstance(existing_spend_obj, dict):
-                        existing_spend = existing_spend_obj["spend"]
-                    else:
-                        existing_spend = existing_spend_obj.spend
+                    existing_spend = existing_spend_obj.spend
                # Calculate the new cost by adding the existing cost and response_cost
                new_spend = existing_spend + response_cost

@ -1057,9 +1045,8 @@ async def update_cache(
                key="{}:spend".format(litellm_proxy_admin_name)
            )
            if global_proxy_spend is None:
-                await user_api_key_cache.async_set_cache(
-                    key="{}:spend".format(litellm_proxy_admin_name), value=response_cost
-                )
+                # do nothing if not in cache
+                return
            elif response_cost is not None and global_proxy_spend is not None:
                increment = global_proxy_spend + response_cost
                await user_api_key_cache.async_set_cache(
@ -1081,19 +1068,8 @@ async def update_cache(
            existing_spend_obj = await user_api_key_cache.async_get_cache(key=_id)
            if existing_spend_obj is None:
                # if user does not exist in LiteLLM_UserTable, create a new user
-                existing_spend = 0
-                max_user_budget = None
-                max_end_user_budget = None
-                if litellm.max_end_user_budget is not None:
-                    max_end_user_budget = litellm.max_end_user_budget
-                existing_spend_obj = LiteLLM_EndUserTable(
-                    user_id=end_user_id,
-                    spend=0,
-                    blocked=False,
-                    litellm_budget_table=LiteLLM_BudgetTable(
-                        max_budget=max_end_user_budget
-                    ),
-                )
+                # do nothing if end-user not in api key cache
+                return
            verbose_proxy_logger.debug(
                f"_update_end_user_db: existing spend: {existing_spend_obj}; response_cost: {response_cost}"
            )
@ -1131,6 +1107,7 @@ async def update_cache(
                await user_api_key_cache.async_get_cache(key=_id)
            )
            if existing_spend_obj is None:
+                # do nothing if team not in api key cache
                return
            verbose_proxy_logger.debug(
                f"_update_team_db: existing spend: {existing_spend_obj}; response_cost: {response_cost}"
--- a/litellm/tests/test_key_generate_prisma.py
+++ b/litellm/tests/test_key_generate_prisma.py
@ -19,76 +19,87 @@
 # function to call to generate key - async def new_user(data: NewUserRequest):
 # function to validate a request - async def user_auth(request: Request):

-import sys, os
-import traceback, uuid
+import os
+import sys
+import traceback
+import uuid
+from datetime import datetime
+
 from dotenv import load_dotenv
 from fastapi import Request
 from fastapi.routing import APIRoute
-from datetime import datetime

 load_dotenv()
-import os, io, time
+import io
+import os
+import time

 # this file is to test litellm/proxy

 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
-import pytest, logging, asyncio
-import litellm, asyncio
+import asyncio
+import logging
+
+import pytest
+
+import litellm
+from litellm._logging import verbose_proxy_logger
+from litellm.proxy.management_endpoints.internal_user_endpoints import (
+    new_user,
+    user_info,
+    user_update,
+)
+from litellm.proxy.management_endpoints.key_management_endpoints import (
+    delete_key_fn,
+    generate_key_fn,
+    generate_key_helper_fn,
+    info_key_fn,
+    update_key_fn,
+)
+from litellm.proxy.management_endpoints.team_endpoints import (
+    new_team,
+    team_info,
+    update_team,
+)
 from litellm.proxy.proxy_server import (
-    user_api_key_auth,
+    LitellmUserRoles,
+    audio_transcriptions,
    chat_completion,
    completion,
    embeddings,
    image_generation,
-    audio_transcriptions,
-    moderations,
    model_list,
-    LitellmUserRoles,
-)
-from litellm.proxy.management_endpoints.key_management_endpoints import (
-    delete_key_fn,
-    info_key_fn,
-    update_key_fn,
-    generate_key_fn,
-    generate_key_helper_fn,
-)
-from litellm.proxy.management_endpoints.internal_user_endpoints import (
-    new_user,
-    user_update,
-    user_info,
-)
-from litellm.proxy.management_endpoints.team_endpoints import (
-    team_info,
-    new_team,
-    update_team,
+    moderations,
+    new_end_user,
+    user_api_key_auth,
 )
 from litellm.proxy.spend_reporting_endpoints.spend_management_endpoints import (
-    spend_user_fn,
    spend_key_fn,
+    spend_user_fn,
    view_spend_logs,
 )
 from litellm.proxy.utils import PrismaClient, ProxyLogging, hash_token, update_spend
-from litellm._logging import verbose_proxy_logger

 verbose_proxy_logger.setLevel(level=logging.DEBUG)

+from starlette.datastructures import URL
+
+from litellm.caching import DualCache
 from litellm.proxy._types import (
-    NewUserRequest,
-    GenerateKeyRequest,
    DynamoDBArgs,
-    KeyRequest,
-    UpdateKeyRequest,
    GenerateKeyRequest,
+    KeyRequest,
+    LiteLLM_UpperboundKeyGenerateParams,
+    NewCustomerRequest,
    NewTeamRequest,
+    NewUserRequest,
+    UpdateKeyRequest,
    UpdateTeamRequest,
    UserAPIKeyAuth,
-    LiteLLM_UpperboundKeyGenerateParams,
 )
 from litellm.proxy.utils import DBClient
-from starlette.datastructures import URL
-from litellm.caching import DualCache

 proxy_logging_obj = ProxyLogging(user_api_key_cache=DualCache())

@ -435,10 +446,10 @@ def test_call_with_user_over_budget(prisma_client):
            print("result from user auth with new key", result)

            # update spend using track_cost callback, make 2nd request, it should fail
+            from litellm import Choices, Message, ModelResponse, Usage
            from litellm.proxy.proxy_server import (
                _PROXY_track_cost_callback as track_cost_callback,
            )
-            from litellm import ModelResponse, Choices, Message, Usage

            resp = ModelResponse(
                id="chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac",
@ -502,15 +513,22 @@ def test_call_with_end_user_over_budget(prisma_client):

        async def test():
            await litellm.proxy.proxy_server.prisma_client.connect()
-            request = GenerateKeyRequest()  # create a key with no budget
-            key = await new_user(request)
-            print(key)
+            user = f"ishaan {uuid.uuid4().hex}"
+            request = NewCustomerRequest(
+                user_id=user, max_budget=0.000001
+            )  # create a key with no budget
+            await new_end_user(
+                request,
+                user_api_key_dict=UserAPIKeyAuth(
+                    user_role=LitellmUserRoles.PROXY_ADMIN,
+                    api_key="sk-1234",
+                    user_id="1234",
+                ),
+            )

-            generated_key = key.key
-            bearer_token = "Bearer " + generated_key
-            user = f"ishaan {random.randint(0, 10000)}"
            request = Request(scope={"type": "http"})
            request._url = URL(url="/chat/completions")
+            bearer_token = "Bearer sk-1234"

            result = await user_api_key_auth(request=request, api_key=bearer_token)

@ -522,10 +540,10 @@ def test_call_with_end_user_over_budget(prisma_client):
            request.body = return_body

            # update spend using track_cost callback, make 2nd request, it should fail
+            from litellm import Choices, Message, ModelResponse, Usage
            from litellm.proxy.proxy_server import (
                _PROXY_track_cost_callback as track_cost_callback,
            )
-            from litellm import ModelResponse, Choices, Message, Usage

            resp = ModelResponse(
                id="chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac",
@ -547,7 +565,7 @@ def test_call_with_end_user_over_budget(prisma_client):
                    "stream": False,
                    "litellm_params": {
                        "metadata": {
-                            "user_api_key": generated_key,
+                            "user_api_key": "sk-1234",
                            "user_api_key_user_id": user,
                        },
                        "proxy_server_request": {
@ -562,7 +580,14 @@ def test_call_with_end_user_over_budget(prisma_client):
                start_time=datetime.now(),
                end_time=datetime.now(),
            )
-            await asyncio.sleep(5)
+
+            await asyncio.sleep(10)
+            await update_spend(
+                prisma_client=prisma_client,
+                db_writer_client=None,
+                proxy_logging_obj=proxy_logging_obj,
+            )
+
            # use generated key to auth in
            result = await user_api_key_auth(request=request, api_key=bearer_token)
            print("result from user auth with new key", result)
@ -612,10 +637,10 @@ def test_call_with_proxy_over_budget(prisma_client):
            print("result from user auth with new key", result)

            # update spend using track_cost callback, make 2nd request, it should fail
+            from litellm import Choices, Message, ModelResponse, Usage
            from litellm.proxy.proxy_server import (
                _PROXY_track_cost_callback as track_cost_callback,
            )
-            from litellm import ModelResponse, Choices, Message, Usage

            resp = ModelResponse(
                id="chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac",
@ -668,9 +693,10 @@ def test_call_with_user_over_budget_stream(prisma_client):
    # 6. Make a call with a key over budget, expect to fail
    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    from litellm._logging import verbose_proxy_logger
    import logging

+    from litellm._logging import verbose_proxy_logger
+
    litellm.set_verbose = True
    verbose_proxy_logger.setLevel(logging.DEBUG)
    try:
@ -693,10 +719,10 @@ def test_call_with_user_over_budget_stream(prisma_client):
            print("result from user auth with new key", result)

            # update spend using track_cost callback, make 2nd request, it should fail
+            from litellm import Choices, Message, ModelResponse, Usage
            from litellm.proxy.proxy_server import (
                _PROXY_track_cost_callback as track_cost_callback,
            )
-            from litellm import ModelResponse, Choices, Message, Usage

            resp = ModelResponse(
                id="chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac",
@ -760,9 +786,10 @@ def test_call_with_proxy_over_budget_stream(prisma_client):
    )
    setattr(litellm.proxy.proxy_server, "user_api_key_cache", user_api_key_cache)

-    from litellm._logging import verbose_proxy_logger
    import logging

+    from litellm._logging import verbose_proxy_logger
+
    litellm.set_verbose = True
    verbose_proxy_logger.setLevel(logging.DEBUG)
    try:
@ -790,10 +817,10 @@ def test_call_with_proxy_over_budget_stream(prisma_client):
            print("result from user auth with new key", result)

            # update spend using track_cost callback, make 2nd request, it should fail
+            from litellm import Choices, Message, ModelResponse, Usage
            from litellm.proxy.proxy_server import (
                _PROXY_track_cost_callback as track_cost_callback,
            )
-            from litellm import ModelResponse, Choices, Message, Usage

            resp = ModelResponse(
                id="chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac",
@ -1296,14 +1323,15 @@ def test_call_with_key_over_budget(prisma_client):
            print("result from user auth with new key", result)

            # update spend using track_cost callback, make 2nd request, it should fail
+            from litellm import Choices, Message, ModelResponse, Usage
+            from litellm.caching import Cache
            from litellm.proxy.proxy_server import (
                _PROXY_track_cost_callback as track_cost_callback,
            )
-            from litellm import ModelResponse, Choices, Message, Usage
-            from litellm.caching import Cache

            litellm.cache = Cache()
-            import time, uuid
+            import time
+            import uuid

            request_id = f"chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac{uuid.uuid4()}"

@ -1376,6 +1404,119 @@ def test_call_with_key_over_budget(prisma_client):
        print(vars(e))


+def test_call_with_key_over_budget_no_cache(prisma_client):
+    # 12. Make a call with a key over budget, expect to fail
+    # ✅  Tests if spend trackign works when the key does not exist in memory
+    # Related to this: https://github.com/BerriAI/litellm/issues/3920
+    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+    try:
+
+        async def test():
+            await litellm.proxy.proxy_server.prisma_client.connect()
+            request = GenerateKeyRequest(max_budget=0.00001)
+            key = await generate_key_fn(request)
+            print(key)
+
+            generated_key = key.key
+            user_id = key.user_id
+            bearer_token = "Bearer " + generated_key
+
+            request = Request(scope={"type": "http"})
+            request._url = URL(url="/chat/completions")
+
+            # use generated key to auth in
+            result = await user_api_key_auth(request=request, api_key=bearer_token)
+            print("result from user auth with new key", result)
+
+            # update spend using track_cost callback, make 2nd request, it should fail
+            from litellm.proxy.proxy_server import (
+                _PROXY_track_cost_callback as track_cost_callback,
+            )
+            from litellm.proxy.proxy_server import user_api_key_cache
+
+            user_api_key_cache.in_memory_cache.cache_dict = {}
+            setattr(litellm.proxy.proxy_server, "proxy_batch_write_at", 1)
+
+            from litellm import Choices, Message, ModelResponse, Usage
+            from litellm.caching import Cache
+
+            litellm.cache = Cache()
+            import time
+            import uuid
+
+            request_id = f"chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac{uuid.uuid4()}"
+
+            resp = ModelResponse(
+                id=request_id,
+                choices=[
+                    Choices(
+                        finish_reason=None,
+                        index=0,
+                        message=Message(
+                            content=" Sure! Here is a short poem about the sky:\n\nA canvas of blue, a",
+                            role="assistant",
+                        ),
+                    )
+                ],
+                model="gpt-35-turbo",  # azure always has model written like this
+                usage=Usage(prompt_tokens=210, completion_tokens=200, total_tokens=410),
+            )
+            await track_cost_callback(
+                kwargs={
+                    "model": "chatgpt-v-2",
+                    "stream": False,
+                    "litellm_params": {
+                        "metadata": {
+                            "user_api_key": hash_token(generated_key),
+                            "user_api_key_user_id": user_id,
+                        }
+                    },
+                    "response_cost": 0.00002,
+                },
+                completion_response=resp,
+                start_time=datetime.now(),
+                end_time=datetime.now(),
+            )
+            await asyncio.sleep(10)
+            await update_spend(
+                prisma_client=prisma_client,
+                db_writer_client=None,
+                proxy_logging_obj=proxy_logging_obj,
+            )
+            # test spend_log was written and we can read it
+            spend_logs = await view_spend_logs(request_id=request_id)
+
+            print("read spend logs", spend_logs)
+            assert len(spend_logs) == 1
+
+            spend_log = spend_logs[0]
+
+            assert spend_log.request_id == request_id
+            assert spend_log.spend == float("2e-05")
+            assert spend_log.model == "chatgpt-v-2"
+            assert (
+                spend_log.cache_key
+                == "a61ae14fe4a8b8014a61e6ae01a100c8bc6770ac37c293242afed954bc69207d"
+            )
+
+            # use generated key to auth in
+            result = await user_api_key_auth(request=request, api_key=bearer_token)
+            print("result from user auth with new key", result)
+            pytest.fail(f"This should have failed!. They key crossed it's budget")
+
+        asyncio.run(test())
+    except Exception as e:
+        # print(f"Error - {str(e)}")
+        traceback.print_exc()
+        if hasattr(e, "message"):
+            error_detail = e.message
+        else:
+            error_detail = str(e)
+        assert "Budget has been exceeded" in error_detail
+        print(vars(e))
+
+
 def test_call_with_key_over_model_budget(prisma_client):
    # 12. Make a call with a key over budget, expect to fail
    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
@ -1413,14 +1554,15 @@ def test_call_with_key_over_model_budget(prisma_client):
            print("result from user auth with new key", result)

            # update spend using track_cost callback, make 2nd request, it should fail
+            from litellm import Choices, Message, ModelResponse, Usage
+            from litellm.caching import Cache
            from litellm.proxy.proxy_server import (
                _PROXY_track_cost_callback as track_cost_callback,
            )
-            from litellm import ModelResponse, Choices, Message, Usage
-            from litellm.caching import Cache

            litellm.cache = Cache()
-            import time, uuid
+            import time
+            import uuid

            request_id = f"chatcmpl-{uuid.uuid4()}"

@ -1513,11 +1655,13 @@ async def test_call_with_key_never_over_budget(prisma_client):
        print("result from user auth with new key: {result}")

        # update spend using track_cost callback, make 2nd request, it should fail
+        import time
+        import uuid
+
+        from litellm import Choices, Message, ModelResponse, Usage
        from litellm.proxy.proxy_server import (
            _PROXY_track_cost_callback as track_cost_callback,
        )
-        from litellm import ModelResponse, Choices, Message, Usage
-        import time, uuid

        request_id = f"chatcmpl-{uuid.uuid4()}"

@ -1571,9 +1715,10 @@ async def test_call_with_key_over_budget_stream(prisma_client):
    # 14. Make a call with a key over budget, expect to fail
    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    from litellm._logging import verbose_proxy_logger
    import logging

+    from litellm._logging import verbose_proxy_logger
+
    litellm.set_verbose = True
    verbose_proxy_logger.setLevel(logging.DEBUG)
    try:
@ -1594,11 +1739,13 @@ async def test_call_with_key_over_budget_stream(prisma_client):
        print("result from user auth with new key", result)

        # update spend using track_cost callback, make 2nd request, it should fail
+        import time
+        import uuid
+
+        from litellm import Choices, Message, ModelResponse, Usage
        from litellm.proxy.proxy_server import (
            _PROXY_track_cost_callback as track_cost_callback,
        )
-        from litellm import ModelResponse, Choices, Message, Usage
-        import time, uuid

        request_id = f"chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac{uuid.uuid4()}"
        resp = ModelResponse(
@ -1942,13 +2089,13 @@ async def test_key_with_no_permissions(prisma_client):


 async def track_cost_callback_helper_fn(generated_key: str, user_id: str):
-    from litellm import ModelResponse, Choices, Message, Usage
+    import uuid
+
+    from litellm import Choices, Message, ModelResponse, Usage
    from litellm.proxy.proxy_server import (
        _PROXY_track_cost_callback as track_cost_callback,
    )

-    import uuid
-
    request_id = f"chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac{uuid.uuid4()}"
    resp = ModelResponse(
        id=request_id,
@ -1993,8 +2140,10 @@ async def test_proxy_load_test_db(prisma_client):
    """
    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+    import logging
+    import time
+
    from litellm._logging import verbose_proxy_logger
-    import logging, time

    litellm.set_verbose = True
    verbose_proxy_logger.setLevel(logging.DEBUG)