Merge pull request #4322 from BerriAI/litellm_bug_fix_spend_tracking

[Fix] user field and user_api_key_* is sometimes omitted randomly
2025-04-26 03:04:13 +00:00 · 2024-06-20 21:10:13 -07:00 · 2024-06-20 21:10:13 -07:00 · 558c00931e
commit 558c00931e
parent cd73ea245a f2677620fd
2 changed files with 228 additions and 102 deletions
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -802,7 +802,7 @@ async def update_database(
                        + prisma_client.key_list_transactons.get(hashed_token, 0)
                    )
            except Exception as e:
-                verbose_proxy_logger.info(
+                verbose_proxy_logger.error(
                    f"Update Key DB Call failed to execute - {str(e)}\n{traceback.format_exc()}"
                )
                raise e
@ -936,8 +936,7 @@ async def update_cache(
            f"_update_key_cache: existing spend: {existing_spend_obj}"
        )
        if existing_spend_obj is None:
-            existing_spend = 0
+            return
            existing_spend_obj = LiteLLM_VerificationTokenView(token=token)
        else:
            existing_spend = existing_spend_obj.spend
        # Calculate the new cost by adding the existing cost and response_cost
@ -1019,27 +1018,16 @@ async def update_cache(
                    continue
                existing_spend_obj = await user_api_key_cache.async_get_cache(key=_id)
                if existing_spend_obj is None:
-                    # if user does not exist in LiteLLM_UserTable, create a new user
+                    # do nothing if there is no cache value
-                    existing_spend = 0
+                    return
                    max_user_budget = None
                    if litellm.max_user_budget is not None:
                        max_user_budget = litellm.max_user_budget
                    existing_spend_obj = LiteLLM_UserTable(
                        user_id=_id,
                        spend=0,
                        max_budget=max_user_budget,
                        user_email=None,
                    )
                verbose_proxy_logger.debug(
                    f"_update_user_db: existing spend: {existing_spend_obj}; response_cost: {response_cost}"
                )
-                if existing_spend_obj is None:
+
-                    existing_spend = 0
+                if isinstance(existing_spend_obj, dict):
                    existing_spend = existing_spend_obj["spend"]
                else:
-                    if isinstance(existing_spend_obj, dict):
+                    existing_spend = existing_spend_obj.spend
                        existing_spend = existing_spend_obj["spend"]
                    else:
                        existing_spend = existing_spend_obj.spend
                # Calculate the new cost by adding the existing cost and response_cost
                new_spend = existing_spend + response_cost
@ -1057,9 +1045,8 @@ async def update_cache(
                key="{}:spend".format(litellm_proxy_admin_name)
            )
            if global_proxy_spend is None:
-                await user_api_key_cache.async_set_cache(
+                # do nothing if not in cache
-                    key="{}:spend".format(litellm_proxy_admin_name), value=response_cost
+                return
                )
            elif response_cost is not None and global_proxy_spend is not None:
                increment = global_proxy_spend + response_cost
                await user_api_key_cache.async_set_cache(
@ -1081,19 +1068,8 @@ async def update_cache(
            existing_spend_obj = await user_api_key_cache.async_get_cache(key=_id)
            if existing_spend_obj is None:
                # if user does not exist in LiteLLM_UserTable, create a new user
-                existing_spend = 0
+                # do nothing if end-user not in api key cache
-                max_user_budget = None
+                return
                max_end_user_budget = None
                if litellm.max_end_user_budget is not None:
                    max_end_user_budget = litellm.max_end_user_budget
                existing_spend_obj = LiteLLM_EndUserTable(
                    user_id=end_user_id,
                    spend=0,
                    blocked=False,
                    litellm_budget_table=LiteLLM_BudgetTable(
                        max_budget=max_end_user_budget
                    ),
                )
            verbose_proxy_logger.debug(
                f"_update_end_user_db: existing spend: {existing_spend_obj}; response_cost: {response_cost}"
            )
@ -1131,6 +1107,7 @@ async def update_cache(
                await user_api_key_cache.async_get_cache(key=_id)
            )
            if existing_spend_obj is None:
                # do nothing if team not in api key cache
                return
            verbose_proxy_logger.debug(
                f"_update_team_db: existing spend: {existing_spend_obj}; response_cost: {response_cost}"
--- a/litellm/tests/test_key_generate_prisma.py
+++ b/litellm/tests/test_key_generate_prisma.py
@ -19,76 +19,87 @@
 # function to call to generate key - async def new_user(data: NewUserRequest):
 # function to validate a request - async def user_auth(request: Request):
-import sys, os
+import os
-import traceback, uuid
+import sys
 import traceback
 import uuid
 from datetime import datetime
 from dotenv import load_dotenv
 from fastapi import Request
 from fastapi.routing import APIRoute
 from datetime import datetime
 load_dotenv()
-import os, io, time
+import io
 import os
 import time
 # this file is to test litellm/proxy
 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
-import pytest, logging, asyncio
+import asyncio
-import litellm, asyncio
+import logging
 import pytest
 import litellm
 from litellm._logging import verbose_proxy_logger
 from litellm.proxy.management_endpoints.internal_user_endpoints import (
    new_user,
    user_info,
    user_update,
 )
 from litellm.proxy.management_endpoints.key_management_endpoints import (
    delete_key_fn,
    generate_key_fn,
    generate_key_helper_fn,
    info_key_fn,
    update_key_fn,
 )
 from litellm.proxy.management_endpoints.team_endpoints import (
    new_team,
    team_info,
    update_team,
 )
 from litellm.proxy.proxy_server import (
-    user_api_key_auth,
+    LitellmUserRoles,
    audio_transcriptions,
    chat_completion,
    completion,
    embeddings,
    image_generation,
    audio_transcriptions,
    moderations,
    model_list,
-    LitellmUserRoles,
+    moderations,
-)
+    new_end_user,
-from litellm.proxy.management_endpoints.key_management_endpoints import (
+    user_api_key_auth,
    delete_key_fn,
    info_key_fn,
    update_key_fn,
    generate_key_fn,
    generate_key_helper_fn,
 )
 from litellm.proxy.management_endpoints.internal_user_endpoints import (
    new_user,
    user_update,
    user_info,
 )
 from litellm.proxy.management_endpoints.team_endpoints import (
    team_info,
    new_team,
    update_team,
 )
 from litellm.proxy.spend_reporting_endpoints.spend_management_endpoints import (
    spend_user_fn,
    spend_key_fn,
    spend_user_fn,
    view_spend_logs,
 )
 from litellm.proxy.utils import PrismaClient, ProxyLogging, hash_token, update_spend
 from litellm._logging import verbose_proxy_logger
 verbose_proxy_logger.setLevel(level=logging.DEBUG)
 from starlette.datastructures import URL
 from litellm.caching import DualCache
 from litellm.proxy._types import (
    NewUserRequest,
    GenerateKeyRequest,
    DynamoDBArgs,
    KeyRequest,
    UpdateKeyRequest,
    GenerateKeyRequest,
    KeyRequest,
    LiteLLM_UpperboundKeyGenerateParams,
    NewCustomerRequest,
    NewTeamRequest,
    NewUserRequest,
    UpdateKeyRequest,
    UpdateTeamRequest,
    UserAPIKeyAuth,
    LiteLLM_UpperboundKeyGenerateParams,
 )
 from litellm.proxy.utils import DBClient
 from starlette.datastructures import URL
 from litellm.caching import DualCache
 proxy_logging_obj = ProxyLogging(user_api_key_cache=DualCache())
@ -435,10 +446,10 @@ def test_call_with_user_over_budget(prisma_client):
            print("result from user auth with new key", result)
            # update spend using track_cost callback, make 2nd request, it should fail
            from litellm import Choices, Message, ModelResponse, Usage
            from litellm.proxy.proxy_server import (
                _PROXY_track_cost_callback as track_cost_callback,
            )
            from litellm import ModelResponse, Choices, Message, Usage
            resp = ModelResponse(
                id="chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac",
@ -502,15 +513,22 @@ def test_call_with_end_user_over_budget(prisma_client):
        async def test():
            await litellm.proxy.proxy_server.prisma_client.connect()
-            request = GenerateKeyRequest()  # create a key with no budget
+            user = f"ishaan {uuid.uuid4().hex}"
-            key = await new_user(request)
+            request = NewCustomerRequest(
-            print(key)
+                user_id=user, max_budget=0.000001
            )  # create a key with no budget
            await new_end_user(
                request,
                user_api_key_dict=UserAPIKeyAuth(
                    user_role=LitellmUserRoles.PROXY_ADMIN,
                    api_key="sk-1234",
                    user_id="1234",
                ),
            )
            generated_key = key.key
            bearer_token = "Bearer " + generated_key
            user = f"ishaan {random.randint(0, 10000)}"
            request = Request(scope={"type": "http"})
            request._url = URL(url="/chat/completions")
            bearer_token = "Bearer sk-1234"
            result = await user_api_key_auth(request=request, api_key=bearer_token)
@ -522,10 +540,10 @@ def test_call_with_end_user_over_budget(prisma_client):
            request.body = return_body
            # update spend using track_cost callback, make 2nd request, it should fail
            from litellm import Choices, Message, ModelResponse, Usage
            from litellm.proxy.proxy_server import (
                _PROXY_track_cost_callback as track_cost_callback,
            )
            from litellm import ModelResponse, Choices, Message, Usage
            resp = ModelResponse(
                id="chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac",
@ -547,7 +565,7 @@ def test_call_with_end_user_over_budget(prisma_client):
                    "stream": False,
                    "litellm_params": {
                        "metadata": {
-                            "user_api_key": generated_key,
+                            "user_api_key": "sk-1234",
                            "user_api_key_user_id": user,
                        },
                        "proxy_server_request": {
@ -562,7 +580,14 @@ def test_call_with_end_user_over_budget(prisma_client):
                start_time=datetime.now(),
                end_time=datetime.now(),
            )
-            await asyncio.sleep(5)
+
            await asyncio.sleep(10)
            await update_spend(
                prisma_client=prisma_client,
                db_writer_client=None,
                proxy_logging_obj=proxy_logging_obj,
            )
            # use generated key to auth in
            result = await user_api_key_auth(request=request, api_key=bearer_token)
            print("result from user auth with new key", result)
@ -612,10 +637,10 @@ def test_call_with_proxy_over_budget(prisma_client):
            print("result from user auth with new key", result)
            # update spend using track_cost callback, make 2nd request, it should fail
            from litellm import Choices, Message, ModelResponse, Usage
            from litellm.proxy.proxy_server import (
                _PROXY_track_cost_callback as track_cost_callback,
            )
            from litellm import ModelResponse, Choices, Message, Usage
            resp = ModelResponse(
                id="chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac",
@ -668,9 +693,10 @@ def test_call_with_user_over_budget_stream(prisma_client):
    # 6. Make a call with a key over budget, expect to fail
    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
    from litellm._logging import verbose_proxy_logger
    import logging
    from litellm._logging import verbose_proxy_logger
    litellm.set_verbose = True
    verbose_proxy_logger.setLevel(logging.DEBUG)
    try:
@ -693,10 +719,10 @@ def test_call_with_user_over_budget_stream(prisma_client):
            print("result from user auth with new key", result)
            # update spend using track_cost callback, make 2nd request, it should fail
            from litellm import Choices, Message, ModelResponse, Usage
            from litellm.proxy.proxy_server import (
                _PROXY_track_cost_callback as track_cost_callback,
            )
            from litellm import ModelResponse, Choices, Message, Usage
            resp = ModelResponse(
                id="chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac",
@ -760,9 +786,10 @@ def test_call_with_proxy_over_budget_stream(prisma_client):
    )
    setattr(litellm.proxy.proxy_server, "user_api_key_cache", user_api_key_cache)
    from litellm._logging import verbose_proxy_logger
    import logging
    from litellm._logging import verbose_proxy_logger
    litellm.set_verbose = True
    verbose_proxy_logger.setLevel(logging.DEBUG)
    try:
@ -790,10 +817,10 @@ def test_call_with_proxy_over_budget_stream(prisma_client):
            print("result from user auth with new key", result)
            # update spend using track_cost callback, make 2nd request, it should fail
            from litellm import Choices, Message, ModelResponse, Usage
            from litellm.proxy.proxy_server import (
                _PROXY_track_cost_callback as track_cost_callback,
            )
            from litellm import ModelResponse, Choices, Message, Usage
            resp = ModelResponse(
                id="chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac",
@ -1296,14 +1323,15 @@ def test_call_with_key_over_budget(prisma_client):
            print("result from user auth with new key", result)
            # update spend using track_cost callback, make 2nd request, it should fail
            from litellm import Choices, Message, ModelResponse, Usage
            from litellm.caching import Cache
            from litellm.proxy.proxy_server import (
                _PROXY_track_cost_callback as track_cost_callback,
            )
            from litellm import ModelResponse, Choices, Message, Usage
            from litellm.caching import Cache
            litellm.cache = Cache()
-            import time, uuid
+            import time
            import uuid
            request_id = f"chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac{uuid.uuid4()}"
@ -1376,6 +1404,119 @@ def test_call_with_key_over_budget(prisma_client):
        print(vars(e))
 def test_call_with_key_over_budget_no_cache(prisma_client):
    # 12. Make a call with a key over budget, expect to fail
    # ✅  Tests if spend trackign works when the key does not exist in memory
    # Related to this: https://github.com/BerriAI/litellm/issues/3920
    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
    try:
        async def test():
            await litellm.proxy.proxy_server.prisma_client.connect()
            request = GenerateKeyRequest(max_budget=0.00001)
            key = await generate_key_fn(request)
            print(key)
            generated_key = key.key
            user_id = key.user_id
            bearer_token = "Bearer " + generated_key
            request = Request(scope={"type": "http"})
            request._url = URL(url="/chat/completions")
            # use generated key to auth in
            result = await user_api_key_auth(request=request, api_key=bearer_token)
            print("result from user auth with new key", result)
            # update spend using track_cost callback, make 2nd request, it should fail
            from litellm.proxy.proxy_server import (
                _PROXY_track_cost_callback as track_cost_callback,
            )
            from litellm.proxy.proxy_server import user_api_key_cache
            user_api_key_cache.in_memory_cache.cache_dict = {}
            setattr(litellm.proxy.proxy_server, "proxy_batch_write_at", 1)
            from litellm import Choices, Message, ModelResponse, Usage
            from litellm.caching import Cache
            litellm.cache = Cache()
            import time
            import uuid
            request_id = f"chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac{uuid.uuid4()}"
            resp = ModelResponse(
                id=request_id,
                choices=[
                    Choices(
                        finish_reason=None,
                        index=0,
                        message=Message(
                            content=" Sure! Here is a short poem about the sky:\n\nA canvas of blue, a",
                            role="assistant",
                        ),
                    )
                ],
                model="gpt-35-turbo",  # azure always has model written like this
                usage=Usage(prompt_tokens=210, completion_tokens=200, total_tokens=410),
            )
            await track_cost_callback(
                kwargs={
                    "model": "chatgpt-v-2",
                    "stream": False,
                    "litellm_params": {
                        "metadata": {
                            "user_api_key": hash_token(generated_key),
                            "user_api_key_user_id": user_id,
                        }
                    },
                    "response_cost": 0.00002,
                },
                completion_response=resp,
                start_time=datetime.now(),
                end_time=datetime.now(),
            )
            await asyncio.sleep(10)
            await update_spend(
                prisma_client=prisma_client,
                db_writer_client=None,
                proxy_logging_obj=proxy_logging_obj,
            )
            # test spend_log was written and we can read it
            spend_logs = await view_spend_logs(request_id=request_id)
            print("read spend logs", spend_logs)
            assert len(spend_logs) == 1
            spend_log = spend_logs[0]
            assert spend_log.request_id == request_id
            assert spend_log.spend == float("2e-05")
            assert spend_log.model == "chatgpt-v-2"
            assert (
                spend_log.cache_key
                == "a61ae14fe4a8b8014a61e6ae01a100c8bc6770ac37c293242afed954bc69207d"
            )
            # use generated key to auth in
            result = await user_api_key_auth(request=request, api_key=bearer_token)
            print("result from user auth with new key", result)
            pytest.fail(f"This should have failed!. They key crossed it's budget")
        asyncio.run(test())
    except Exception as e:
        # print(f"Error - {str(e)}")
        traceback.print_exc()
        if hasattr(e, "message"):
            error_detail = e.message
        else:
            error_detail = str(e)
        assert "Budget has been exceeded" in error_detail
        print(vars(e))
 def test_call_with_key_over_model_budget(prisma_client):
    # 12. Make a call with a key over budget, expect to fail
    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
@ -1413,14 +1554,15 @@ def test_call_with_key_over_model_budget(prisma_client):
            print("result from user auth with new key", result)
            # update spend using track_cost callback, make 2nd request, it should fail
            from litellm import Choices, Message, ModelResponse, Usage
            from litellm.caching import Cache
            from litellm.proxy.proxy_server import (
                _PROXY_track_cost_callback as track_cost_callback,
            )
            from litellm import ModelResponse, Choices, Message, Usage
            from litellm.caching import Cache
            litellm.cache = Cache()
-            import time, uuid
+            import time
            import uuid
            request_id = f"chatcmpl-{uuid.uuid4()}"
@ -1513,11 +1655,13 @@ async def test_call_with_key_never_over_budget(prisma_client):
        print("result from user auth with new key: {result}")
        # update spend using track_cost callback, make 2nd request, it should fail
        import time
        import uuid
        from litellm import Choices, Message, ModelResponse, Usage
        from litellm.proxy.proxy_server import (
            _PROXY_track_cost_callback as track_cost_callback,
        )
        from litellm import ModelResponse, Choices, Message, Usage
        import time, uuid
        request_id = f"chatcmpl-{uuid.uuid4()}"
@ -1571,9 +1715,10 @@ async def test_call_with_key_over_budget_stream(prisma_client):
    # 14. Make a call with a key over budget, expect to fail
    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
    from litellm._logging import verbose_proxy_logger
    import logging
    from litellm._logging import verbose_proxy_logger
    litellm.set_verbose = True
    verbose_proxy_logger.setLevel(logging.DEBUG)
    try:
@ -1594,11 +1739,13 @@ async def test_call_with_key_over_budget_stream(prisma_client):
        print("result from user auth with new key", result)
        # update spend using track_cost callback, make 2nd request, it should fail
        import time
        import uuid
        from litellm import Choices, Message, ModelResponse, Usage
        from litellm.proxy.proxy_server import (
            _PROXY_track_cost_callback as track_cost_callback,
        )
        from litellm import ModelResponse, Choices, Message, Usage
        import time, uuid
        request_id = f"chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac{uuid.uuid4()}"
        resp = ModelResponse(
@ -1942,13 +2089,13 @@ async def test_key_with_no_permissions(prisma_client):
 async def track_cost_callback_helper_fn(generated_key: str, user_id: str):
-    from litellm import ModelResponse, Choices, Message, Usage
+    import uuid
    from litellm import Choices, Message, ModelResponse, Usage
    from litellm.proxy.proxy_server import (
        _PROXY_track_cost_callback as track_cost_callback,
    )
    import uuid
    request_id = f"chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac{uuid.uuid4()}"
    resp = ModelResponse(
        id=request_id,
@ -1993,8 +2140,10 @@ async def test_proxy_load_test_db(prisma_client):
    """
    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
    import logging
    import time
    from litellm._logging import verbose_proxy_logger
    import logging, time
    litellm.set_verbose = True
    verbose_proxy_logger.setLevel(logging.DEBUG)