test - key test_call_with_key_over_budget_no_cache

2025-04-27 03:34:10 +00:00 · 2024-06-20 17:30:03 -07:00 · 2024-06-20 17:30:03 -07:00 · 9b913c7aae
commit 9b913c7aae
parent beb67dca38
1 changed files with 191 additions and 58 deletions
--- a/litellm/tests/test_key_generate_prisma.py
+++ b/litellm/tests/test_key_generate_prisma.py
@ -19,76 +19,85 @@
 # function to call to generate key - async def new_user(data: NewUserRequest):
 # function to validate a request - async def user_auth(request: Request):

-import sys, os
-import traceback, uuid
+import os
+import sys
+import traceback
+import uuid
+from datetime import datetime
+
 from dotenv import load_dotenv
 from fastapi import Request
 from fastapi.routing import APIRoute
-from datetime import datetime

 load_dotenv()
-import os, io, time
+import io
+import os
+import time

 # this file is to test litellm/proxy

 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
-import pytest, logging, asyncio
-import litellm, asyncio
+import asyncio
+import logging
+
+import pytest
+
+import litellm
+from litellm._logging import verbose_proxy_logger
+from litellm.proxy.management_endpoints.internal_user_endpoints import (
+    new_user,
+    user_info,
+    user_update,
+)
+from litellm.proxy.management_endpoints.key_management_endpoints import (
+    delete_key_fn,
+    generate_key_fn,
+    generate_key_helper_fn,
+    info_key_fn,
+    update_key_fn,
+)
+from litellm.proxy.management_endpoints.team_endpoints import (
+    new_team,
+    team_info,
+    update_team,
+)
 from litellm.proxy.proxy_server import (
-    user_api_key_auth,
+    LitellmUserRoles,
+    audio_transcriptions,
    chat_completion,
    completion,
    embeddings,
    image_generation,
-    audio_transcriptions,
-    moderations,
    model_list,
-    LitellmUserRoles,
-)
-from litellm.proxy.management_endpoints.key_management_endpoints import (
-    delete_key_fn,
-    info_key_fn,
-    update_key_fn,
-    generate_key_fn,
-    generate_key_helper_fn,
-)
-from litellm.proxy.management_endpoints.internal_user_endpoints import (
-    new_user,
-    user_update,
-    user_info,
-)
-from litellm.proxy.management_endpoints.team_endpoints import (
-    team_info,
-    new_team,
-    update_team,
+    moderations,
+    user_api_key_auth,
 )
 from litellm.proxy.spend_reporting_endpoints.spend_management_endpoints import (
-    spend_user_fn,
    spend_key_fn,
+    spend_user_fn,
    view_spend_logs,
 )
 from litellm.proxy.utils import PrismaClient, ProxyLogging, hash_token, update_spend
-from litellm._logging import verbose_proxy_logger

 verbose_proxy_logger.setLevel(level=logging.DEBUG)

+from starlette.datastructures import URL
+
+from litellm.caching import DualCache
 from litellm.proxy._types import (
-    NewUserRequest,
-    GenerateKeyRequest,
    DynamoDBArgs,
-    KeyRequest,
-    UpdateKeyRequest,
    GenerateKeyRequest,
+    KeyRequest,
+    LiteLLM_UpperboundKeyGenerateParams,
    NewTeamRequest,
+    NewUserRequest,
+    UpdateKeyRequest,
    UpdateTeamRequest,
    UserAPIKeyAuth,
-    LiteLLM_UpperboundKeyGenerateParams,
 )
 from litellm.proxy.utils import DBClient
-from starlette.datastructures import URL
-from litellm.caching import DualCache

 proxy_logging_obj = ProxyLogging(user_api_key_cache=DualCache())

@ -435,10 +444,10 @@ def test_call_with_user_over_budget(prisma_client):
            print("result from user auth with new key", result)

            # update spend using track_cost callback, make 2nd request, it should fail
+            from litellm import Choices, Message, ModelResponse, Usage
            from litellm.proxy.proxy_server import (
                _PROXY_track_cost_callback as track_cost_callback,
            )
-            from litellm import ModelResponse, Choices, Message, Usage

            resp = ModelResponse(
                id="chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac",
@ -522,10 +531,10 @@ def test_call_with_end_user_over_budget(prisma_client):
            request.body = return_body

            # update spend using track_cost callback, make 2nd request, it should fail
+            from litellm import Choices, Message, ModelResponse, Usage
            from litellm.proxy.proxy_server import (
                _PROXY_track_cost_callback as track_cost_callback,
            )
-            from litellm import ModelResponse, Choices, Message, Usage

            resp = ModelResponse(
                id="chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac",
@ -612,10 +621,10 @@ def test_call_with_proxy_over_budget(prisma_client):
            print("result from user auth with new key", result)

            # update spend using track_cost callback, make 2nd request, it should fail
+            from litellm import Choices, Message, ModelResponse, Usage
            from litellm.proxy.proxy_server import (
                _PROXY_track_cost_callback as track_cost_callback,
            )
-            from litellm import ModelResponse, Choices, Message, Usage

            resp = ModelResponse(
                id="chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac",
@ -668,9 +677,10 @@ def test_call_with_user_over_budget_stream(prisma_client):
    # 6. Make a call with a key over budget, expect to fail
    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    from litellm._logging import verbose_proxy_logger
    import logging

+    from litellm._logging import verbose_proxy_logger
+
    litellm.set_verbose = True
    verbose_proxy_logger.setLevel(logging.DEBUG)
    try:
@ -693,10 +703,10 @@ def test_call_with_user_over_budget_stream(prisma_client):
            print("result from user auth with new key", result)

            # update spend using track_cost callback, make 2nd request, it should fail
+            from litellm import Choices, Message, ModelResponse, Usage
            from litellm.proxy.proxy_server import (
                _PROXY_track_cost_callback as track_cost_callback,
            )
-            from litellm import ModelResponse, Choices, Message, Usage

            resp = ModelResponse(
                id="chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac",
@ -760,9 +770,10 @@ def test_call_with_proxy_over_budget_stream(prisma_client):
    )
    setattr(litellm.proxy.proxy_server, "user_api_key_cache", user_api_key_cache)

-    from litellm._logging import verbose_proxy_logger
    import logging

+    from litellm._logging import verbose_proxy_logger
+
    litellm.set_verbose = True
    verbose_proxy_logger.setLevel(logging.DEBUG)
    try:
@ -790,10 +801,10 @@ def test_call_with_proxy_over_budget_stream(prisma_client):
            print("result from user auth with new key", result)

            # update spend using track_cost callback, make 2nd request, it should fail
+            from litellm import Choices, Message, ModelResponse, Usage
            from litellm.proxy.proxy_server import (
                _PROXY_track_cost_callback as track_cost_callback,
            )
-            from litellm import ModelResponse, Choices, Message, Usage

            resp = ModelResponse(
                id="chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac",
@ -1296,14 +1307,15 @@ def test_call_with_key_over_budget(prisma_client):
            print("result from user auth with new key", result)

            # update spend using track_cost callback, make 2nd request, it should fail
+            from litellm import Choices, Message, ModelResponse, Usage
+            from litellm.caching import Cache
            from litellm.proxy.proxy_server import (
                _PROXY_track_cost_callback as track_cost_callback,
            )
-            from litellm import ModelResponse, Choices, Message, Usage
-            from litellm.caching import Cache

            litellm.cache = Cache()
-            import time, uuid
+            import time
+            import uuid

            request_id = f"chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac{uuid.uuid4()}"

@ -1376,6 +1388,119 @@ def test_call_with_key_over_budget(prisma_client):
        print(vars(e))


+def test_call_with_key_over_budget_no_cache(prisma_client):
+    # 12. Make a call with a key over budget, expect to fail
+    # ✅  Tests if spend trackign works when the key does not exist in memory
+    # Related to this: https://github.com/BerriAI/litellm/issues/3920
+    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+    try:
+
+        async def test():
+            await litellm.proxy.proxy_server.prisma_client.connect()
+            request = GenerateKeyRequest(max_budget=0.00001)
+            key = await generate_key_fn(request)
+            print(key)
+
+            generated_key = key.key
+            user_id = key.user_id
+            bearer_token = "Bearer " + generated_key
+
+            request = Request(scope={"type": "http"})
+            request._url = URL(url="/chat/completions")
+
+            # use generated key to auth in
+            result = await user_api_key_auth(request=request, api_key=bearer_token)
+            print("result from user auth with new key", result)
+
+            # update spend using track_cost callback, make 2nd request, it should fail
+            from litellm.proxy.proxy_server import (
+                _PROXY_track_cost_callback as track_cost_callback,
+            )
+            from litellm.proxy.proxy_server import user_api_key_cache
+
+            user_api_key_cache.in_memory_cache.cache_dict = {}
+            setattr(litellm.proxy.proxy_server, "proxy_batch_write_at", 1)
+
+            from litellm import Choices, Message, ModelResponse, Usage
+            from litellm.caching import Cache
+
+            litellm.cache = Cache()
+            import time
+            import uuid
+
+            request_id = f"chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac{uuid.uuid4()}"
+
+            resp = ModelResponse(
+                id=request_id,
+                choices=[
+                    Choices(
+                        finish_reason=None,
+                        index=0,
+                        message=Message(
+                            content=" Sure! Here is a short poem about the sky:\n\nA canvas of blue, a",
+                            role="assistant",
+                        ),
+                    )
+                ],
+                model="gpt-35-turbo",  # azure always has model written like this
+                usage=Usage(prompt_tokens=210, completion_tokens=200, total_tokens=410),
+            )
+            await track_cost_callback(
+                kwargs={
+                    "model": "chatgpt-v-2",
+                    "stream": False,
+                    "litellm_params": {
+                        "metadata": {
+                            "user_api_key": hash_token(generated_key),
+                            "user_api_key_user_id": user_id,
+                        }
+                    },
+                    "response_cost": 0.00002,
+                },
+                completion_response=resp,
+                start_time=datetime.now(),
+                end_time=datetime.now(),
+            )
+            await asyncio.sleep(10)
+            await update_spend(
+                prisma_client=prisma_client,
+                db_writer_client=None,
+                proxy_logging_obj=proxy_logging_obj,
+            )
+            # test spend_log was written and we can read it
+            spend_logs = await view_spend_logs(request_id=request_id)
+
+            print("read spend logs", spend_logs)
+            assert len(spend_logs) == 1
+
+            spend_log = spend_logs[0]
+
+            assert spend_log.request_id == request_id
+            assert spend_log.spend == float("2e-05")
+            assert spend_log.model == "chatgpt-v-2"
+            assert (
+                spend_log.cache_key
+                == "a61ae14fe4a8b8014a61e6ae01a100c8bc6770ac37c293242afed954bc69207d"
+            )
+
+            # use generated key to auth in
+            result = await user_api_key_auth(request=request, api_key=bearer_token)
+            print("result from user auth with new key", result)
+            pytest.fail(f"This should have failed!. They key crossed it's budget")
+
+        asyncio.run(test())
+    except Exception as e:
+        # print(f"Error - {str(e)}")
+        traceback.print_exc()
+        if hasattr(e, "message"):
+            error_detail = e.message
+        else:
+            error_detail = str(e)
+        assert "Budget has been exceeded" in error_detail
+        print(vars(e))
+
+
 def test_call_with_key_over_model_budget(prisma_client):
    # 12. Make a call with a key over budget, expect to fail
    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
@ -1413,14 +1538,15 @@ def test_call_with_key_over_model_budget(prisma_client):
            print("result from user auth with new key", result)

            # update spend using track_cost callback, make 2nd request, it should fail
+            from litellm import Choices, Message, ModelResponse, Usage
+            from litellm.caching import Cache
            from litellm.proxy.proxy_server import (
                _PROXY_track_cost_callback as track_cost_callback,
            )
-            from litellm import ModelResponse, Choices, Message, Usage
-            from litellm.caching import Cache

            litellm.cache = Cache()
-            import time, uuid
+            import time
+            import uuid

            request_id = f"chatcmpl-{uuid.uuid4()}"

@ -1513,11 +1639,13 @@ async def test_call_with_key_never_over_budget(prisma_client):
        print("result from user auth with new key: {result}")

        # update spend using track_cost callback, make 2nd request, it should fail
+        import time
+        import uuid
+
+        from litellm import Choices, Message, ModelResponse, Usage
        from litellm.proxy.proxy_server import (
            _PROXY_track_cost_callback as track_cost_callback,
        )
-        from litellm import ModelResponse, Choices, Message, Usage
-        import time, uuid

        request_id = f"chatcmpl-{uuid.uuid4()}"

@ -1571,9 +1699,10 @@ async def test_call_with_key_over_budget_stream(prisma_client):
    # 14. Make a call with a key over budget, expect to fail
    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    from litellm._logging import verbose_proxy_logger
    import logging

+    from litellm._logging import verbose_proxy_logger
+
    litellm.set_verbose = True
    verbose_proxy_logger.setLevel(logging.DEBUG)
    try:
@ -1594,11 +1723,13 @@ async def test_call_with_key_over_budget_stream(prisma_client):
        print("result from user auth with new key", result)

        # update spend using track_cost callback, make 2nd request, it should fail
+        import time
+        import uuid
+
+        from litellm import Choices, Message, ModelResponse, Usage
        from litellm.proxy.proxy_server import (
            _PROXY_track_cost_callback as track_cost_callback,
        )
-        from litellm import ModelResponse, Choices, Message, Usage
-        import time, uuid

        request_id = f"chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac{uuid.uuid4()}"
        resp = ModelResponse(
@ -1942,13 +2073,13 @@ async def test_key_with_no_permissions(prisma_client):


 async def track_cost_callback_helper_fn(generated_key: str, user_id: str):
-    from litellm import ModelResponse, Choices, Message, Usage
+    import uuid
+
+    from litellm import Choices, Message, ModelResponse, Usage
    from litellm.proxy.proxy_server import (
        _PROXY_track_cost_callback as track_cost_callback,
    )

-    import uuid
-
    request_id = f"chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac{uuid.uuid4()}"
    resp = ModelResponse(
        id=request_id,
@ -1993,8 +2124,10 @@ async def test_proxy_load_test_db(prisma_client):
    """
    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+    import logging
+    import time
+
    from litellm._logging import verbose_proxy_logger
-    import logging, time

    litellm.set_verbose = True
    verbose_proxy_logger.setLevel(logging.DEBUG)