diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index f0264a7e80..bc09b8208c 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -802,7 +802,7 @@ async def update_database(
                         + prisma_client.key_list_transactons.get(hashed_token, 0)
                     )
             except Exception as e:
-                verbose_proxy_logger.info(
+                verbose_proxy_logger.error(
                     f"Update Key DB Call failed to execute - {str(e)}\n{traceback.format_exc()}"
                 )
                 raise e
@@ -936,8 +936,7 @@ async def update_cache(
             f"_update_key_cache: existing spend: {existing_spend_obj}"
         )
         if existing_spend_obj is None:
-            existing_spend = 0
-            existing_spend_obj = LiteLLM_VerificationTokenView(token=token)
+            return
         else:
             existing_spend = existing_spend_obj.spend
         # Calculate the new cost by adding the existing cost and response_cost
@@ -1019,27 +1018,16 @@ async def update_cache(
                     continue
                 existing_spend_obj = await user_api_key_cache.async_get_cache(key=_id)
                 if existing_spend_obj is None:
-                    # if user does not exist in LiteLLM_UserTable, create a new user
-                    existing_spend = 0
-                    max_user_budget = None
-                    if litellm.max_user_budget is not None:
-                        max_user_budget = litellm.max_user_budget
-                    existing_spend_obj = LiteLLM_UserTable(
-                        user_id=_id,
-                        spend=0,
-                        max_budget=max_user_budget,
-                        user_email=None,
-                    )
+                    # do nothing if there is no cache value
+                    return
                 verbose_proxy_logger.debug(
                     f"_update_user_db: existing spend: {existing_spend_obj}; response_cost: {response_cost}"
                 )
-                if existing_spend_obj is None:
-                    existing_spend = 0
+
+                if isinstance(existing_spend_obj, dict):
+                    existing_spend = existing_spend_obj["spend"]
                 else:
-                    if isinstance(existing_spend_obj, dict):
-                        existing_spend = existing_spend_obj["spend"]
-                    else:
-                        existing_spend = existing_spend_obj.spend
+                    existing_spend = existing_spend_obj.spend
                 # Calculate the new cost by adding the existing cost and response_cost
                 new_spend = existing_spend + response_cost
 
@@ -1057,9 +1045,8 @@ async def update_cache(
                 key="{}:spend".format(litellm_proxy_admin_name)
             )
             if global_proxy_spend is None:
-                await user_api_key_cache.async_set_cache(
-                    key="{}:spend".format(litellm_proxy_admin_name), value=response_cost
-                )
+                # do nothing if not in cache
+                return
             elif response_cost is not None and global_proxy_spend is not None:
                 increment = global_proxy_spend + response_cost
                 await user_api_key_cache.async_set_cache(
@@ -1081,19 +1068,8 @@ async def update_cache(
             existing_spend_obj = await user_api_key_cache.async_get_cache(key=_id)
             if existing_spend_obj is None:
                 # if user does not exist in LiteLLM_UserTable, create a new user
-                existing_spend = 0
-                max_user_budget = None
-                max_end_user_budget = None
-                if litellm.max_end_user_budget is not None:
-                    max_end_user_budget = litellm.max_end_user_budget
-                existing_spend_obj = LiteLLM_EndUserTable(
-                    user_id=end_user_id,
-                    spend=0,
-                    blocked=False,
-                    litellm_budget_table=LiteLLM_BudgetTable(
-                        max_budget=max_end_user_budget
-                    ),
-                )
+                # do nothing if end-user not in api key cache
+                return
             verbose_proxy_logger.debug(
                 f"_update_end_user_db: existing spend: {existing_spend_obj}; response_cost: {response_cost}"
             )
@@ -1131,6 +1107,7 @@ async def update_cache(
                 await user_api_key_cache.async_get_cache(key=_id)
             )
             if existing_spend_obj is None:
+                # do nothing if team not in api key cache
                 return
             verbose_proxy_logger.debug(
                 f"_update_team_db: existing spend: {existing_spend_obj}; response_cost: {response_cost}"
diff --git a/litellm/tests/test_key_generate_prisma.py b/litellm/tests/test_key_generate_prisma.py
index e1a165a40d..594b4d77c5 100644
--- a/litellm/tests/test_key_generate_prisma.py
+++ b/litellm/tests/test_key_generate_prisma.py
@@ -19,76 +19,87 @@
 # function to call to generate key - async def new_user(data: NewUserRequest):
 # function to validate a request - async def user_auth(request: Request):
 
-import sys, os
-import traceback, uuid
+import os
+import sys
+import traceback
+import uuid
+from datetime import datetime
+
 from dotenv import load_dotenv
 from fastapi import Request
 from fastapi.routing import APIRoute
-from datetime import datetime
 
 load_dotenv()
-import os, io, time
+import io
+import os
+import time
 
 # this file is to test litellm/proxy
 
 sys.path.insert(
     0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
-import pytest, logging, asyncio
-import litellm, asyncio
+import asyncio
+import logging
+
+import pytest
+
+import litellm
+from litellm._logging import verbose_proxy_logger
+from litellm.proxy.management_endpoints.internal_user_endpoints import (
+    new_user,
+    user_info,
+    user_update,
+)
+from litellm.proxy.management_endpoints.key_management_endpoints import (
+    delete_key_fn,
+    generate_key_fn,
+    generate_key_helper_fn,
+    info_key_fn,
+    update_key_fn,
+)
+from litellm.proxy.management_endpoints.team_endpoints import (
+    new_team,
+    team_info,
+    update_team,
+)
 from litellm.proxy.proxy_server import (
-    user_api_key_auth,
+    LitellmUserRoles,
+    audio_transcriptions,
     chat_completion,
     completion,
     embeddings,
     image_generation,
-    audio_transcriptions,
-    moderations,
     model_list,
-    LitellmUserRoles,
-)
-from litellm.proxy.management_endpoints.key_management_endpoints import (
-    delete_key_fn,
-    info_key_fn,
-    update_key_fn,
-    generate_key_fn,
-    generate_key_helper_fn,
-)
-from litellm.proxy.management_endpoints.internal_user_endpoints import (
-    new_user,
-    user_update,
-    user_info,
-)
-from litellm.proxy.management_endpoints.team_endpoints import (
-    team_info,
-    new_team,
-    update_team,
+    moderations,
+    new_end_user,
+    user_api_key_auth,
 )
 from litellm.proxy.spend_reporting_endpoints.spend_management_endpoints import (
-    spend_user_fn,
     spend_key_fn,
+    spend_user_fn,
     view_spend_logs,
 )
 from litellm.proxy.utils import PrismaClient, ProxyLogging, hash_token, update_spend
-from litellm._logging import verbose_proxy_logger
 
 verbose_proxy_logger.setLevel(level=logging.DEBUG)
 
+from starlette.datastructures import URL
+
+from litellm.caching import DualCache
 from litellm.proxy._types import (
-    NewUserRequest,
-    GenerateKeyRequest,
     DynamoDBArgs,
-    KeyRequest,
-    UpdateKeyRequest,
     GenerateKeyRequest,
+    KeyRequest,
+    LiteLLM_UpperboundKeyGenerateParams,
+    NewCustomerRequest,
     NewTeamRequest,
+    NewUserRequest,
+    UpdateKeyRequest,
     UpdateTeamRequest,
     UserAPIKeyAuth,
-    LiteLLM_UpperboundKeyGenerateParams,
 )
 from litellm.proxy.utils import DBClient
-from starlette.datastructures import URL
-from litellm.caching import DualCache
 
 proxy_logging_obj = ProxyLogging(user_api_key_cache=DualCache())
 
@@ -435,10 +446,10 @@ def test_call_with_user_over_budget(prisma_client):
             print("result from user auth with new key", result)
 
             # update spend using track_cost callback, make 2nd request, it should fail
+            from litellm import Choices, Message, ModelResponse, Usage
             from litellm.proxy.proxy_server import (
                 _PROXY_track_cost_callback as track_cost_callback,
             )
-            from litellm import ModelResponse, Choices, Message, Usage
 
             resp = ModelResponse(
                 id="chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac",
@@ -502,15 +513,22 @@ def test_call_with_end_user_over_budget(prisma_client):
 
         async def test():
             await litellm.proxy.proxy_server.prisma_client.connect()
-            request = GenerateKeyRequest()  # create a key with no budget
-            key = await new_user(request)
-            print(key)
+            user = f"ishaan {uuid.uuid4().hex}"
+            request = NewCustomerRequest(
+                user_id=user, max_budget=0.000001
+            )  # create a key with no budget
+            await new_end_user(
+                request,
+                user_api_key_dict=UserAPIKeyAuth(
+                    user_role=LitellmUserRoles.PROXY_ADMIN,
+                    api_key="sk-1234",
+                    user_id="1234",
+                ),
+            )
 
-            generated_key = key.key
-            bearer_token = "Bearer " + generated_key
-            user = f"ishaan {random.randint(0, 10000)}"
             request = Request(scope={"type": "http"})
             request._url = URL(url="/chat/completions")
+            bearer_token = "Bearer sk-1234"
 
             result = await user_api_key_auth(request=request, api_key=bearer_token)
 
@@ -522,10 +540,10 @@ def test_call_with_end_user_over_budget(prisma_client):
             request.body = return_body
 
             # update spend using track_cost callback, make 2nd request, it should fail
+            from litellm import Choices, Message, ModelResponse, Usage
             from litellm.proxy.proxy_server import (
                 _PROXY_track_cost_callback as track_cost_callback,
             )
-            from litellm import ModelResponse, Choices, Message, Usage
 
             resp = ModelResponse(
                 id="chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac",
@@ -547,7 +565,7 @@ def test_call_with_end_user_over_budget(prisma_client):
                     "stream": False,
                     "litellm_params": {
                         "metadata": {
-                            "user_api_key": generated_key,
+                            "user_api_key": "sk-1234",
                             "user_api_key_user_id": user,
                         },
                         "proxy_server_request": {
@@ -562,7 +580,14 @@ def test_call_with_end_user_over_budget(prisma_client):
                 start_time=datetime.now(),
                 end_time=datetime.now(),
             )
-            await asyncio.sleep(5)
+
+            await asyncio.sleep(10)
+            await update_spend(
+                prisma_client=prisma_client,
+                db_writer_client=None,
+                proxy_logging_obj=proxy_logging_obj,
+            )
+
             # use generated key to auth in
             result = await user_api_key_auth(request=request, api_key=bearer_token)
             print("result from user auth with new key", result)
@@ -612,10 +637,10 @@ def test_call_with_proxy_over_budget(prisma_client):
             print("result from user auth with new key", result)
 
             # update spend using track_cost callback, make 2nd request, it should fail
+            from litellm import Choices, Message, ModelResponse, Usage
             from litellm.proxy.proxy_server import (
                 _PROXY_track_cost_callback as track_cost_callback,
             )
-            from litellm import ModelResponse, Choices, Message, Usage
 
             resp = ModelResponse(
                 id="chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac",
@@ -668,9 +693,10 @@ def test_call_with_user_over_budget_stream(prisma_client):
     # 6. Make a call with a key over budget, expect to fail
     setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
     setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    from litellm._logging import verbose_proxy_logger
     import logging
 
+    from litellm._logging import verbose_proxy_logger
+
     litellm.set_verbose = True
     verbose_proxy_logger.setLevel(logging.DEBUG)
     try:
@@ -693,10 +719,10 @@ def test_call_with_user_over_budget_stream(prisma_client):
             print("result from user auth with new key", result)
 
             # update spend using track_cost callback, make 2nd request, it should fail
+            from litellm import Choices, Message, ModelResponse, Usage
             from litellm.proxy.proxy_server import (
                 _PROXY_track_cost_callback as track_cost_callback,
             )
-            from litellm import ModelResponse, Choices, Message, Usage
 
             resp = ModelResponse(
                 id="chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac",
@@ -760,9 +786,10 @@ def test_call_with_proxy_over_budget_stream(prisma_client):
     )
     setattr(litellm.proxy.proxy_server, "user_api_key_cache", user_api_key_cache)
 
-    from litellm._logging import verbose_proxy_logger
     import logging
 
+    from litellm._logging import verbose_proxy_logger
+
     litellm.set_verbose = True
     verbose_proxy_logger.setLevel(logging.DEBUG)
     try:
@@ -790,10 +817,10 @@ def test_call_with_proxy_over_budget_stream(prisma_client):
             print("result from user auth with new key", result)
 
             # update spend using track_cost callback, make 2nd request, it should fail
+            from litellm import Choices, Message, ModelResponse, Usage
             from litellm.proxy.proxy_server import (
                 _PROXY_track_cost_callback as track_cost_callback,
             )
-            from litellm import ModelResponse, Choices, Message, Usage
 
             resp = ModelResponse(
                 id="chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac",
@@ -1296,14 +1323,15 @@ def test_call_with_key_over_budget(prisma_client):
             print("result from user auth with new key", result)
 
             # update spend using track_cost callback, make 2nd request, it should fail
+            from litellm import Choices, Message, ModelResponse, Usage
+            from litellm.caching import Cache
             from litellm.proxy.proxy_server import (
                 _PROXY_track_cost_callback as track_cost_callback,
             )
-            from litellm import ModelResponse, Choices, Message, Usage
-            from litellm.caching import Cache
 
             litellm.cache = Cache()
-            import time, uuid
+            import time
+            import uuid
 
             request_id = f"chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac{uuid.uuid4()}"
 
@@ -1376,6 +1404,119 @@ def test_call_with_key_over_budget(prisma_client):
         print(vars(e))
 
 
+def test_call_with_key_over_budget_no_cache(prisma_client):
+    # 12. Make a call with a key over budget, expect to fail
+    # ✅  Tests if spend trackign works when the key does not exist in memory
+    # Related to this: https://github.com/BerriAI/litellm/issues/3920
+    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+    try:
+
+        async def test():
+            await litellm.proxy.proxy_server.prisma_client.connect()
+            request = GenerateKeyRequest(max_budget=0.00001)
+            key = await generate_key_fn(request)
+            print(key)
+
+            generated_key = key.key
+            user_id = key.user_id
+            bearer_token = "Bearer " + generated_key
+
+            request = Request(scope={"type": "http"})
+            request._url = URL(url="/chat/completions")
+
+            # use generated key to auth in
+            result = await user_api_key_auth(request=request, api_key=bearer_token)
+            print("result from user auth with new key", result)
+
+            # update spend using track_cost callback, make 2nd request, it should fail
+            from litellm.proxy.proxy_server import (
+                _PROXY_track_cost_callback as track_cost_callback,
+            )
+            from litellm.proxy.proxy_server import user_api_key_cache
+
+            user_api_key_cache.in_memory_cache.cache_dict = {}
+            setattr(litellm.proxy.proxy_server, "proxy_batch_write_at", 1)
+
+            from litellm import Choices, Message, ModelResponse, Usage
+            from litellm.caching import Cache
+
+            litellm.cache = Cache()
+            import time
+            import uuid
+
+            request_id = f"chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac{uuid.uuid4()}"
+
+            resp = ModelResponse(
+                id=request_id,
+                choices=[
+                    Choices(
+                        finish_reason=None,
+                        index=0,
+                        message=Message(
+                            content=" Sure! Here is a short poem about the sky:\n\nA canvas of blue, a",
+                            role="assistant",
+                        ),
+                    )
+                ],
+                model="gpt-35-turbo",  # azure always has model written like this
+                usage=Usage(prompt_tokens=210, completion_tokens=200, total_tokens=410),
+            )
+            await track_cost_callback(
+                kwargs={
+                    "model": "chatgpt-v-2",
+                    "stream": False,
+                    "litellm_params": {
+                        "metadata": {
+                            "user_api_key": hash_token(generated_key),
+                            "user_api_key_user_id": user_id,
+                        }
+                    },
+                    "response_cost": 0.00002,
+                },
+                completion_response=resp,
+                start_time=datetime.now(),
+                end_time=datetime.now(),
+            )
+            await asyncio.sleep(10)
+            await update_spend(
+                prisma_client=prisma_client,
+                db_writer_client=None,
+                proxy_logging_obj=proxy_logging_obj,
+            )
+            # test spend_log was written and we can read it
+            spend_logs = await view_spend_logs(request_id=request_id)
+
+            print("read spend logs", spend_logs)
+            assert len(spend_logs) == 1
+
+            spend_log = spend_logs[0]
+
+            assert spend_log.request_id == request_id
+            assert spend_log.spend == float("2e-05")
+            assert spend_log.model == "chatgpt-v-2"
+            assert (
+                spend_log.cache_key
+                == "a61ae14fe4a8b8014a61e6ae01a100c8bc6770ac37c293242afed954bc69207d"
+            )
+
+            # use generated key to auth in
+            result = await user_api_key_auth(request=request, api_key=bearer_token)
+            print("result from user auth with new key", result)
+            pytest.fail(f"This should have failed!. They key crossed it's budget")
+
+        asyncio.run(test())
+    except Exception as e:
+        # print(f"Error - {str(e)}")
+        traceback.print_exc()
+        if hasattr(e, "message"):
+            error_detail = e.message
+        else:
+            error_detail = str(e)
+        assert "Budget has been exceeded" in error_detail
+        print(vars(e))
+
+
 def test_call_with_key_over_model_budget(prisma_client):
     # 12. Make a call with a key over budget, expect to fail
     setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
@@ -1413,14 +1554,15 @@ def test_call_with_key_over_model_budget(prisma_client):
             print("result from user auth with new key", result)
 
             # update spend using track_cost callback, make 2nd request, it should fail
+            from litellm import Choices, Message, ModelResponse, Usage
+            from litellm.caching import Cache
             from litellm.proxy.proxy_server import (
                 _PROXY_track_cost_callback as track_cost_callback,
             )
-            from litellm import ModelResponse, Choices, Message, Usage
-            from litellm.caching import Cache
 
             litellm.cache = Cache()
-            import time, uuid
+            import time
+            import uuid
 
             request_id = f"chatcmpl-{uuid.uuid4()}"
 
@@ -1513,11 +1655,13 @@ async def test_call_with_key_never_over_budget(prisma_client):
         print("result from user auth with new key: {result}")
 
         # update spend using track_cost callback, make 2nd request, it should fail
+        import time
+        import uuid
+
+        from litellm import Choices, Message, ModelResponse, Usage
         from litellm.proxy.proxy_server import (
             _PROXY_track_cost_callback as track_cost_callback,
         )
-        from litellm import ModelResponse, Choices, Message, Usage
-        import time, uuid
 
         request_id = f"chatcmpl-{uuid.uuid4()}"
 
@@ -1571,9 +1715,10 @@ async def test_call_with_key_over_budget_stream(prisma_client):
     # 14. Make a call with a key over budget, expect to fail
     setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
     setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    from litellm._logging import verbose_proxy_logger
     import logging
 
+    from litellm._logging import verbose_proxy_logger
+
     litellm.set_verbose = True
     verbose_proxy_logger.setLevel(logging.DEBUG)
     try:
@@ -1594,11 +1739,13 @@ async def test_call_with_key_over_budget_stream(prisma_client):
         print("result from user auth with new key", result)
 
         # update spend using track_cost callback, make 2nd request, it should fail
+        import time
+        import uuid
+
+        from litellm import Choices, Message, ModelResponse, Usage
         from litellm.proxy.proxy_server import (
             _PROXY_track_cost_callback as track_cost_callback,
         )
-        from litellm import ModelResponse, Choices, Message, Usage
-        import time, uuid
 
         request_id = f"chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac{uuid.uuid4()}"
         resp = ModelResponse(
@@ -1942,13 +2089,13 @@ async def test_key_with_no_permissions(prisma_client):
 
 
 async def track_cost_callback_helper_fn(generated_key: str, user_id: str):
-    from litellm import ModelResponse, Choices, Message, Usage
+    import uuid
+
+    from litellm import Choices, Message, ModelResponse, Usage
     from litellm.proxy.proxy_server import (
         _PROXY_track_cost_callback as track_cost_callback,
     )
 
-    import uuid
-
     request_id = f"chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac{uuid.uuid4()}"
     resp = ModelResponse(
         id=request_id,
@@ -1993,8 +2140,10 @@ async def test_proxy_load_test_db(prisma_client):
     """
     setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
     setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+    import logging
+    import time
+
     from litellm._logging import verbose_proxy_logger
-    import logging, time
 
     litellm.set_verbose = True
     verbose_proxy_logger.setLevel(logging.DEBUG)