From ef20536aa06013a4bec176a4210d3989e161760d Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 6 Feb 2024 15:13:59 -0800
Subject: [PATCH 1/6] (Feat) support max_user_budget

---
 litellm/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/litellm/__init__.py b/litellm/__init__.py
index 26b761c64a..6a0cb95ae6 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -148,6 +148,7 @@ s3_callback_params: Optional[Dict] = None
 default_key_generate_params: Optional[Dict] = None
 upperbound_key_generate_params: Optional[Dict] = None
 default_team_settings: Optional[List] = None
+max_user_budget: Optional[float] = None
 #### RELIABILITY ####
 request_timeout: Optional[float] = 6000
 num_retries: Optional[int] = None  # per model endpoint

From 33aee6ba836ed29701951b5b3760419ea618d46d Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 6 Feb 2024 15:16:20 -0800
Subject: [PATCH 2/6] (feat) max_user_budget

---
 litellm/proxy/proxy_config.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml
index a8144e9d48..7d774d9105 100644
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@@ -68,6 +68,7 @@ litellm_settings:
   fallbacks: [{"openai-gpt-3.5": ["azure-gpt-3.5"]}]
   success_callback: ['langfuse']
   max_budget: 10      # global budget for proxy 
+  max_user_budget: 0.0001
   budget_duration: 30d    # global budget duration, will reset after 30d
   default_key_generate_params:
     max_budget: 1.5000

From 4de77018cc79e871f409b66fb5d624af4ce1d241 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 6 Feb 2024 15:19:36 -0800
Subject: [PATCH 3/6] (feat) support max_user_budget

---
 litellm/proxy/proxy_server.py | 31 +++++++++++++++++++++++++------
 litellm/proxy/utils.py        | 22 ++++++++++++++++++++++
 2 files changed, 47 insertions(+), 6 deletions(-)

diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index 427bb88a9c..e97ae734fe 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -91,6 +91,7 @@ from litellm.proxy.utils import (
     reset_budget,
     hash_token,
     html_form,
+    _read_request_body,
 )
 from litellm.proxy.secret_managers.google_kms import load_google_kms
 import pydantic
@@ -370,8 +371,9 @@ async def user_api_key_auth(
             # Run checks for
             # 1. If token can call model
             # 2. If user_id for this token is in budget
-            # 3. If token is expired
-            # 4. If token spend is under Budget for the token
+            # 3. If 'user' passed to /chat/completions, /embeddings endpoint is in budget
+            # 4. If token is expired
+            # 5. If token spend is under Budget for the token
 
             # Check 1. If token can call model
             litellm.model_alias_map = valid_token.aliases
@@ -430,11 +432,24 @@ async def user_api_key_auth(
                 )
 
             # Check 2. If user_id for this token is in budget
-            ## Check 2.5 If global proxy is in budget
+            ## Check 2.1 If global proxy is in budget
+            ## Check 2.2 [OPTIONAL - checked only if litellm.max_user_budget is not None] If 'user' passed in /chat/completions is in budget
             if valid_token.user_id is not None:
+                user_id_list = [
+                    valid_token.user_id,
+                    litellm_proxy_budget_name,
+                ]
+                if (
+                    litellm.max_user_budget is not None
+                ):  # Check if 'user' passed in /chat/completions is in budget, only checked if litellm.max_user_budget is set
+                    request_data = await _read_request_body(request=request)
+                    user_passed_to_chat_completions = request_data.get("user", None)
+                    if user_passed_to_chat_completions is not None:
+                        user_id_list.append(user_passed_to_chat_completions)
+
                 if prisma_client is not None:
                     user_id_information = await prisma_client.get_data(
-                        user_id_list=[valid_token.user_id, litellm_proxy_budget_name],
+                        user_id_list=user_id_list,
                         table_name="user",
                         query_type="find_all",
                     )
@@ -459,7 +474,7 @@ async def user_api_key_auth(
                             user_current_spend = _user.get("spend", None)
 
                             verbose_proxy_logger.debug(
-                                f"user_max_budget: {user_max_budget}; user_current_spend: {user_current_spend}"
+                                f"user_id: {_user.get('user_id', None)}; user_max_budget: {user_max_budget}; user_current_spend: {user_current_spend}"
                             )
 
                             if (
@@ -852,9 +867,13 @@ async def update_database(
                     f"Updating existing_spend_obj: {existing_spend_obj}"
                 )
                 if existing_spend_obj is None:
+                    # if user does not exist in LiteLLM_UserTable, create a new user
                     existing_spend = 0
+                    max_user_budget = None
+                    if litellm.max_user_budget is not None:
+                        max_user_budget = litellm.max_user_budget
                     existing_spend_obj = LiteLLM_UserTable(
-                        user_id=id, spend=0, max_budget=None, user_email=None
+                        user_id=id, spend=0, max_budget=max_user_budget, user_email=None
                     )
                 else:
                     existing_spend = existing_spend_obj.spend
diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py
index 84b09d7265..b28f887eff 100644
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@@ -1213,6 +1213,28 @@ async def reset_budget(prisma_client: PrismaClient):
             )
 
 
+async def _read_request_body(request):
+    """
+    Asynchronous function to read the request body and parse it as JSON or literal data.
+
+    Parameters:
+    - request: The request object to read the body from
+
+    Returns:
+    - dict: Parsed request data as a dictionary
+    """
+    import ast, json
+
+    request_data = {}
+    body = await request.body()
+    body_str = body.decode()
+    try:
+        request_data = ast.literal_eval(body_str)
+    except:
+        request_data = json.loads(body_str)
+    return request_data
+
+
 # LiteLLM Admin UI - Non SSO Login
 html_form = """
 <!DOCTYPE html>

From 3dc85a526370b9146908a77d048151f6645de462 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 6 Feb 2024 15:25:51 -0800
Subject: [PATCH 4/6] (test) track_cost_ for end users

---
 litellm/tests/test_key_generate_prisma.py | 81 +++++++++++++++++++++++
 1 file changed, 81 insertions(+)

diff --git a/litellm/tests/test_key_generate_prisma.py b/litellm/tests/test_key_generate_prisma.py
index b4c86afb25..d4f405b7b0 100644
--- a/litellm/tests/test_key_generate_prisma.py
+++ b/litellm/tests/test_key_generate_prisma.py
@@ -322,6 +322,87 @@ def test_call_with_user_over_budget(prisma_client):
         print(vars(e))
 
 
+def test_call_with_end_user_over_budget(prisma_client):
+    # Test if a user passed to /chat/completions is tracked & fails whe they cross their budget
+    # we only check this when litellm.max_user_budget is set
+    import random
+
+    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm, "max_user_budget", 0.00001)
+    try:
+
+        async def test():
+            await litellm.proxy.proxy_server.prisma_client.connect()
+            request = GenerateKeyRequest()  # create a key with no budget
+            key = await new_user(request)
+            print(key)
+
+            generated_key = key.key
+            bearer_token = "Bearer " + generated_key
+            user = f"ishaan {random.randint(0, 10000)}"
+            request = Request(scope={"type": "http"})
+            request._url = URL(url="/chat/completions")
+
+            async def return_body():
+                return_string = f'{{"model": "gemini-pro-vision", "user": "{user}"}}'
+                # return string as bytes
+                return return_string.encode()
+
+            request.body = return_body
+
+            # update spend using track_cost callback, make 2nd request, it should fail
+            from litellm.proxy.proxy_server import (
+                _PROXY_track_cost_callback as track_cost_callback,
+            )
+            from litellm import ModelResponse, Choices, Message, Usage
+
+            resp = ModelResponse(
+                id="chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac",
+                choices=[
+                    Choices(
+                        finish_reason=None,
+                        index=0,
+                        message=Message(
+                            content=" Sure! Here is a short poem about the sky:\n\nA canvas of blue, a",
+                            role="assistant",
+                        ),
+                    )
+                ],
+                model="gpt-35-turbo",  # azure always has model written like this
+                usage=Usage(prompt_tokens=210, completion_tokens=200, total_tokens=410),
+            )
+            await track_cost_callback(
+                kwargs={
+                    "stream": False,
+                    "litellm_params": {
+                        "metadata": {
+                            "user_api_key": generated_key,
+                            "user_api_key_user_id": user,
+                        },
+                        "proxy_server_request": {
+                            "user": user,
+                        },
+                    },
+                    "response_cost": 10,
+                },
+                completion_response=resp,
+                start_time=datetime.now(),
+                end_time=datetime.now(),
+            )
+            await asyncio.sleep(5)
+            # use generated key to auth in
+            result = await user_api_key_auth(request=request, api_key=bearer_token)
+            print("result from user auth with new key", result)
+            pytest.fail(f"This should have failed!. They key crossed it's budget")
+
+        asyncio.run(test())
+    except Exception as e:
+        error_detail = e.message
+        assert "Authentication Error, ExceededBudget:" in error_detail
+        print(vars(e))
+
+
 def test_call_with_proxy_over_budget(prisma_client):
     # 5.1 Make a call with a proxy over budget, expect to fail
     setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)

From cfd46738ed9e531217f1dac664b0bc4b26543281 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 6 Feb 2024 15:39:45 -0800
Subject: [PATCH 5/6] (docs) budget per end_user

---
 docs/my-website/docs/proxy/users.md | 56 +++++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)

diff --git a/docs/my-website/docs/proxy/users.md b/docs/my-website/docs/proxy/users.md
index c5f2ca358c..baca0188e8 100644
--- a/docs/my-website/docs/proxy/users.md
+++ b/docs/my-website/docs/proxy/users.md
@@ -13,6 +13,7 @@ Requirements:
 You can set budgets at 3 levels: 
 - For the proxy 
 - For a user 
+- For a 'user' passed to `/chat/completions`, `/embeddings` etc
 - For a key
 
 
@@ -117,6 +118,61 @@ curl --location 'http://0.0.0.0:8000/key/generate' \
 --data '{"models": ["azure-models"], "user_id": "krrish3@berri.ai"}'
 ```
 
+</TabItem>
+<TabItem value="per-user-chat" label="For 'user' passed to /chat/completions">
+
+Use this to budget `user` passed to `/chat/completions`, **without needing to create a key for every user**
+
+**Step 1. Modify config.yaml**
+Define `litellm.max_user_budget`
+```yaml
+general_settings:
+  master_key: sk-1234
+
+litellm_settings:
+  max_budget: 10      # global budget for proxy 
+  max_user_budget: 0.0001 # budget for 'user' passed to /chat/completions
+```
+
+2. Make a /chat/completions call, pass 'user' - First call Works 
+```shell
+curl --location 'http://0.0.0.0:4000/chat/completions' \
+        --header 'Content-Type: application/json' \
+        --header 'Authorization: Bearer sk-zi5onDRdHGD24v0Zdn7VBA' \
+        --data ' {
+        "model": "azure-gpt-3.5",
+        "user": "ishaan3",
+        "messages": [
+            {
+            "role": "user",
+            "content": "what time is it"
+            }
+        ]
+        }'
+```
+
+3. Make a /chat/completions call, pass 'user' - Call Fails, since 'ishaan3' over budget
+```shell
+curl --location 'http://0.0.0.0:4000/chat/completions' \
+        --header 'Content-Type: application/json' \
+        --header 'Authorization: Bearer sk-zi5onDRdHGD24v0Zdn7VBA' \
+        --data ' {
+        "model": "azure-gpt-3.5",
+        "user": "ishaan3",
+        "messages": [
+            {
+            "role": "user",
+            "content": "what time is it"
+            }
+        ]
+        }'
+```
+
+Error
+```shell
+{"error":{"message":"Authentication Error, ExceededBudget: User ishaan3 has exceeded their budget. Current spend: 0.0008869999999999999; Max Budget: 0.0001","type":"auth_error","param":"None","code":401}}%                
+```
+
 </TabItem>
 <TabItem value="per-key" label="For Key">
 

From 3017377740bb46c35f83a3048e98e13de382b8e8 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 6 Feb 2024 16:08:25 -0800
Subject: [PATCH 6/6] (ci/cd) run again

---
 litellm/tests/test_completion.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py
index 80a4372a57..6eac2ebf5f 100644
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@@ -1743,7 +1743,7 @@ def test_azure_cloudflare_api():
 
 def test_completion_anyscale_2():
     try:
-        # litellm.set_verbose=True
+        # litellm.set_verbose= True
         messages = [
             {"role": "system", "content": "You're a good bot"},
             {