From ef20536aa06013a4bec176a4210d3989e161760d Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Tue, 6 Feb 2024 15:13:59 -0800 Subject: [PATCH 1/6] (Feat) support max_user_budget --- litellm/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/litellm/__init__.py b/litellm/__init__.py index 26b761c64a..6a0cb95ae6 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -148,6 +148,7 @@ s3_callback_params: Optional[Dict] = None default_key_generate_params: Optional[Dict] = None upperbound_key_generate_params: Optional[Dict] = None default_team_settings: Optional[List] = None +max_user_budget: Optional[float] = None #### RELIABILITY #### request_timeout: Optional[float] = 6000 num_retries: Optional[int] = None # per model endpoint From 33aee6ba836ed29701951b5b3760419ea618d46d Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Tue, 6 Feb 2024 15:16:20 -0800 Subject: [PATCH 2/6] (feat) max_user_budget --- litellm/proxy/proxy_config.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index a8144e9d48..7d774d9105 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -68,6 +68,7 @@ litellm_settings: fallbacks: [{"openai-gpt-3.5": ["azure-gpt-3.5"]}] success_callback: ['langfuse'] max_budget: 10 # global budget for proxy + max_user_budget: 0.0001 budget_duration: 30d # global budget duration, will reset after 30d default_key_generate_params: max_budget: 1.5000 From 4de77018cc79e871f409b66fb5d624af4ce1d241 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Tue, 6 Feb 2024 15:19:36 -0800 Subject: [PATCH 3/6] (feat) support max_user_budget --- litellm/proxy/proxy_server.py | 31 +++++++++++++++++++++++++------ litellm/proxy/utils.py | 22 ++++++++++++++++++++++ 2 files changed, 47 insertions(+), 6 deletions(-) diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 427bb88a9c..e97ae734fe 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -91,6 +91,7 @@ from litellm.proxy.utils import ( reset_budget, hash_token, html_form, + _read_request_body, ) from litellm.proxy.secret_managers.google_kms import load_google_kms import pydantic @@ -370,8 +371,9 @@ async def user_api_key_auth( # Run checks for # 1. If token can call model # 2. If user_id for this token is in budget - # 3. If token is expired - # 4. If token spend is under Budget for the token + # 3. If 'user' passed to /chat/completions, /embeddings endpoint is in budget + # 4. If token is expired + # 5. If token spend is under Budget for the token # Check 1. If token can call model litellm.model_alias_map = valid_token.aliases @@ -430,11 +432,24 @@ async def user_api_key_auth( ) # Check 2. If user_id for this token is in budget - ## Check 2.5 If global proxy is in budget + ## Check 2.1 If global proxy is in budget + ## Check 2.2 [OPTIONAL - checked only if litellm.max_user_budget is not None] If 'user' passed in /chat/completions is in budget if valid_token.user_id is not None: + user_id_list = [ + valid_token.user_id, + litellm_proxy_budget_name, + ] + if ( + litellm.max_user_budget is not None + ): # Check if 'user' passed in /chat/completions is in budget, only checked if litellm.max_user_budget is set + request_data = await _read_request_body(request=request) + user_passed_to_chat_completions = request_data.get("user", None) + if user_passed_to_chat_completions is not None: + user_id_list.append(user_passed_to_chat_completions) + if prisma_client is not None: user_id_information = await prisma_client.get_data( - user_id_list=[valid_token.user_id, litellm_proxy_budget_name], + user_id_list=user_id_list, table_name="user", query_type="find_all", ) @@ -459,7 +474,7 @@ async def user_api_key_auth( user_current_spend = _user.get("spend", None) verbose_proxy_logger.debug( - f"user_max_budget: {user_max_budget}; user_current_spend: {user_current_spend}" + f"user_id: {_user.get('user_id', None)}; user_max_budget: {user_max_budget}; user_current_spend: {user_current_spend}" ) if ( @@ -852,9 +867,13 @@ async def update_database( f"Updating existing_spend_obj: {existing_spend_obj}" ) if existing_spend_obj is None: + # if user does not exist in LiteLLM_UserTable, create a new user existing_spend = 0 + max_user_budget = None + if litellm.max_user_budget is not None: + max_user_budget = litellm.max_user_budget existing_spend_obj = LiteLLM_UserTable( - user_id=id, spend=0, max_budget=None, user_email=None + user_id=id, spend=0, max_budget=max_user_budget, user_email=None ) else: existing_spend = existing_spend_obj.spend diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index 84b09d7265..b28f887eff 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -1213,6 +1213,28 @@ async def reset_budget(prisma_client: PrismaClient): ) +async def _read_request_body(request): + """ + Asynchronous function to read the request body and parse it as JSON or literal data. + + Parameters: + - request: The request object to read the body from + + Returns: + - dict: Parsed request data as a dictionary + """ + import ast, json + + request_data = {} + body = await request.body() + body_str = body.decode() + try: + request_data = ast.literal_eval(body_str) + except: + request_data = json.loads(body_str) + return request_data + + # LiteLLM Admin UI - Non SSO Login html_form = """ From 3dc85a526370b9146908a77d048151f6645de462 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Tue, 6 Feb 2024 15:25:51 -0800 Subject: [PATCH 4/6] (test) track_cost_ for end users --- litellm/tests/test_key_generate_prisma.py | 81 +++++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/litellm/tests/test_key_generate_prisma.py b/litellm/tests/test_key_generate_prisma.py index b4c86afb25..d4f405b7b0 100644 --- a/litellm/tests/test_key_generate_prisma.py +++ b/litellm/tests/test_key_generate_prisma.py @@ -322,6 +322,87 @@ def test_call_with_user_over_budget(prisma_client): print(vars(e)) +def test_call_with_end_user_over_budget(prisma_client): + # Test if a user passed to /chat/completions is tracked & fails whe they cross their budget + # we only check this when litellm.max_user_budget is set + import random + + setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client) + setattr(litellm.proxy.proxy_server, "master_key", "sk-1234") + setattr(litellm, "max_user_budget", 0.00001) + try: + + async def test(): + await litellm.proxy.proxy_server.prisma_client.connect() + request = GenerateKeyRequest() # create a key with no budget + key = await new_user(request) + print(key) + + generated_key = key.key + bearer_token = "Bearer " + generated_key + user = f"ishaan {random.randint(0, 10000)}" + request = Request(scope={"type": "http"}) + request._url = URL(url="/chat/completions") + + async def return_body(): + return_string = f'{{"model": "gemini-pro-vision", "user": "{user}"}}' + # return string as bytes + return return_string.encode() + + request.body = return_body + + # update spend using track_cost callback, make 2nd request, it should fail + from litellm.proxy.proxy_server import ( + _PROXY_track_cost_callback as track_cost_callback, + ) + from litellm import ModelResponse, Choices, Message, Usage + + resp = ModelResponse( + id="chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac", + choices=[ + Choices( + finish_reason=None, + index=0, + message=Message( + content=" Sure! Here is a short poem about the sky:\n\nA canvas of blue, a", + role="assistant", + ), + ) + ], + model="gpt-35-turbo", # azure always has model written like this + usage=Usage(prompt_tokens=210, completion_tokens=200, total_tokens=410), + ) + await track_cost_callback( + kwargs={ + "stream": False, + "litellm_params": { + "metadata": { + "user_api_key": generated_key, + "user_api_key_user_id": user, + }, + "proxy_server_request": { + "user": user, + }, + }, + "response_cost": 10, + }, + completion_response=resp, + start_time=datetime.now(), + end_time=datetime.now(), + ) + await asyncio.sleep(5) + # use generated key to auth in + result = await user_api_key_auth(request=request, api_key=bearer_token) + print("result from user auth with new key", result) + pytest.fail(f"This should have failed!. They key crossed it's budget") + + asyncio.run(test()) + except Exception as e: + error_detail = e.message + assert "Authentication Error, ExceededBudget:" in error_detail + print(vars(e)) + + def test_call_with_proxy_over_budget(prisma_client): # 5.1 Make a call with a proxy over budget, expect to fail setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client) From cfd46738ed9e531217f1dac664b0bc4b26543281 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Tue, 6 Feb 2024 15:39:45 -0800 Subject: [PATCH 5/6] (docs) budget per end_user --- docs/my-website/docs/proxy/users.md | 56 +++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/docs/my-website/docs/proxy/users.md b/docs/my-website/docs/proxy/users.md index c5f2ca358c..baca0188e8 100644 --- a/docs/my-website/docs/proxy/users.md +++ b/docs/my-website/docs/proxy/users.md @@ -13,6 +13,7 @@ Requirements: You can set budgets at 3 levels: - For the proxy - For a user +- For a 'user' passed to `/chat/completions`, `/embeddings` etc - For a key @@ -117,6 +118,61 @@ curl --location 'http://0.0.0.0:8000/key/generate' \ --data '{"models": ["azure-models"], "user_id": "krrish3@berri.ai"}' ``` + + + +Use this to budget `user` passed to `/chat/completions`, **without needing to create a key for every user** + +**Step 1. Modify config.yaml** +Define `litellm.max_user_budget` +```yaml +general_settings: + master_key: sk-1234 + +litellm_settings: + max_budget: 10 # global budget for proxy + max_user_budget: 0.0001 # budget for 'user' passed to /chat/completions +``` + +2. Make a /chat/completions call, pass 'user' - First call Works +```shell +curl --location 'http://0.0.0.0:4000/chat/completions' \ + --header 'Content-Type: application/json' \ + --header 'Authorization: Bearer sk-zi5onDRdHGD24v0Zdn7VBA' \ + --data ' { + "model": "azure-gpt-3.5", + "user": "ishaan3", + "messages": [ + { + "role": "user", + "content": "what time is it" + } + ] + }' +``` + +3. Make a /chat/completions call, pass 'user' - Call Fails, since 'ishaan3' over budget +```shell +curl --location 'http://0.0.0.0:4000/chat/completions' \ + --header 'Content-Type: application/json' \ + --header 'Authorization: Bearer sk-zi5onDRdHGD24v0Zdn7VBA' \ + --data ' { + "model": "azure-gpt-3.5", + "user": "ishaan3", + "messages": [ + { + "role": "user", + "content": "what time is it" + } + ] + }' +``` + +Error +```shell +{"error":{"message":"Authentication Error, ExceededBudget: User ishaan3 has exceeded their budget. Current spend: 0.0008869999999999999; Max Budget: 0.0001","type":"auth_error","param":"None","code":401}}% +``` + From 3017377740bb46c35f83a3048e98e13de382b8e8 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Tue, 6 Feb 2024 16:08:25 -0800 Subject: [PATCH 6/6] (ci/cd) run again --- litellm/tests/test_completion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index 80a4372a57..6eac2ebf5f 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -1743,7 +1743,7 @@ def test_azure_cloudflare_api(): def test_completion_anyscale_2(): try: - # litellm.set_verbose=True + # litellm.set_verbose= True messages = [ {"role": "system", "content": "You're a good bot"}, {