diff --git a/litellm/proxy/_super_secret_config.yaml b/litellm/proxy/_super_secret_config.yaml index 42f9e3be5..0e9e43bc9 100644 --- a/litellm/proxy/_super_secret_config.yaml +++ b/litellm/proxy/_super_secret_config.yaml @@ -20,6 +20,9 @@ model_list: api_base: os.environ/AZURE_API_BASE input_cost_per_token: 0.0 output_cost_per_token: 0.0 +- model_name: gpt-3.5-turbo + litellm_params: + model: gpt-4o - model_name: bert-classifier litellm_params: model: huggingface/text-classification/shahrukhx01/question-vs-statement-classifier @@ -40,6 +43,6 @@ litellm_settings: general_settings: enable_jwt_auth: True disable_reset_budget: True - proxy_batch_write_at: 60 # 👈 Frequency of batch writing logs to server (in seconds) + proxy_batch_write_at: 10 # 👈 Frequency of batch writing logs to server (in seconds) routing_strategy: simple-shuffle # Literal["simple-shuffle", "least-busy", "usage-based-routing","latency-based-routing"], default="simple-shuffle" alerting: ["slack"] diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index aee75d54f..919219845 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -1499,68 +1499,6 @@ async def update_database( end_user_id, 0 ) ) - elif custom_db_client is not None: - for id in user_ids: - if id is None: - continue - if ( - custom_db_client is not None - and id != litellm_proxy_budget_name - ): - existing_spend_obj = await custom_db_client.get_data( - key=id, table_name="user" - ) - verbose_proxy_logger.debug( - f"Updating existing_spend_obj: {existing_spend_obj}" - ) - if existing_spend_obj is None: - # if user does not exist in LiteLLM_UserTable, create a new user - existing_spend = 0 - max_user_budget = None - if litellm.max_user_budget is not None: - max_user_budget = litellm.max_user_budget - existing_spend_obj = LiteLLM_UserTable( - user_id=id, - spend=0, - max_budget=max_user_budget, - user_email=None, - ) - else: - existing_spend = existing_spend_obj.spend - - # Calculate the new cost by adding the existing cost and response_cost - existing_spend_obj.spend = existing_spend + response_cost - - # track cost per model, for the given user - spend_per_model = existing_spend_obj.model_spend or {} - current_model = kwargs.get("model") - - if current_model is not None and spend_per_model is not None: - if spend_per_model.get(current_model) is None: - spend_per_model[current_model] = response_cost - else: - spend_per_model[current_model] += response_cost - existing_spend_obj.model_spend = spend_per_model - - valid_token = user_api_key_cache.get_cache(key=id) - if valid_token is not None and isinstance(valid_token, dict): - user_api_key_cache.set_cache( - key=id, value=existing_spend_obj.json() - ) - - verbose_proxy_logger.debug( - f"user - new cost: {existing_spend_obj.spend}, user_id: {id}" - ) - data_list.append(existing_spend_obj) - - if custom_db_client is not None and user_id is not None: - new_spend = data_list[0].spend - await custom_db_client.update_data( - key=user_id, - value={"spend": new_spend}, - table_name="user", - ) - except Exception as e: verbose_proxy_logger.info( "\033[91m" @@ -1580,31 +1518,6 @@ async def update_database( response_cost + prisma_client.key_list_transactons.get(hashed_token, 0) ) - elif custom_db_client is not None: - # Fetch the existing cost for the given token - existing_spend_obj = await custom_db_client.get_data( - key=token, table_name="key" - ) - verbose_proxy_logger.debug( - f"_update_key_db existing spend: {existing_spend_obj}" - ) - if existing_spend_obj is None: - existing_spend = 0 - else: - existing_spend = existing_spend_obj.spend - # Calculate the new cost by adding the existing cost and response_cost - new_spend = existing_spend + response_cost - - verbose_proxy_logger.debug("new cost: %s", new_spend) - # Update the cost column for the given token - await custom_db_client.update_data( - key=token, value={"spend": new_spend}, table_name="key" - ) - - valid_token = user_api_key_cache.get_cache(key=token) - if valid_token is not None: - valid_token.spend = new_spend - user_api_key_cache.set_cache(key=token, value=valid_token) except Exception as e: verbose_proxy_logger.info( f"Update Key DB Call failed to execute - {str(e)}\n{traceback.format_exc()}" @@ -1622,6 +1535,7 @@ async def update_database( response_obj=completion_response, start_time=start_time, end_time=end_time, + end_user_id=end_user_id, ) payload["spend"] = response_cost @@ -1652,31 +1566,6 @@ async def update_database( response_cost + prisma_client.team_list_transactons.get(team_id, 0) ) - elif custom_db_client is not None: - # Fetch the existing cost for the given token - existing_spend_obj = await custom_db_client.get_data( - key=token, table_name="key" - ) - verbose_proxy_logger.debug( - f"_update_key_db existing spend: {existing_spend_obj}" - ) - if existing_spend_obj is None: - existing_spend = 0 - else: - existing_spend = existing_spend_obj.spend - # Calculate the new cost by adding the existing cost and response_cost - new_spend = existing_spend + response_cost - - verbose_proxy_logger.debug("new cost: %s", new_spend) - # Update the cost column for the given token - await custom_db_client.update_data( - key=token, value={"spend": new_spend}, table_name="key" - ) - - valid_token = user_api_key_cache.get_cache(key=token) - if valid_token is not None: - valid_token.spend = new_spend - user_api_key_cache.set_cache(key=token, value=valid_token) except Exception as e: verbose_proxy_logger.info( f"Update Team DB failed to execute - {str(e)}\n{traceback.format_exc()}" @@ -7032,16 +6921,29 @@ async def new_end_user( return end_user_record -@router.post( +@router.get( "/end_user/info", tags=["End User Management"], dependencies=[Depends(user_api_key_auth)], ) -async def end_user_info(): - """ - [TODO] Needs to be implemented. - """ - pass +async def end_user_info( + end_user_id: str = fastapi.Query( + description="End User ID in the request parameters" + ), +): + global prisma_client + + if prisma_client is None: + raise HTTPException( + status_code=500, + detail={"error": CommonProxyErrors.db_not_connected_error.value}, + ) + + user_info = await prisma_client.db.litellm_endusertable.find_first( + where={"user_id": end_user_id} + ) + + return user_info @router.post( diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index a97864f00..33127e957 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -1787,7 +1787,9 @@ def hash_token(token: str): return hashed_token -def get_logging_payload(kwargs, response_obj, start_time, end_time): +def get_logging_payload( + kwargs, response_obj, start_time, end_time, end_user_id: Optional[str] +): from litellm.proxy._types import LiteLLM_SpendLogs from pydantic import Json import uuid @@ -1865,7 +1867,7 @@ def get_logging_payload(kwargs, response_obj, start_time, end_time): "prompt_tokens": usage.get("prompt_tokens", 0), "completion_tokens": usage.get("completion_tokens", 0), "request_tags": metadata.get("tags", []), - "end_user": kwargs.get("user", ""), + "end_user": end_user_id or "", "api_base": litellm_params.get("api_base", ""), } @@ -2028,6 +2030,11 @@ async def update_spend( raise e ### UPDATE END-USER TABLE ### + verbose_proxy_logger.debug( + "End-User Spend transactions: {}".format( + len(prisma_client.end_user_list_transactons.keys()) + ) + ) if len(prisma_client.end_user_list_transactons.keys()) > 0: for i in range(n_retry_times + 1): start_time = time.time() @@ -2043,13 +2050,18 @@ async def update_spend( max_end_user_budget = None if litellm.max_end_user_budget is not None: max_end_user_budget = litellm.max_end_user_budget - new_user_obj = LiteLLM_EndUserTable( - user_id=end_user_id, spend=response_cost, blocked=False - ) - batcher.litellm_endusertable.update_many( + batcher.litellm_endusertable.upsert( where={"user_id": end_user_id}, - data={"spend": {"increment": response_cost}}, + data={ + "create": { + "user_id": end_user_id, + "spend": response_cost, + "blocked": False, + }, + "update": {"spend": {"increment": response_cost}}, + }, ) + prisma_client.end_user_list_transactons = ( {} ) # Clear the remaining transactions after processing all batches in the loop.