mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
feat(proxy_server.py): new /end_user/info
endpoint
get spend for a specific end-user
This commit is contained in:
parent
f43da3597d
commit
5dcf3d672c
3 changed files with 43 additions and 126 deletions
|
@ -20,6 +20,9 @@ model_list:
|
||||||
api_base: os.environ/AZURE_API_BASE
|
api_base: os.environ/AZURE_API_BASE
|
||||||
input_cost_per_token: 0.0
|
input_cost_per_token: 0.0
|
||||||
output_cost_per_token: 0.0
|
output_cost_per_token: 0.0
|
||||||
|
- model_name: gpt-3.5-turbo
|
||||||
|
litellm_params:
|
||||||
|
model: gpt-4o
|
||||||
- model_name: bert-classifier
|
- model_name: bert-classifier
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: huggingface/text-classification/shahrukhx01/question-vs-statement-classifier
|
model: huggingface/text-classification/shahrukhx01/question-vs-statement-classifier
|
||||||
|
@ -40,6 +43,6 @@ litellm_settings:
|
||||||
general_settings:
|
general_settings:
|
||||||
enable_jwt_auth: True
|
enable_jwt_auth: True
|
||||||
disable_reset_budget: True
|
disable_reset_budget: True
|
||||||
proxy_batch_write_at: 60 # 👈 Frequency of batch writing logs to server (in seconds)
|
proxy_batch_write_at: 10 # 👈 Frequency of batch writing logs to server (in seconds)
|
||||||
routing_strategy: simple-shuffle # Literal["simple-shuffle", "least-busy", "usage-based-routing","latency-based-routing"], default="simple-shuffle"
|
routing_strategy: simple-shuffle # Literal["simple-shuffle", "least-busy", "usage-based-routing","latency-based-routing"], default="simple-shuffle"
|
||||||
alerting: ["slack"]
|
alerting: ["slack"]
|
||||||
|
|
|
@ -1499,68 +1499,6 @@ async def update_database(
|
||||||
end_user_id, 0
|
end_user_id, 0
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
elif custom_db_client is not None:
|
|
||||||
for id in user_ids:
|
|
||||||
if id is None:
|
|
||||||
continue
|
|
||||||
if (
|
|
||||||
custom_db_client is not None
|
|
||||||
and id != litellm_proxy_budget_name
|
|
||||||
):
|
|
||||||
existing_spend_obj = await custom_db_client.get_data(
|
|
||||||
key=id, table_name="user"
|
|
||||||
)
|
|
||||||
verbose_proxy_logger.debug(
|
|
||||||
f"Updating existing_spend_obj: {existing_spend_obj}"
|
|
||||||
)
|
|
||||||
if existing_spend_obj is None:
|
|
||||||
# if user does not exist in LiteLLM_UserTable, create a new user
|
|
||||||
existing_spend = 0
|
|
||||||
max_user_budget = None
|
|
||||||
if litellm.max_user_budget is not None:
|
|
||||||
max_user_budget = litellm.max_user_budget
|
|
||||||
existing_spend_obj = LiteLLM_UserTable(
|
|
||||||
user_id=id,
|
|
||||||
spend=0,
|
|
||||||
max_budget=max_user_budget,
|
|
||||||
user_email=None,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
existing_spend = existing_spend_obj.spend
|
|
||||||
|
|
||||||
# Calculate the new cost by adding the existing cost and response_cost
|
|
||||||
existing_spend_obj.spend = existing_spend + response_cost
|
|
||||||
|
|
||||||
# track cost per model, for the given user
|
|
||||||
spend_per_model = existing_spend_obj.model_spend or {}
|
|
||||||
current_model = kwargs.get("model")
|
|
||||||
|
|
||||||
if current_model is not None and spend_per_model is not None:
|
|
||||||
if spend_per_model.get(current_model) is None:
|
|
||||||
spend_per_model[current_model] = response_cost
|
|
||||||
else:
|
|
||||||
spend_per_model[current_model] += response_cost
|
|
||||||
existing_spend_obj.model_spend = spend_per_model
|
|
||||||
|
|
||||||
valid_token = user_api_key_cache.get_cache(key=id)
|
|
||||||
if valid_token is not None and isinstance(valid_token, dict):
|
|
||||||
user_api_key_cache.set_cache(
|
|
||||||
key=id, value=existing_spend_obj.json()
|
|
||||||
)
|
|
||||||
|
|
||||||
verbose_proxy_logger.debug(
|
|
||||||
f"user - new cost: {existing_spend_obj.spend}, user_id: {id}"
|
|
||||||
)
|
|
||||||
data_list.append(existing_spend_obj)
|
|
||||||
|
|
||||||
if custom_db_client is not None and user_id is not None:
|
|
||||||
new_spend = data_list[0].spend
|
|
||||||
await custom_db_client.update_data(
|
|
||||||
key=user_id,
|
|
||||||
value={"spend": new_spend},
|
|
||||||
table_name="user",
|
|
||||||
)
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_proxy_logger.info(
|
verbose_proxy_logger.info(
|
||||||
"\033[91m"
|
"\033[91m"
|
||||||
|
@ -1580,31 +1518,6 @@ async def update_database(
|
||||||
response_cost
|
response_cost
|
||||||
+ prisma_client.key_list_transactons.get(hashed_token, 0)
|
+ prisma_client.key_list_transactons.get(hashed_token, 0)
|
||||||
)
|
)
|
||||||
elif custom_db_client is not None:
|
|
||||||
# Fetch the existing cost for the given token
|
|
||||||
existing_spend_obj = await custom_db_client.get_data(
|
|
||||||
key=token, table_name="key"
|
|
||||||
)
|
|
||||||
verbose_proxy_logger.debug(
|
|
||||||
f"_update_key_db existing spend: {existing_spend_obj}"
|
|
||||||
)
|
|
||||||
if existing_spend_obj is None:
|
|
||||||
existing_spend = 0
|
|
||||||
else:
|
|
||||||
existing_spend = existing_spend_obj.spend
|
|
||||||
# Calculate the new cost by adding the existing cost and response_cost
|
|
||||||
new_spend = existing_spend + response_cost
|
|
||||||
|
|
||||||
verbose_proxy_logger.debug("new cost: %s", new_spend)
|
|
||||||
# Update the cost column for the given token
|
|
||||||
await custom_db_client.update_data(
|
|
||||||
key=token, value={"spend": new_spend}, table_name="key"
|
|
||||||
)
|
|
||||||
|
|
||||||
valid_token = user_api_key_cache.get_cache(key=token)
|
|
||||||
if valid_token is not None:
|
|
||||||
valid_token.spend = new_spend
|
|
||||||
user_api_key_cache.set_cache(key=token, value=valid_token)
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_proxy_logger.info(
|
verbose_proxy_logger.info(
|
||||||
f"Update Key DB Call failed to execute - {str(e)}\n{traceback.format_exc()}"
|
f"Update Key DB Call failed to execute - {str(e)}\n{traceback.format_exc()}"
|
||||||
|
@ -1622,6 +1535,7 @@ async def update_database(
|
||||||
response_obj=completion_response,
|
response_obj=completion_response,
|
||||||
start_time=start_time,
|
start_time=start_time,
|
||||||
end_time=end_time,
|
end_time=end_time,
|
||||||
|
end_user_id=end_user_id,
|
||||||
)
|
)
|
||||||
|
|
||||||
payload["spend"] = response_cost
|
payload["spend"] = response_cost
|
||||||
|
@ -1652,31 +1566,6 @@ async def update_database(
|
||||||
response_cost
|
response_cost
|
||||||
+ prisma_client.team_list_transactons.get(team_id, 0)
|
+ prisma_client.team_list_transactons.get(team_id, 0)
|
||||||
)
|
)
|
||||||
elif custom_db_client is not None:
|
|
||||||
# Fetch the existing cost for the given token
|
|
||||||
existing_spend_obj = await custom_db_client.get_data(
|
|
||||||
key=token, table_name="key"
|
|
||||||
)
|
|
||||||
verbose_proxy_logger.debug(
|
|
||||||
f"_update_key_db existing spend: {existing_spend_obj}"
|
|
||||||
)
|
|
||||||
if existing_spend_obj is None:
|
|
||||||
existing_spend = 0
|
|
||||||
else:
|
|
||||||
existing_spend = existing_spend_obj.spend
|
|
||||||
# Calculate the new cost by adding the existing cost and response_cost
|
|
||||||
new_spend = existing_spend + response_cost
|
|
||||||
|
|
||||||
verbose_proxy_logger.debug("new cost: %s", new_spend)
|
|
||||||
# Update the cost column for the given token
|
|
||||||
await custom_db_client.update_data(
|
|
||||||
key=token, value={"spend": new_spend}, table_name="key"
|
|
||||||
)
|
|
||||||
|
|
||||||
valid_token = user_api_key_cache.get_cache(key=token)
|
|
||||||
if valid_token is not None:
|
|
||||||
valid_token.spend = new_spend
|
|
||||||
user_api_key_cache.set_cache(key=token, value=valid_token)
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_proxy_logger.info(
|
verbose_proxy_logger.info(
|
||||||
f"Update Team DB failed to execute - {str(e)}\n{traceback.format_exc()}"
|
f"Update Team DB failed to execute - {str(e)}\n{traceback.format_exc()}"
|
||||||
|
@ -7032,16 +6921,29 @@ async def new_end_user(
|
||||||
return end_user_record
|
return end_user_record
|
||||||
|
|
||||||
|
|
||||||
@router.post(
|
@router.get(
|
||||||
"/end_user/info",
|
"/end_user/info",
|
||||||
tags=["End User Management"],
|
tags=["End User Management"],
|
||||||
dependencies=[Depends(user_api_key_auth)],
|
dependencies=[Depends(user_api_key_auth)],
|
||||||
)
|
)
|
||||||
async def end_user_info():
|
async def end_user_info(
|
||||||
"""
|
end_user_id: str = fastapi.Query(
|
||||||
[TODO] Needs to be implemented.
|
description="End User ID in the request parameters"
|
||||||
"""
|
),
|
||||||
pass
|
):
|
||||||
|
global prisma_client
|
||||||
|
|
||||||
|
if prisma_client is None:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=500,
|
||||||
|
detail={"error": CommonProxyErrors.db_not_connected_error.value},
|
||||||
|
)
|
||||||
|
|
||||||
|
user_info = await prisma_client.db.litellm_endusertable.find_first(
|
||||||
|
where={"user_id": end_user_id}
|
||||||
|
)
|
||||||
|
|
||||||
|
return user_info
|
||||||
|
|
||||||
|
|
||||||
@router.post(
|
@router.post(
|
||||||
|
|
|
@ -1787,7 +1787,9 @@ def hash_token(token: str):
|
||||||
return hashed_token
|
return hashed_token
|
||||||
|
|
||||||
|
|
||||||
def get_logging_payload(kwargs, response_obj, start_time, end_time):
|
def get_logging_payload(
|
||||||
|
kwargs, response_obj, start_time, end_time, end_user_id: Optional[str]
|
||||||
|
):
|
||||||
from litellm.proxy._types import LiteLLM_SpendLogs
|
from litellm.proxy._types import LiteLLM_SpendLogs
|
||||||
from pydantic import Json
|
from pydantic import Json
|
||||||
import uuid
|
import uuid
|
||||||
|
@ -1865,7 +1867,7 @@ def get_logging_payload(kwargs, response_obj, start_time, end_time):
|
||||||
"prompt_tokens": usage.get("prompt_tokens", 0),
|
"prompt_tokens": usage.get("prompt_tokens", 0),
|
||||||
"completion_tokens": usage.get("completion_tokens", 0),
|
"completion_tokens": usage.get("completion_tokens", 0),
|
||||||
"request_tags": metadata.get("tags", []),
|
"request_tags": metadata.get("tags", []),
|
||||||
"end_user": kwargs.get("user", ""),
|
"end_user": end_user_id or "",
|
||||||
"api_base": litellm_params.get("api_base", ""),
|
"api_base": litellm_params.get("api_base", ""),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2028,6 +2030,11 @@ async def update_spend(
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
### UPDATE END-USER TABLE ###
|
### UPDATE END-USER TABLE ###
|
||||||
|
verbose_proxy_logger.debug(
|
||||||
|
"End-User Spend transactions: {}".format(
|
||||||
|
len(prisma_client.end_user_list_transactons.keys())
|
||||||
|
)
|
||||||
|
)
|
||||||
if len(prisma_client.end_user_list_transactons.keys()) > 0:
|
if len(prisma_client.end_user_list_transactons.keys()) > 0:
|
||||||
for i in range(n_retry_times + 1):
|
for i in range(n_retry_times + 1):
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
@ -2043,13 +2050,18 @@ async def update_spend(
|
||||||
max_end_user_budget = None
|
max_end_user_budget = None
|
||||||
if litellm.max_end_user_budget is not None:
|
if litellm.max_end_user_budget is not None:
|
||||||
max_end_user_budget = litellm.max_end_user_budget
|
max_end_user_budget = litellm.max_end_user_budget
|
||||||
new_user_obj = LiteLLM_EndUserTable(
|
batcher.litellm_endusertable.upsert(
|
||||||
user_id=end_user_id, spend=response_cost, blocked=False
|
|
||||||
)
|
|
||||||
batcher.litellm_endusertable.update_many(
|
|
||||||
where={"user_id": end_user_id},
|
where={"user_id": end_user_id},
|
||||||
data={"spend": {"increment": response_cost}},
|
data={
|
||||||
|
"create": {
|
||||||
|
"user_id": end_user_id,
|
||||||
|
"spend": response_cost,
|
||||||
|
"blocked": False,
|
||||||
|
},
|
||||||
|
"update": {"spend": {"increment": response_cost}},
|
||||||
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
prisma_client.end_user_list_transactons = (
|
prisma_client.end_user_list_transactons = (
|
||||||
{}
|
{}
|
||||||
) # Clear the remaining transactions after processing all batches in the loop.
|
) # Clear the remaining transactions after processing all batches in the loop.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue