litellm/litellm/proxy/spend_reporting_endpoints/spend_management_endpoints.py
2024-06-15 13:35:21 -07:00

1829 lines
56 KiB
Python

#### SPEND MANAGEMENT #####
from typing import Optional, List
import litellm
from litellm._logging import verbose_proxy_logger
from datetime import datetime, timedelta, timezone
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
import fastapi
from fastapi import Depends, Request, APIRouter, Header, status
from fastapi import HTTPException
from litellm.proxy._types import *
router = APIRouter()
@router.get(
"/spend/keys",
tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)],
include_in_schema=False,
)
async def spend_key_fn():
"""
View all keys created, ordered by spend
Example Request:
```
curl -X GET "http://0.0.0.0:8000/spend/keys" \
-H "Authorization: Bearer sk-1234"
```
"""
from litellm.proxy.proxy_server import prisma_client
try:
if prisma_client is None:
raise Exception(
f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys"
)
key_info = await prisma_client.get_data(table_name="key", query_type="find_all")
return key_info
except Exception as e:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={"error": str(e)},
)
@router.get(
"/spend/users",
tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)],
include_in_schema=False,
)
async def spend_user_fn(
user_id: Optional[str] = fastapi.Query(
default=None,
description="Get User Table row for user_id",
),
):
"""
View all users created, ordered by spend
Example Request:
```
curl -X GET "http://0.0.0.0:8000/spend/users" \
-H "Authorization: Bearer sk-1234"
```
View User Table row for user_id
```
curl -X GET "http://0.0.0.0:8000/spend/users?user_id=1234" \
-H "Authorization: Bearer sk-1234"
```
"""
from litellm.proxy.proxy_server import prisma_client
try:
if prisma_client is None:
raise Exception(
f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys"
)
if user_id is not None:
user_info = await prisma_client.get_data(
table_name="user", query_type="find_unique", user_id=user_id
)
return [user_info]
else:
user_info = await prisma_client.get_data(
table_name="user", query_type="find_all"
)
return user_info
except Exception as e:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={"error": str(e)},
)
@router.get(
"/spend/tags",
tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)],
responses={
200: {"model": List[LiteLLM_SpendLogs]},
},
)
async def view_spend_tags(
start_date: Optional[str] = fastapi.Query(
default=None,
description="Time from which to start viewing key spend",
),
end_date: Optional[str] = fastapi.Query(
default=None,
description="Time till which to view key spend",
),
):
"""
LiteLLM Enterprise - View Spend Per Request Tag
Example Request:
```
curl -X GET "http://0.0.0.0:8000/spend/tags" \
-H "Authorization: Bearer sk-1234"
```
Spend with Start Date and End Date
```
curl -X GET "http://0.0.0.0:8000/spend/tags?start_date=2022-01-01&end_date=2022-02-01" \
-H "Authorization: Bearer sk-1234"
```
"""
from enterprise.utils import get_spend_by_tags
from litellm.proxy.proxy_server import prisma_client
try:
if prisma_client is None:
raise Exception(
f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys"
)
# run the following SQL query on prisma
"""
SELECT
jsonb_array_elements_text(request_tags) AS individual_request_tag,
COUNT(*) AS log_count,
SUM(spend) AS total_spend
FROM "LiteLLM_SpendLogs"
GROUP BY individual_request_tag;
"""
response = await get_spend_by_tags(
start_date=start_date, end_date=end_date, prisma_client=prisma_client
)
return response
except Exception as e:
if isinstance(e, HTTPException):
raise ProxyException(
message=getattr(e, "detail", f"/spend/tags Error({str(e)})"),
type="internal_error",
param=getattr(e, "param", "None"),
code=getattr(e, "status_code", status.HTTP_500_INTERNAL_SERVER_ERROR),
)
elif isinstance(e, ProxyException):
raise e
raise ProxyException(
message="/spend/tags Error" + str(e),
type="internal_error",
param=getattr(e, "param", "None"),
code=status.HTTP_500_INTERNAL_SERVER_ERROR,
)
@router.get(
"/global/activity",
tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)],
responses={
200: {"model": List[LiteLLM_SpendLogs]},
},
include_in_schema=False,
)
async def get_global_activity(
start_date: Optional[str] = fastapi.Query(
default=None,
description="Time from which to start viewing spend",
),
end_date: Optional[str] = fastapi.Query(
default=None,
description="Time till which to view spend",
),
):
"""
Get number of API Requests, total tokens through proxy
{
"daily_data": [
const chartdata = [
{
date: 'Jan 22',
api_requests: 10,
total_tokens: 2000
},
{
date: 'Jan 23',
api_requests: 10,
total_tokens: 12
},
],
"sum_api_requests": 20,
"sum_total_tokens": 2012
}
"""
from collections import defaultdict
if start_date is None or end_date is None:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={"error": "Please provide start_date and end_date"},
)
start_date_obj = datetime.strptime(start_date, "%Y-%m-%d")
end_date_obj = datetime.strptime(end_date, "%Y-%m-%d")
from litellm.proxy.proxy_server import prisma_client, llm_router
try:
if prisma_client is None:
raise Exception(
f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys"
)
sql_query = """
SELECT
date_trunc('day', "startTime") AS date,
COUNT(*) AS api_requests,
SUM(total_tokens) AS total_tokens
FROM "LiteLLM_SpendLogs"
WHERE "startTime" BETWEEN $1::date AND $2::date + interval '1 day'
GROUP BY date_trunc('day', "startTime")
"""
db_response = await prisma_client.db.query_raw(
sql_query, start_date_obj, end_date_obj
)
if db_response is None:
return []
sum_api_requests = 0
sum_total_tokens = 0
daily_data = []
for row in db_response:
# cast date to datetime
_date_obj = datetime.fromisoformat(row["date"])
row["date"] = _date_obj.strftime("%b %d")
daily_data.append(row)
sum_api_requests += row.get("api_requests", 0)
sum_total_tokens += row.get("total_tokens", 0)
# sort daily_data by date
daily_data = sorted(daily_data, key=lambda x: x["date"])
data_to_return = {
"daily_data": daily_data,
"sum_api_requests": sum_api_requests,
"sum_total_tokens": sum_total_tokens,
}
return data_to_return
except Exception as e:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={"error": str(e)},
)
@router.get(
"/global/activity/model",
tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)],
responses={
200: {"model": List[LiteLLM_SpendLogs]},
},
include_in_schema=False,
)
async def get_global_activity_model(
start_date: Optional[str] = fastapi.Query(
default=None,
description="Time from which to start viewing spend",
),
end_date: Optional[str] = fastapi.Query(
default=None,
description="Time till which to view spend",
),
):
"""
Get number of API Requests, total tokens through proxy - Grouped by MODEL
[
{
"model": "gpt-4",
"daily_data": [
const chartdata = [
{
date: 'Jan 22',
api_requests: 10,
total_tokens: 2000
},
{
date: 'Jan 23',
api_requests: 10,
total_tokens: 12
},
],
"sum_api_requests": 20,
"sum_total_tokens": 2012
},
{
"model": "azure/gpt-4-turbo",
"daily_data": [
const chartdata = [
{
date: 'Jan 22',
api_requests: 10,
total_tokens: 2000
},
{
date: 'Jan 23',
api_requests: 10,
total_tokens: 12
},
],
"sum_api_requests": 20,
"sum_total_tokens": 2012
},
]
"""
from collections import defaultdict
if start_date is None or end_date is None:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={"error": "Please provide start_date and end_date"},
)
start_date_obj = datetime.strptime(start_date, "%Y-%m-%d")
end_date_obj = datetime.strptime(end_date, "%Y-%m-%d")
from litellm.proxy.proxy_server import prisma_client, llm_router, premium_user
try:
if prisma_client is None:
raise Exception(
f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys"
)
sql_query = """
SELECT
model_group,
date_trunc('day', "startTime") AS date,
COUNT(*) AS api_requests,
SUM(total_tokens) AS total_tokens
FROM "LiteLLM_SpendLogs"
WHERE "startTime" BETWEEN $1::date AND $2::date + interval '1 day'
GROUP BY model_group, date_trunc('day', "startTime")
"""
db_response = await prisma_client.db.query_raw(
sql_query, start_date_obj, end_date_obj
)
if db_response is None:
return []
model_ui_data: dict = (
{}
) # {"gpt-4": {"daily_data": [], "sum_api_requests": 0, "sum_total_tokens": 0}}
for row in db_response:
_model = row["model_group"]
if _model not in model_ui_data:
model_ui_data[_model] = {
"daily_data": [],
"sum_api_requests": 0,
"sum_total_tokens": 0,
}
_date_obj = datetime.fromisoformat(row["date"])
row["date"] = _date_obj.strftime("%b %d")
model_ui_data[_model]["daily_data"].append(row)
model_ui_data[_model]["sum_api_requests"] += row.get("api_requests", 0)
model_ui_data[_model]["sum_total_tokens"] += row.get("total_tokens", 0)
# sort mode ui data by sum_api_requests -> get top 10 models
model_ui_data = dict(
sorted(
model_ui_data.items(),
key=lambda x: x[1]["sum_api_requests"],
reverse=True,
)[:10]
)
response = []
for model, data in model_ui_data.items():
_sort_daily_data = sorted(data["daily_data"], key=lambda x: x["date"])
response.append(
{
"model": model,
"daily_data": _sort_daily_data,
"sum_api_requests": data["sum_api_requests"],
"sum_total_tokens": data["sum_total_tokens"],
}
)
return response
except Exception as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail={"error": str(e)},
)
@router.get(
"/global/activity/exceptions/deployment",
tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)],
responses={
200: {"model": List[LiteLLM_SpendLogs]},
},
include_in_schema=False,
)
async def get_global_activity_exceptions_per_deployment(
model_group: str = fastapi.Query(
description="Filter by model group",
),
start_date: Optional[str] = fastapi.Query(
default=None,
description="Time from which to start viewing spend",
),
end_date: Optional[str] = fastapi.Query(
default=None,
description="Time till which to view spend",
),
):
"""
Get number of 429 errors - Grouped by deployment
[
{
"deployment": "https://azure-us-east-1.openai.azure.com/",
"daily_data": [
const chartdata = [
{
date: 'Jan 22',
num_rate_limit_exceptions: 10
},
{
date: 'Jan 23',
num_rate_limit_exceptions: 12
},
],
"sum_num_rate_limit_exceptions": 20,
},
{
"deployment": "https://azure-us-east-1.openai.azure.com/",
"daily_data": [
const chartdata = [
{
date: 'Jan 22',
num_rate_limit_exceptions: 10,
},
{
date: 'Jan 23',
num_rate_limit_exceptions: 12
},
],
"sum_num_rate_limit_exceptions": 20,
},
]
"""
from collections import defaultdict
if start_date is None or end_date is None:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={"error": "Please provide start_date and end_date"},
)
start_date_obj = datetime.strptime(start_date, "%Y-%m-%d")
end_date_obj = datetime.strptime(end_date, "%Y-%m-%d")
from litellm.proxy.proxy_server import prisma_client, llm_router, premium_user
try:
if prisma_client is None:
raise Exception(
f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys"
)
sql_query = """
SELECT
api_base,
date_trunc('day', "startTime")::date AS date,
COUNT(*) AS num_rate_limit_exceptions
FROM
"LiteLLM_ErrorLogs"
WHERE
"startTime" >= $1::date
AND "startTime" < ($2::date + INTERVAL '1 day')
AND model_group = $3
AND status_code = '429'
GROUP BY
api_base,
date_trunc('day', "startTime")
ORDER BY
date;
"""
db_response = await prisma_client.db.query_raw(
sql_query, start_date_obj, end_date_obj, model_group
)
if db_response is None:
return []
model_ui_data: dict = (
{}
) # {"gpt-4": {"daily_data": [], "sum_api_requests": 0, "sum_total_tokens": 0}}
for row in db_response:
_model = row["api_base"]
if _model not in model_ui_data:
model_ui_data[_model] = {
"daily_data": [],
"sum_num_rate_limit_exceptions": 0,
}
_date_obj = datetime.fromisoformat(row["date"])
row["date"] = _date_obj.strftime("%b %d")
model_ui_data[_model]["daily_data"].append(row)
model_ui_data[_model]["sum_num_rate_limit_exceptions"] += row.get(
"num_rate_limit_exceptions", 0
)
# sort mode ui data by sum_api_requests -> get top 10 models
model_ui_data = dict(
sorted(
model_ui_data.items(),
key=lambda x: x[1]["sum_num_rate_limit_exceptions"],
reverse=True,
)[:10]
)
response = []
for model, data in model_ui_data.items():
_sort_daily_data = sorted(data["daily_data"], key=lambda x: x["date"])
response.append(
{
"api_base": model,
"daily_data": _sort_daily_data,
"sum_num_rate_limit_exceptions": data[
"sum_num_rate_limit_exceptions"
],
}
)
return response
except Exception as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail={"error": str(e)},
)
@router.get(
"/global/activity/exceptions",
tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)],
responses={
200: {"model": List[LiteLLM_SpendLogs]},
},
include_in_schema=False,
)
async def get_global_activity_exceptions(
model_group: str = fastapi.Query(
description="Filter by model group",
),
start_date: Optional[str] = fastapi.Query(
default=None,
description="Time from which to start viewing spend",
),
end_date: Optional[str] = fastapi.Query(
default=None,
description="Time till which to view spend",
),
):
"""
Get number of API Requests, total tokens through proxy
{
"daily_data": [
const chartdata = [
{
date: 'Jan 22',
num_rate_limit_exceptions: 10,
},
{
date: 'Jan 23',
num_rate_limit_exceptions: 10,
},
],
"sum_api_exceptions": 20,
}
"""
from collections import defaultdict
if start_date is None or end_date is None:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={"error": "Please provide start_date and end_date"},
)
start_date_obj = datetime.strptime(start_date, "%Y-%m-%d")
end_date_obj = datetime.strptime(end_date, "%Y-%m-%d")
from litellm.proxy.proxy_server import prisma_client, llm_router
try:
if prisma_client is None:
raise Exception(
f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys"
)
sql_query = """
SELECT
date_trunc('day', "startTime")::date AS date,
COUNT(*) AS num_rate_limit_exceptions
FROM
"LiteLLM_ErrorLogs"
WHERE
"startTime" >= $1::date
AND "startTime" < ($2::date + INTERVAL '1 day')
AND model_group = $3
AND status_code = '429'
GROUP BY
date_trunc('day', "startTime")
ORDER BY
date;
"""
db_response = await prisma_client.db.query_raw(
sql_query, start_date_obj, end_date_obj, model_group
)
if db_response is None:
return []
sum_num_rate_limit_exceptions = 0
daily_data = []
for row in db_response:
# cast date to datetime
_date_obj = datetime.fromisoformat(row["date"])
row["date"] = _date_obj.strftime("%b %d")
daily_data.append(row)
sum_num_rate_limit_exceptions += row.get("num_rate_limit_exceptions", 0)
# sort daily_data by date
daily_data = sorted(daily_data, key=lambda x: x["date"])
data_to_return = {
"daily_data": daily_data,
"sum_num_rate_limit_exceptions": sum_num_rate_limit_exceptions,
}
return data_to_return
except Exception as e:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={"error": str(e)},
)
@router.get(
"/global/spend/provider",
tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)],
include_in_schema=False,
responses={
200: {"model": List[LiteLLM_SpendLogs]},
},
)
async def get_global_spend_provider(
start_date: Optional[str] = fastapi.Query(
default=None,
description="Time from which to start viewing spend",
),
end_date: Optional[str] = fastapi.Query(
default=None,
description="Time till which to view spend",
),
):
"""
Get breakdown of spend per provider
[
{
"provider": "Azure OpenAI",
"spend": 20
},
{
"provider": "OpenAI",
"spend": 10
},
{
"provider": "VertexAI",
"spend": 30
}
]
"""
from collections import defaultdict
if start_date is None or end_date is None:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={"error": "Please provide start_date and end_date"},
)
start_date_obj = datetime.strptime(start_date, "%Y-%m-%d")
end_date_obj = datetime.strptime(end_date, "%Y-%m-%d")
from litellm.proxy.proxy_server import prisma_client, llm_router
try:
if prisma_client is None:
raise Exception(
f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys"
)
sql_query = """
SELECT
model_id,
SUM(spend) AS spend
FROM "LiteLLM_SpendLogs"
WHERE "startTime" BETWEEN $1::date AND $2::date AND length(model_id) > 0
GROUP BY model_id
"""
db_response = await prisma_client.db.query_raw(
sql_query, start_date_obj, end_date_obj
)
if db_response is None:
return []
###################################
# Convert model_id -> to Provider #
###################################
# we use the in memory router for this
ui_response = []
provider_spend_mapping: defaultdict = defaultdict(int)
for row in db_response:
_model_id = row["model_id"]
_provider = "Unknown"
if llm_router is not None:
_deployment = llm_router.get_deployment(model_id=_model_id)
if _deployment is not None:
try:
_, _provider, _, _ = litellm.get_llm_provider(
model=_deployment.litellm_params.model,
custom_llm_provider=_deployment.litellm_params.custom_llm_provider,
api_base=_deployment.litellm_params.api_base,
litellm_params=_deployment.litellm_params,
)
provider_spend_mapping[_provider] += row["spend"]
except:
pass
for provider, spend in provider_spend_mapping.items():
ui_response.append({"provider": provider, "spend": spend})
return ui_response
except Exception as e:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={"error": str(e)},
)
@router.get(
"/global/spend/report",
tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)],
responses={
200: {"model": List[LiteLLM_SpendLogs]},
},
)
async def get_global_spend_report(
start_date: Optional[str] = fastapi.Query(
default=None,
description="Time from which to start viewing spend",
),
end_date: Optional[str] = fastapi.Query(
default=None,
description="Time till which to view spend",
),
group_by: Optional[Literal["team", "customer"]] = fastapi.Query(
default="team",
description="Group spend by internal team or customer",
),
):
"""
Get Daily Spend per Team, based on specific startTime and endTime. Per team, view usage by each key, model
[
{
"group-by-day": "2024-05-10",
"teams": [
{
"team_name": "team-1"
"spend": 10,
"keys": [
"key": "1213",
"usage": {
"model-1": {
"cost": 12.50,
"input_tokens": 1000,
"output_tokens": 5000,
"requests": 100
},
"audio-modelname1": {
"cost": 25.50,
"seconds": 25,
"requests": 50
},
}
}
]
]
}
"""
if start_date is None or end_date is None:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={"error": "Please provide start_date and end_date"},
)
start_date_obj = datetime.strptime(start_date, "%Y-%m-%d")
end_date_obj = datetime.strptime(end_date, "%Y-%m-%d")
from litellm.proxy.proxy_server import prisma_client
try:
if prisma_client is None:
raise Exception(
f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys"
)
if group_by == "team":
# first get data from spend logs -> SpendByModelApiKey
# then read data from "SpendByModelApiKey" to format the response obj
sql_query = """
WITH SpendByModelApiKey AS (
SELECT
date_trunc('day', sl."startTime") AS group_by_day,
COALESCE(tt.team_alias, 'Unassigned Team') AS team_name,
sl.model,
sl.api_key,
SUM(sl.spend) AS model_api_spend,
SUM(sl.total_tokens) AS model_api_tokens
FROM
"LiteLLM_SpendLogs" sl
LEFT JOIN
"LiteLLM_TeamTable" tt
ON
sl.team_id = tt.team_id
WHERE
sl."startTime" BETWEEN $1::date AND $2::date
GROUP BY
date_trunc('day', sl."startTime"),
tt.team_alias,
sl.model,
sl.api_key
)
SELECT
group_by_day,
jsonb_agg(jsonb_build_object(
'team_name', team_name,
'total_spend', total_spend,
'metadata', metadata
)) AS teams
FROM (
SELECT
group_by_day,
team_name,
SUM(model_api_spend) AS total_spend,
jsonb_agg(jsonb_build_object(
'model', model,
'api_key', api_key,
'spend', model_api_spend,
'total_tokens', model_api_tokens
)) AS metadata
FROM
SpendByModelApiKey
GROUP BY
group_by_day,
team_name
) AS aggregated
GROUP BY
group_by_day
ORDER BY
group_by_day;
"""
db_response = await prisma_client.db.query_raw(
sql_query, start_date_obj, end_date_obj
)
if db_response is None:
return []
return db_response
elif group_by == "customer":
sql_query = """
WITH SpendByModelApiKey AS (
SELECT
date_trunc('day', sl."startTime") AS group_by_day,
sl.end_user AS customer,
sl.model,
sl.api_key,
SUM(sl.spend) AS model_api_spend,
SUM(sl.total_tokens) AS model_api_tokens
FROM
"LiteLLM_SpendLogs" sl
WHERE
sl."startTime" BETWEEN $1::date AND $2::date
GROUP BY
date_trunc('day', sl."startTime"),
customer,
sl.model,
sl.api_key
)
SELECT
group_by_day,
jsonb_agg(jsonb_build_object(
'customer', customer,
'total_spend', total_spend,
'metadata', metadata
)) AS customers
FROM
(
SELECT
group_by_day,
customer,
SUM(model_api_spend) AS total_spend,
jsonb_agg(jsonb_build_object(
'model', model,
'api_key', api_key,
'spend', model_api_spend,
'total_tokens', model_api_tokens
)) AS metadata
FROM
SpendByModelApiKey
GROUP BY
group_by_day,
customer
) AS aggregated
GROUP BY
group_by_day
ORDER BY
group_by_day;
"""
db_response = await prisma_client.db.query_raw(
sql_query, start_date_obj, end_date_obj
)
if db_response is None:
return []
return db_response
except Exception as e:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={"error": str(e)},
)
@router.get(
"/global/spend/all_tag_names",
tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)],
include_in_schema=False,
responses={
200: {"model": List[LiteLLM_SpendLogs]},
},
)
async def global_get_all_tag_names():
try:
from litellm.proxy.proxy_server import prisma_client
if prisma_client is None:
raise Exception(
f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys"
)
sql_query = """
SELECT DISTINCT
jsonb_array_elements_text(request_tags) AS individual_request_tag
FROM "LiteLLM_SpendLogs";
"""
db_response = await prisma_client.db.query_raw(sql_query)
if db_response is None:
return []
_tag_names = []
for row in db_response:
_tag_names.append(row.get("individual_request_tag"))
return {"tag_names": _tag_names}
except Exception as e:
if isinstance(e, HTTPException):
raise ProxyException(
message=getattr(e, "detail", f"/spend/all_tag_names Error({str(e)})"),
type="internal_error",
param=getattr(e, "param", "None"),
code=getattr(e, "status_code", status.HTTP_500_INTERNAL_SERVER_ERROR),
)
elif isinstance(e, ProxyException):
raise e
raise ProxyException(
message="/spend/all_tag_names Error" + str(e),
type="internal_error",
param=getattr(e, "param", "None"),
code=status.HTTP_500_INTERNAL_SERVER_ERROR,
)
@router.get(
"/global/spend/tags",
tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)],
include_in_schema=False,
responses={
200: {"model": List[LiteLLM_SpendLogs]},
},
)
async def global_view_spend_tags(
start_date: Optional[str] = fastapi.Query(
default=None,
description="Time from which to start viewing key spend",
),
end_date: Optional[str] = fastapi.Query(
default=None,
description="Time till which to view key spend",
),
tags: Optional[str] = fastapi.Query(
default=None,
description="comman separated tags to filter on",
),
):
"""
LiteLLM Enterprise - View Spend Per Request Tag. Used by LiteLLM UI
Example Request:
```
curl -X GET "http://0.0.0.0:4000/spend/tags" \
-H "Authorization: Bearer sk-1234"
```
Spend with Start Date and End Date
```
curl -X GET "http://0.0.0.0:4000/spend/tags?start_date=2022-01-01&end_date=2022-02-01" \
-H "Authorization: Bearer sk-1234"
```
"""
from enterprise.utils import ui_get_spend_by_tags
from litellm.proxy.proxy_server import prisma_client
try:
if prisma_client is None:
raise Exception(
f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys"
)
if end_date is None or start_date is None:
raise ProxyException(
message="Please provide start_date and end_date",
type="bad_request",
param=None,
code=status.HTTP_400_BAD_REQUEST,
)
response = await ui_get_spend_by_tags(
start_date=start_date,
end_date=end_date,
tags_str=tags,
prisma_client=prisma_client,
)
return response
except Exception as e:
if isinstance(e, HTTPException):
raise ProxyException(
message=getattr(e, "detail", f"/spend/tags Error({str(e)})"),
type="internal_error",
param=getattr(e, "param", "None"),
code=getattr(e, "status_code", status.HTTP_500_INTERNAL_SERVER_ERROR),
)
elif isinstance(e, ProxyException):
raise e
raise ProxyException(
message="/spend/tags Error" + str(e),
type="internal_error",
param=getattr(e, "param", "None"),
code=status.HTTP_500_INTERNAL_SERVER_ERROR,
)
async def _get_spend_report_for_time_range(
start_date: str,
end_date: str,
):
from litellm.proxy.proxy_server import prisma_client
if prisma_client is None:
verbose_proxy_logger.error(
f"Database not connected. Connect a database to your proxy for weekly, monthly spend reports"
)
return None
try:
sql_query = """
SELECT
t.team_alias,
SUM(s.spend) AS total_spend
FROM
"LiteLLM_SpendLogs" s
LEFT JOIN
"LiteLLM_TeamTable" t ON s.team_id = t.team_id
WHERE
s."startTime"::DATE >= $1::date AND s."startTime"::DATE <= $2::date
GROUP BY
t.team_alias
ORDER BY
total_spend DESC;
"""
response = await prisma_client.db.query_raw(sql_query, start_date, end_date)
# get spend per tag for today
sql_query = """
SELECT
jsonb_array_elements_text(request_tags) AS individual_request_tag,
SUM(spend) AS total_spend
FROM "LiteLLM_SpendLogs"
WHERE "startTime"::DATE >= $1::date AND "startTime"::DATE <= $2::date
GROUP BY individual_request_tag
ORDER BY total_spend DESC;
"""
spend_per_tag = await prisma_client.db.query_raw(
sql_query, start_date, end_date
)
return response, spend_per_tag
except Exception as e:
verbose_proxy_logger.error(
"Exception in _get_daily_spend_reports {}".format(str(e))
) # noqa
@router.post(
"/spend/calculate",
tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)],
responses={
200: {
"cost": {
"description": "The calculated cost",
"example": 0.0,
"type": "float",
}
}
},
)
async def calculate_spend(request: Request):
"""
Accepts all the params of completion_cost.
Calculate spend **before** making call:
Note: If you see a spend of $0.0 you need to set custom_pricing for your model: https://docs.litellm.ai/docs/proxy/custom_pricing
```
curl --location 'http://localhost:4000/spend/calculate'
--header 'Authorization: Bearer sk-1234'
--header 'Content-Type: application/json'
--data '{
"model": "anthropic.claude-v2",
"messages": [{"role": "user", "content": "Hey, how'''s it going?"}]
}'
```
Calculate spend **after** making call:
```
curl --location 'http://localhost:4000/spend/calculate'
--header 'Authorization: Bearer sk-1234'
--header 'Content-Type: application/json'
--data '{
"completion_response": {
"id": "chatcmpl-123",
"object": "chat.completion",
"created": 1677652288,
"model": "gpt-3.5-turbo-0125",
"system_fingerprint": "fp_44709d6fcb",
"choices": [{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello there, how may I assist you today?"
},
"logprobs": null,
"finish_reason": "stop"
}]
"usage": {
"prompt_tokens": 9,
"completion_tokens": 12,
"total_tokens": 21
}
}
}'
```
"""
from litellm import completion_cost
data = await request.json()
if "completion_response" in data:
data["completion_response"] = litellm.ModelResponse(
**data["completion_response"]
)
return {"cost": completion_cost(**data)}
@router.get(
"/spend/logs",
tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)],
responses={
200: {"model": List[LiteLLM_SpendLogs]},
},
)
async def view_spend_logs(
api_key: Optional[str] = fastapi.Query(
default=None,
description="Get spend logs based on api key",
),
user_id: Optional[str] = fastapi.Query(
default=None,
description="Get spend logs based on user_id",
),
request_id: Optional[str] = fastapi.Query(
default=None,
description="request_id to get spend logs for specific request_id. If none passed then pass spend logs for all requests",
),
start_date: Optional[str] = fastapi.Query(
default=None,
description="Time from which to start viewing key spend",
),
end_date: Optional[str] = fastapi.Query(
default=None,
description="Time till which to view key spend",
),
):
"""
View all spend logs, if request_id is provided, only logs for that request_id will be returned
Example Request for all logs
```
curl -X GET "http://0.0.0.0:8000/spend/logs" \
-H "Authorization: Bearer sk-1234"
```
Example Request for specific request_id
```
curl -X GET "http://0.0.0.0:8000/spend/logs?request_id=chatcmpl-6dcb2540-d3d7-4e49-bb27-291f863f112e" \
-H "Authorization: Bearer sk-1234"
```
Example Request for specific api_key
```
curl -X GET "http://0.0.0.0:8000/spend/logs?api_key=sk-Fn8Ej39NkBQmUagFEoUWPQ" \
-H "Authorization: Bearer sk-1234"
```
Example Request for specific user_id
```
curl -X GET "http://0.0.0.0:8000/spend/logs?user_id=ishaan@berri.ai" \
-H "Authorization: Bearer sk-1234"
```
"""
from litellm.proxy.proxy_server import prisma_client
try:
verbose_proxy_logger.debug("inside view_spend_logs")
if prisma_client is None:
raise Exception(
f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys"
)
spend_logs = []
if (
start_date is not None
and isinstance(start_date, str)
and end_date is not None
and isinstance(end_date, str)
):
# Convert the date strings to datetime objects
start_date_obj = datetime.strptime(start_date, "%Y-%m-%d")
end_date_obj = datetime.strptime(end_date, "%Y-%m-%d")
filter_query = {
"startTime": {
"gte": start_date_obj, # Greater than or equal to Start Date
"lte": end_date_obj, # Less than or equal to End Date
}
}
if api_key is not None and isinstance(api_key, str):
filter_query["api_key"] = api_key # type: ignore
elif request_id is not None and isinstance(request_id, str):
filter_query["request_id"] = request_id # type: ignore
elif user_id is not None and isinstance(user_id, str):
filter_query["user"] = user_id # type: ignore
# SQL query
response = await prisma_client.db.litellm_spendlogs.group_by(
by=["api_key", "user", "model", "startTime"],
where=filter_query, # type: ignore
sum={
"spend": True,
},
)
if (
isinstance(response, list)
and len(response) > 0
and isinstance(response[0], dict)
):
result: dict = {}
for record in response:
dt_object = datetime.strptime(
str(record["startTime"]), "%Y-%m-%dT%H:%M:%S.%fZ"
) # type: ignore
date = dt_object.date()
if date not in result:
result[date] = {"users": {}, "models": {}}
api_key = record["api_key"]
user_id = record["user"]
model = record["model"]
result[date]["spend"] = (
result[date].get("spend", 0) + record["_sum"]["spend"]
)
result[date][api_key] = (
result[date].get(api_key, 0) + record["_sum"]["spend"]
)
result[date]["users"][user_id] = (
result[date]["users"].get(user_id, 0) + record["_sum"]["spend"]
)
result[date]["models"][model] = (
result[date]["models"].get(model, 0) + record["_sum"]["spend"]
)
return_list = []
final_date = None
for k, v in sorted(result.items()):
return_list.append({**v, "startTime": k})
final_date = k
end_date_date = end_date_obj.date()
if final_date is not None and final_date < end_date_date:
current_date = final_date + timedelta(days=1)
while current_date <= end_date_date:
# Represent current_date as string because original response has it this way
return_list.append(
{
"startTime": current_date,
"spend": 0,
"users": {},
"models": {},
}
) # If no data, will stay as zero
current_date += timedelta(days=1) # Move on to the next day
return return_list
return response
elif api_key is not None and isinstance(api_key, str):
if api_key.startswith("sk-"):
hashed_token = prisma_client.hash_token(token=api_key)
else:
hashed_token = api_key
spend_log = await prisma_client.get_data(
table_name="spend",
query_type="find_all",
key_val={"key": "api_key", "value": hashed_token},
)
if isinstance(spend_log, list):
return spend_log
else:
return [spend_log]
elif request_id is not None:
spend_log = await prisma_client.get_data(
table_name="spend",
query_type="find_unique",
key_val={"key": "request_id", "value": request_id},
)
return [spend_log]
elif user_id is not None:
spend_log = await prisma_client.get_data(
table_name="spend",
query_type="find_all",
key_val={"key": "user", "value": user_id},
)
if isinstance(spend_log, list):
return spend_log
else:
return [spend_log]
else:
spend_logs = await prisma_client.get_data(
table_name="spend", query_type="find_all"
)
return spend_log
return None
except Exception as e:
if isinstance(e, HTTPException):
raise ProxyException(
message=getattr(e, "detail", f"/spend/logs Error({str(e)})"),
type="internal_error",
param=getattr(e, "param", "None"),
code=getattr(e, "status_code", status.HTTP_500_INTERNAL_SERVER_ERROR),
)
elif isinstance(e, ProxyException):
raise e
raise ProxyException(
message="/spend/logs Error" + str(e),
type="internal_error",
param=getattr(e, "param", "None"),
code=status.HTTP_500_INTERNAL_SERVER_ERROR,
)
@router.post(
"/global/spend/reset",
tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)],
)
async def global_spend_reset():
"""
ADMIN ONLY / MASTER KEY Only Endpoint
Globally reset spend for All API Keys and Teams, maintain LiteLLM_SpendLogs
1. LiteLLM_SpendLogs will maintain the logs on spend, no data gets deleted from there
2. LiteLLM_VerificationTokens spend will be set = 0
3. LiteLLM_TeamTable spend will be set = 0
"""
from litellm.proxy.proxy_server import prisma_client
if prisma_client is None:
raise ProxyException(
message="Prisma Client is not initialized",
type="internal_error",
param="None",
code=status.HTTP_401_UNAUTHORIZED,
)
await prisma_client.db.litellm_verificationtoken.update_many(
data={"spend": 0.0}, where={}
)
await prisma_client.db.litellm_teamtable.update_many(data={"spend": 0.0}, where={})
return {
"message": "Spend for all API Keys and Teams reset successfully",
"status": "success",
}
@router.get(
"/global/spend/logs",
tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)],
include_in_schema=False,
)
async def global_spend_logs(
api_key: str = fastapi.Query(
default=None,
description="API Key to get global spend (spend per day for last 30d). Admin-only endpoint",
)
):
"""
[BETA] This is a beta endpoint. It will change.
Use this to get global spend (spend per day for last 30d). Admin-only endpoint
More efficient implementation of /spend/logs, by creating a view over the spend logs table.
"""
from litellm.proxy.proxy_server import prisma_client
if prisma_client is None:
raise ProxyException(
message="Prisma Client is not initialized",
type="internal_error",
param="None",
code=status.HTTP_500_INTERNAL_SERVER_ERROR,
)
if api_key is None:
sql_query = """SELECT * FROM "MonthlyGlobalSpend" ORDER BY "date";"""
response = await prisma_client.db.query_raw(query=sql_query)
return response
else:
sql_query = """
SELECT * FROM "MonthlyGlobalSpendPerKey"
WHERE "api_key" = $1
ORDER BY "date";
"""
response = await prisma_client.db.query_raw(sql_query, api_key)
return response
return
@router.get(
"/global/spend",
tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)],
include_in_schema=False,
)
async def global_spend():
"""
[BETA] This is a beta endpoint. It will change.
View total spend across all proxy keys
"""
from litellm.proxy.proxy_server import prisma_client
total_spend = 0.0
total_proxy_budget = 0.0
if prisma_client is None:
raise HTTPException(status_code=500, detail={"error": "No db connected"})
sql_query = """SELECT SUM(spend) as total_spend FROM "MonthlyGlobalSpend";"""
response = await prisma_client.db.query_raw(query=sql_query)
if response is not None:
if isinstance(response, list) and len(response) > 0:
total_spend = response[0].get("total_spend", 0.0)
return {"spend": total_spend, "max_budget": litellm.max_budget}
@router.get(
"/global/spend/keys",
tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)],
include_in_schema=False,
)
async def global_spend_keys(
limit: int = fastapi.Query(
default=None,
description="Number of keys to get. Will return Top 'n' keys.",
)
):
"""
[BETA] This is a beta endpoint. It will change.
Use this to get the top 'n' keys with the highest spend, ordered by spend.
"""
from litellm.proxy.proxy_server import prisma_client
if prisma_client is None:
raise HTTPException(status_code=500, detail={"error": "No db connected"})
sql_query = f"""SELECT * FROM "Last30dKeysBySpend" LIMIT {limit};"""
response = await prisma_client.db.query_raw(query=sql_query)
return response
@router.get(
"/global/spend/teams",
tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)],
include_in_schema=False,
)
async def global_spend_per_team():
"""
[BETA] This is a beta endpoint. It will change.
Use this to get daily spend, grouped by `team_id` and `date`
"""
from litellm.proxy.proxy_server import prisma_client
if prisma_client is None:
raise HTTPException(status_code=500, detail={"error": "No db connected"})
sql_query = """
SELECT
t.team_alias as team_alias,
DATE(s."startTime") AS spend_date,
SUM(s.spend) AS total_spend
FROM
"LiteLLM_SpendLogs" s
LEFT JOIN
"LiteLLM_TeamTable" t ON s.team_id = t.team_id
WHERE
s."startTime" >= CURRENT_DATE - INTERVAL '30 days'
GROUP BY
t.team_alias,
DATE(s."startTime")
ORDER BY
spend_date;
"""
response = await prisma_client.db.query_raw(query=sql_query)
# transform the response for the Admin UI
spend_by_date = {}
team_aliases = set()
total_spend_per_team = {}
for row in response:
row_date = row["spend_date"]
if row_date is None:
continue
team_alias = row["team_alias"]
if team_alias is None:
team_alias = "Unassigned"
team_aliases.add(team_alias)
if row_date in spend_by_date:
# get the team_id for this entry
# get the spend for this entry
spend = row["total_spend"]
spend = round(spend, 2)
current_date_entries = spend_by_date[row_date]
current_date_entries[team_alias] = spend
else:
spend = row["total_spend"]
spend = round(spend, 2)
spend_by_date[row_date] = {team_alias: spend}
if team_alias in total_spend_per_team:
total_spend_per_team[team_alias] += spend
else:
total_spend_per_team[team_alias] = spend
total_spend_per_team_ui = []
# order the elements in total_spend_per_team by spend
total_spend_per_team = dict(
sorted(total_spend_per_team.items(), key=lambda item: item[1], reverse=True)
)
for team_id in total_spend_per_team:
# only add first 10 elements to total_spend_per_team_ui
if len(total_spend_per_team_ui) >= 10:
break
if team_id is None:
team_id = "Unassigned"
total_spend_per_team_ui.append(
{"team_id": team_id, "total_spend": total_spend_per_team[team_id]}
)
# sort spend_by_date by it's key (which is a date)
response_data = []
for key in spend_by_date:
value = spend_by_date[key]
response_data.append({"date": key, **value})
return {
"daily_spend": response_data,
"teams": list(team_aliases),
"total_spend_per_team": total_spend_per_team_ui,
}
@router.get(
"/global/all_end_users",
tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)],
include_in_schema=False,
)
async def global_view_all_end_users():
"""
[BETA] This is a beta endpoint. It will change.
Use this to just get all the unique `end_users`
"""
from litellm.proxy.proxy_server import prisma_client
if prisma_client is None:
raise HTTPException(status_code=500, detail={"error": "No db connected"})
sql_query = """
SELECT DISTINCT end_user FROM "LiteLLM_SpendLogs"
"""
db_response = await prisma_client.db.query_raw(query=sql_query)
if db_response is None:
return []
_end_users = []
for row in db_response:
_end_users.append(row["end_user"])
return {"end_users": _end_users}
@router.post(
"/global/spend/end_users",
tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)],
include_in_schema=False,
)
async def global_spend_end_users(data: Optional[GlobalEndUsersSpend] = None):
"""
[BETA] This is a beta endpoint. It will change.
Use this to get the top 'n' keys with the highest spend, ordered by spend.
"""
from litellm.proxy.proxy_server import prisma_client
if prisma_client is None:
raise HTTPException(status_code=500, detail={"error": "No db connected"})
"""
Gets the top 100 end-users for a given api key
"""
startTime = None
endTime = None
selected_api_key = None
if data is not None:
startTime = data.startTime
endTime = data.endTime
selected_api_key = data.api_key
startTime = startTime or datetime.now() - timedelta(days=30)
endTime = endTime or datetime.now()
sql_query = """
SELECT end_user, COUNT(*) AS total_count, SUM(spend) AS total_spend
FROM "LiteLLM_SpendLogs"
WHERE "startTime" >= $1::timestamp
AND "startTime" < $2::timestamp
AND (
CASE
WHEN $3::TEXT IS NULL THEN TRUE
ELSE api_key = $3
END
)
GROUP BY end_user
ORDER BY total_spend DESC
LIMIT 100
"""
response = await prisma_client.db.query_raw(
sql_query, startTime, endTime, selected_api_key
)
return response
@router.get(
"/global/spend/models",
tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)],
include_in_schema=False,
)
async def global_spend_models(
limit: int = fastapi.Query(
default=None,
description="Number of models to get. Will return Top 'n' models.",
)
):
"""
[BETA] This is a beta endpoint. It will change.
Use this to get the top 'n' keys with the highest spend, ordered by spend.
"""
from litellm.proxy.proxy_server import prisma_client
if prisma_client is None:
raise HTTPException(status_code=500, detail={"error": "No db connected"})
sql_query = f"""SELECT * FROM "Last30dModelsBySpend" LIMIT {limit};"""
response = await prisma_client.db.query_raw(query=sql_query)
return response
@router.post(
"/global/predict/spend/logs",
tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)],
include_in_schema=False,
)
async def global_predict_spend_logs(request: Request):
from enterprise.utils import _forecast_daily_cost
data = await request.json()
data = data.get("data")
return _forecast_daily_cost(data)