Merge pull request #3619 from BerriAI/litellm_show_spend_reports

[Feat] -  `/global/spend/report`
This commit is contained in:
Ishaan Jaff 2024-05-13 16:06:02 -07:00 committed by GitHub
commit 7b3f695e5d
4 changed files with 310 additions and 23 deletions

View file

@ -1,6 +1,3 @@
<!-- This is just examples. You can remove all items if you want. -->
<!-- Please remove all comments. -->
## Title
<!-- e.g. "Implement user authentication feature" -->
@ -18,7 +15,6 @@
🐛 Bug Fix
🧹 Refactoring
📖 Documentation
💻 Development Environment
🚄 Infrastructure
✅ Test
@ -26,22 +22,8 @@
<!-- List of changes -->
## Testing
## [REQUIRED] Testing - Attach a screenshot of any new tests passing locall
If UI changes, send a screenshot/GIF of working UI fixes
<!-- Test procedure -->
## Notes
<!-- Test results -->
<!-- Points to note for the reviewer, consultation content, concerns -->
## Pre-Submission Checklist (optional but appreciated):
- [ ] I have included relevant documentation updates (stored in /docs/my-website)
## OS Tests (optional but appreciated):
- [ ] Tested on Windows
- [ ] Tested on MacOS
- [ ] Tested on Linux

View file

@ -1,8 +1,125 @@
# Cost Tracking - Azure
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
# Spend Tracking
## Getting Spend Reports - To Charge Other Teams, API Keys
Use the `/global/spend/report` endpoint to get daily spend per team, with a breakdown of spend per API Key, Model
### Example Request
```shell
curl -X GET 'http://localhost:4000/global/spend/report?start_date=2023-04-01&end_date=2024-06-30' \
-H 'Authorization: Bearer sk-1234'
```
### Example Response
<Tabs>
<TabItem value="response" label="Expected Response">
```shell
[
{
"group_by_day": "2024-04-30T00:00:00+00:00",
"teams": [
{
"team_name": "Prod Team",
"total_spend": 0.0015265,
"metadata": [
{
"model": "gpt-4",
"spend": 0.00123,
"total_tokens": 28
},
{
"model": "chatgpt-v-2",
"spend": 0.000214,
"total_tokens": 122
},
{
"model": "gpt-3.5-turbo",
"spend": 0.0000825,
"total_tokens": 85
}
]
}
]
}
]
```
</TabItem>
<TabItem value="py-script" label="Script to Parse Response (Python)">
```python
import requests
url = 'http://localhost:4000/global/spend/report'
params = {
'start_date': '2023-04-01',
'end_date': '2024-06-30'
}
headers = {
'Authorization': 'Bearer sk-1234'
}
# Make the GET request
response = requests.get(url, headers=headers, params=params)
spend_report = response.json()
for row in spend_report:
date = row["group_by_day"]
teams = row["teams"]
for team in teams:
team_name = team["team_name"]
total_spend = team["total_spend"]
metadata = team["metadata"]
print(f"Date: {date}")
print(f"Team: {team_name}")
print(f"Total Spend: {total_spend}")
print("Metadata: ", metadata)
print()
```
Output from script
```shell
# Date: 2024-05-11T00:00:00+00:00
# Team: local_test_team
# Total Spend: 0.003675099999999999
# Metadata: [{'model': 'gpt-3.5-turbo', 'spend': 0.003675099999999999, 'api_key': 'b94d5e0bc3a71a573917fe1335dc0c14728c7016337451af9714924ff3a729db', 'total_tokens': 3105}]
# Date: 2024-05-13T00:00:00+00:00
# Team: Unassigned Team
# Total Spend: 3.4e-05
# Metadata: [{'model': 'gpt-3.5-turbo', 'spend': 3.4e-05, 'api_key': '9569d13c9777dba68096dea49b0b03e0aaf4d2b65d4030eda9e8a2733c3cd6e0', 'total_tokens': 50}]
# Date: 2024-05-13T00:00:00+00:00
# Team: central
# Total Spend: 0.000684
# Metadata: [{'model': 'gpt-3.5-turbo', 'spend': 0.000684, 'api_key': '0323facdf3af551594017b9ef162434a9b9a8ca1bbd9ccbd9d6ce173b1015605', 'total_tokens': 498}]
# Date: 2024-05-13T00:00:00+00:00
# Team: local_test_team
# Total Spend: 0.0005715000000000001
# Metadata: [{'model': 'gpt-3.5-turbo', 'spend': 0.0005715000000000001, 'api_key': 'b94d5e0bc3a71a573917fe1335dc0c14728c7016337451af9714924ff3a729db', 'total_tokens': 423}]
```
</TabItem>
</Tabs>
## Spend Tracking for Azure
Set base model for cost tracking azure image-gen call
## Image Generation
### Image Generation
```yaml
model_list:
@ -17,7 +134,7 @@ model_list:
mode: image_generation
```
## Chat Completions / Embeddings
### Chat Completions / Embeddings
**Problem**: Azure returns `gpt-4` in the response when `azure/gpt-4-1106-preview` is used. This leads to inaccurate cost tracking

View file

@ -5384,6 +5384,141 @@ async def view_spend_tags(
)
@router.get(
"/global/spend/report",
tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)],
include_in_schema=False,
responses={
200: {"model": List[LiteLLM_SpendLogs]},
},
)
async def get_global_spend_report(
start_date: Optional[str] = fastapi.Query(
default=None,
description="Time from which to start viewing spend",
),
end_date: Optional[str] = fastapi.Query(
default=None,
description="Time till which to view spend",
),
):
"""
Get Daily Spend per Team, based on specific startTime and endTime. Per team, view usage by each key, model
[
{
"group-by-day": "2024-05-10",
"teams": [
{
"team_name": "team-1"
"spend": 10,
"keys": [
"key": "1213",
"usage": {
"model-1": {
"cost": 12.50,
"input_tokens": 1000,
"output_tokens": 5000,
"requests": 100
},
"audio-modelname1": {
"cost": 25.50,
"seconds": 25,
"requests": 50
},
}
}
]
]
}
"""
if start_date is None or end_date is None:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={"error": "Please provide start_date and end_date"},
)
start_date_obj = datetime.strptime(start_date, "%Y-%m-%d")
end_date_obj = datetime.strptime(end_date, "%Y-%m-%d")
global prisma_client
try:
if prisma_client is None:
raise Exception(
f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys"
)
# first get data from spend logs -> SpendByModelApiKey
# then read data from "SpendByModelApiKey" to format the response obj
sql_query = """
WITH SpendByModelApiKey AS (
SELECT
date_trunc('day', sl."startTime") AS group_by_day,
COALESCE(tt.team_alias, 'Unassigned Team') AS team_name,
sl.model,
sl.api_key,
SUM(sl.spend) AS model_api_spend,
SUM(sl.total_tokens) AS model_api_tokens
FROM
"LiteLLM_SpendLogs" sl
LEFT JOIN
"LiteLLM_TeamTable" tt
ON
sl.team_id = tt.team_id
WHERE
sl."startTime" BETWEEN $1::date AND $2::date
GROUP BY
date_trunc('day', sl."startTime"),
tt.team_alias,
sl.model,
sl.api_key
)
SELECT
group_by_day,
jsonb_agg(jsonb_build_object(
'team_name', team_name,
'total_spend', total_spend,
'metadata', metadata
)) AS teams
FROM (
SELECT
group_by_day,
team_name,
SUM(model_api_spend) AS total_spend,
jsonb_agg(jsonb_build_object(
'model', model,
'api_key', api_key,
'spend', model_api_spend,
'total_tokens', model_api_tokens
)) AS metadata
FROM
SpendByModelApiKey
GROUP BY
group_by_day,
team_name
) AS aggregated
GROUP BY
group_by_day
ORDER BY
group_by_day;
"""
db_response = await prisma_client.db.query_raw(
sql_query, start_date_obj, end_date_obj
)
if db_response is None:
return []
return db_response
except Exception as e:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={"error": str(e)},
)
@router.get(
"/global/spend/tags",
tags=["Budget & Spend Tracking"],

View file

@ -138,6 +138,23 @@ async def get_predict_spend_logs(session):
return await response.json()
async def get_spend_report(session, start_date, end_date):
url = "http://0.0.0.0:4000/global/spend/report"
headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"}
async with session.get(
url, headers=headers, params={"start_date": start_date, "end_date": end_date}
) as response:
status = response.status
response_text = await response.text()
print(response_text)
print()
if status != 200:
raise Exception(f"Request did not return a 200 status code: {status}")
return await response.json()
@pytest.mark.asyncio
async def test_get_predicted_spend_logs():
"""
@ -205,3 +222,39 @@ async def test_spend_logs_high_traffic():
except:
print(n, time.time() - start, 0)
raise Exception("it worked!")
@pytest.mark.asyncio
async def test_spend_report_endpoint():
async with aiohttp.ClientSession(
timeout=aiohttp.ClientTimeout(total=600)
) as session:
import datetime
todays_date = datetime.date.today() + datetime.timedelta(days=1)
todays_date = todays_date.strftime("%Y-%m-%d")
print("todays_date", todays_date)
thirty_days_ago = (
datetime.date.today() - datetime.timedelta(days=30)
).strftime("%Y-%m-%d")
spend_report = await get_spend_report(
session=session, start_date=thirty_days_ago, end_date=todays_date
)
print("spend report", spend_report)
for row in spend_report:
date = row["group_by_day"]
teams = row["teams"]
for team in teams:
team_name = team["team_name"]
total_spend = team["total_spend"]
metadata = team["metadata"]
assert team_name is not None
print(f"Date: {date}")
print(f"Team: {team_name}")
print(f"Total Spend: {total_spend}")
print("Metadata: ", metadata)
print()