mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 11:43:54 +00:00
Merge pull request #3619 from BerriAI/litellm_show_spend_reports
[Feat] - `/global/spend/report`
This commit is contained in:
commit
7b3f695e5d
4 changed files with 310 additions and 23 deletions
22
.github/pull_request_template.md
vendored
22
.github/pull_request_template.md
vendored
|
@ -1,6 +1,3 @@
|
|||
<!-- This is just examples. You can remove all items if you want. -->
|
||||
<!-- Please remove all comments. -->
|
||||
|
||||
## Title
|
||||
|
||||
<!-- e.g. "Implement user authentication feature" -->
|
||||
|
@ -18,7 +15,6 @@
|
|||
🐛 Bug Fix
|
||||
🧹 Refactoring
|
||||
📖 Documentation
|
||||
💻 Development Environment
|
||||
🚄 Infrastructure
|
||||
✅ Test
|
||||
|
||||
|
@ -26,22 +22,8 @@
|
|||
|
||||
<!-- List of changes -->
|
||||
|
||||
## Testing
|
||||
## [REQUIRED] Testing - Attach a screenshot of any new tests passing locall
|
||||
If UI changes, send a screenshot/GIF of working UI fixes
|
||||
|
||||
<!-- Test procedure -->
|
||||
|
||||
## Notes
|
||||
|
||||
<!-- Test results -->
|
||||
|
||||
<!-- Points to note for the reviewer, consultation content, concerns -->
|
||||
|
||||
## Pre-Submission Checklist (optional but appreciated):
|
||||
|
||||
- [ ] I have included relevant documentation updates (stored in /docs/my-website)
|
||||
|
||||
## OS Tests (optional but appreciated):
|
||||
|
||||
- [ ] Tested on Windows
|
||||
- [ ] Tested on MacOS
|
||||
- [ ] Tested on Linux
|
||||
|
|
|
@ -1,8 +1,125 @@
|
|||
# Cost Tracking - Azure
|
||||
import Tabs from '@theme/Tabs';
|
||||
import TabItem from '@theme/TabItem';
|
||||
|
||||
# Spend Tracking
|
||||
|
||||
## Getting Spend Reports - To Charge Other Teams, API Keys
|
||||
|
||||
Use the `/global/spend/report` endpoint to get daily spend per team, with a breakdown of spend per API Key, Model
|
||||
|
||||
### Example Request
|
||||
|
||||
```shell
|
||||
curl -X GET 'http://localhost:4000/global/spend/report?start_date=2023-04-01&end_date=2024-06-30' \
|
||||
-H 'Authorization: Bearer sk-1234'
|
||||
```
|
||||
|
||||
### Example Response
|
||||
<Tabs>
|
||||
|
||||
<TabItem value="response" label="Expected Response">
|
||||
|
||||
```shell
|
||||
[
|
||||
{
|
||||
"group_by_day": "2024-04-30T00:00:00+00:00",
|
||||
"teams": [
|
||||
{
|
||||
"team_name": "Prod Team",
|
||||
"total_spend": 0.0015265,
|
||||
"metadata": [
|
||||
{
|
||||
"model": "gpt-4",
|
||||
"spend": 0.00123,
|
||||
"total_tokens": 28
|
||||
},
|
||||
{
|
||||
"model": "chatgpt-v-2",
|
||||
"spend": 0.000214,
|
||||
"total_tokens": 122
|
||||
},
|
||||
{
|
||||
"model": "gpt-3.5-turbo",
|
||||
"spend": 0.0000825,
|
||||
"total_tokens": 85
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
|
||||
</TabItem>
|
||||
|
||||
<TabItem value="py-script" label="Script to Parse Response (Python)">
|
||||
|
||||
```python
|
||||
import requests
|
||||
url = 'http://localhost:4000/global/spend/report'
|
||||
params = {
|
||||
'start_date': '2023-04-01',
|
||||
'end_date': '2024-06-30'
|
||||
}
|
||||
|
||||
headers = {
|
||||
'Authorization': 'Bearer sk-1234'
|
||||
}
|
||||
|
||||
# Make the GET request
|
||||
response = requests.get(url, headers=headers, params=params)
|
||||
spend_report = response.json()
|
||||
|
||||
for row in spend_report:
|
||||
date = row["group_by_day"]
|
||||
teams = row["teams"]
|
||||
for team in teams:
|
||||
team_name = team["team_name"]
|
||||
total_spend = team["total_spend"]
|
||||
metadata = team["metadata"]
|
||||
|
||||
print(f"Date: {date}")
|
||||
print(f"Team: {team_name}")
|
||||
print(f"Total Spend: {total_spend}")
|
||||
print("Metadata: ", metadata)
|
||||
print()
|
||||
```
|
||||
|
||||
Output from script
|
||||
```shell
|
||||
# Date: 2024-05-11T00:00:00+00:00
|
||||
# Team: local_test_team
|
||||
# Total Spend: 0.003675099999999999
|
||||
# Metadata: [{'model': 'gpt-3.5-turbo', 'spend': 0.003675099999999999, 'api_key': 'b94d5e0bc3a71a573917fe1335dc0c14728c7016337451af9714924ff3a729db', 'total_tokens': 3105}]
|
||||
|
||||
# Date: 2024-05-13T00:00:00+00:00
|
||||
# Team: Unassigned Team
|
||||
# Total Spend: 3.4e-05
|
||||
# Metadata: [{'model': 'gpt-3.5-turbo', 'spend': 3.4e-05, 'api_key': '9569d13c9777dba68096dea49b0b03e0aaf4d2b65d4030eda9e8a2733c3cd6e0', 'total_tokens': 50}]
|
||||
|
||||
# Date: 2024-05-13T00:00:00+00:00
|
||||
# Team: central
|
||||
# Total Spend: 0.000684
|
||||
# Metadata: [{'model': 'gpt-3.5-turbo', 'spend': 0.000684, 'api_key': '0323facdf3af551594017b9ef162434a9b9a8ca1bbd9ccbd9d6ce173b1015605', 'total_tokens': 498}]
|
||||
|
||||
# Date: 2024-05-13T00:00:00+00:00
|
||||
# Team: local_test_team
|
||||
# Total Spend: 0.0005715000000000001
|
||||
# Metadata: [{'model': 'gpt-3.5-turbo', 'spend': 0.0005715000000000001, 'api_key': 'b94d5e0bc3a71a573917fe1335dc0c14728c7016337451af9714924ff3a729db', 'total_tokens': 423}]
|
||||
```
|
||||
|
||||
|
||||
</TabItem>
|
||||
|
||||
</Tabs>
|
||||
|
||||
|
||||
## Spend Tracking for Azure
|
||||
|
||||
Set base model for cost tracking azure image-gen call
|
||||
|
||||
## Image Generation
|
||||
### Image Generation
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
|
@ -17,7 +134,7 @@ model_list:
|
|||
mode: image_generation
|
||||
```
|
||||
|
||||
## Chat Completions / Embeddings
|
||||
### Chat Completions / Embeddings
|
||||
|
||||
**Problem**: Azure returns `gpt-4` in the response when `azure/gpt-4-1106-preview` is used. This leads to inaccurate cost tracking
|
||||
|
||||
|
|
|
@ -5384,6 +5384,141 @@ async def view_spend_tags(
|
|||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/global/spend/report",
|
||||
tags=["Budget & Spend Tracking"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
include_in_schema=False,
|
||||
responses={
|
||||
200: {"model": List[LiteLLM_SpendLogs]},
|
||||
},
|
||||
)
|
||||
async def get_global_spend_report(
|
||||
start_date: Optional[str] = fastapi.Query(
|
||||
default=None,
|
||||
description="Time from which to start viewing spend",
|
||||
),
|
||||
end_date: Optional[str] = fastapi.Query(
|
||||
default=None,
|
||||
description="Time till which to view spend",
|
||||
),
|
||||
):
|
||||
"""
|
||||
Get Daily Spend per Team, based on specific startTime and endTime. Per team, view usage by each key, model
|
||||
[
|
||||
{
|
||||
"group-by-day": "2024-05-10",
|
||||
"teams": [
|
||||
{
|
||||
"team_name": "team-1"
|
||||
"spend": 10,
|
||||
"keys": [
|
||||
"key": "1213",
|
||||
"usage": {
|
||||
"model-1": {
|
||||
"cost": 12.50,
|
||||
"input_tokens": 1000,
|
||||
"output_tokens": 5000,
|
||||
"requests": 100
|
||||
},
|
||||
"audio-modelname1": {
|
||||
"cost": 25.50,
|
||||
"seconds": 25,
|
||||
"requests": 50
|
||||
},
|
||||
}
|
||||
}
|
||||
]
|
||||
]
|
||||
}
|
||||
"""
|
||||
if start_date is None or end_date is None:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail={"error": "Please provide start_date and end_date"},
|
||||
)
|
||||
|
||||
start_date_obj = datetime.strptime(start_date, "%Y-%m-%d")
|
||||
end_date_obj = datetime.strptime(end_date, "%Y-%m-%d")
|
||||
|
||||
global prisma_client
|
||||
try:
|
||||
if prisma_client is None:
|
||||
raise Exception(
|
||||
f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys"
|
||||
)
|
||||
|
||||
# first get data from spend logs -> SpendByModelApiKey
|
||||
# then read data from "SpendByModelApiKey" to format the response obj
|
||||
sql_query = """
|
||||
|
||||
WITH SpendByModelApiKey AS (
|
||||
SELECT
|
||||
date_trunc('day', sl."startTime") AS group_by_day,
|
||||
COALESCE(tt.team_alias, 'Unassigned Team') AS team_name,
|
||||
sl.model,
|
||||
sl.api_key,
|
||||
SUM(sl.spend) AS model_api_spend,
|
||||
SUM(sl.total_tokens) AS model_api_tokens
|
||||
FROM
|
||||
"LiteLLM_SpendLogs" sl
|
||||
LEFT JOIN
|
||||
"LiteLLM_TeamTable" tt
|
||||
ON
|
||||
sl.team_id = tt.team_id
|
||||
WHERE
|
||||
sl."startTime" BETWEEN $1::date AND $2::date
|
||||
GROUP BY
|
||||
date_trunc('day', sl."startTime"),
|
||||
tt.team_alias,
|
||||
sl.model,
|
||||
sl.api_key
|
||||
)
|
||||
SELECT
|
||||
group_by_day,
|
||||
jsonb_agg(jsonb_build_object(
|
||||
'team_name', team_name,
|
||||
'total_spend', total_spend,
|
||||
'metadata', metadata
|
||||
)) AS teams
|
||||
FROM (
|
||||
SELECT
|
||||
group_by_day,
|
||||
team_name,
|
||||
SUM(model_api_spend) AS total_spend,
|
||||
jsonb_agg(jsonb_build_object(
|
||||
'model', model,
|
||||
'api_key', api_key,
|
||||
'spend', model_api_spend,
|
||||
'total_tokens', model_api_tokens
|
||||
)) AS metadata
|
||||
FROM
|
||||
SpendByModelApiKey
|
||||
GROUP BY
|
||||
group_by_day,
|
||||
team_name
|
||||
) AS aggregated
|
||||
GROUP BY
|
||||
group_by_day
|
||||
ORDER BY
|
||||
group_by_day;
|
||||
"""
|
||||
|
||||
db_response = await prisma_client.db.query_raw(
|
||||
sql_query, start_date_obj, end_date_obj
|
||||
)
|
||||
if db_response is None:
|
||||
return []
|
||||
|
||||
return db_response
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail={"error": str(e)},
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/global/spend/tags",
|
||||
tags=["Budget & Spend Tracking"],
|
||||
|
|
|
@ -138,6 +138,23 @@ async def get_predict_spend_logs(session):
|
|||
return await response.json()
|
||||
|
||||
|
||||
async def get_spend_report(session, start_date, end_date):
|
||||
url = "http://0.0.0.0:4000/global/spend/report"
|
||||
headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"}
|
||||
async with session.get(
|
||||
url, headers=headers, params={"start_date": start_date, "end_date": end_date}
|
||||
) as response:
|
||||
status = response.status
|
||||
response_text = await response.text()
|
||||
|
||||
print(response_text)
|
||||
print()
|
||||
|
||||
if status != 200:
|
||||
raise Exception(f"Request did not return a 200 status code: {status}")
|
||||
return await response.json()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_predicted_spend_logs():
|
||||
"""
|
||||
|
@ -205,3 +222,39 @@ async def test_spend_logs_high_traffic():
|
|||
except:
|
||||
print(n, time.time() - start, 0)
|
||||
raise Exception("it worked!")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_spend_report_endpoint():
|
||||
async with aiohttp.ClientSession(
|
||||
timeout=aiohttp.ClientTimeout(total=600)
|
||||
) as session:
|
||||
import datetime
|
||||
|
||||
todays_date = datetime.date.today() + datetime.timedelta(days=1)
|
||||
todays_date = todays_date.strftime("%Y-%m-%d")
|
||||
|
||||
print("todays_date", todays_date)
|
||||
thirty_days_ago = (
|
||||
datetime.date.today() - datetime.timedelta(days=30)
|
||||
).strftime("%Y-%m-%d")
|
||||
spend_report = await get_spend_report(
|
||||
session=session, start_date=thirty_days_ago, end_date=todays_date
|
||||
)
|
||||
print("spend report", spend_report)
|
||||
|
||||
for row in spend_report:
|
||||
date = row["group_by_day"]
|
||||
teams = row["teams"]
|
||||
for team in teams:
|
||||
team_name = team["team_name"]
|
||||
total_spend = team["total_spend"]
|
||||
metadata = team["metadata"]
|
||||
|
||||
assert team_name is not None
|
||||
|
||||
print(f"Date: {date}")
|
||||
print(f"Team: {team_name}")
|
||||
print(f"Total Spend: {total_spend}")
|
||||
print("Metadata: ", metadata)
|
||||
print()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue