diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 51c5789717..b7a1643686 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -1,6 +1,3 @@ - - - ## Title @@ -18,7 +15,6 @@ ๐Ÿ› Bug Fix ๐Ÿงน Refactoring ๐Ÿ“– Documentation -๐Ÿ’ป Development Environment ๐Ÿš„ Infrastructure โœ… Test @@ -26,22 +22,8 @@ -## Testing +## [REQUIRED] Testing - Attach a screenshot of any new tests passing locall +If UI changes, send a screenshot/GIF of working UI fixes -## Notes - - - - - -## Pre-Submission Checklist (optional but appreciated): - -- [ ] I have included relevant documentation updates (stored in /docs/my-website) - -## OS Tests (optional but appreciated): - -- [ ] Tested on Windows -- [ ] Tested on MacOS -- [ ] Tested on Linux diff --git a/docs/my-website/docs/proxy/cost_tracking.md b/docs/my-website/docs/proxy/cost_tracking.md index 887ec9e3ed..56586361f3 100644 --- a/docs/my-website/docs/proxy/cost_tracking.md +++ b/docs/my-website/docs/proxy/cost_tracking.md @@ -1,8 +1,125 @@ -# Cost Tracking - Azure +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# Spend Tracking + +## Getting Spend Reports - To Charge Other Teams, API Keys + +Use the `/global/spend/report` endpoint to get daily spend per team, with a breakdown of spend per API Key, Model + +### Example Request + +```shell +curl -X GET 'http://localhost:4000/global/spend/report?start_date=2023-04-01&end_date=2024-06-30' \ + -H 'Authorization: Bearer sk-1234' +``` + +### Example Response + + + + +```shell +[ + { + "group_by_day": "2024-04-30T00:00:00+00:00", + "teams": [ + { + "team_name": "Prod Team", + "total_spend": 0.0015265, + "metadata": [ + { + "model": "gpt-4", + "spend": 0.00123, + "total_tokens": 28 + }, + { + "model": "chatgpt-v-2", + "spend": 0.000214, + "total_tokens": 122 + }, + { + "model": "gpt-3.5-turbo", + "spend": 0.0000825, + "total_tokens": 85 + } + ] + } + ] + } +] +``` + + + + + + +```python +import requests +url = 'http://localhost:4000/global/spend/report' +params = { + 'start_date': '2023-04-01', + 'end_date': '2024-06-30' +} + +headers = { + 'Authorization': 'Bearer sk-1234' +} + +# Make the GET request +response = requests.get(url, headers=headers, params=params) +spend_report = response.json() + +for row in spend_report: + date = row["group_by_day"] + teams = row["teams"] + for team in teams: + team_name = team["team_name"] + total_spend = team["total_spend"] + metadata = team["metadata"] + + print(f"Date: {date}") + print(f"Team: {team_name}") + print(f"Total Spend: {total_spend}") + print("Metadata: ", metadata) + print() +``` + +Output from script +```shell +# Date: 2024-05-11T00:00:00+00:00 +# Team: local_test_team +# Total Spend: 0.003675099999999999 +# Metadata: [{'model': 'gpt-3.5-turbo', 'spend': 0.003675099999999999, 'api_key': 'b94d5e0bc3a71a573917fe1335dc0c14728c7016337451af9714924ff3a729db', 'total_tokens': 3105}] + +# Date: 2024-05-13T00:00:00+00:00 +# Team: Unassigned Team +# Total Spend: 3.4e-05 +# Metadata: [{'model': 'gpt-3.5-turbo', 'spend': 3.4e-05, 'api_key': '9569d13c9777dba68096dea49b0b03e0aaf4d2b65d4030eda9e8a2733c3cd6e0', 'total_tokens': 50}] + +# Date: 2024-05-13T00:00:00+00:00 +# Team: central +# Total Spend: 0.000684 +# Metadata: [{'model': 'gpt-3.5-turbo', 'spend': 0.000684, 'api_key': '0323facdf3af551594017b9ef162434a9b9a8ca1bbd9ccbd9d6ce173b1015605', 'total_tokens': 498}] + +# Date: 2024-05-13T00:00:00+00:00 +# Team: local_test_team +# Total Spend: 0.0005715000000000001 +# Metadata: [{'model': 'gpt-3.5-turbo', 'spend': 0.0005715000000000001, 'api_key': 'b94d5e0bc3a71a573917fe1335dc0c14728c7016337451af9714924ff3a729db', 'total_tokens': 423}] +``` + + + + + + + +## Spend Tracking for Azure Set base model for cost tracking azure image-gen call -## Image Generation +### Image Generation ```yaml model_list: @@ -17,7 +134,7 @@ model_list: mode: image_generation ``` -## Chat Completions / Embeddings +### Chat Completions / Embeddings **Problem**: Azure returns `gpt-4` in the response when `azure/gpt-4-1106-preview` is used. This leads to inaccurate cost tracking diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 92ae01c25f..ec708d5f2b 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -5384,6 +5384,141 @@ async def view_spend_tags( ) +@router.get( + "/global/spend/report", + tags=["Budget & Spend Tracking"], + dependencies=[Depends(user_api_key_auth)], + include_in_schema=False, + responses={ + 200: {"model": List[LiteLLM_SpendLogs]}, + }, +) +async def get_global_spend_report( + start_date: Optional[str] = fastapi.Query( + default=None, + description="Time from which to start viewing spend", + ), + end_date: Optional[str] = fastapi.Query( + default=None, + description="Time till which to view spend", + ), +): + """ + Get Daily Spend per Team, based on specific startTime and endTime. Per team, view usage by each key, model + [ + { + "group-by-day": "2024-05-10", + "teams": [ + { + "team_name": "team-1" + "spend": 10, + "keys": [ + "key": "1213", + "usage": { + "model-1": { + "cost": 12.50, + "input_tokens": 1000, + "output_tokens": 5000, + "requests": 100 + }, + "audio-modelname1": { + "cost": 25.50, + "seconds": 25, + "requests": 50 + }, + } + } + ] + ] + } + """ + if start_date is None or end_date is None: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail={"error": "Please provide start_date and end_date"}, + ) + + start_date_obj = datetime.strptime(start_date, "%Y-%m-%d") + end_date_obj = datetime.strptime(end_date, "%Y-%m-%d") + + global prisma_client + try: + if prisma_client is None: + raise Exception( + f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys" + ) + + # first get data from spend logs -> SpendByModelApiKey + # then read data from "SpendByModelApiKey" to format the response obj + sql_query = """ + + WITH SpendByModelApiKey AS ( + SELECT + date_trunc('day', sl."startTime") AS group_by_day, + COALESCE(tt.team_alias, 'Unassigned Team') AS team_name, + sl.model, + sl.api_key, + SUM(sl.spend) AS model_api_spend, + SUM(sl.total_tokens) AS model_api_tokens + FROM + "LiteLLM_SpendLogs" sl + LEFT JOIN + "LiteLLM_TeamTable" tt + ON + sl.team_id = tt.team_id + WHERE + sl."startTime" BETWEEN $1::date AND $2::date + GROUP BY + date_trunc('day', sl."startTime"), + tt.team_alias, + sl.model, + sl.api_key + ) + SELECT + group_by_day, + jsonb_agg(jsonb_build_object( + 'team_name', team_name, + 'total_spend', total_spend, + 'metadata', metadata + )) AS teams + FROM ( + SELECT + group_by_day, + team_name, + SUM(model_api_spend) AS total_spend, + jsonb_agg(jsonb_build_object( + 'model', model, + 'api_key', api_key, + 'spend', model_api_spend, + 'total_tokens', model_api_tokens + )) AS metadata + FROM + SpendByModelApiKey + GROUP BY + group_by_day, + team_name + ) AS aggregated + GROUP BY + group_by_day + ORDER BY + group_by_day; + """ + + db_response = await prisma_client.db.query_raw( + sql_query, start_date_obj, end_date_obj + ) + if db_response is None: + return [] + + return db_response + + except Exception as e: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail={"error": str(e)}, + ) + + @router.get( "/global/spend/tags", tags=["Budget & Spend Tracking"], diff --git a/tests/test_spend_logs.py b/tests/test_spend_logs.py index 477fdb86f0..1a3373c9d7 100644 --- a/tests/test_spend_logs.py +++ b/tests/test_spend_logs.py @@ -138,6 +138,23 @@ async def get_predict_spend_logs(session): return await response.json() +async def get_spend_report(session, start_date, end_date): + url = "http://0.0.0.0:4000/global/spend/report" + headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"} + async with session.get( + url, headers=headers, params={"start_date": start_date, "end_date": end_date} + ) as response: + status = response.status + response_text = await response.text() + + print(response_text) + print() + + if status != 200: + raise Exception(f"Request did not return a 200 status code: {status}") + return await response.json() + + @pytest.mark.asyncio async def test_get_predicted_spend_logs(): """ @@ -205,3 +222,39 @@ async def test_spend_logs_high_traffic(): except: print(n, time.time() - start, 0) raise Exception("it worked!") + + +@pytest.mark.asyncio +async def test_spend_report_endpoint(): + async with aiohttp.ClientSession( + timeout=aiohttp.ClientTimeout(total=600) + ) as session: + import datetime + + todays_date = datetime.date.today() + datetime.timedelta(days=1) + todays_date = todays_date.strftime("%Y-%m-%d") + + print("todays_date", todays_date) + thirty_days_ago = ( + datetime.date.today() - datetime.timedelta(days=30) + ).strftime("%Y-%m-%d") + spend_report = await get_spend_report( + session=session, start_date=thirty_days_ago, end_date=todays_date + ) + print("spend report", spend_report) + + for row in spend_report: + date = row["group_by_day"] + teams = row["teams"] + for team in teams: + team_name = team["team_name"] + total_spend = team["total_spend"] + metadata = team["metadata"] + + assert team_name is not None + + print(f"Date: {date}") + print(f"Team: {team_name}") + print(f"Total Spend: {total_spend}") + print("Metadata: ", metadata) + print()