mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 11:14:04 +00:00
fix(proxy_server.py): introduces a beta endpoint for admin to view global spend
This commit is contained in:
parent
a042092faa
commit
6a94ef6c16
4 changed files with 183 additions and 19 deletions
|
@ -3788,7 +3788,7 @@ async def view_spend_tags(
|
||||||
|
|
||||||
@router.get(
|
@router.get(
|
||||||
"/spend/logs",
|
"/spend/logs",
|
||||||
tags=["budget & spend Tracking"],
|
tags=["Budget & Spend Tracking"],
|
||||||
dependencies=[Depends(user_api_key_auth)],
|
dependencies=[Depends(user_api_key_auth)],
|
||||||
responses={
|
responses={
|
||||||
200: {"model": List[LiteLLM_SpendLogs]},
|
200: {"model": List[LiteLLM_SpendLogs]},
|
||||||
|
@ -4048,6 +4048,28 @@ async def view_spend_logs(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get(
|
||||||
|
"/global/spend/logs",
|
||||||
|
tags=["Budget & Spend Tracking"],
|
||||||
|
dependencies=[Depends(user_api_key_auth)],
|
||||||
|
)
|
||||||
|
async def global_spend_logs():
|
||||||
|
"""
|
||||||
|
[BETA] This is a beta endpoint.
|
||||||
|
|
||||||
|
Use this to get global spend (spend per day for last 30d). Admin-only endpoint
|
||||||
|
|
||||||
|
More efficient implementation of /spend/logs, by creating a view over the spend logs table.
|
||||||
|
"""
|
||||||
|
global prisma_client
|
||||||
|
|
||||||
|
sql_query = """SELECT * FROM "globalspendperdate";"""
|
||||||
|
|
||||||
|
response = await prisma_client.db.query_raw(query=sql_query)
|
||||||
|
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
@router.get(
|
@router.get(
|
||||||
"/daily_metrics",
|
"/daily_metrics",
|
||||||
summary="Get daily spend metrics",
|
summary="Get daily spend metrics",
|
||||||
|
|
|
@ -80,6 +80,14 @@ request_data = {
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def prisma_client():
|
def prisma_client():
|
||||||
|
from litellm.proxy.proxy_cli import append_query_params
|
||||||
|
|
||||||
|
### add connection pool + pool timeout args
|
||||||
|
params = {"connection_limit": 100, "pool_timeout": 60}
|
||||||
|
database_url = os.getenv("DATABASE_URL")
|
||||||
|
modified_url = append_query_params(database_url, params)
|
||||||
|
os.environ["DATABASE_URL"] = modified_url
|
||||||
|
|
||||||
# Assuming DBClient is a class that needs to be instantiated
|
# Assuming DBClient is a class that needs to be instantiated
|
||||||
prisma_client = PrismaClient(
|
prisma_client = PrismaClient(
|
||||||
database_url=os.environ["DATABASE_URL"], proxy_logging_obj=proxy_logging_obj
|
database_url=os.environ["DATABASE_URL"], proxy_logging_obj=proxy_logging_obj
|
||||||
|
@ -1633,3 +1641,99 @@ async def test_key_with_no_permissions(prisma_client):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("Got Exception", e)
|
print("Got Exception", e)
|
||||||
print(e.message)
|
print(e.message)
|
||||||
|
|
||||||
|
|
||||||
|
async def track_cost_callback_helper_fn(generated_key: str, user_id: str):
|
||||||
|
from litellm import ModelResponse, Choices, Message, Usage
|
||||||
|
from litellm.proxy.proxy_server import (
|
||||||
|
_PROXY_track_cost_callback as track_cost_callback,
|
||||||
|
)
|
||||||
|
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
request_id = f"chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac{uuid.uuid4()}"
|
||||||
|
resp = ModelResponse(
|
||||||
|
id=request_id,
|
||||||
|
choices=[
|
||||||
|
Choices(
|
||||||
|
finish_reason=None,
|
||||||
|
index=0,
|
||||||
|
message=Message(
|
||||||
|
content=" Sure! Here is a short poem about the sky:\n\nA canvas of blue, a",
|
||||||
|
role="assistant",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
],
|
||||||
|
model="gpt-35-turbo", # azure always has model written like this
|
||||||
|
usage=Usage(prompt_tokens=210, completion_tokens=200, total_tokens=410),
|
||||||
|
)
|
||||||
|
await track_cost_callback(
|
||||||
|
kwargs={
|
||||||
|
"call_type": "acompletion",
|
||||||
|
"model": "sagemaker-chatgpt-v-2",
|
||||||
|
"stream": True,
|
||||||
|
"complete_streaming_response": resp,
|
||||||
|
"litellm_params": {
|
||||||
|
"metadata": {
|
||||||
|
"user_api_key": hash_token(generated_key),
|
||||||
|
"user_api_key_user_id": user_id,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"response_cost": 0.00005,
|
||||||
|
},
|
||||||
|
completion_response=resp,
|
||||||
|
start_time=datetime.now(),
|
||||||
|
end_time=datetime.now(),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# @pytest.mark.skip(reason="High traffic load test for spend tracking")
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_proxy_load_test_db(prisma_client):
|
||||||
|
"""
|
||||||
|
Run 1500 req./s against track_cost_callback function
|
||||||
|
"""
|
||||||
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
||||||
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
||||||
|
from litellm._logging import verbose_proxy_logger
|
||||||
|
import logging, time
|
||||||
|
|
||||||
|
litellm.set_verbose = True
|
||||||
|
verbose_proxy_logger.setLevel(logging.DEBUG)
|
||||||
|
try:
|
||||||
|
start_time = time.time()
|
||||||
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
||||||
|
request = GenerateKeyRequest(max_budget=0.00001)
|
||||||
|
key = await generate_key_fn(request)
|
||||||
|
print(key)
|
||||||
|
|
||||||
|
generated_key = key.key
|
||||||
|
user_id = key.user_id
|
||||||
|
bearer_token = "Bearer " + generated_key
|
||||||
|
|
||||||
|
request = Request(scope={"type": "http"})
|
||||||
|
request._url = URL(url="/chat/completions")
|
||||||
|
|
||||||
|
# use generated key to auth in
|
||||||
|
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
||||||
|
print("result from user auth with new key", result)
|
||||||
|
# update spend using track_cost callback, make 2nd request, it should fail
|
||||||
|
n = 5000
|
||||||
|
tasks = [
|
||||||
|
track_cost_callback_helper_fn(generated_key=generated_key, user_id=user_id)
|
||||||
|
for _ in range(n)
|
||||||
|
]
|
||||||
|
completions = await asyncio.gather(*tasks)
|
||||||
|
await asyncio.sleep(120)
|
||||||
|
try:
|
||||||
|
# call spend logs
|
||||||
|
spend_logs = await view_spend_logs(api_key=generated_key)
|
||||||
|
|
||||||
|
print(f"len responses: {len(spend_logs)}")
|
||||||
|
assert len(spend_logs) == n
|
||||||
|
print(n, time.time() - start_time, len(spend_logs))
|
||||||
|
except:
|
||||||
|
print(n, time.time() - start_time, 0)
|
||||||
|
raise Exception(f"it worked! key={key.key}")
|
||||||
|
except Exception as e:
|
||||||
|
pytest.fail(f"An exception occurred - {str(e)}")
|
||||||
|
|
|
@ -313,6 +313,11 @@ export const userSpendLogsCall = async (
|
||||||
endTime: String
|
endTime: String
|
||||||
) => {
|
) => {
|
||||||
try {
|
try {
|
||||||
|
console.log(`user role in spend logs call: ${userRole}`);
|
||||||
|
if (userRole == "Admin") {
|
||||||
|
return await adminSpendLogsCall(accessToken);
|
||||||
|
}
|
||||||
|
|
||||||
let url = proxyBaseUrl ? `${proxyBaseUrl}/spend/logs` : `/spend/logs`;
|
let url = proxyBaseUrl ? `${proxyBaseUrl}/spend/logs` : `/spend/logs`;
|
||||||
if (userRole == "App Owner") {
|
if (userRole == "App Owner") {
|
||||||
url = `${url}/?user_id=${userID}&start_date=${startTime}&end_date=${endTime}`;
|
url = `${url}/?user_id=${userID}&start_date=${startTime}&end_date=${endTime}`;
|
||||||
|
@ -343,6 +348,36 @@ export const userSpendLogsCall = async (
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export const adminSpendLogsCall = async (accessToken: String) => {
|
||||||
|
try {
|
||||||
|
let url = proxyBaseUrl
|
||||||
|
? `${proxyBaseUrl}/global/spend/logs`
|
||||||
|
: `/global/spend/logs`;
|
||||||
|
|
||||||
|
message.info("Making spend logs request");
|
||||||
|
const response = await fetch(url, {
|
||||||
|
method: "GET",
|
||||||
|
headers: {
|
||||||
|
Authorization: `Bearer ${accessToken}`,
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
},
|
||||||
|
});
|
||||||
|
if (!response.ok) {
|
||||||
|
const errorData = await response.text();
|
||||||
|
message.error(errorData);
|
||||||
|
throw new Error("Network response was not ok");
|
||||||
|
}
|
||||||
|
|
||||||
|
const data = await response.json();
|
||||||
|
console.log(data);
|
||||||
|
message.success("Spend Logs received");
|
||||||
|
return data;
|
||||||
|
} catch (error) {
|
||||||
|
console.error("Failed to create key:", error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
export const keyInfoCall = async (accessToken: String, keys: String[]) => {
|
export const keyInfoCall = async (accessToken: String, keys: String[]) => {
|
||||||
try {
|
try {
|
||||||
let url = proxyBaseUrl ? `${proxyBaseUrl}/v2/key/info` : `/v2/key/info`;
|
let url = proxyBaseUrl ? `${proxyBaseUrl}/v2/key/info` : `/v2/key/info`;
|
||||||
|
|
|
@ -2,7 +2,11 @@ import { BarChart, Card, Title } from "@tremor/react";
|
||||||
|
|
||||||
import React, { useState, useEffect } from "react";
|
import React, { useState, useEffect } from "react";
|
||||||
import { Grid, Col, Text, LineChart } from "@tremor/react";
|
import { Grid, Col, Text, LineChart } from "@tremor/react";
|
||||||
import { userSpendLogsCall, keyInfoCall } from "./networking";
|
import {
|
||||||
|
userSpendLogsCall,
|
||||||
|
keyInfoCall,
|
||||||
|
adminSpendLogsCall,
|
||||||
|
} from "./networking";
|
||||||
import { start } from "repl";
|
import { start } from "repl";
|
||||||
|
|
||||||
interface UsagePageProps {
|
interface UsagePageProps {
|
||||||
|
@ -175,27 +179,26 @@ const UsagePage: React.FC<UsagePageProps> = ({
|
||||||
console.log("result from spend logs call", response);
|
console.log("result from spend logs call", response);
|
||||||
if ("daily_spend" in response) {
|
if ("daily_spend" in response) {
|
||||||
// this is from clickhouse analytics
|
// this is from clickhouse analytics
|
||||||
//
|
//
|
||||||
let daily_spend = response["daily_spend"];
|
let daily_spend = response["daily_spend"];
|
||||||
console.log("daily spend", daily_spend);
|
console.log("daily spend", daily_spend);
|
||||||
setKeySpendData(daily_spend);
|
setKeySpendData(daily_spend);
|
||||||
let topApiKeys = response.top_api_keys;
|
let topApiKeys = response.top_api_keys;
|
||||||
setTopKeys(topApiKeys);
|
setTopKeys(topApiKeys);
|
||||||
}
|
} else {
|
||||||
else {
|
// const topKeysResponse = await keyInfoCall(
|
||||||
const topKeysResponse = await keyInfoCall(
|
// accessToken,
|
||||||
accessToken,
|
// getTopKeys(response)
|
||||||
getTopKeys(response)
|
// );
|
||||||
);
|
// const filtered_keys = topKeysResponse["info"].map((k: any) => ({
|
||||||
const filtered_keys = topKeysResponse["info"].map((k: any) => ({
|
// key: (k["key_name"] || k["key_alias"] || k["token"]).substring(
|
||||||
key: (k["key_name"] || k["key_alias"] || k["token"]).substring(
|
// 0,
|
||||||
0,
|
// 7
|
||||||
7
|
// ),
|
||||||
),
|
// spend: k["spend"],
|
||||||
spend: k["spend"],
|
// }));
|
||||||
}));
|
// setTopKeys(filtered_keys);
|
||||||
setTopKeys(filtered_keys);
|
// setTopUsers(getTopUsers(response));
|
||||||
setTopUsers(getTopUsers(response));
|
|
||||||
setKeySpendData(response);
|
setKeySpendData(response);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
@ -222,7 +225,7 @@ const UsagePage: React.FC<UsagePageProps> = ({
|
||||||
valueFormatter={valueFormatter}
|
valueFormatter={valueFormatter}
|
||||||
yAxisWidth={100}
|
yAxisWidth={100}
|
||||||
tickGap={5}
|
tickGap={5}
|
||||||
customTooltip={customTooltip}
|
// customTooltip={customTooltip}
|
||||||
/>
|
/>
|
||||||
</Card>
|
</Card>
|
||||||
</Col>
|
</Col>
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue