mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 19:54:13 +00:00
[Feat - Cost Tracking improvement] Track prompt caching metrics in DailyUserSpendTransactions (#10029)
* stash changes * emit cache read/write tokens to daily spend update * emit cache read/write tokens on daily activity * update types.ts * docs prompt caching * undo ui change * fix activity metrics * fix prompt caching metrics * fix typed dict fields * fix get_aggregated_daily_spend_update_transactions * fix aggregating cache tokens * test_cache_token_fields_aggregation * daily_transaction * add cache_creation_input_tokens and cache_read_input_tokens to LiteLLM_DailyUserSpend * test_daily_spend_update_queue.py
This commit is contained in:
parent
1c3bfc4856
commit
3aed97e63e
12 changed files with 197 additions and 35 deletions
|
@ -0,0 +1,4 @@
|
||||||
|
-- AlterTable
|
||||||
|
ALTER TABLE "LiteLLM_DailyUserSpend" ADD COLUMN "cache_creation_input_tokens" INTEGER NOT NULL DEFAULT 0,
|
||||||
|
ADD COLUMN "cache_read_input_tokens" INTEGER NOT NULL DEFAULT 0;
|
||||||
|
|
|
@ -326,6 +326,8 @@ model LiteLLM_DailyUserSpend {
|
||||||
custom_llm_provider String?
|
custom_llm_provider String?
|
||||||
prompt_tokens Int @default(0)
|
prompt_tokens Int @default(0)
|
||||||
completion_tokens Int @default(0)
|
completion_tokens Int @default(0)
|
||||||
|
cache_read_input_tokens Int @default(0)
|
||||||
|
cache_creation_input_tokens Int @default(0)
|
||||||
spend Float @default(0.0)
|
spend Float @default(0.0)
|
||||||
api_requests Int @default(0)
|
api_requests Int @default(0)
|
||||||
successful_requests Int @default(0)
|
successful_requests Int @default(0)
|
||||||
|
|
|
@ -2777,8 +2777,14 @@ class BaseDailySpendTransaction(TypedDict):
|
||||||
model: str
|
model: str
|
||||||
model_group: Optional[str]
|
model_group: Optional[str]
|
||||||
custom_llm_provider: Optional[str]
|
custom_llm_provider: Optional[str]
|
||||||
|
|
||||||
|
# token count metrics
|
||||||
prompt_tokens: int
|
prompt_tokens: int
|
||||||
completion_tokens: int
|
completion_tokens: int
|
||||||
|
cache_read_input_tokens: int
|
||||||
|
cache_creation_input_tokens: int
|
||||||
|
|
||||||
|
# request level metrics
|
||||||
spend: float
|
spend: float
|
||||||
api_requests: int
|
api_requests: int
|
||||||
successful_requests: int
|
successful_requests: int
|
||||||
|
|
|
@ -6,6 +6,7 @@ Module responsible for
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import json
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
import traceback
|
import traceback
|
||||||
|
@ -24,6 +25,7 @@ from litellm.proxy._types import (
|
||||||
DBSpendUpdateTransactions,
|
DBSpendUpdateTransactions,
|
||||||
Litellm_EntityType,
|
Litellm_EntityType,
|
||||||
LiteLLM_UserTable,
|
LiteLLM_UserTable,
|
||||||
|
SpendLogsMetadata,
|
||||||
SpendLogsPayload,
|
SpendLogsPayload,
|
||||||
SpendUpdateQueueItem,
|
SpendUpdateQueueItem,
|
||||||
)
|
)
|
||||||
|
@ -806,6 +808,12 @@ class DBSpendUpdateWriter:
|
||||||
"completion_tokens": transaction[
|
"completion_tokens": transaction[
|
||||||
"completion_tokens"
|
"completion_tokens"
|
||||||
],
|
],
|
||||||
|
"cache_read_input_tokens": transaction.get(
|
||||||
|
"cache_read_input_tokens", 0
|
||||||
|
),
|
||||||
|
"cache_creation_input_tokens": transaction.get(
|
||||||
|
"cache_creation_input_tokens", 0
|
||||||
|
),
|
||||||
"spend": transaction["spend"],
|
"spend": transaction["spend"],
|
||||||
"api_requests": transaction["api_requests"],
|
"api_requests": transaction["api_requests"],
|
||||||
"successful_requests": transaction[
|
"successful_requests": transaction[
|
||||||
|
@ -824,6 +832,16 @@ class DBSpendUpdateWriter:
|
||||||
"completion_tokens"
|
"completion_tokens"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
"cache_read_input_tokens": {
|
||||||
|
"increment": transaction.get(
|
||||||
|
"cache_read_input_tokens", 0
|
||||||
|
)
|
||||||
|
},
|
||||||
|
"cache_creation_input_tokens": {
|
||||||
|
"increment": transaction.get(
|
||||||
|
"cache_creation_input_tokens", 0
|
||||||
|
)
|
||||||
|
},
|
||||||
"spend": {"increment": transaction["spend"]},
|
"spend": {"increment": transaction["spend"]},
|
||||||
"api_requests": {
|
"api_requests": {
|
||||||
"increment": transaction["api_requests"]
|
"increment": transaction["api_requests"]
|
||||||
|
@ -1024,6 +1042,8 @@ class DBSpendUpdateWriter:
|
||||||
|
|
||||||
request_status = prisma_client.get_request_status(payload)
|
request_status = prisma_client.get_request_status(payload)
|
||||||
verbose_proxy_logger.info(f"Logged request status: {request_status}")
|
verbose_proxy_logger.info(f"Logged request status: {request_status}")
|
||||||
|
_metadata: SpendLogsMetadata = json.loads(payload["metadata"])
|
||||||
|
usage_obj = _metadata.get("usage_object", {}) or {}
|
||||||
if isinstance(payload["startTime"], datetime):
|
if isinstance(payload["startTime"], datetime):
|
||||||
start_time = payload["startTime"].isoformat()
|
start_time = payload["startTime"].isoformat()
|
||||||
date = start_time.split("T")[0]
|
date = start_time.split("T")[0]
|
||||||
|
@ -1047,6 +1067,12 @@ class DBSpendUpdateWriter:
|
||||||
api_requests=1,
|
api_requests=1,
|
||||||
successful_requests=1 if request_status == "success" else 0,
|
successful_requests=1 if request_status == "success" else 0,
|
||||||
failed_requests=1 if request_status != "success" else 0,
|
failed_requests=1 if request_status != "success" else 0,
|
||||||
|
cache_read_input_tokens=usage_obj.get("cache_read_input_tokens", 0)
|
||||||
|
or 0,
|
||||||
|
cache_creation_input_tokens=usage_obj.get(
|
||||||
|
"cache_creation_input_tokens", 0
|
||||||
|
)
|
||||||
|
or 0,
|
||||||
)
|
)
|
||||||
return daily_transaction
|
return daily_transaction
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
|
@ -53,9 +53,9 @@ class DailySpendUpdateQueue(BaseUpdateQueue):
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.update_queue: asyncio.Queue[
|
self.update_queue: asyncio.Queue[Dict[str, BaseDailySpendTransaction]] = (
|
||||||
Dict[str, BaseDailySpendTransaction]
|
asyncio.Queue()
|
||||||
] = asyncio.Queue()
|
)
|
||||||
|
|
||||||
async def add_update(self, update: Dict[str, BaseDailySpendTransaction]):
|
async def add_update(self, update: Dict[str, BaseDailySpendTransaction]):
|
||||||
"""Enqueue an update."""
|
"""Enqueue an update."""
|
||||||
|
@ -72,9 +72,9 @@ class DailySpendUpdateQueue(BaseUpdateQueue):
|
||||||
Combine all updates in the queue into a single update.
|
Combine all updates in the queue into a single update.
|
||||||
This is used to reduce the size of the in-memory queue.
|
This is used to reduce the size of the in-memory queue.
|
||||||
"""
|
"""
|
||||||
updates: List[
|
updates: List[Dict[str, BaseDailySpendTransaction]] = (
|
||||||
Dict[str, BaseDailySpendTransaction]
|
await self.flush_all_updates_from_in_memory_queue()
|
||||||
] = await self.flush_all_updates_from_in_memory_queue()
|
)
|
||||||
aggregated_updates = self.get_aggregated_daily_spend_update_transactions(
|
aggregated_updates = self.get_aggregated_daily_spend_update_transactions(
|
||||||
updates
|
updates
|
||||||
)
|
)
|
||||||
|
@ -98,7 +98,7 @@ class DailySpendUpdateQueue(BaseUpdateQueue):
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_aggregated_daily_spend_update_transactions(
|
def get_aggregated_daily_spend_update_transactions(
|
||||||
updates: List[Dict[str, BaseDailySpendTransaction]]
|
updates: List[Dict[str, BaseDailySpendTransaction]],
|
||||||
) -> Dict[str, BaseDailySpendTransaction]:
|
) -> Dict[str, BaseDailySpendTransaction]:
|
||||||
"""Aggregate updates by daily_transaction_key."""
|
"""Aggregate updates by daily_transaction_key."""
|
||||||
aggregated_daily_spend_update_transactions: Dict[
|
aggregated_daily_spend_update_transactions: Dict[
|
||||||
|
@ -118,6 +118,16 @@ class DailySpendUpdateQueue(BaseUpdateQueue):
|
||||||
"successful_requests"
|
"successful_requests"
|
||||||
]
|
]
|
||||||
daily_transaction["failed_requests"] += payload["failed_requests"]
|
daily_transaction["failed_requests"] += payload["failed_requests"]
|
||||||
|
|
||||||
|
# Add optional metrics cache_read_input_tokens and cache_creation_input_tokens
|
||||||
|
daily_transaction["cache_read_input_tokens"] = (
|
||||||
|
payload.get("cache_read_input_tokens", 0) or 0
|
||||||
|
) + daily_transaction.get("cache_read_input_tokens", 0)
|
||||||
|
|
||||||
|
daily_transaction["cache_creation_input_tokens"] = (
|
||||||
|
payload.get("cache_creation_input_tokens", 0) or 0
|
||||||
|
) + daily_transaction.get("cache_creation_input_tokens", 0)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
aggregated_daily_spend_update_transactions[_key] = deepcopy(payload)
|
aggregated_daily_spend_update_transactions[_key] = deepcopy(payload)
|
||||||
return aggregated_daily_spend_update_transactions
|
return aggregated_daily_spend_update_transactions
|
||||||
|
|
|
@ -82,9 +82,9 @@ def _update_internal_new_user_params(data_json: dict, data: NewUserRequest) -> d
|
||||||
data_json["user_id"] = str(uuid.uuid4())
|
data_json["user_id"] = str(uuid.uuid4())
|
||||||
auto_create_key = data_json.pop("auto_create_key", True)
|
auto_create_key = data_json.pop("auto_create_key", True)
|
||||||
if auto_create_key is False:
|
if auto_create_key is False:
|
||||||
data_json[
|
data_json["table_name"] = (
|
||||||
"table_name"
|
"user" # only create a user, don't create key if 'auto_create_key' set to False
|
||||||
] = "user" # only create a user, don't create key if 'auto_create_key' set to False
|
)
|
||||||
|
|
||||||
is_internal_user = False
|
is_internal_user = False
|
||||||
if data.user_role and data.user_role.is_internal_user_role:
|
if data.user_role and data.user_role.is_internal_user_role:
|
||||||
|
@ -651,9 +651,9 @@ def _update_internal_user_params(data_json: dict, data: UpdateUserRequest) -> di
|
||||||
"budget_duration" not in non_default_values
|
"budget_duration" not in non_default_values
|
||||||
): # applies internal user limits, if user role updated
|
): # applies internal user limits, if user role updated
|
||||||
if is_internal_user and litellm.internal_user_budget_duration is not None:
|
if is_internal_user and litellm.internal_user_budget_duration is not None:
|
||||||
non_default_values[
|
non_default_values["budget_duration"] = (
|
||||||
"budget_duration"
|
litellm.internal_user_budget_duration
|
||||||
] = litellm.internal_user_budget_duration
|
)
|
||||||
duration_s = duration_in_seconds(
|
duration_s = duration_in_seconds(
|
||||||
duration=non_default_values["budget_duration"]
|
duration=non_default_values["budget_duration"]
|
||||||
)
|
)
|
||||||
|
@ -964,14 +964,14 @@ async def get_users(
|
||||||
"in": user_id_list, # Now passing a list of strings as required by Prisma
|
"in": user_id_list, # Now passing a list of strings as required by Prisma
|
||||||
}
|
}
|
||||||
|
|
||||||
users: Optional[
|
users: Optional[List[LiteLLM_UserTable]] = (
|
||||||
List[LiteLLM_UserTable]
|
await prisma_client.db.litellm_usertable.find_many(
|
||||||
] = await prisma_client.db.litellm_usertable.find_many(
|
|
||||||
where=where_conditions,
|
where=where_conditions,
|
||||||
skip=skip,
|
skip=skip,
|
||||||
take=page_size,
|
take=page_size,
|
||||||
order={"created_at": "desc"},
|
order={"created_at": "desc"},
|
||||||
)
|
)
|
||||||
|
)
|
||||||
|
|
||||||
# Get total count of user rows
|
# Get total count of user rows
|
||||||
total_count = await prisma_client.db.litellm_usertable.count(
|
total_count = await prisma_client.db.litellm_usertable.count(
|
||||||
|
@ -1225,14 +1225,14 @@ async def ui_view_users(
|
||||||
}
|
}
|
||||||
|
|
||||||
# Query users with pagination and filters
|
# Query users with pagination and filters
|
||||||
users: Optional[
|
users: Optional[List[BaseModel]] = (
|
||||||
List[BaseModel]
|
await prisma_client.db.litellm_usertable.find_many(
|
||||||
] = await prisma_client.db.litellm_usertable.find_many(
|
|
||||||
where=where_conditions,
|
where=where_conditions,
|
||||||
skip=skip,
|
skip=skip,
|
||||||
take=page_size,
|
take=page_size,
|
||||||
order={"created_at": "desc"},
|
order={"created_at": "desc"},
|
||||||
)
|
)
|
||||||
|
)
|
||||||
|
|
||||||
if not users:
|
if not users:
|
||||||
return []
|
return []
|
||||||
|
@ -1258,6 +1258,8 @@ class SpendMetrics(BaseModel):
|
||||||
spend: float = Field(default=0.0)
|
spend: float = Field(default=0.0)
|
||||||
prompt_tokens: int = Field(default=0)
|
prompt_tokens: int = Field(default=0)
|
||||||
completion_tokens: int = Field(default=0)
|
completion_tokens: int = Field(default=0)
|
||||||
|
cache_read_input_tokens: int = Field(default=0)
|
||||||
|
cache_creation_input_tokens: int = Field(default=0)
|
||||||
total_tokens: int = Field(default=0)
|
total_tokens: int = Field(default=0)
|
||||||
successful_requests: int = Field(default=0)
|
successful_requests: int = Field(default=0)
|
||||||
failed_requests: int = Field(default=0)
|
failed_requests: int = Field(default=0)
|
||||||
|
@ -1312,6 +1314,8 @@ class DailySpendMetadata(BaseModel):
|
||||||
total_api_requests: int = Field(default=0)
|
total_api_requests: int = Field(default=0)
|
||||||
total_successful_requests: int = Field(default=0)
|
total_successful_requests: int = Field(default=0)
|
||||||
total_failed_requests: int = Field(default=0)
|
total_failed_requests: int = Field(default=0)
|
||||||
|
total_cache_read_input_tokens: int = Field(default=0)
|
||||||
|
total_cache_creation_input_tokens: int = Field(default=0)
|
||||||
page: int = Field(default=1)
|
page: int = Field(default=1)
|
||||||
total_pages: int = Field(default=1)
|
total_pages: int = Field(default=1)
|
||||||
has_more: bool = Field(default=False)
|
has_more: bool = Field(default=False)
|
||||||
|
@ -1332,6 +1336,8 @@ class LiteLLM_DailyUserSpend(BaseModel):
|
||||||
custom_llm_provider: Optional[str] = None
|
custom_llm_provider: Optional[str] = None
|
||||||
prompt_tokens: int = 0
|
prompt_tokens: int = 0
|
||||||
completion_tokens: int = 0
|
completion_tokens: int = 0
|
||||||
|
cache_read_input_tokens: int = 0
|
||||||
|
cache_creation_input_tokens: int = 0
|
||||||
spend: float = 0.0
|
spend: float = 0.0
|
||||||
api_requests: int = 0
|
api_requests: int = 0
|
||||||
successful_requests: int = 0
|
successful_requests: int = 0
|
||||||
|
@ -1349,6 +1355,8 @@ def update_metrics(
|
||||||
group_metrics.spend += record.spend
|
group_metrics.spend += record.spend
|
||||||
group_metrics.prompt_tokens += record.prompt_tokens
|
group_metrics.prompt_tokens += record.prompt_tokens
|
||||||
group_metrics.completion_tokens += record.completion_tokens
|
group_metrics.completion_tokens += record.completion_tokens
|
||||||
|
group_metrics.cache_read_input_tokens += record.cache_read_input_tokens
|
||||||
|
group_metrics.cache_creation_input_tokens += record.cache_creation_input_tokens
|
||||||
group_metrics.total_tokens += record.prompt_tokens + record.completion_tokens
|
group_metrics.total_tokens += record.prompt_tokens + record.completion_tokens
|
||||||
group_metrics.api_requests += record.api_requests
|
group_metrics.api_requests += record.api_requests
|
||||||
group_metrics.successful_requests += record.successful_requests
|
group_metrics.successful_requests += record.successful_requests
|
||||||
|
@ -1448,6 +1456,8 @@ async def get_user_daily_activity(
|
||||||
- spend
|
- spend
|
||||||
- prompt_tokens
|
- prompt_tokens
|
||||||
- completion_tokens
|
- completion_tokens
|
||||||
|
- cache_read_input_tokens
|
||||||
|
- cache_creation_input_tokens
|
||||||
- total_tokens
|
- total_tokens
|
||||||
- api_requests
|
- api_requests
|
||||||
- breakdown by model, api_key, provider
|
- breakdown by model, api_key, provider
|
||||||
|
@ -1484,9 +1494,9 @@ async def get_user_daily_activity(
|
||||||
user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN
|
user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN
|
||||||
and user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN_VIEW_ONLY
|
and user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN_VIEW_ONLY
|
||||||
):
|
):
|
||||||
where_conditions[
|
where_conditions["user_id"] = (
|
||||||
"user_id"
|
user_api_key_dict.user_id
|
||||||
] = user_api_key_dict.user_id # only allow access to own data
|
) # only allow access to own data
|
||||||
|
|
||||||
# Get total count for pagination
|
# Get total count for pagination
|
||||||
total_count = await prisma_client.db.litellm_dailyuserspend.count(
|
total_count = await prisma_client.db.litellm_dailyuserspend.count(
|
||||||
|
@ -1560,6 +1570,10 @@ async def get_user_daily_activity(
|
||||||
total_metrics.total_tokens += (
|
total_metrics.total_tokens += (
|
||||||
record.prompt_tokens + record.completion_tokens
|
record.prompt_tokens + record.completion_tokens
|
||||||
)
|
)
|
||||||
|
total_metrics.cache_read_input_tokens += record.cache_read_input_tokens
|
||||||
|
total_metrics.cache_creation_input_tokens += (
|
||||||
|
record.cache_creation_input_tokens
|
||||||
|
)
|
||||||
total_metrics.api_requests += record.api_requests
|
total_metrics.api_requests += record.api_requests
|
||||||
total_metrics.successful_requests += record.successful_requests
|
total_metrics.successful_requests += record.successful_requests
|
||||||
total_metrics.failed_requests += record.failed_requests
|
total_metrics.failed_requests += record.failed_requests
|
||||||
|
@ -1587,6 +1601,8 @@ async def get_user_daily_activity(
|
||||||
total_api_requests=total_metrics.api_requests,
|
total_api_requests=total_metrics.api_requests,
|
||||||
total_successful_requests=total_metrics.successful_requests,
|
total_successful_requests=total_metrics.successful_requests,
|
||||||
total_failed_requests=total_metrics.failed_requests,
|
total_failed_requests=total_metrics.failed_requests,
|
||||||
|
total_cache_read_input_tokens=total_metrics.cache_read_input_tokens,
|
||||||
|
total_cache_creation_input_tokens=total_metrics.cache_creation_input_tokens,
|
||||||
page=page,
|
page=page,
|
||||||
total_pages=-(-total_count // page_size), # Ceiling division
|
total_pages=-(-total_count // page_size), # Ceiling division
|
||||||
has_more=(page * page_size) < total_count,
|
has_more=(page * page_size) < total_count,
|
||||||
|
|
|
@ -326,6 +326,8 @@ model LiteLLM_DailyUserSpend {
|
||||||
custom_llm_provider String?
|
custom_llm_provider String?
|
||||||
prompt_tokens Int @default(0)
|
prompt_tokens Int @default(0)
|
||||||
completion_tokens Int @default(0)
|
completion_tokens Int @default(0)
|
||||||
|
cache_read_input_tokens Int @default(0)
|
||||||
|
cache_creation_input_tokens Int @default(0)
|
||||||
spend Float @default(0.0)
|
spend Float @default(0.0)
|
||||||
api_requests Int @default(0)
|
api_requests Int @default(0)
|
||||||
successful_requests Int @default(0)
|
successful_requests Int @default(0)
|
||||||
|
|
|
@ -326,6 +326,8 @@ model LiteLLM_DailyUserSpend {
|
||||||
custom_llm_provider String?
|
custom_llm_provider String?
|
||||||
prompt_tokens Int @default(0)
|
prompt_tokens Int @default(0)
|
||||||
completion_tokens Int @default(0)
|
completion_tokens Int @default(0)
|
||||||
|
cache_read_input_tokens Int @default(0)
|
||||||
|
cache_creation_input_tokens Int @default(0)
|
||||||
spend Float @default(0.0)
|
spend Float @default(0.0)
|
||||||
api_requests Int @default(0)
|
api_requests Int @default(0)
|
||||||
successful_requests Int @default(0)
|
successful_requests Int @default(0)
|
||||||
|
|
|
@ -204,6 +204,8 @@ async def test_get_aggregated_daily_spend_update_transactions_same_key():
|
||||||
"api_requests": 2, # 1 + 1
|
"api_requests": 2, # 1 + 1
|
||||||
"successful_requests": 2, # 1 + 1
|
"successful_requests": 2, # 1 + 1
|
||||||
"failed_requests": 0, # 0 + 0
|
"failed_requests": 0, # 0 + 0
|
||||||
|
"cache_creation_input_tokens": 0,
|
||||||
|
"cache_read_input_tokens": 0,
|
||||||
}
|
}
|
||||||
|
|
||||||
updates = [{test_key: test_transaction1}, {test_key: test_transaction2}]
|
updates = [{test_key: test_transaction1}, {test_key: test_transaction2}]
|
||||||
|
@ -249,6 +251,8 @@ async def test_flush_and_get_aggregated_daily_spend_update_transactions(
|
||||||
"api_requests": 2, # 1 + 1
|
"api_requests": 2, # 1 + 1
|
||||||
"successful_requests": 2, # 1 + 1
|
"successful_requests": 2, # 1 + 1
|
||||||
"failed_requests": 0, # 0 + 0
|
"failed_requests": 0, # 0 + 0
|
||||||
|
"cache_creation_input_tokens": 0,
|
||||||
|
"cache_read_input_tokens": 0,
|
||||||
}
|
}
|
||||||
|
|
||||||
# Add updates to queue
|
# Add updates to queue
|
||||||
|
@ -368,6 +372,48 @@ async def test_aggregate_queue_updates_accuracy(daily_spend_update_queue):
|
||||||
assert daily_spend_update_transactions[test_key3]["failed_requests"] == 0
|
assert daily_spend_update_transactions[test_key3]["failed_requests"] == 0
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_cache_token_fields_aggregation(daily_spend_update_queue):
|
||||||
|
"""Test that cache_read_input_tokens and cache_creation_input_tokens are handled and aggregated correctly."""
|
||||||
|
test_key = "user1_2023-01-01_key123_gpt-4_openai"
|
||||||
|
transaction1 = {
|
||||||
|
"spend": 1.0,
|
||||||
|
"prompt_tokens": 10,
|
||||||
|
"completion_tokens": 5,
|
||||||
|
"api_requests": 1,
|
||||||
|
"successful_requests": 1,
|
||||||
|
"failed_requests": 0,
|
||||||
|
"cache_read_input_tokens": 7,
|
||||||
|
"cache_creation_input_tokens": 3,
|
||||||
|
}
|
||||||
|
transaction2 = {
|
||||||
|
"spend": 2.0,
|
||||||
|
"prompt_tokens": 20,
|
||||||
|
"completion_tokens": 10,
|
||||||
|
"api_requests": 1,
|
||||||
|
"successful_requests": 1,
|
||||||
|
"failed_requests": 0,
|
||||||
|
"cache_read_input_tokens": 5,
|
||||||
|
"cache_creation_input_tokens": 4,
|
||||||
|
}
|
||||||
|
# Add both updates
|
||||||
|
await daily_spend_update_queue.add_update({test_key: transaction1})
|
||||||
|
await daily_spend_update_queue.add_update({test_key: transaction2})
|
||||||
|
# Aggregate
|
||||||
|
await daily_spend_update_queue.aggregate_queue_updates()
|
||||||
|
updates = await daily_spend_update_queue.flush_all_updates_from_in_memory_queue()
|
||||||
|
assert len(updates) == 1
|
||||||
|
agg = updates[0][test_key]
|
||||||
|
assert agg["cache_read_input_tokens"] == 12 # 7 + 5
|
||||||
|
assert agg["cache_creation_input_tokens"] == 7 # 3 + 4
|
||||||
|
assert agg["spend"] == 3.0
|
||||||
|
assert agg["prompt_tokens"] == 30
|
||||||
|
assert agg["completion_tokens"] == 15
|
||||||
|
assert agg["api_requests"] == 2
|
||||||
|
assert agg["successful_requests"] == 2
|
||||||
|
assert agg["failed_requests"] == 0
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_queue_size_reduction_with_large_volume(
|
async def test_queue_size_reduction_with_large_volume(
|
||||||
monkeypatch, daily_spend_update_queue
|
monkeypatch, daily_spend_update_queue
|
||||||
|
|
|
@ -79,6 +79,21 @@ const ModelSection = ({ modelName, metrics }: { modelName: string; metrics: Mode
|
||||||
stack
|
stack
|
||||||
/>
|
/>
|
||||||
</Card>
|
</Card>
|
||||||
|
|
||||||
|
<Card>
|
||||||
|
<Title>Prompt Caching Metrics</Title>
|
||||||
|
<div className="mb-2">
|
||||||
|
<Text>Cache Read: {metrics.total_cache_read_input_tokens?.toLocaleString() || 0} tokens</Text>
|
||||||
|
<Text>Cache Creation: {metrics.total_cache_creation_input_tokens?.toLocaleString() || 0} tokens</Text>
|
||||||
|
</div>
|
||||||
|
<AreaChart
|
||||||
|
data={metrics.daily_data}
|
||||||
|
index="date"
|
||||||
|
categories={["metrics.cache_read_input_tokens", "metrics.cache_creation_input_tokens"]}
|
||||||
|
colors={["cyan", "purple"]}
|
||||||
|
valueFormatter={(number: number) => number.toLocaleString()}
|
||||||
|
/>
|
||||||
|
</Card>
|
||||||
</Grid>
|
</Grid>
|
||||||
</div>
|
</div>
|
||||||
);
|
);
|
||||||
|
@ -97,6 +112,8 @@ export const ActivityMetrics: React.FC<ActivityMetricsProps> = ({ modelMetrics }
|
||||||
total_successful_requests: 0,
|
total_successful_requests: 0,
|
||||||
total_tokens: 0,
|
total_tokens: 0,
|
||||||
total_spend: 0,
|
total_spend: 0,
|
||||||
|
total_cache_read_input_tokens: 0,
|
||||||
|
total_cache_creation_input_tokens: 0,
|
||||||
daily_data: {} as Record<string, {
|
daily_data: {} as Record<string, {
|
||||||
prompt_tokens: number;
|
prompt_tokens: number;
|
||||||
completion_tokens: number;
|
completion_tokens: number;
|
||||||
|
@ -105,6 +122,8 @@ export const ActivityMetrics: React.FC<ActivityMetricsProps> = ({ modelMetrics }
|
||||||
spend: number;
|
spend: number;
|
||||||
successful_requests: number;
|
successful_requests: number;
|
||||||
failed_requests: number;
|
failed_requests: number;
|
||||||
|
cache_read_input_tokens: number;
|
||||||
|
cache_creation_input_tokens: number;
|
||||||
}>
|
}>
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -114,6 +133,8 @@ export const ActivityMetrics: React.FC<ActivityMetricsProps> = ({ modelMetrics }
|
||||||
totalMetrics.total_successful_requests += model.total_successful_requests;
|
totalMetrics.total_successful_requests += model.total_successful_requests;
|
||||||
totalMetrics.total_tokens += model.total_tokens;
|
totalMetrics.total_tokens += model.total_tokens;
|
||||||
totalMetrics.total_spend += model.total_spend;
|
totalMetrics.total_spend += model.total_spend;
|
||||||
|
totalMetrics.total_cache_read_input_tokens += model.total_cache_read_input_tokens || 0;
|
||||||
|
totalMetrics.total_cache_creation_input_tokens += model.total_cache_creation_input_tokens || 0;
|
||||||
|
|
||||||
// Aggregate daily data
|
// Aggregate daily data
|
||||||
model.daily_data.forEach(day => {
|
model.daily_data.forEach(day => {
|
||||||
|
@ -125,7 +146,9 @@ export const ActivityMetrics: React.FC<ActivityMetricsProps> = ({ modelMetrics }
|
||||||
api_requests: 0,
|
api_requests: 0,
|
||||||
spend: 0,
|
spend: 0,
|
||||||
successful_requests: 0,
|
successful_requests: 0,
|
||||||
failed_requests: 0
|
failed_requests: 0,
|
||||||
|
cache_read_input_tokens: 0,
|
||||||
|
cache_creation_input_tokens: 0
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
totalMetrics.daily_data[day.date].prompt_tokens += day.metrics.prompt_tokens;
|
totalMetrics.daily_data[day.date].prompt_tokens += day.metrics.prompt_tokens;
|
||||||
|
@ -135,6 +158,8 @@ export const ActivityMetrics: React.FC<ActivityMetricsProps> = ({ modelMetrics }
|
||||||
totalMetrics.daily_data[day.date].spend += day.metrics.spend;
|
totalMetrics.daily_data[day.date].spend += day.metrics.spend;
|
||||||
totalMetrics.daily_data[day.date].successful_requests += day.metrics.successful_requests;
|
totalMetrics.daily_data[day.date].successful_requests += day.metrics.successful_requests;
|
||||||
totalMetrics.daily_data[day.date].failed_requests += day.metrics.failed_requests;
|
totalMetrics.daily_data[day.date].failed_requests += day.metrics.failed_requests;
|
||||||
|
totalMetrics.daily_data[day.date].cache_read_input_tokens += day.metrics.cache_read_input_tokens || 0;
|
||||||
|
totalMetrics.daily_data[day.date].cache_creation_input_tokens += day.metrics.cache_creation_input_tokens || 0;
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -233,6 +258,8 @@ export const processActivityData = (dailyActivity: { results: DailyData[] }): Re
|
||||||
prompt_tokens: 0,
|
prompt_tokens: 0,
|
||||||
completion_tokens: 0,
|
completion_tokens: 0,
|
||||||
total_spend: 0,
|
total_spend: 0,
|
||||||
|
total_cache_read_input_tokens: 0,
|
||||||
|
total_cache_creation_input_tokens: 0,
|
||||||
daily_data: []
|
daily_data: []
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -245,6 +272,8 @@ export const processActivityData = (dailyActivity: { results: DailyData[] }): Re
|
||||||
modelMetrics[model].total_spend += modelData.metrics.spend;
|
modelMetrics[model].total_spend += modelData.metrics.spend;
|
||||||
modelMetrics[model].total_successful_requests += modelData.metrics.successful_requests;
|
modelMetrics[model].total_successful_requests += modelData.metrics.successful_requests;
|
||||||
modelMetrics[model].total_failed_requests += modelData.metrics.failed_requests;
|
modelMetrics[model].total_failed_requests += modelData.metrics.failed_requests;
|
||||||
|
modelMetrics[model].total_cache_read_input_tokens += modelData.metrics.cache_read_input_tokens || 0;
|
||||||
|
modelMetrics[model].total_cache_creation_input_tokens += modelData.metrics.cache_creation_input_tokens || 0;
|
||||||
|
|
||||||
// Add daily data
|
// Add daily data
|
||||||
modelMetrics[model].daily_data.push({
|
modelMetrics[model].daily_data.push({
|
||||||
|
@ -256,7 +285,9 @@ export const processActivityData = (dailyActivity: { results: DailyData[] }): Re
|
||||||
api_requests: modelData.metrics.api_requests,
|
api_requests: modelData.metrics.api_requests,
|
||||||
spend: modelData.metrics.spend,
|
spend: modelData.metrics.spend,
|
||||||
successful_requests: modelData.metrics.successful_requests,
|
successful_requests: modelData.metrics.successful_requests,
|
||||||
failed_requests: modelData.metrics.failed_requests
|
failed_requests: modelData.metrics.failed_requests,
|
||||||
|
cache_read_input_tokens: modelData.metrics.cache_read_input_tokens || 0,
|
||||||
|
cache_creation_input_tokens: modelData.metrics.cache_creation_input_tokens || 0
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
|
@ -62,7 +62,9 @@ const NewUsagePage: React.FC<NewUsagePageProps> = ({
|
||||||
total_tokens: 0,
|
total_tokens: 0,
|
||||||
api_requests: 0,
|
api_requests: 0,
|
||||||
successful_requests: 0,
|
successful_requests: 0,
|
||||||
failed_requests: 0
|
failed_requests: 0,
|
||||||
|
cache_read_input_tokens: 0,
|
||||||
|
cache_creation_input_tokens: 0
|
||||||
},
|
},
|
||||||
metadata: {}
|
metadata: {}
|
||||||
};
|
};
|
||||||
|
@ -74,6 +76,8 @@ const NewUsagePage: React.FC<NewUsagePageProps> = ({
|
||||||
modelSpend[model].metrics.api_requests += metrics.metrics.api_requests;
|
modelSpend[model].metrics.api_requests += metrics.metrics.api_requests;
|
||||||
modelSpend[model].metrics.successful_requests += metrics.metrics.successful_requests || 0;
|
modelSpend[model].metrics.successful_requests += metrics.metrics.successful_requests || 0;
|
||||||
modelSpend[model].metrics.failed_requests += metrics.metrics.failed_requests || 0;
|
modelSpend[model].metrics.failed_requests += metrics.metrics.failed_requests || 0;
|
||||||
|
modelSpend[model].metrics.cache_read_input_tokens += metrics.metrics.cache_read_input_tokens || 0;
|
||||||
|
modelSpend[model].metrics.cache_creation_input_tokens += metrics.metrics.cache_creation_input_tokens || 0;
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -104,7 +108,9 @@ const NewUsagePage: React.FC<NewUsagePageProps> = ({
|
||||||
total_tokens: 0,
|
total_tokens: 0,
|
||||||
api_requests: 0,
|
api_requests: 0,
|
||||||
successful_requests: 0,
|
successful_requests: 0,
|
||||||
failed_requests: 0
|
failed_requests: 0,
|
||||||
|
cache_read_input_tokens: 0,
|
||||||
|
cache_creation_input_tokens: 0
|
||||||
},
|
},
|
||||||
metadata: {}
|
metadata: {}
|
||||||
};
|
};
|
||||||
|
@ -116,6 +122,8 @@ const NewUsagePage: React.FC<NewUsagePageProps> = ({
|
||||||
providerSpend[provider].metrics.api_requests += metrics.metrics.api_requests;
|
providerSpend[provider].metrics.api_requests += metrics.metrics.api_requests;
|
||||||
providerSpend[provider].metrics.successful_requests += metrics.metrics.successful_requests || 0;
|
providerSpend[provider].metrics.successful_requests += metrics.metrics.successful_requests || 0;
|
||||||
providerSpend[provider].metrics.failed_requests += metrics.metrics.failed_requests || 0;
|
providerSpend[provider].metrics.failed_requests += metrics.metrics.failed_requests || 0;
|
||||||
|
providerSpend[provider].metrics.cache_read_input_tokens += metrics.metrics.cache_read_input_tokens || 0;
|
||||||
|
providerSpend[provider].metrics.cache_creation_input_tokens += metrics.metrics.cache_creation_input_tokens || 0;
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -145,6 +153,8 @@ const NewUsagePage: React.FC<NewUsagePageProps> = ({
|
||||||
api_requests: 0,
|
api_requests: 0,
|
||||||
successful_requests: 0,
|
successful_requests: 0,
|
||||||
failed_requests: 0,
|
failed_requests: 0,
|
||||||
|
cache_read_input_tokens: 0,
|
||||||
|
cache_creation_input_tokens: 0
|
||||||
},
|
},
|
||||||
metadata: {
|
metadata: {
|
||||||
key_alias: metrics.metadata.key_alias
|
key_alias: metrics.metadata.key_alias
|
||||||
|
@ -158,6 +168,8 @@ const NewUsagePage: React.FC<NewUsagePageProps> = ({
|
||||||
keySpend[key].metrics.api_requests += metrics.metrics.api_requests;
|
keySpend[key].metrics.api_requests += metrics.metrics.api_requests;
|
||||||
keySpend[key].metrics.successful_requests += metrics.metrics.successful_requests;
|
keySpend[key].metrics.successful_requests += metrics.metrics.successful_requests;
|
||||||
keySpend[key].metrics.failed_requests += metrics.metrics.failed_requests;
|
keySpend[key].metrics.failed_requests += metrics.metrics.failed_requests;
|
||||||
|
keySpend[key].metrics.cache_read_input_tokens += metrics.metrics.cache_read_input_tokens || 0;
|
||||||
|
keySpend[key].metrics.cache_creation_input_tokens += metrics.metrics.cache_creation_input_tokens || 0;
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -325,7 +337,6 @@ const NewUsagePage: React.FC<NewUsagePageProps> = ({
|
||||||
/>
|
/>
|
||||||
</Card>
|
</Card>
|
||||||
</Col>
|
</Col>
|
||||||
|
|
||||||
{/* Top API Keys */}
|
{/* Top API Keys */}
|
||||||
<Col numColSpan={1}>
|
<Col numColSpan={1}>
|
||||||
<Card className="h-full">
|
<Card className="h-full">
|
||||||
|
|
|
@ -6,6 +6,8 @@ export interface SpendMetrics {
|
||||||
api_requests: number;
|
api_requests: number;
|
||||||
successful_requests: number;
|
successful_requests: number;
|
||||||
failed_requests: number;
|
failed_requests: number;
|
||||||
|
cache_read_input_tokens: number;
|
||||||
|
cache_creation_input_tokens: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface DailyData {
|
export interface DailyData {
|
||||||
|
@ -36,6 +38,8 @@ export interface ModelActivityData {
|
||||||
total_requests: number;
|
total_requests: number;
|
||||||
total_successful_requests: number;
|
total_successful_requests: number;
|
||||||
total_failed_requests: number;
|
total_failed_requests: number;
|
||||||
|
total_cache_read_input_tokens: number;
|
||||||
|
total_cache_creation_input_tokens: number;
|
||||||
total_tokens: number;
|
total_tokens: number;
|
||||||
prompt_tokens: number;
|
prompt_tokens: number;
|
||||||
completion_tokens: number;
|
completion_tokens: number;
|
||||||
|
@ -50,6 +54,8 @@ export interface ModelActivityData {
|
||||||
spend: number;
|
spend: number;
|
||||||
successful_requests: number;
|
successful_requests: number;
|
||||||
failed_requests: number;
|
failed_requests: number;
|
||||||
|
cache_read_input_tokens: number;
|
||||||
|
cache_creation_input_tokens: number;
|
||||||
};
|
};
|
||||||
}[];
|
}[];
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue