[Feat - Cost Tracking improvement] Track prompt caching metrics in DailyUserSpendTransactions (#10029)

* stash changes

* emit cache read/write tokens to daily spend update

* emit cache read/write tokens on daily activity

* update types.ts

* docs prompt caching

* undo ui change

* fix activity metrics

* fix prompt caching metrics

* fix typed dict fields

* fix get_aggregated_daily_spend_update_transactions

* fix aggregating cache tokens

* test_cache_token_fields_aggregation

* daily_transaction

* add cache_creation_input_tokens and cache_read_input_tokens to LiteLLM_DailyUserSpend

* test_daily_spend_update_queue.py
This commit is contained in:
Ishaan Jaff 2025-04-15 21:40:57 -07:00 committed by GitHub
parent 1c3bfc4856
commit 3aed97e63e
12 changed files with 197 additions and 35 deletions

View file

@ -0,0 +1,4 @@
-- AlterTable
ALTER TABLE "LiteLLM_DailyUserSpend" ADD COLUMN "cache_creation_input_tokens" INTEGER NOT NULL DEFAULT 0,
ADD COLUMN "cache_read_input_tokens" INTEGER NOT NULL DEFAULT 0;

View file

@ -326,6 +326,8 @@ model LiteLLM_DailyUserSpend {
custom_llm_provider String? custom_llm_provider String?
prompt_tokens Int @default(0) prompt_tokens Int @default(0)
completion_tokens Int @default(0) completion_tokens Int @default(0)
cache_read_input_tokens Int @default(0)
cache_creation_input_tokens Int @default(0)
spend Float @default(0.0) spend Float @default(0.0)
api_requests Int @default(0) api_requests Int @default(0)
successful_requests Int @default(0) successful_requests Int @default(0)

View file

@ -2777,8 +2777,14 @@ class BaseDailySpendTransaction(TypedDict):
model: str model: str
model_group: Optional[str] model_group: Optional[str]
custom_llm_provider: Optional[str] custom_llm_provider: Optional[str]
# token count metrics
prompt_tokens: int prompt_tokens: int
completion_tokens: int completion_tokens: int
cache_read_input_tokens: int
cache_creation_input_tokens: int
# request level metrics
spend: float spend: float
api_requests: int api_requests: int
successful_requests: int successful_requests: int

View file

@ -6,6 +6,7 @@ Module responsible for
""" """
import asyncio import asyncio
import json
import os import os
import time import time
import traceback import traceback
@ -24,6 +25,7 @@ from litellm.proxy._types import (
DBSpendUpdateTransactions, DBSpendUpdateTransactions,
Litellm_EntityType, Litellm_EntityType,
LiteLLM_UserTable, LiteLLM_UserTable,
SpendLogsMetadata,
SpendLogsPayload, SpendLogsPayload,
SpendUpdateQueueItem, SpendUpdateQueueItem,
) )
@ -806,6 +808,12 @@ class DBSpendUpdateWriter:
"completion_tokens": transaction[ "completion_tokens": transaction[
"completion_tokens" "completion_tokens"
], ],
"cache_read_input_tokens": transaction.get(
"cache_read_input_tokens", 0
),
"cache_creation_input_tokens": transaction.get(
"cache_creation_input_tokens", 0
),
"spend": transaction["spend"], "spend": transaction["spend"],
"api_requests": transaction["api_requests"], "api_requests": transaction["api_requests"],
"successful_requests": transaction[ "successful_requests": transaction[
@ -824,6 +832,16 @@ class DBSpendUpdateWriter:
"completion_tokens" "completion_tokens"
] ]
}, },
"cache_read_input_tokens": {
"increment": transaction.get(
"cache_read_input_tokens", 0
)
},
"cache_creation_input_tokens": {
"increment": transaction.get(
"cache_creation_input_tokens", 0
)
},
"spend": {"increment": transaction["spend"]}, "spend": {"increment": transaction["spend"]},
"api_requests": { "api_requests": {
"increment": transaction["api_requests"] "increment": transaction["api_requests"]
@ -1024,6 +1042,8 @@ class DBSpendUpdateWriter:
request_status = prisma_client.get_request_status(payload) request_status = prisma_client.get_request_status(payload)
verbose_proxy_logger.info(f"Logged request status: {request_status}") verbose_proxy_logger.info(f"Logged request status: {request_status}")
_metadata: SpendLogsMetadata = json.loads(payload["metadata"])
usage_obj = _metadata.get("usage_object", {}) or {}
if isinstance(payload["startTime"], datetime): if isinstance(payload["startTime"], datetime):
start_time = payload["startTime"].isoformat() start_time = payload["startTime"].isoformat()
date = start_time.split("T")[0] date = start_time.split("T")[0]
@ -1047,6 +1067,12 @@ class DBSpendUpdateWriter:
api_requests=1, api_requests=1,
successful_requests=1 if request_status == "success" else 0, successful_requests=1 if request_status == "success" else 0,
failed_requests=1 if request_status != "success" else 0, failed_requests=1 if request_status != "success" else 0,
cache_read_input_tokens=usage_obj.get("cache_read_input_tokens", 0)
or 0,
cache_creation_input_tokens=usage_obj.get(
"cache_creation_input_tokens", 0
)
or 0,
) )
return daily_transaction return daily_transaction
except Exception as e: except Exception as e:

View file

@ -53,9 +53,9 @@ class DailySpendUpdateQueue(BaseUpdateQueue):
def __init__(self): def __init__(self):
super().__init__() super().__init__()
self.update_queue: asyncio.Queue[ self.update_queue: asyncio.Queue[Dict[str, BaseDailySpendTransaction]] = (
Dict[str, BaseDailySpendTransaction] asyncio.Queue()
] = asyncio.Queue() )
async def add_update(self, update: Dict[str, BaseDailySpendTransaction]): async def add_update(self, update: Dict[str, BaseDailySpendTransaction]):
"""Enqueue an update.""" """Enqueue an update."""
@ -72,9 +72,9 @@ class DailySpendUpdateQueue(BaseUpdateQueue):
Combine all updates in the queue into a single update. Combine all updates in the queue into a single update.
This is used to reduce the size of the in-memory queue. This is used to reduce the size of the in-memory queue.
""" """
updates: List[ updates: List[Dict[str, BaseDailySpendTransaction]] = (
Dict[str, BaseDailySpendTransaction] await self.flush_all_updates_from_in_memory_queue()
] = await self.flush_all_updates_from_in_memory_queue() )
aggregated_updates = self.get_aggregated_daily_spend_update_transactions( aggregated_updates = self.get_aggregated_daily_spend_update_transactions(
updates updates
) )
@ -98,7 +98,7 @@ class DailySpendUpdateQueue(BaseUpdateQueue):
@staticmethod @staticmethod
def get_aggregated_daily_spend_update_transactions( def get_aggregated_daily_spend_update_transactions(
updates: List[Dict[str, BaseDailySpendTransaction]] updates: List[Dict[str, BaseDailySpendTransaction]],
) -> Dict[str, BaseDailySpendTransaction]: ) -> Dict[str, BaseDailySpendTransaction]:
"""Aggregate updates by daily_transaction_key.""" """Aggregate updates by daily_transaction_key."""
aggregated_daily_spend_update_transactions: Dict[ aggregated_daily_spend_update_transactions: Dict[
@ -118,6 +118,16 @@ class DailySpendUpdateQueue(BaseUpdateQueue):
"successful_requests" "successful_requests"
] ]
daily_transaction["failed_requests"] += payload["failed_requests"] daily_transaction["failed_requests"] += payload["failed_requests"]
# Add optional metrics cache_read_input_tokens and cache_creation_input_tokens
daily_transaction["cache_read_input_tokens"] = (
payload.get("cache_read_input_tokens", 0) or 0
) + daily_transaction.get("cache_read_input_tokens", 0)
daily_transaction["cache_creation_input_tokens"] = (
payload.get("cache_creation_input_tokens", 0) or 0
) + daily_transaction.get("cache_creation_input_tokens", 0)
else: else:
aggregated_daily_spend_update_transactions[_key] = deepcopy(payload) aggregated_daily_spend_update_transactions[_key] = deepcopy(payload)
return aggregated_daily_spend_update_transactions return aggregated_daily_spend_update_transactions

View file

@ -82,9 +82,9 @@ def _update_internal_new_user_params(data_json: dict, data: NewUserRequest) -> d
data_json["user_id"] = str(uuid.uuid4()) data_json["user_id"] = str(uuid.uuid4())
auto_create_key = data_json.pop("auto_create_key", True) auto_create_key = data_json.pop("auto_create_key", True)
if auto_create_key is False: if auto_create_key is False:
data_json[ data_json["table_name"] = (
"table_name" "user" # only create a user, don't create key if 'auto_create_key' set to False
] = "user" # only create a user, don't create key if 'auto_create_key' set to False )
is_internal_user = False is_internal_user = False
if data.user_role and data.user_role.is_internal_user_role: if data.user_role and data.user_role.is_internal_user_role:
@ -651,9 +651,9 @@ def _update_internal_user_params(data_json: dict, data: UpdateUserRequest) -> di
"budget_duration" not in non_default_values "budget_duration" not in non_default_values
): # applies internal user limits, if user role updated ): # applies internal user limits, if user role updated
if is_internal_user and litellm.internal_user_budget_duration is not None: if is_internal_user and litellm.internal_user_budget_duration is not None:
non_default_values[ non_default_values["budget_duration"] = (
"budget_duration" litellm.internal_user_budget_duration
] = litellm.internal_user_budget_duration )
duration_s = duration_in_seconds( duration_s = duration_in_seconds(
duration=non_default_values["budget_duration"] duration=non_default_values["budget_duration"]
) )
@ -964,14 +964,14 @@ async def get_users(
"in": user_id_list, # Now passing a list of strings as required by Prisma "in": user_id_list, # Now passing a list of strings as required by Prisma
} }
users: Optional[ users: Optional[List[LiteLLM_UserTable]] = (
List[LiteLLM_UserTable] await prisma_client.db.litellm_usertable.find_many(
] = await prisma_client.db.litellm_usertable.find_many(
where=where_conditions, where=where_conditions,
skip=skip, skip=skip,
take=page_size, take=page_size,
order={"created_at": "desc"}, order={"created_at": "desc"},
) )
)
# Get total count of user rows # Get total count of user rows
total_count = await prisma_client.db.litellm_usertable.count( total_count = await prisma_client.db.litellm_usertable.count(
@ -1225,14 +1225,14 @@ async def ui_view_users(
} }
# Query users with pagination and filters # Query users with pagination and filters
users: Optional[ users: Optional[List[BaseModel]] = (
List[BaseModel] await prisma_client.db.litellm_usertable.find_many(
] = await prisma_client.db.litellm_usertable.find_many(
where=where_conditions, where=where_conditions,
skip=skip, skip=skip,
take=page_size, take=page_size,
order={"created_at": "desc"}, order={"created_at": "desc"},
) )
)
if not users: if not users:
return [] return []
@ -1258,6 +1258,8 @@ class SpendMetrics(BaseModel):
spend: float = Field(default=0.0) spend: float = Field(default=0.0)
prompt_tokens: int = Field(default=0) prompt_tokens: int = Field(default=0)
completion_tokens: int = Field(default=0) completion_tokens: int = Field(default=0)
cache_read_input_tokens: int = Field(default=0)
cache_creation_input_tokens: int = Field(default=0)
total_tokens: int = Field(default=0) total_tokens: int = Field(default=0)
successful_requests: int = Field(default=0) successful_requests: int = Field(default=0)
failed_requests: int = Field(default=0) failed_requests: int = Field(default=0)
@ -1312,6 +1314,8 @@ class DailySpendMetadata(BaseModel):
total_api_requests: int = Field(default=0) total_api_requests: int = Field(default=0)
total_successful_requests: int = Field(default=0) total_successful_requests: int = Field(default=0)
total_failed_requests: int = Field(default=0) total_failed_requests: int = Field(default=0)
total_cache_read_input_tokens: int = Field(default=0)
total_cache_creation_input_tokens: int = Field(default=0)
page: int = Field(default=1) page: int = Field(default=1)
total_pages: int = Field(default=1) total_pages: int = Field(default=1)
has_more: bool = Field(default=False) has_more: bool = Field(default=False)
@ -1332,6 +1336,8 @@ class LiteLLM_DailyUserSpend(BaseModel):
custom_llm_provider: Optional[str] = None custom_llm_provider: Optional[str] = None
prompt_tokens: int = 0 prompt_tokens: int = 0
completion_tokens: int = 0 completion_tokens: int = 0
cache_read_input_tokens: int = 0
cache_creation_input_tokens: int = 0
spend: float = 0.0 spend: float = 0.0
api_requests: int = 0 api_requests: int = 0
successful_requests: int = 0 successful_requests: int = 0
@ -1349,6 +1355,8 @@ def update_metrics(
group_metrics.spend += record.spend group_metrics.spend += record.spend
group_metrics.prompt_tokens += record.prompt_tokens group_metrics.prompt_tokens += record.prompt_tokens
group_metrics.completion_tokens += record.completion_tokens group_metrics.completion_tokens += record.completion_tokens
group_metrics.cache_read_input_tokens += record.cache_read_input_tokens
group_metrics.cache_creation_input_tokens += record.cache_creation_input_tokens
group_metrics.total_tokens += record.prompt_tokens + record.completion_tokens group_metrics.total_tokens += record.prompt_tokens + record.completion_tokens
group_metrics.api_requests += record.api_requests group_metrics.api_requests += record.api_requests
group_metrics.successful_requests += record.successful_requests group_metrics.successful_requests += record.successful_requests
@ -1448,6 +1456,8 @@ async def get_user_daily_activity(
- spend - spend
- prompt_tokens - prompt_tokens
- completion_tokens - completion_tokens
- cache_read_input_tokens
- cache_creation_input_tokens
- total_tokens - total_tokens
- api_requests - api_requests
- breakdown by model, api_key, provider - breakdown by model, api_key, provider
@ -1484,9 +1494,9 @@ async def get_user_daily_activity(
user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN
and user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN_VIEW_ONLY and user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN_VIEW_ONLY
): ):
where_conditions[ where_conditions["user_id"] = (
"user_id" user_api_key_dict.user_id
] = user_api_key_dict.user_id # only allow access to own data ) # only allow access to own data
# Get total count for pagination # Get total count for pagination
total_count = await prisma_client.db.litellm_dailyuserspend.count( total_count = await prisma_client.db.litellm_dailyuserspend.count(
@ -1560,6 +1570,10 @@ async def get_user_daily_activity(
total_metrics.total_tokens += ( total_metrics.total_tokens += (
record.prompt_tokens + record.completion_tokens record.prompt_tokens + record.completion_tokens
) )
total_metrics.cache_read_input_tokens += record.cache_read_input_tokens
total_metrics.cache_creation_input_tokens += (
record.cache_creation_input_tokens
)
total_metrics.api_requests += record.api_requests total_metrics.api_requests += record.api_requests
total_metrics.successful_requests += record.successful_requests total_metrics.successful_requests += record.successful_requests
total_metrics.failed_requests += record.failed_requests total_metrics.failed_requests += record.failed_requests
@ -1587,6 +1601,8 @@ async def get_user_daily_activity(
total_api_requests=total_metrics.api_requests, total_api_requests=total_metrics.api_requests,
total_successful_requests=total_metrics.successful_requests, total_successful_requests=total_metrics.successful_requests,
total_failed_requests=total_metrics.failed_requests, total_failed_requests=total_metrics.failed_requests,
total_cache_read_input_tokens=total_metrics.cache_read_input_tokens,
total_cache_creation_input_tokens=total_metrics.cache_creation_input_tokens,
page=page, page=page,
total_pages=-(-total_count // page_size), # Ceiling division total_pages=-(-total_count // page_size), # Ceiling division
has_more=(page * page_size) < total_count, has_more=(page * page_size) < total_count,

View file

@ -326,6 +326,8 @@ model LiteLLM_DailyUserSpend {
custom_llm_provider String? custom_llm_provider String?
prompt_tokens Int @default(0) prompt_tokens Int @default(0)
completion_tokens Int @default(0) completion_tokens Int @default(0)
cache_read_input_tokens Int @default(0)
cache_creation_input_tokens Int @default(0)
spend Float @default(0.0) spend Float @default(0.0)
api_requests Int @default(0) api_requests Int @default(0)
successful_requests Int @default(0) successful_requests Int @default(0)

View file

@ -326,6 +326,8 @@ model LiteLLM_DailyUserSpend {
custom_llm_provider String? custom_llm_provider String?
prompt_tokens Int @default(0) prompt_tokens Int @default(0)
completion_tokens Int @default(0) completion_tokens Int @default(0)
cache_read_input_tokens Int @default(0)
cache_creation_input_tokens Int @default(0)
spend Float @default(0.0) spend Float @default(0.0)
api_requests Int @default(0) api_requests Int @default(0)
successful_requests Int @default(0) successful_requests Int @default(0)

View file

@ -204,6 +204,8 @@ async def test_get_aggregated_daily_spend_update_transactions_same_key():
"api_requests": 2, # 1 + 1 "api_requests": 2, # 1 + 1
"successful_requests": 2, # 1 + 1 "successful_requests": 2, # 1 + 1
"failed_requests": 0, # 0 + 0 "failed_requests": 0, # 0 + 0
"cache_creation_input_tokens": 0,
"cache_read_input_tokens": 0,
} }
updates = [{test_key: test_transaction1}, {test_key: test_transaction2}] updates = [{test_key: test_transaction1}, {test_key: test_transaction2}]
@ -249,6 +251,8 @@ async def test_flush_and_get_aggregated_daily_spend_update_transactions(
"api_requests": 2, # 1 + 1 "api_requests": 2, # 1 + 1
"successful_requests": 2, # 1 + 1 "successful_requests": 2, # 1 + 1
"failed_requests": 0, # 0 + 0 "failed_requests": 0, # 0 + 0
"cache_creation_input_tokens": 0,
"cache_read_input_tokens": 0,
} }
# Add updates to queue # Add updates to queue
@ -368,6 +372,48 @@ async def test_aggregate_queue_updates_accuracy(daily_spend_update_queue):
assert daily_spend_update_transactions[test_key3]["failed_requests"] == 0 assert daily_spend_update_transactions[test_key3]["failed_requests"] == 0
@pytest.mark.asyncio
async def test_cache_token_fields_aggregation(daily_spend_update_queue):
"""Test that cache_read_input_tokens and cache_creation_input_tokens are handled and aggregated correctly."""
test_key = "user1_2023-01-01_key123_gpt-4_openai"
transaction1 = {
"spend": 1.0,
"prompt_tokens": 10,
"completion_tokens": 5,
"api_requests": 1,
"successful_requests": 1,
"failed_requests": 0,
"cache_read_input_tokens": 7,
"cache_creation_input_tokens": 3,
}
transaction2 = {
"spend": 2.0,
"prompt_tokens": 20,
"completion_tokens": 10,
"api_requests": 1,
"successful_requests": 1,
"failed_requests": 0,
"cache_read_input_tokens": 5,
"cache_creation_input_tokens": 4,
}
# Add both updates
await daily_spend_update_queue.add_update({test_key: transaction1})
await daily_spend_update_queue.add_update({test_key: transaction2})
# Aggregate
await daily_spend_update_queue.aggregate_queue_updates()
updates = await daily_spend_update_queue.flush_all_updates_from_in_memory_queue()
assert len(updates) == 1
agg = updates[0][test_key]
assert agg["cache_read_input_tokens"] == 12 # 7 + 5
assert agg["cache_creation_input_tokens"] == 7 # 3 + 4
assert agg["spend"] == 3.0
assert agg["prompt_tokens"] == 30
assert agg["completion_tokens"] == 15
assert agg["api_requests"] == 2
assert agg["successful_requests"] == 2
assert agg["failed_requests"] == 0
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_queue_size_reduction_with_large_volume( async def test_queue_size_reduction_with_large_volume(
monkeypatch, daily_spend_update_queue monkeypatch, daily_spend_update_queue

View file

@ -79,6 +79,21 @@ const ModelSection = ({ modelName, metrics }: { modelName: string; metrics: Mode
stack stack
/> />
</Card> </Card>
<Card>
<Title>Prompt Caching Metrics</Title>
<div className="mb-2">
<Text>Cache Read: {metrics.total_cache_read_input_tokens?.toLocaleString() || 0} tokens</Text>
<Text>Cache Creation: {metrics.total_cache_creation_input_tokens?.toLocaleString() || 0} tokens</Text>
</div>
<AreaChart
data={metrics.daily_data}
index="date"
categories={["metrics.cache_read_input_tokens", "metrics.cache_creation_input_tokens"]}
colors={["cyan", "purple"]}
valueFormatter={(number: number) => number.toLocaleString()}
/>
</Card>
</Grid> </Grid>
</div> </div>
); );
@ -97,6 +112,8 @@ export const ActivityMetrics: React.FC<ActivityMetricsProps> = ({ modelMetrics }
total_successful_requests: 0, total_successful_requests: 0,
total_tokens: 0, total_tokens: 0,
total_spend: 0, total_spend: 0,
total_cache_read_input_tokens: 0,
total_cache_creation_input_tokens: 0,
daily_data: {} as Record<string, { daily_data: {} as Record<string, {
prompt_tokens: number; prompt_tokens: number;
completion_tokens: number; completion_tokens: number;
@ -105,6 +122,8 @@ export const ActivityMetrics: React.FC<ActivityMetricsProps> = ({ modelMetrics }
spend: number; spend: number;
successful_requests: number; successful_requests: number;
failed_requests: number; failed_requests: number;
cache_read_input_tokens: number;
cache_creation_input_tokens: number;
}> }>
}; };
@ -114,6 +133,8 @@ export const ActivityMetrics: React.FC<ActivityMetricsProps> = ({ modelMetrics }
totalMetrics.total_successful_requests += model.total_successful_requests; totalMetrics.total_successful_requests += model.total_successful_requests;
totalMetrics.total_tokens += model.total_tokens; totalMetrics.total_tokens += model.total_tokens;
totalMetrics.total_spend += model.total_spend; totalMetrics.total_spend += model.total_spend;
totalMetrics.total_cache_read_input_tokens += model.total_cache_read_input_tokens || 0;
totalMetrics.total_cache_creation_input_tokens += model.total_cache_creation_input_tokens || 0;
// Aggregate daily data // Aggregate daily data
model.daily_data.forEach(day => { model.daily_data.forEach(day => {
@ -125,7 +146,9 @@ export const ActivityMetrics: React.FC<ActivityMetricsProps> = ({ modelMetrics }
api_requests: 0, api_requests: 0,
spend: 0, spend: 0,
successful_requests: 0, successful_requests: 0,
failed_requests: 0 failed_requests: 0,
cache_read_input_tokens: 0,
cache_creation_input_tokens: 0
}; };
} }
totalMetrics.daily_data[day.date].prompt_tokens += day.metrics.prompt_tokens; totalMetrics.daily_data[day.date].prompt_tokens += day.metrics.prompt_tokens;
@ -135,6 +158,8 @@ export const ActivityMetrics: React.FC<ActivityMetricsProps> = ({ modelMetrics }
totalMetrics.daily_data[day.date].spend += day.metrics.spend; totalMetrics.daily_data[day.date].spend += day.metrics.spend;
totalMetrics.daily_data[day.date].successful_requests += day.metrics.successful_requests; totalMetrics.daily_data[day.date].successful_requests += day.metrics.successful_requests;
totalMetrics.daily_data[day.date].failed_requests += day.metrics.failed_requests; totalMetrics.daily_data[day.date].failed_requests += day.metrics.failed_requests;
totalMetrics.daily_data[day.date].cache_read_input_tokens += day.metrics.cache_read_input_tokens || 0;
totalMetrics.daily_data[day.date].cache_creation_input_tokens += day.metrics.cache_creation_input_tokens || 0;
}); });
}); });
@ -233,6 +258,8 @@ export const processActivityData = (dailyActivity: { results: DailyData[] }): Re
prompt_tokens: 0, prompt_tokens: 0,
completion_tokens: 0, completion_tokens: 0,
total_spend: 0, total_spend: 0,
total_cache_read_input_tokens: 0,
total_cache_creation_input_tokens: 0,
daily_data: [] daily_data: []
}; };
} }
@ -245,6 +272,8 @@ export const processActivityData = (dailyActivity: { results: DailyData[] }): Re
modelMetrics[model].total_spend += modelData.metrics.spend; modelMetrics[model].total_spend += modelData.metrics.spend;
modelMetrics[model].total_successful_requests += modelData.metrics.successful_requests; modelMetrics[model].total_successful_requests += modelData.metrics.successful_requests;
modelMetrics[model].total_failed_requests += modelData.metrics.failed_requests; modelMetrics[model].total_failed_requests += modelData.metrics.failed_requests;
modelMetrics[model].total_cache_read_input_tokens += modelData.metrics.cache_read_input_tokens || 0;
modelMetrics[model].total_cache_creation_input_tokens += modelData.metrics.cache_creation_input_tokens || 0;
// Add daily data // Add daily data
modelMetrics[model].daily_data.push({ modelMetrics[model].daily_data.push({
@ -256,7 +285,9 @@ export const processActivityData = (dailyActivity: { results: DailyData[] }): Re
api_requests: modelData.metrics.api_requests, api_requests: modelData.metrics.api_requests,
spend: modelData.metrics.spend, spend: modelData.metrics.spend,
successful_requests: modelData.metrics.successful_requests, successful_requests: modelData.metrics.successful_requests,
failed_requests: modelData.metrics.failed_requests failed_requests: modelData.metrics.failed_requests,
cache_read_input_tokens: modelData.metrics.cache_read_input_tokens || 0,
cache_creation_input_tokens: modelData.metrics.cache_creation_input_tokens || 0
} }
}); });
}); });

View file

@ -62,7 +62,9 @@ const NewUsagePage: React.FC<NewUsagePageProps> = ({
total_tokens: 0, total_tokens: 0,
api_requests: 0, api_requests: 0,
successful_requests: 0, successful_requests: 0,
failed_requests: 0 failed_requests: 0,
cache_read_input_tokens: 0,
cache_creation_input_tokens: 0
}, },
metadata: {} metadata: {}
}; };
@ -74,6 +76,8 @@ const NewUsagePage: React.FC<NewUsagePageProps> = ({
modelSpend[model].metrics.api_requests += metrics.metrics.api_requests; modelSpend[model].metrics.api_requests += metrics.metrics.api_requests;
modelSpend[model].metrics.successful_requests += metrics.metrics.successful_requests || 0; modelSpend[model].metrics.successful_requests += metrics.metrics.successful_requests || 0;
modelSpend[model].metrics.failed_requests += metrics.metrics.failed_requests || 0; modelSpend[model].metrics.failed_requests += metrics.metrics.failed_requests || 0;
modelSpend[model].metrics.cache_read_input_tokens += metrics.metrics.cache_read_input_tokens || 0;
modelSpend[model].metrics.cache_creation_input_tokens += metrics.metrics.cache_creation_input_tokens || 0;
}); });
}); });
@ -104,7 +108,9 @@ const NewUsagePage: React.FC<NewUsagePageProps> = ({
total_tokens: 0, total_tokens: 0,
api_requests: 0, api_requests: 0,
successful_requests: 0, successful_requests: 0,
failed_requests: 0 failed_requests: 0,
cache_read_input_tokens: 0,
cache_creation_input_tokens: 0
}, },
metadata: {} metadata: {}
}; };
@ -116,6 +122,8 @@ const NewUsagePage: React.FC<NewUsagePageProps> = ({
providerSpend[provider].metrics.api_requests += metrics.metrics.api_requests; providerSpend[provider].metrics.api_requests += metrics.metrics.api_requests;
providerSpend[provider].metrics.successful_requests += metrics.metrics.successful_requests || 0; providerSpend[provider].metrics.successful_requests += metrics.metrics.successful_requests || 0;
providerSpend[provider].metrics.failed_requests += metrics.metrics.failed_requests || 0; providerSpend[provider].metrics.failed_requests += metrics.metrics.failed_requests || 0;
providerSpend[provider].metrics.cache_read_input_tokens += metrics.metrics.cache_read_input_tokens || 0;
providerSpend[provider].metrics.cache_creation_input_tokens += metrics.metrics.cache_creation_input_tokens || 0;
}); });
}); });
@ -145,6 +153,8 @@ const NewUsagePage: React.FC<NewUsagePageProps> = ({
api_requests: 0, api_requests: 0,
successful_requests: 0, successful_requests: 0,
failed_requests: 0, failed_requests: 0,
cache_read_input_tokens: 0,
cache_creation_input_tokens: 0
}, },
metadata: { metadata: {
key_alias: metrics.metadata.key_alias key_alias: metrics.metadata.key_alias
@ -158,6 +168,8 @@ const NewUsagePage: React.FC<NewUsagePageProps> = ({
keySpend[key].metrics.api_requests += metrics.metrics.api_requests; keySpend[key].metrics.api_requests += metrics.metrics.api_requests;
keySpend[key].metrics.successful_requests += metrics.metrics.successful_requests; keySpend[key].metrics.successful_requests += metrics.metrics.successful_requests;
keySpend[key].metrics.failed_requests += metrics.metrics.failed_requests; keySpend[key].metrics.failed_requests += metrics.metrics.failed_requests;
keySpend[key].metrics.cache_read_input_tokens += metrics.metrics.cache_read_input_tokens || 0;
keySpend[key].metrics.cache_creation_input_tokens += metrics.metrics.cache_creation_input_tokens || 0;
}); });
}); });
@ -325,7 +337,6 @@ const NewUsagePage: React.FC<NewUsagePageProps> = ({
/> />
</Card> </Card>
</Col> </Col>
{/* Top API Keys */} {/* Top API Keys */}
<Col numColSpan={1}> <Col numColSpan={1}>
<Card className="h-full"> <Card className="h-full">

View file

@ -6,6 +6,8 @@ export interface SpendMetrics {
api_requests: number; api_requests: number;
successful_requests: number; successful_requests: number;
failed_requests: number; failed_requests: number;
cache_read_input_tokens: number;
cache_creation_input_tokens: number;
} }
export interface DailyData { export interface DailyData {
@ -36,6 +38,8 @@ export interface ModelActivityData {
total_requests: number; total_requests: number;
total_successful_requests: number; total_successful_requests: number;
total_failed_requests: number; total_failed_requests: number;
total_cache_read_input_tokens: number;
total_cache_creation_input_tokens: number;
total_tokens: number; total_tokens: number;
prompt_tokens: number; prompt_tokens: number;
completion_tokens: number; completion_tokens: number;
@ -50,6 +54,8 @@ export interface ModelActivityData {
spend: number; spend: number;
successful_requests: number; successful_requests: number;
failed_requests: number; failed_requests: number;
cache_read_input_tokens: number;
cache_creation_input_tokens: number;
}; };
}[]; }[];
} }