From bd88263b2993167b93ea9f714d2b41b68e2e00d0 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 15 Apr 2025 21:40:57 -0700 Subject: [PATCH] [Feat - Cost Tracking improvement] Track prompt caching metrics in DailyUserSpendTransactions (#10029) * stash changes * emit cache read/write tokens to daily spend update * emit cache read/write tokens on daily activity * update types.ts * docs prompt caching * undo ui change * fix activity metrics * fix prompt caching metrics * fix typed dict fields * fix get_aggregated_daily_spend_update_transactions * fix aggregating cache tokens * test_cache_token_fields_aggregation * daily_transaction * add cache_creation_input_tokens and cache_read_input_tokens to LiteLLM_DailyUserSpend * test_daily_spend_update_queue.py --- .../migration.sql | 4 ++ .../litellm_proxy_extras/schema.prisma | 2 + litellm/proxy/_types.py | 6 ++ litellm/proxy/db/db_spend_update_writer.py | 26 ++++++++ .../daily_spend_update_queue.py | 24 ++++--- .../internal_user_endpoints.py | 62 ++++++++++++------- litellm/proxy/schema.prisma | 2 + schema.prisma | 2 + .../test_daily_spend_update_queue.py | 46 ++++++++++++++ .../src/components/activity_metrics.tsx | 35 ++++++++++- .../src/components/new_usage.tsx | 17 ++++- .../src/components/usage/types.ts | 6 ++ 12 files changed, 197 insertions(+), 35 deletions(-) create mode 100644 litellm-proxy-extras/litellm_proxy_extras/migrations/20250415151647_add_cache_read_write_tokens_daily_spend_transactions/migration.sql diff --git a/litellm-proxy-extras/litellm_proxy_extras/migrations/20250415151647_add_cache_read_write_tokens_daily_spend_transactions/migration.sql b/litellm-proxy-extras/litellm_proxy_extras/migrations/20250415151647_add_cache_read_write_tokens_daily_spend_transactions/migration.sql new file mode 100644 index 0000000000..f47e1c2e91 --- /dev/null +++ b/litellm-proxy-extras/litellm_proxy_extras/migrations/20250415151647_add_cache_read_write_tokens_daily_spend_transactions/migration.sql @@ -0,0 +1,4 @@ +-- AlterTable +ALTER TABLE "LiteLLM_DailyUserSpend" ADD COLUMN "cache_creation_input_tokens" INTEGER NOT NULL DEFAULT 0, +ADD COLUMN "cache_read_input_tokens" INTEGER NOT NULL DEFAULT 0; + diff --git a/litellm-proxy-extras/litellm_proxy_extras/schema.prisma b/litellm-proxy-extras/litellm_proxy_extras/schema.prisma index b2a6b362cf..b470eba64e 100644 --- a/litellm-proxy-extras/litellm_proxy_extras/schema.prisma +++ b/litellm-proxy-extras/litellm_proxy_extras/schema.prisma @@ -326,6 +326,8 @@ model LiteLLM_DailyUserSpend { custom_llm_provider String? prompt_tokens Int @default(0) completion_tokens Int @default(0) + cache_read_input_tokens Int @default(0) + cache_creation_input_tokens Int @default(0) spend Float @default(0.0) api_requests Int @default(0) successful_requests Int @default(0) diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py index 491c64611f..d72ce2cb3b 100644 --- a/litellm/proxy/_types.py +++ b/litellm/proxy/_types.py @@ -2777,8 +2777,14 @@ class BaseDailySpendTransaction(TypedDict): model: str model_group: Optional[str] custom_llm_provider: Optional[str] + + # token count metrics prompt_tokens: int completion_tokens: int + cache_read_input_tokens: int + cache_creation_input_tokens: int + + # request level metrics spend: float api_requests: int successful_requests: int diff --git a/litellm/proxy/db/db_spend_update_writer.py b/litellm/proxy/db/db_spend_update_writer.py index 4247553c30..e6bc0c3b7a 100644 --- a/litellm/proxy/db/db_spend_update_writer.py +++ b/litellm/proxy/db/db_spend_update_writer.py @@ -6,6 +6,7 @@ Module responsible for """ import asyncio +import json import os import time import traceback @@ -24,6 +25,7 @@ from litellm.proxy._types import ( DBSpendUpdateTransactions, Litellm_EntityType, LiteLLM_UserTable, + SpendLogsMetadata, SpendLogsPayload, SpendUpdateQueueItem, ) @@ -806,6 +808,12 @@ class DBSpendUpdateWriter: "completion_tokens": transaction[ "completion_tokens" ], + "cache_read_input_tokens": transaction.get( + "cache_read_input_tokens", 0 + ), + "cache_creation_input_tokens": transaction.get( + "cache_creation_input_tokens", 0 + ), "spend": transaction["spend"], "api_requests": transaction["api_requests"], "successful_requests": transaction[ @@ -824,6 +832,16 @@ class DBSpendUpdateWriter: "completion_tokens" ] }, + "cache_read_input_tokens": { + "increment": transaction.get( + "cache_read_input_tokens", 0 + ) + }, + "cache_creation_input_tokens": { + "increment": transaction.get( + "cache_creation_input_tokens", 0 + ) + }, "spend": {"increment": transaction["spend"]}, "api_requests": { "increment": transaction["api_requests"] @@ -1024,6 +1042,8 @@ class DBSpendUpdateWriter: request_status = prisma_client.get_request_status(payload) verbose_proxy_logger.info(f"Logged request status: {request_status}") + _metadata: SpendLogsMetadata = json.loads(payload["metadata"]) + usage_obj = _metadata.get("usage_object", {}) or {} if isinstance(payload["startTime"], datetime): start_time = payload["startTime"].isoformat() date = start_time.split("T")[0] @@ -1047,6 +1067,12 @@ class DBSpendUpdateWriter: api_requests=1, successful_requests=1 if request_status == "success" else 0, failed_requests=1 if request_status != "success" else 0, + cache_read_input_tokens=usage_obj.get("cache_read_input_tokens", 0) + or 0, + cache_creation_input_tokens=usage_obj.get( + "cache_creation_input_tokens", 0 + ) + or 0, ) return daily_transaction except Exception as e: diff --git a/litellm/proxy/db/db_transaction_queue/daily_spend_update_queue.py b/litellm/proxy/db/db_transaction_queue/daily_spend_update_queue.py index c92b4d5ae7..c3074e641b 100644 --- a/litellm/proxy/db/db_transaction_queue/daily_spend_update_queue.py +++ b/litellm/proxy/db/db_transaction_queue/daily_spend_update_queue.py @@ -53,9 +53,9 @@ class DailySpendUpdateQueue(BaseUpdateQueue): def __init__(self): super().__init__() - self.update_queue: asyncio.Queue[ - Dict[str, BaseDailySpendTransaction] - ] = asyncio.Queue() + self.update_queue: asyncio.Queue[Dict[str, BaseDailySpendTransaction]] = ( + asyncio.Queue() + ) async def add_update(self, update: Dict[str, BaseDailySpendTransaction]): """Enqueue an update.""" @@ -72,9 +72,9 @@ class DailySpendUpdateQueue(BaseUpdateQueue): Combine all updates in the queue into a single update. This is used to reduce the size of the in-memory queue. """ - updates: List[ - Dict[str, BaseDailySpendTransaction] - ] = await self.flush_all_updates_from_in_memory_queue() + updates: List[Dict[str, BaseDailySpendTransaction]] = ( + await self.flush_all_updates_from_in_memory_queue() + ) aggregated_updates = self.get_aggregated_daily_spend_update_transactions( updates ) @@ -98,7 +98,7 @@ class DailySpendUpdateQueue(BaseUpdateQueue): @staticmethod def get_aggregated_daily_spend_update_transactions( - updates: List[Dict[str, BaseDailySpendTransaction]] + updates: List[Dict[str, BaseDailySpendTransaction]], ) -> Dict[str, BaseDailySpendTransaction]: """Aggregate updates by daily_transaction_key.""" aggregated_daily_spend_update_transactions: Dict[ @@ -118,6 +118,16 @@ class DailySpendUpdateQueue(BaseUpdateQueue): "successful_requests" ] daily_transaction["failed_requests"] += payload["failed_requests"] + + # Add optional metrics cache_read_input_tokens and cache_creation_input_tokens + daily_transaction["cache_read_input_tokens"] = ( + payload.get("cache_read_input_tokens", 0) or 0 + ) + daily_transaction.get("cache_read_input_tokens", 0) + + daily_transaction["cache_creation_input_tokens"] = ( + payload.get("cache_creation_input_tokens", 0) or 0 + ) + daily_transaction.get("cache_creation_input_tokens", 0) + else: aggregated_daily_spend_update_transactions[_key] = deepcopy(payload) return aggregated_daily_spend_update_transactions diff --git a/litellm/proxy/management_endpoints/internal_user_endpoints.py b/litellm/proxy/management_endpoints/internal_user_endpoints.py index efc1bafa15..4a8f4edea3 100644 --- a/litellm/proxy/management_endpoints/internal_user_endpoints.py +++ b/litellm/proxy/management_endpoints/internal_user_endpoints.py @@ -82,9 +82,9 @@ def _update_internal_new_user_params(data_json: dict, data: NewUserRequest) -> d data_json["user_id"] = str(uuid.uuid4()) auto_create_key = data_json.pop("auto_create_key", True) if auto_create_key is False: - data_json[ - "table_name" - ] = "user" # only create a user, don't create key if 'auto_create_key' set to False + data_json["table_name"] = ( + "user" # only create a user, don't create key if 'auto_create_key' set to False + ) is_internal_user = False if data.user_role and data.user_role.is_internal_user_role: @@ -651,9 +651,9 @@ def _update_internal_user_params(data_json: dict, data: UpdateUserRequest) -> di "budget_duration" not in non_default_values ): # applies internal user limits, if user role updated if is_internal_user and litellm.internal_user_budget_duration is not None: - non_default_values[ - "budget_duration" - ] = litellm.internal_user_budget_duration + non_default_values["budget_duration"] = ( + litellm.internal_user_budget_duration + ) duration_s = duration_in_seconds( duration=non_default_values["budget_duration"] ) @@ -964,13 +964,13 @@ async def get_users( "in": user_id_list, # Now passing a list of strings as required by Prisma } - users: Optional[ - List[LiteLLM_UserTable] - ] = await prisma_client.db.litellm_usertable.find_many( - where=where_conditions, - skip=skip, - take=page_size, - order={"created_at": "desc"}, + users: Optional[List[LiteLLM_UserTable]] = ( + await prisma_client.db.litellm_usertable.find_many( + where=where_conditions, + skip=skip, + take=page_size, + order={"created_at": "desc"}, + ) ) # Get total count of user rows @@ -1225,13 +1225,13 @@ async def ui_view_users( } # Query users with pagination and filters - users: Optional[ - List[BaseModel] - ] = await prisma_client.db.litellm_usertable.find_many( - where=where_conditions, - skip=skip, - take=page_size, - order={"created_at": "desc"}, + users: Optional[List[BaseModel]] = ( + await prisma_client.db.litellm_usertable.find_many( + where=where_conditions, + skip=skip, + take=page_size, + order={"created_at": "desc"}, + ) ) if not users: @@ -1258,6 +1258,8 @@ class SpendMetrics(BaseModel): spend: float = Field(default=0.0) prompt_tokens: int = Field(default=0) completion_tokens: int = Field(default=0) + cache_read_input_tokens: int = Field(default=0) + cache_creation_input_tokens: int = Field(default=0) total_tokens: int = Field(default=0) successful_requests: int = Field(default=0) failed_requests: int = Field(default=0) @@ -1312,6 +1314,8 @@ class DailySpendMetadata(BaseModel): total_api_requests: int = Field(default=0) total_successful_requests: int = Field(default=0) total_failed_requests: int = Field(default=0) + total_cache_read_input_tokens: int = Field(default=0) + total_cache_creation_input_tokens: int = Field(default=0) page: int = Field(default=1) total_pages: int = Field(default=1) has_more: bool = Field(default=False) @@ -1332,6 +1336,8 @@ class LiteLLM_DailyUserSpend(BaseModel): custom_llm_provider: Optional[str] = None prompt_tokens: int = 0 completion_tokens: int = 0 + cache_read_input_tokens: int = 0 + cache_creation_input_tokens: int = 0 spend: float = 0.0 api_requests: int = 0 successful_requests: int = 0 @@ -1349,6 +1355,8 @@ def update_metrics( group_metrics.spend += record.spend group_metrics.prompt_tokens += record.prompt_tokens group_metrics.completion_tokens += record.completion_tokens + group_metrics.cache_read_input_tokens += record.cache_read_input_tokens + group_metrics.cache_creation_input_tokens += record.cache_creation_input_tokens group_metrics.total_tokens += record.prompt_tokens + record.completion_tokens group_metrics.api_requests += record.api_requests group_metrics.successful_requests += record.successful_requests @@ -1448,6 +1456,8 @@ async def get_user_daily_activity( - spend - prompt_tokens - completion_tokens + - cache_read_input_tokens + - cache_creation_input_tokens - total_tokens - api_requests - breakdown by model, api_key, provider @@ -1484,9 +1494,9 @@ async def get_user_daily_activity( user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN and user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN_VIEW_ONLY ): - where_conditions[ - "user_id" - ] = user_api_key_dict.user_id # only allow access to own data + where_conditions["user_id"] = ( + user_api_key_dict.user_id + ) # only allow access to own data # Get total count for pagination total_count = await prisma_client.db.litellm_dailyuserspend.count( @@ -1560,6 +1570,10 @@ async def get_user_daily_activity( total_metrics.total_tokens += ( record.prompt_tokens + record.completion_tokens ) + total_metrics.cache_read_input_tokens += record.cache_read_input_tokens + total_metrics.cache_creation_input_tokens += ( + record.cache_creation_input_tokens + ) total_metrics.api_requests += record.api_requests total_metrics.successful_requests += record.successful_requests total_metrics.failed_requests += record.failed_requests @@ -1587,6 +1601,8 @@ async def get_user_daily_activity( total_api_requests=total_metrics.api_requests, total_successful_requests=total_metrics.successful_requests, total_failed_requests=total_metrics.failed_requests, + total_cache_read_input_tokens=total_metrics.cache_read_input_tokens, + total_cache_creation_input_tokens=total_metrics.cache_creation_input_tokens, page=page, total_pages=-(-total_count // page_size), # Ceiling division has_more=(page * page_size) < total_count, diff --git a/litellm/proxy/schema.prisma b/litellm/proxy/schema.prisma index 2a0f791e25..845f05f14b 100644 --- a/litellm/proxy/schema.prisma +++ b/litellm/proxy/schema.prisma @@ -326,6 +326,8 @@ model LiteLLM_DailyUserSpend { custom_llm_provider String? prompt_tokens Int @default(0) completion_tokens Int @default(0) + cache_read_input_tokens Int @default(0) + cache_creation_input_tokens Int @default(0) spend Float @default(0.0) api_requests Int @default(0) successful_requests Int @default(0) diff --git a/schema.prisma b/schema.prisma index 2a0f791e25..845f05f14b 100644 --- a/schema.prisma +++ b/schema.prisma @@ -326,6 +326,8 @@ model LiteLLM_DailyUserSpend { custom_llm_provider String? prompt_tokens Int @default(0) completion_tokens Int @default(0) + cache_read_input_tokens Int @default(0) + cache_creation_input_tokens Int @default(0) spend Float @default(0.0) api_requests Int @default(0) successful_requests Int @default(0) diff --git a/tests/litellm/proxy/db/db_transaction_queue/test_daily_spend_update_queue.py b/tests/litellm/proxy/db/db_transaction_queue/test_daily_spend_update_queue.py index b92f2919ed..86bd9b71d0 100644 --- a/tests/litellm/proxy/db/db_transaction_queue/test_daily_spend_update_queue.py +++ b/tests/litellm/proxy/db/db_transaction_queue/test_daily_spend_update_queue.py @@ -204,6 +204,8 @@ async def test_get_aggregated_daily_spend_update_transactions_same_key(): "api_requests": 2, # 1 + 1 "successful_requests": 2, # 1 + 1 "failed_requests": 0, # 0 + 0 + "cache_creation_input_tokens": 0, + "cache_read_input_tokens": 0, } updates = [{test_key: test_transaction1}, {test_key: test_transaction2}] @@ -249,6 +251,8 @@ async def test_flush_and_get_aggregated_daily_spend_update_transactions( "api_requests": 2, # 1 + 1 "successful_requests": 2, # 1 + 1 "failed_requests": 0, # 0 + 0 + "cache_creation_input_tokens": 0, + "cache_read_input_tokens": 0, } # Add updates to queue @@ -368,6 +372,48 @@ async def test_aggregate_queue_updates_accuracy(daily_spend_update_queue): assert daily_spend_update_transactions[test_key3]["failed_requests"] == 0 +@pytest.mark.asyncio +async def test_cache_token_fields_aggregation(daily_spend_update_queue): + """Test that cache_read_input_tokens and cache_creation_input_tokens are handled and aggregated correctly.""" + test_key = "user1_2023-01-01_key123_gpt-4_openai" + transaction1 = { + "spend": 1.0, + "prompt_tokens": 10, + "completion_tokens": 5, + "api_requests": 1, + "successful_requests": 1, + "failed_requests": 0, + "cache_read_input_tokens": 7, + "cache_creation_input_tokens": 3, + } + transaction2 = { + "spend": 2.0, + "prompt_tokens": 20, + "completion_tokens": 10, + "api_requests": 1, + "successful_requests": 1, + "failed_requests": 0, + "cache_read_input_tokens": 5, + "cache_creation_input_tokens": 4, + } + # Add both updates + await daily_spend_update_queue.add_update({test_key: transaction1}) + await daily_spend_update_queue.add_update({test_key: transaction2}) + # Aggregate + await daily_spend_update_queue.aggregate_queue_updates() + updates = await daily_spend_update_queue.flush_all_updates_from_in_memory_queue() + assert len(updates) == 1 + agg = updates[0][test_key] + assert agg["cache_read_input_tokens"] == 12 # 7 + 5 + assert agg["cache_creation_input_tokens"] == 7 # 3 + 4 + assert agg["spend"] == 3.0 + assert agg["prompt_tokens"] == 30 + assert agg["completion_tokens"] == 15 + assert agg["api_requests"] == 2 + assert agg["successful_requests"] == 2 + assert agg["failed_requests"] == 0 + + @pytest.mark.asyncio async def test_queue_size_reduction_with_large_volume( monkeypatch, daily_spend_update_queue diff --git a/ui/litellm-dashboard/src/components/activity_metrics.tsx b/ui/litellm-dashboard/src/components/activity_metrics.tsx index 1c999531c3..df54561b16 100644 --- a/ui/litellm-dashboard/src/components/activity_metrics.tsx +++ b/ui/litellm-dashboard/src/components/activity_metrics.tsx @@ -79,6 +79,21 @@ const ModelSection = ({ modelName, metrics }: { modelName: string; metrics: Mode stack /> + + + Prompt Caching Metrics +
+ Cache Read: {metrics.total_cache_read_input_tokens?.toLocaleString() || 0} tokens + Cache Creation: {metrics.total_cache_creation_input_tokens?.toLocaleString() || 0} tokens +
+ number.toLocaleString()} + /> +
); @@ -97,6 +112,8 @@ export const ActivityMetrics: React.FC = ({ modelMetrics } total_successful_requests: 0, total_tokens: 0, total_spend: 0, + total_cache_read_input_tokens: 0, + total_cache_creation_input_tokens: 0, daily_data: {} as Record = ({ modelMetrics } spend: number; successful_requests: number; failed_requests: number; + cache_read_input_tokens: number; + cache_creation_input_tokens: number; }> }; @@ -114,6 +133,8 @@ export const ActivityMetrics: React.FC = ({ modelMetrics } totalMetrics.total_successful_requests += model.total_successful_requests; totalMetrics.total_tokens += model.total_tokens; totalMetrics.total_spend += model.total_spend; + totalMetrics.total_cache_read_input_tokens += model.total_cache_read_input_tokens || 0; + totalMetrics.total_cache_creation_input_tokens += model.total_cache_creation_input_tokens || 0; // Aggregate daily data model.daily_data.forEach(day => { @@ -125,7 +146,9 @@ export const ActivityMetrics: React.FC = ({ modelMetrics } api_requests: 0, spend: 0, successful_requests: 0, - failed_requests: 0 + failed_requests: 0, + cache_read_input_tokens: 0, + cache_creation_input_tokens: 0 }; } totalMetrics.daily_data[day.date].prompt_tokens += day.metrics.prompt_tokens; @@ -135,6 +158,8 @@ export const ActivityMetrics: React.FC = ({ modelMetrics } totalMetrics.daily_data[day.date].spend += day.metrics.spend; totalMetrics.daily_data[day.date].successful_requests += day.metrics.successful_requests; totalMetrics.daily_data[day.date].failed_requests += day.metrics.failed_requests; + totalMetrics.daily_data[day.date].cache_read_input_tokens += day.metrics.cache_read_input_tokens || 0; + totalMetrics.daily_data[day.date].cache_creation_input_tokens += day.metrics.cache_creation_input_tokens || 0; }); }); @@ -233,6 +258,8 @@ export const processActivityData = (dailyActivity: { results: DailyData[] }): Re prompt_tokens: 0, completion_tokens: 0, total_spend: 0, + total_cache_read_input_tokens: 0, + total_cache_creation_input_tokens: 0, daily_data: [] }; } @@ -245,6 +272,8 @@ export const processActivityData = (dailyActivity: { results: DailyData[] }): Re modelMetrics[model].total_spend += modelData.metrics.spend; modelMetrics[model].total_successful_requests += modelData.metrics.successful_requests; modelMetrics[model].total_failed_requests += modelData.metrics.failed_requests; + modelMetrics[model].total_cache_read_input_tokens += modelData.metrics.cache_read_input_tokens || 0; + modelMetrics[model].total_cache_creation_input_tokens += modelData.metrics.cache_creation_input_tokens || 0; // Add daily data modelMetrics[model].daily_data.push({ @@ -256,7 +285,9 @@ export const processActivityData = (dailyActivity: { results: DailyData[] }): Re api_requests: modelData.metrics.api_requests, spend: modelData.metrics.spend, successful_requests: modelData.metrics.successful_requests, - failed_requests: modelData.metrics.failed_requests + failed_requests: modelData.metrics.failed_requests, + cache_read_input_tokens: modelData.metrics.cache_read_input_tokens || 0, + cache_creation_input_tokens: modelData.metrics.cache_creation_input_tokens || 0 } }); }); diff --git a/ui/litellm-dashboard/src/components/new_usage.tsx b/ui/litellm-dashboard/src/components/new_usage.tsx index 7ea0f2f8e8..43e4fe3ef6 100644 --- a/ui/litellm-dashboard/src/components/new_usage.tsx +++ b/ui/litellm-dashboard/src/components/new_usage.tsx @@ -62,7 +62,9 @@ const NewUsagePage: React.FC = ({ total_tokens: 0, api_requests: 0, successful_requests: 0, - failed_requests: 0 + failed_requests: 0, + cache_read_input_tokens: 0, + cache_creation_input_tokens: 0 }, metadata: {} }; @@ -74,6 +76,8 @@ const NewUsagePage: React.FC = ({ modelSpend[model].metrics.api_requests += metrics.metrics.api_requests; modelSpend[model].metrics.successful_requests += metrics.metrics.successful_requests || 0; modelSpend[model].metrics.failed_requests += metrics.metrics.failed_requests || 0; + modelSpend[model].metrics.cache_read_input_tokens += metrics.metrics.cache_read_input_tokens || 0; + modelSpend[model].metrics.cache_creation_input_tokens += metrics.metrics.cache_creation_input_tokens || 0; }); }); @@ -104,7 +108,9 @@ const NewUsagePage: React.FC = ({ total_tokens: 0, api_requests: 0, successful_requests: 0, - failed_requests: 0 + failed_requests: 0, + cache_read_input_tokens: 0, + cache_creation_input_tokens: 0 }, metadata: {} }; @@ -116,6 +122,8 @@ const NewUsagePage: React.FC = ({ providerSpend[provider].metrics.api_requests += metrics.metrics.api_requests; providerSpend[provider].metrics.successful_requests += metrics.metrics.successful_requests || 0; providerSpend[provider].metrics.failed_requests += metrics.metrics.failed_requests || 0; + providerSpend[provider].metrics.cache_read_input_tokens += metrics.metrics.cache_read_input_tokens || 0; + providerSpend[provider].metrics.cache_creation_input_tokens += metrics.metrics.cache_creation_input_tokens || 0; }); }); @@ -145,6 +153,8 @@ const NewUsagePage: React.FC = ({ api_requests: 0, successful_requests: 0, failed_requests: 0, + cache_read_input_tokens: 0, + cache_creation_input_tokens: 0 }, metadata: { key_alias: metrics.metadata.key_alias @@ -158,6 +168,8 @@ const NewUsagePage: React.FC = ({ keySpend[key].metrics.api_requests += metrics.metrics.api_requests; keySpend[key].metrics.successful_requests += metrics.metrics.successful_requests; keySpend[key].metrics.failed_requests += metrics.metrics.failed_requests; + keySpend[key].metrics.cache_read_input_tokens += metrics.metrics.cache_read_input_tokens || 0; + keySpend[key].metrics.cache_creation_input_tokens += metrics.metrics.cache_creation_input_tokens || 0; }); }); @@ -325,7 +337,6 @@ const NewUsagePage: React.FC = ({ /> - {/* Top API Keys */} diff --git a/ui/litellm-dashboard/src/components/usage/types.ts b/ui/litellm-dashboard/src/components/usage/types.ts index 6d81ffc5c9..fbc893b7bd 100644 --- a/ui/litellm-dashboard/src/components/usage/types.ts +++ b/ui/litellm-dashboard/src/components/usage/types.ts @@ -6,6 +6,8 @@ export interface SpendMetrics { api_requests: number; successful_requests: number; failed_requests: number; + cache_read_input_tokens: number; + cache_creation_input_tokens: number; } export interface DailyData { @@ -36,6 +38,8 @@ export interface ModelActivityData { total_requests: number; total_successful_requests: number; total_failed_requests: number; + total_cache_read_input_tokens: number; + total_cache_creation_input_tokens: number; total_tokens: number; prompt_tokens: number; completion_tokens: number; @@ -50,6 +54,8 @@ export interface ModelActivityData { spend: number; successful_requests: number; failed_requests: number; + cache_read_input_tokens: number; + cache_creation_input_tokens: number; }; }[]; }