diff --git a/litellm-proxy-extras/litellm_proxy_extras/schema.prisma b/litellm-proxy-extras/litellm_proxy_extras/schema.prisma index b2a6b362cf..b470eba64e 100644 --- a/litellm-proxy-extras/litellm_proxy_extras/schema.prisma +++ b/litellm-proxy-extras/litellm_proxy_extras/schema.prisma @@ -326,6 +326,8 @@ model LiteLLM_DailyUserSpend { custom_llm_provider String? prompt_tokens Int @default(0) completion_tokens Int @default(0) + cache_read_input_tokens Int @default(0) + cache_creation_input_tokens Int @default(0) spend Float @default(0.0) api_requests Int @default(0) successful_requests Int @default(0) diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py index e0bdfdb649..e25b52e2b8 100644 --- a/litellm/proxy/_types.py +++ b/litellm/proxy/_types.py @@ -650,9 +650,9 @@ class GenerateRequestBase(LiteLLMPydanticObjectBase): allowed_cache_controls: Optional[list] = [] config: Optional[dict] = {} permissions: Optional[dict] = {} - model_max_budget: Optional[ - dict - ] = {} # {"gpt-4": 5.0, "gpt-3.5-turbo": 5.0}, defaults to {} + model_max_budget: Optional[dict] = ( + {} + ) # {"gpt-4": 5.0, "gpt-3.5-turbo": 5.0}, defaults to {} model_config = ConfigDict(protected_namespaces=()) model_rpm_limit: Optional[dict] = None @@ -908,12 +908,12 @@ class NewCustomerRequest(BudgetNewRequest): alias: Optional[str] = None # human-friendly alias blocked: bool = False # allow/disallow requests for this end-user budget_id: Optional[str] = None # give either a budget_id or max_budget - allowed_model_region: Optional[ - AllowedModelRegion - ] = None # require all user requests to use models in this specific region - default_model: Optional[ - str - ] = None # if no equivalent model in allowed region - default all requests to this model + allowed_model_region: Optional[AllowedModelRegion] = ( + None # require all user requests to use models in this specific region + ) + default_model: Optional[str] = ( + None # if no equivalent model in allowed region - default all requests to this model + ) @model_validator(mode="before") @classmethod @@ -935,12 +935,12 @@ class UpdateCustomerRequest(LiteLLMPydanticObjectBase): blocked: bool = False # allow/disallow requests for this end-user max_budget: Optional[float] = None budget_id: Optional[str] = None # give either a budget_id or max_budget - allowed_model_region: Optional[ - AllowedModelRegion - ] = None # require all user requests to use models in this specific region - default_model: Optional[ - str - ] = None # if no equivalent model in allowed region - default all requests to this model + allowed_model_region: Optional[AllowedModelRegion] = ( + None # require all user requests to use models in this specific region + ) + default_model: Optional[str] = ( + None # if no equivalent model in allowed region - default all requests to this model + ) class DeleteCustomerRequest(LiteLLMPydanticObjectBase): @@ -1076,9 +1076,9 @@ class BlockKeyRequest(LiteLLMPydanticObjectBase): class AddTeamCallback(LiteLLMPydanticObjectBase): callback_name: str - callback_type: Optional[ - Literal["success", "failure", "success_and_failure"] - ] = "success_and_failure" + callback_type: Optional[Literal["success", "failure", "success_and_failure"]] = ( + "success_and_failure" + ) callback_vars: Dict[str, str] @model_validator(mode="before") @@ -1335,9 +1335,9 @@ class ConfigList(LiteLLMPydanticObjectBase): stored_in_db: Optional[bool] field_default_value: Any premium_field: bool = False - nested_fields: Optional[ - List[FieldDetail] - ] = None # For nested dictionary or Pydantic fields + nested_fields: Optional[List[FieldDetail]] = ( + None # For nested dictionary or Pydantic fields + ) class ConfigGeneralSettings(LiteLLMPydanticObjectBase): @@ -1604,9 +1604,9 @@ class LiteLLM_OrganizationMembershipTable(LiteLLMPydanticObjectBase): budget_id: Optional[str] = None created_at: datetime updated_at: datetime - user: Optional[ - Any - ] = None # You might want to replace 'Any' with a more specific type if available + user: Optional[Any] = ( + None # You might want to replace 'Any' with a more specific type if available + ) litellm_budget_table: Optional[LiteLLM_BudgetTable] = None model_config = ConfigDict(protected_namespaces=()) @@ -2352,9 +2352,9 @@ class TeamModelDeleteRequest(BaseModel): # Organization Member Requests class OrganizationMemberAddRequest(OrgMemberAddRequest): organization_id: str - max_budget_in_organization: Optional[ - float - ] = None # Users max budget within the organization + max_budget_in_organization: Optional[float] = ( + None # Users max budget within the organization + ) class OrganizationMemberDeleteRequest(MemberDeleteRequest): @@ -2543,9 +2543,9 @@ class ProviderBudgetResponse(LiteLLMPydanticObjectBase): Maps provider names to their budget configs. """ - providers: Dict[ - str, ProviderBudgetResponseObject - ] = {} # Dictionary mapping provider names to their budget configurations + providers: Dict[str, ProviderBudgetResponseObject] = ( + {} + ) # Dictionary mapping provider names to their budget configurations class ProxyStateVariables(TypedDict): @@ -2673,9 +2673,9 @@ class LiteLLM_JWTAuth(LiteLLMPydanticObjectBase): enforce_rbac: bool = False roles_jwt_field: Optional[str] = None # v2 on role mappings role_mappings: Optional[List[RoleMapping]] = None - object_id_jwt_field: Optional[ - str - ] = None # can be either user / team, inferred from the role mapping + object_id_jwt_field: Optional[str] = ( + None # can be either user / team, inferred from the role mapping + ) scope_mappings: Optional[List[ScopeMapping]] = None enforce_scope_based_access: bool = False enforce_team_based_model_access: bool = False @@ -2776,8 +2776,14 @@ class DailyUserSpendTransaction(TypedDict): model: str model_group: Optional[str] custom_llm_provider: Optional[str] + + # token count metrics prompt_tokens: int completion_tokens: int + cache_read_input_tokens: int + cache_creation_input_tokens: int + + # request level metrics spend: float api_requests: int successful_requests: int diff --git a/litellm/proxy/db/db_spend_update_writer.py b/litellm/proxy/db/db_spend_update_writer.py index 6d88b3fc46..a9dfc9723e 100644 --- a/litellm/proxy/db/db_spend_update_writer.py +++ b/litellm/proxy/db/db_spend_update_writer.py @@ -6,6 +6,7 @@ Module responsible for """ import asyncio +import json import os import time import traceback @@ -22,6 +23,7 @@ from litellm.proxy._types import ( DBSpendUpdateTransactions, Litellm_EntityType, LiteLLM_UserTable, + SpendLogsMetadata, SpendLogsPayload, SpendUpdateQueueItem, ) @@ -862,6 +864,13 @@ class DBSpendUpdateWriter: request_status = prisma_client.get_request_status(payload) verbose_proxy_logger.info(f"Logged request status: {request_status}") + _metadata: SpendLogsMetadata = json.loads(payload["metadata"]) + usage_obj = _metadata.get("usage_object", {}) or {} + cache_read_input_tokens = usage_obj.get("cache_read_input_tokens", 0) or 0 + cache_creation_input_tokens = ( + usage_obj.get("cache_creation_input_tokens", 0) or 0 + ) + if isinstance(payload["startTime"], datetime): start_time = payload["startTime"].isoformat() date = start_time.split("T")[0] @@ -887,6 +896,8 @@ class DBSpendUpdateWriter: api_requests=1, successful_requests=1 if request_status == "success" else 0, failed_requests=1 if request_status != "success" else 0, + cache_read_input_tokens=cache_read_input_tokens, + cache_creation_input_tokens=cache_creation_input_tokens, ) await self.daily_spend_update_queue.add_update( diff --git a/litellm/proxy/db/db_transaction_queue/daily_spend_update_queue.py b/litellm/proxy/db/db_transaction_queue/daily_spend_update_queue.py index c61d24d50e..9c64105e74 100644 --- a/litellm/proxy/db/db_transaction_queue/daily_spend_update_queue.py +++ b/litellm/proxy/db/db_transaction_queue/daily_spend_update_queue.py @@ -53,9 +53,9 @@ class DailySpendUpdateQueue(BaseUpdateQueue): def __init__(self): super().__init__() - self.update_queue: asyncio.Queue[ - Dict[str, DailyUserSpendTransaction] - ] = asyncio.Queue() + self.update_queue: asyncio.Queue[Dict[str, DailyUserSpendTransaction]] = ( + asyncio.Queue() + ) async def add_update(self, update: Dict[str, DailyUserSpendTransaction]): """Enqueue an update.""" @@ -72,9 +72,9 @@ class DailySpendUpdateQueue(BaseUpdateQueue): Combine all updates in the queue into a single update. This is used to reduce the size of the in-memory queue. """ - updates: List[ - Dict[str, DailyUserSpendTransaction] - ] = await self.flush_all_updates_from_in_memory_queue() + updates: List[Dict[str, DailyUserSpendTransaction]] = ( + await self.flush_all_updates_from_in_memory_queue() + ) aggregated_updates = self.get_aggregated_daily_spend_update_transactions( updates ) @@ -98,7 +98,7 @@ class DailySpendUpdateQueue(BaseUpdateQueue): @staticmethod def get_aggregated_daily_spend_update_transactions( - updates: List[Dict[str, DailyUserSpendTransaction]] + updates: List[Dict[str, DailyUserSpendTransaction]], ) -> Dict[str, DailyUserSpendTransaction]: """Aggregate updates by daily_transaction_key.""" aggregated_daily_spend_update_transactions: Dict[ @@ -113,6 +113,12 @@ class DailySpendUpdateQueue(BaseUpdateQueue): daily_transaction["completion_tokens"] += payload[ "completion_tokens" ] + daily_transaction["cache_read_input_tokens"] += payload[ + "cache_read_input_tokens" + ] + daily_transaction["cache_creation_input_tokens"] += payload[ + "cache_creation_input_tokens" + ] daily_transaction["api_requests"] += payload["api_requests"] daily_transaction["successful_requests"] += payload[ "successful_requests" diff --git a/litellm/proxy/schema.prisma b/litellm/proxy/schema.prisma index b2a6b362cf..b470eba64e 100644 --- a/litellm/proxy/schema.prisma +++ b/litellm/proxy/schema.prisma @@ -326,6 +326,8 @@ model LiteLLM_DailyUserSpend { custom_llm_provider String? prompt_tokens Int @default(0) completion_tokens Int @default(0) + cache_read_input_tokens Int @default(0) + cache_creation_input_tokens Int @default(0) spend Float @default(0.0) api_requests Int @default(0) successful_requests Int @default(0) diff --git a/litellm/types/utils.py b/litellm/types/utils.py index d15c66ab98..be7a234123 100644 --- a/litellm/types/utils.py +++ b/litellm/types/utils.py @@ -1713,6 +1713,8 @@ class StandardLoggingMetadata(StandardLoggingUserAPIKeyMetadata): mcp_tool_call_metadata: Optional[StandardLoggingMCPToolCall] applied_guardrails: Optional[List[str]] usage_object: Optional[dict] + cache_read_input_tokens: Optional[int] + cache_creation_input_tokens: Optional[int] class StandardLoggingAdditionalHeaders(TypedDict, total=False): diff --git a/schema.prisma b/schema.prisma index b2a6b362cf..b470eba64e 100644 --- a/schema.prisma +++ b/schema.prisma @@ -326,6 +326,8 @@ model LiteLLM_DailyUserSpend { custom_llm_provider String? prompt_tokens Int @default(0) completion_tokens Int @default(0) + cache_read_input_tokens Int @default(0) + cache_creation_input_tokens Int @default(0) spend Float @default(0.0) api_requests Int @default(0) successful_requests Int @default(0)