stash changes

2025-04-27 11:43:54 +00:00 · 2025-04-15 13:27:35 -07:00 · 2025-04-15 13:27:35 -07:00 · 09e13ab75b
commit 09e13ab75b
parent 14bcc9a6c9
7 changed files with 71 additions and 40 deletions
--- a/litellm-proxy-extras/litellm_proxy_extras/schema.prisma
+++ b/litellm-proxy-extras/litellm_proxy_extras/schema.prisma
@ -326,6 +326,8 @@ model LiteLLM_DailyUserSpend {
  custom_llm_provider String?  
  prompt_tokens       Int      @default(0)
  completion_tokens   Int      @default(0)
+  cache_read_input_tokens     Int      @default(0)
+  cache_creation_input_tokens Int      @default(0)
  spend               Float    @default(0.0)
  api_requests        Int      @default(0)
  successful_requests Int      @default(0)
--- a/litellm/proxy/_types.py
+++ b/litellm/proxy/_types.py
@ -650,9 +650,9 @@ class GenerateRequestBase(LiteLLMPydanticObjectBase):
    allowed_cache_controls: Optional[list] = []
    config: Optional[dict] = {}
    permissions: Optional[dict] = {}
-    model_max_budget: Optional[
-        dict
-    ] = {}  # {"gpt-4": 5.0, "gpt-3.5-turbo": 5.0}, defaults to {}
+    model_max_budget: Optional[dict] = (
+        {}
+    )  # {"gpt-4": 5.0, "gpt-3.5-turbo": 5.0}, defaults to {}

    model_config = ConfigDict(protected_namespaces=())
    model_rpm_limit: Optional[dict] = None
@ -908,12 +908,12 @@ class NewCustomerRequest(BudgetNewRequest):
    alias: Optional[str] = None  # human-friendly alias
    blocked: bool = False  # allow/disallow requests for this end-user
    budget_id: Optional[str] = None  # give either a budget_id or max_budget
-    allowed_model_region: Optional[
-        AllowedModelRegion
-    ] = None  # require all user requests to use models in this specific region
-    default_model: Optional[
-        str
-    ] = None  # if no equivalent model in allowed region - default all requests to this model
+    allowed_model_region: Optional[AllowedModelRegion] = (
+        None  # require all user requests to use models in this specific region
+    )
+    default_model: Optional[str] = (
+        None  # if no equivalent model in allowed region - default all requests to this model
+    )

    @model_validator(mode="before")
    @classmethod
@ -935,12 +935,12 @@ class UpdateCustomerRequest(LiteLLMPydanticObjectBase):
    blocked: bool = False  # allow/disallow requests for this end-user
    max_budget: Optional[float] = None
    budget_id: Optional[str] = None  # give either a budget_id or max_budget
-    allowed_model_region: Optional[
-        AllowedModelRegion
-    ] = None  # require all user requests to use models in this specific region
-    default_model: Optional[
-        str
-    ] = None  # if no equivalent model in allowed region - default all requests to this model
+    allowed_model_region: Optional[AllowedModelRegion] = (
+        None  # require all user requests to use models in this specific region
+    )
+    default_model: Optional[str] = (
+        None  # if no equivalent model in allowed region - default all requests to this model
+    )


 class DeleteCustomerRequest(LiteLLMPydanticObjectBase):
@ -1076,9 +1076,9 @@ class BlockKeyRequest(LiteLLMPydanticObjectBase):

 class AddTeamCallback(LiteLLMPydanticObjectBase):
    callback_name: str
-    callback_type: Optional[
-        Literal["success", "failure", "success_and_failure"]
-    ] = "success_and_failure"
+    callback_type: Optional[Literal["success", "failure", "success_and_failure"]] = (
+        "success_and_failure"
+    )
    callback_vars: Dict[str, str]

    @model_validator(mode="before")
@ -1335,9 +1335,9 @@ class ConfigList(LiteLLMPydanticObjectBase):
    stored_in_db: Optional[bool]
    field_default_value: Any
    premium_field: bool = False
-    nested_fields: Optional[
-        List[FieldDetail]
-    ] = None  # For nested dictionary or Pydantic fields
+    nested_fields: Optional[List[FieldDetail]] = (
+        None  # For nested dictionary or Pydantic fields
+    )


 class ConfigGeneralSettings(LiteLLMPydanticObjectBase):
@ -1604,9 +1604,9 @@ class LiteLLM_OrganizationMembershipTable(LiteLLMPydanticObjectBase):
    budget_id: Optional[str] = None
    created_at: datetime
    updated_at: datetime
-    user: Optional[
-        Any
-    ] = None  # You might want to replace 'Any' with a more specific type if available
+    user: Optional[Any] = (
+        None  # You might want to replace 'Any' with a more specific type if available
+    )
    litellm_budget_table: Optional[LiteLLM_BudgetTable] = None

    model_config = ConfigDict(protected_namespaces=())
@ -2352,9 +2352,9 @@ class TeamModelDeleteRequest(BaseModel):
 # Organization Member Requests
 class OrganizationMemberAddRequest(OrgMemberAddRequest):
    organization_id: str
-    max_budget_in_organization: Optional[
-        float
-    ] = None  # Users max budget within the organization
+    max_budget_in_organization: Optional[float] = (
+        None  # Users max budget within the organization
+    )


 class OrganizationMemberDeleteRequest(MemberDeleteRequest):
@ -2543,9 +2543,9 @@ class ProviderBudgetResponse(LiteLLMPydanticObjectBase):
    Maps provider names to their budget configs.
    """

-    providers: Dict[
-        str, ProviderBudgetResponseObject
-    ] = {}  # Dictionary mapping provider names to their budget configurations
+    providers: Dict[str, ProviderBudgetResponseObject] = (
+        {}
+    )  # Dictionary mapping provider names to their budget configurations


 class ProxyStateVariables(TypedDict):
@ -2673,9 +2673,9 @@ class LiteLLM_JWTAuth(LiteLLMPydanticObjectBase):
    enforce_rbac: bool = False
    roles_jwt_field: Optional[str] = None  # v2 on role mappings
    role_mappings: Optional[List[RoleMapping]] = None
-    object_id_jwt_field: Optional[
-        str
-    ] = None  # can be either user / team, inferred from the role mapping
+    object_id_jwt_field: Optional[str] = (
+        None  # can be either user / team, inferred from the role mapping
+    )
    scope_mappings: Optional[List[ScopeMapping]] = None
    enforce_scope_based_access: bool = False
    enforce_team_based_model_access: bool = False
@ -2776,8 +2776,14 @@ class DailyUserSpendTransaction(TypedDict):
    model: str
    model_group: Optional[str]
    custom_llm_provider: Optional[str]
+
+    # token count metrics
    prompt_tokens: int
    completion_tokens: int
+    cache_read_input_tokens: int
+    cache_creation_input_tokens: int
+
+    # request level metrics
    spend: float
    api_requests: int
    successful_requests: int
--- a/litellm/proxy/db/db_spend_update_writer.py
+++ b/litellm/proxy/db/db_spend_update_writer.py
@ -6,6 +6,7 @@ Module responsible for
 """

 import asyncio
+import json
 import os
 import time
 import traceback
@ -22,6 +23,7 @@ from litellm.proxy._types import (
    DBSpendUpdateTransactions,
    Litellm_EntityType,
    LiteLLM_UserTable,
+    SpendLogsMetadata,
    SpendLogsPayload,
    SpendUpdateQueueItem,
 )
@ -862,6 +864,13 @@ class DBSpendUpdateWriter:

        request_status = prisma_client.get_request_status(payload)
        verbose_proxy_logger.info(f"Logged request status: {request_status}")
+        _metadata: SpendLogsMetadata = json.loads(payload["metadata"])
+        usage_obj = _metadata.get("usage_object", {}) or {}
+        cache_read_input_tokens = usage_obj.get("cache_read_input_tokens", 0) or 0
+        cache_creation_input_tokens = (
+            usage_obj.get("cache_creation_input_tokens", 0) or 0
+        )
+
        if isinstance(payload["startTime"], datetime):
            start_time = payload["startTime"].isoformat()
            date = start_time.split("T")[0]
@ -887,6 +896,8 @@ class DBSpendUpdateWriter:
                api_requests=1,
                successful_requests=1 if request_status == "success" else 0,
                failed_requests=1 if request_status != "success" else 0,
+                cache_read_input_tokens=cache_read_input_tokens,
+                cache_creation_input_tokens=cache_creation_input_tokens,
            )

            await self.daily_spend_update_queue.add_update(
--- a/litellm/proxy/db/db_transaction_queue/daily_spend_update_queue.py
+++ b/litellm/proxy/db/db_transaction_queue/daily_spend_update_queue.py
@ -53,9 +53,9 @@ class DailySpendUpdateQueue(BaseUpdateQueue):

    def __init__(self):
        super().__init__()
-        self.update_queue: asyncio.Queue[
-            Dict[str, DailyUserSpendTransaction]
-        ] = asyncio.Queue()
+        self.update_queue: asyncio.Queue[Dict[str, DailyUserSpendTransaction]] = (
+            asyncio.Queue()
+        )

    async def add_update(self, update: Dict[str, DailyUserSpendTransaction]):
        """Enqueue an update."""
@ -72,9 +72,9 @@ class DailySpendUpdateQueue(BaseUpdateQueue):
        Combine all updates in the queue into a single update.
        This is used to reduce the size of the in-memory queue.
        """
-        updates: List[
-            Dict[str, DailyUserSpendTransaction]
-        ] = await self.flush_all_updates_from_in_memory_queue()
+        updates: List[Dict[str, DailyUserSpendTransaction]] = (
+            await self.flush_all_updates_from_in_memory_queue()
+        )
        aggregated_updates = self.get_aggregated_daily_spend_update_transactions(
            updates
        )
@ -98,7 +98,7 @@ class DailySpendUpdateQueue(BaseUpdateQueue):

    @staticmethod
    def get_aggregated_daily_spend_update_transactions(
-        updates: List[Dict[str, DailyUserSpendTransaction]]
+        updates: List[Dict[str, DailyUserSpendTransaction]],
    ) -> Dict[str, DailyUserSpendTransaction]:
        """Aggregate updates by daily_transaction_key."""
        aggregated_daily_spend_update_transactions: Dict[
@ -113,6 +113,12 @@ class DailySpendUpdateQueue(BaseUpdateQueue):
                    daily_transaction["completion_tokens"] += payload[
                        "completion_tokens"
                    ]
+                    daily_transaction["cache_read_input_tokens"] += payload[
+                        "cache_read_input_tokens"
+                    ]
+                    daily_transaction["cache_creation_input_tokens"] += payload[
+                        "cache_creation_input_tokens"
+                    ]
                    daily_transaction["api_requests"] += payload["api_requests"]
                    daily_transaction["successful_requests"] += payload[
                        "successful_requests"
--- a/litellm/proxy/schema.prisma
+++ b/litellm/proxy/schema.prisma
@ -326,6 +326,8 @@ model LiteLLM_DailyUserSpend {
  custom_llm_provider String?  
  prompt_tokens       Int      @default(0)
  completion_tokens   Int      @default(0)
+  cache_read_input_tokens     Int      @default(0)
+  cache_creation_input_tokens Int      @default(0)
  spend               Float    @default(0.0)
  api_requests        Int      @default(0)
  successful_requests Int      @default(0)
--- a/litellm/types/utils.py
+++ b/litellm/types/utils.py
@ -1713,6 +1713,8 @@ class StandardLoggingMetadata(StandardLoggingUserAPIKeyMetadata):
    mcp_tool_call_metadata: Optional[StandardLoggingMCPToolCall]
    applied_guardrails: Optional[List[str]]
    usage_object: Optional[dict]
+    cache_read_input_tokens: Optional[int]
+    cache_creation_input_tokens: Optional[int]


 class StandardLoggingAdditionalHeaders(TypedDict, total=False):
--- a/schema.prisma
+++ b/schema.prisma
@ -326,6 +326,8 @@ model LiteLLM_DailyUserSpend {
  custom_llm_provider String?  
  prompt_tokens       Int      @default(0)
  completion_tokens   Int      @default(0)
+  cache_read_input_tokens     Int      @default(0)
+  cache_creation_input_tokens Int      @default(0)
  spend               Float    @default(0.0)
  api_requests        Int      @default(0)
  successful_requests Int      @default(0)