stash changes

This commit is contained in:
Ishaan Jaff 2025-04-15 13:27:35 -07:00
parent 14bcc9a6c9
commit 09e13ab75b
7 changed files with 71 additions and 40 deletions

View file

@ -326,6 +326,8 @@ model LiteLLM_DailyUserSpend {
custom_llm_provider String?
prompt_tokens Int @default(0)
completion_tokens Int @default(0)
cache_read_input_tokens Int @default(0)
cache_creation_input_tokens Int @default(0)
spend Float @default(0.0)
api_requests Int @default(0)
successful_requests Int @default(0)

View file

@ -650,9 +650,9 @@ class GenerateRequestBase(LiteLLMPydanticObjectBase):
allowed_cache_controls: Optional[list] = []
config: Optional[dict] = {}
permissions: Optional[dict] = {}
model_max_budget: Optional[
dict
] = {} # {"gpt-4": 5.0, "gpt-3.5-turbo": 5.0}, defaults to {}
model_max_budget: Optional[dict] = (
{}
) # {"gpt-4": 5.0, "gpt-3.5-turbo": 5.0}, defaults to {}
model_config = ConfigDict(protected_namespaces=())
model_rpm_limit: Optional[dict] = None
@ -908,12 +908,12 @@ class NewCustomerRequest(BudgetNewRequest):
alias: Optional[str] = None # human-friendly alias
blocked: bool = False # allow/disallow requests for this end-user
budget_id: Optional[str] = None # give either a budget_id or max_budget
allowed_model_region: Optional[
AllowedModelRegion
] = None # require all user requests to use models in this specific region
default_model: Optional[
str
] = None # if no equivalent model in allowed region - default all requests to this model
allowed_model_region: Optional[AllowedModelRegion] = (
None # require all user requests to use models in this specific region
)
default_model: Optional[str] = (
None # if no equivalent model in allowed region - default all requests to this model
)
@model_validator(mode="before")
@classmethod
@ -935,12 +935,12 @@ class UpdateCustomerRequest(LiteLLMPydanticObjectBase):
blocked: bool = False # allow/disallow requests for this end-user
max_budget: Optional[float] = None
budget_id: Optional[str] = None # give either a budget_id or max_budget
allowed_model_region: Optional[
AllowedModelRegion
] = None # require all user requests to use models in this specific region
default_model: Optional[
str
] = None # if no equivalent model in allowed region - default all requests to this model
allowed_model_region: Optional[AllowedModelRegion] = (
None # require all user requests to use models in this specific region
)
default_model: Optional[str] = (
None # if no equivalent model in allowed region - default all requests to this model
)
class DeleteCustomerRequest(LiteLLMPydanticObjectBase):
@ -1076,9 +1076,9 @@ class BlockKeyRequest(LiteLLMPydanticObjectBase):
class AddTeamCallback(LiteLLMPydanticObjectBase):
callback_name: str
callback_type: Optional[
Literal["success", "failure", "success_and_failure"]
] = "success_and_failure"
callback_type: Optional[Literal["success", "failure", "success_and_failure"]] = (
"success_and_failure"
)
callback_vars: Dict[str, str]
@model_validator(mode="before")
@ -1335,9 +1335,9 @@ class ConfigList(LiteLLMPydanticObjectBase):
stored_in_db: Optional[bool]
field_default_value: Any
premium_field: bool = False
nested_fields: Optional[
List[FieldDetail]
] = None # For nested dictionary or Pydantic fields
nested_fields: Optional[List[FieldDetail]] = (
None # For nested dictionary or Pydantic fields
)
class ConfigGeneralSettings(LiteLLMPydanticObjectBase):
@ -1604,9 +1604,9 @@ class LiteLLM_OrganizationMembershipTable(LiteLLMPydanticObjectBase):
budget_id: Optional[str] = None
created_at: datetime
updated_at: datetime
user: Optional[
Any
] = None # You might want to replace 'Any' with a more specific type if available
user: Optional[Any] = (
None # You might want to replace 'Any' with a more specific type if available
)
litellm_budget_table: Optional[LiteLLM_BudgetTable] = None
model_config = ConfigDict(protected_namespaces=())
@ -2352,9 +2352,9 @@ class TeamModelDeleteRequest(BaseModel):
# Organization Member Requests
class OrganizationMemberAddRequest(OrgMemberAddRequest):
organization_id: str
max_budget_in_organization: Optional[
float
] = None # Users max budget within the organization
max_budget_in_organization: Optional[float] = (
None # Users max budget within the organization
)
class OrganizationMemberDeleteRequest(MemberDeleteRequest):
@ -2543,9 +2543,9 @@ class ProviderBudgetResponse(LiteLLMPydanticObjectBase):
Maps provider names to their budget configs.
"""
providers: Dict[
str, ProviderBudgetResponseObject
] = {} # Dictionary mapping provider names to their budget configurations
providers: Dict[str, ProviderBudgetResponseObject] = (
{}
) # Dictionary mapping provider names to their budget configurations
class ProxyStateVariables(TypedDict):
@ -2673,9 +2673,9 @@ class LiteLLM_JWTAuth(LiteLLMPydanticObjectBase):
enforce_rbac: bool = False
roles_jwt_field: Optional[str] = None # v2 on role mappings
role_mappings: Optional[List[RoleMapping]] = None
object_id_jwt_field: Optional[
str
] = None # can be either user / team, inferred from the role mapping
object_id_jwt_field: Optional[str] = (
None # can be either user / team, inferred from the role mapping
)
scope_mappings: Optional[List[ScopeMapping]] = None
enforce_scope_based_access: bool = False
enforce_team_based_model_access: bool = False
@ -2776,8 +2776,14 @@ class DailyUserSpendTransaction(TypedDict):
model: str
model_group: Optional[str]
custom_llm_provider: Optional[str]
# token count metrics
prompt_tokens: int
completion_tokens: int
cache_read_input_tokens: int
cache_creation_input_tokens: int
# request level metrics
spend: float
api_requests: int
successful_requests: int

View file

@ -6,6 +6,7 @@ Module responsible for
"""
import asyncio
import json
import os
import time
import traceback
@ -22,6 +23,7 @@ from litellm.proxy._types import (
DBSpendUpdateTransactions,
Litellm_EntityType,
LiteLLM_UserTable,
SpendLogsMetadata,
SpendLogsPayload,
SpendUpdateQueueItem,
)
@ -862,6 +864,13 @@ class DBSpendUpdateWriter:
request_status = prisma_client.get_request_status(payload)
verbose_proxy_logger.info(f"Logged request status: {request_status}")
_metadata: SpendLogsMetadata = json.loads(payload["metadata"])
usage_obj = _metadata.get("usage_object", {}) or {}
cache_read_input_tokens = usage_obj.get("cache_read_input_tokens", 0) or 0
cache_creation_input_tokens = (
usage_obj.get("cache_creation_input_tokens", 0) or 0
)
if isinstance(payload["startTime"], datetime):
start_time = payload["startTime"].isoformat()
date = start_time.split("T")[0]
@ -887,6 +896,8 @@ class DBSpendUpdateWriter:
api_requests=1,
successful_requests=1 if request_status == "success" else 0,
failed_requests=1 if request_status != "success" else 0,
cache_read_input_tokens=cache_read_input_tokens,
cache_creation_input_tokens=cache_creation_input_tokens,
)
await self.daily_spend_update_queue.add_update(

View file

@ -53,9 +53,9 @@ class DailySpendUpdateQueue(BaseUpdateQueue):
def __init__(self):
super().__init__()
self.update_queue: asyncio.Queue[
Dict[str, DailyUserSpendTransaction]
] = asyncio.Queue()
self.update_queue: asyncio.Queue[Dict[str, DailyUserSpendTransaction]] = (
asyncio.Queue()
)
async def add_update(self, update: Dict[str, DailyUserSpendTransaction]):
"""Enqueue an update."""
@ -72,9 +72,9 @@ class DailySpendUpdateQueue(BaseUpdateQueue):
Combine all updates in the queue into a single update.
This is used to reduce the size of the in-memory queue.
"""
updates: List[
Dict[str, DailyUserSpendTransaction]
] = await self.flush_all_updates_from_in_memory_queue()
updates: List[Dict[str, DailyUserSpendTransaction]] = (
await self.flush_all_updates_from_in_memory_queue()
)
aggregated_updates = self.get_aggregated_daily_spend_update_transactions(
updates
)
@ -98,7 +98,7 @@ class DailySpendUpdateQueue(BaseUpdateQueue):
@staticmethod
def get_aggregated_daily_spend_update_transactions(
updates: List[Dict[str, DailyUserSpendTransaction]]
updates: List[Dict[str, DailyUserSpendTransaction]],
) -> Dict[str, DailyUserSpendTransaction]:
"""Aggregate updates by daily_transaction_key."""
aggregated_daily_spend_update_transactions: Dict[
@ -113,6 +113,12 @@ class DailySpendUpdateQueue(BaseUpdateQueue):
daily_transaction["completion_tokens"] += payload[
"completion_tokens"
]
daily_transaction["cache_read_input_tokens"] += payload[
"cache_read_input_tokens"
]
daily_transaction["cache_creation_input_tokens"] += payload[
"cache_creation_input_tokens"
]
daily_transaction["api_requests"] += payload["api_requests"]
daily_transaction["successful_requests"] += payload[
"successful_requests"

View file

@ -326,6 +326,8 @@ model LiteLLM_DailyUserSpend {
custom_llm_provider String?
prompt_tokens Int @default(0)
completion_tokens Int @default(0)
cache_read_input_tokens Int @default(0)
cache_creation_input_tokens Int @default(0)
spend Float @default(0.0)
api_requests Int @default(0)
successful_requests Int @default(0)

View file

@ -1713,6 +1713,8 @@ class StandardLoggingMetadata(StandardLoggingUserAPIKeyMetadata):
mcp_tool_call_metadata: Optional[StandardLoggingMCPToolCall]
applied_guardrails: Optional[List[str]]
usage_object: Optional[dict]
cache_read_input_tokens: Optional[int]
cache_creation_input_tokens: Optional[int]
class StandardLoggingAdditionalHeaders(TypedDict, total=False):

View file

@ -326,6 +326,8 @@ model LiteLLM_DailyUserSpend {
custom_llm_provider String?
prompt_tokens Int @default(0)
completion_tokens Int @default(0)
cache_read_input_tokens Int @default(0)
cache_creation_input_tokens Int @default(0)
spend Float @default(0.0)
api_requests Int @default(0)
successful_requests Int @default(0)