stash changes

This commit is contained in:
Ishaan Jaff 2025-04-15 13:27:35 -07:00
parent 14bcc9a6c9
commit 09e13ab75b
7 changed files with 71 additions and 40 deletions

View file

@ -326,6 +326,8 @@ model LiteLLM_DailyUserSpend {
custom_llm_provider String? custom_llm_provider String?
prompt_tokens Int @default(0) prompt_tokens Int @default(0)
completion_tokens Int @default(0) completion_tokens Int @default(0)
cache_read_input_tokens Int @default(0)
cache_creation_input_tokens Int @default(0)
spend Float @default(0.0) spend Float @default(0.0)
api_requests Int @default(0) api_requests Int @default(0)
successful_requests Int @default(0) successful_requests Int @default(0)

View file

@ -650,9 +650,9 @@ class GenerateRequestBase(LiteLLMPydanticObjectBase):
allowed_cache_controls: Optional[list] = [] allowed_cache_controls: Optional[list] = []
config: Optional[dict] = {} config: Optional[dict] = {}
permissions: Optional[dict] = {} permissions: Optional[dict] = {}
model_max_budget: Optional[ model_max_budget: Optional[dict] = (
dict {}
] = {} # {"gpt-4": 5.0, "gpt-3.5-turbo": 5.0}, defaults to {} ) # {"gpt-4": 5.0, "gpt-3.5-turbo": 5.0}, defaults to {}
model_config = ConfigDict(protected_namespaces=()) model_config = ConfigDict(protected_namespaces=())
model_rpm_limit: Optional[dict] = None model_rpm_limit: Optional[dict] = None
@ -908,12 +908,12 @@ class NewCustomerRequest(BudgetNewRequest):
alias: Optional[str] = None # human-friendly alias alias: Optional[str] = None # human-friendly alias
blocked: bool = False # allow/disallow requests for this end-user blocked: bool = False # allow/disallow requests for this end-user
budget_id: Optional[str] = None # give either a budget_id or max_budget budget_id: Optional[str] = None # give either a budget_id or max_budget
allowed_model_region: Optional[ allowed_model_region: Optional[AllowedModelRegion] = (
AllowedModelRegion None # require all user requests to use models in this specific region
] = None # require all user requests to use models in this specific region )
default_model: Optional[ default_model: Optional[str] = (
str None # if no equivalent model in allowed region - default all requests to this model
] = None # if no equivalent model in allowed region - default all requests to this model )
@model_validator(mode="before") @model_validator(mode="before")
@classmethod @classmethod
@ -935,12 +935,12 @@ class UpdateCustomerRequest(LiteLLMPydanticObjectBase):
blocked: bool = False # allow/disallow requests for this end-user blocked: bool = False # allow/disallow requests for this end-user
max_budget: Optional[float] = None max_budget: Optional[float] = None
budget_id: Optional[str] = None # give either a budget_id or max_budget budget_id: Optional[str] = None # give either a budget_id or max_budget
allowed_model_region: Optional[ allowed_model_region: Optional[AllowedModelRegion] = (
AllowedModelRegion None # require all user requests to use models in this specific region
] = None # require all user requests to use models in this specific region )
default_model: Optional[ default_model: Optional[str] = (
str None # if no equivalent model in allowed region - default all requests to this model
] = None # if no equivalent model in allowed region - default all requests to this model )
class DeleteCustomerRequest(LiteLLMPydanticObjectBase): class DeleteCustomerRequest(LiteLLMPydanticObjectBase):
@ -1076,9 +1076,9 @@ class BlockKeyRequest(LiteLLMPydanticObjectBase):
class AddTeamCallback(LiteLLMPydanticObjectBase): class AddTeamCallback(LiteLLMPydanticObjectBase):
callback_name: str callback_name: str
callback_type: Optional[ callback_type: Optional[Literal["success", "failure", "success_and_failure"]] = (
Literal["success", "failure", "success_and_failure"] "success_and_failure"
] = "success_and_failure" )
callback_vars: Dict[str, str] callback_vars: Dict[str, str]
@model_validator(mode="before") @model_validator(mode="before")
@ -1335,9 +1335,9 @@ class ConfigList(LiteLLMPydanticObjectBase):
stored_in_db: Optional[bool] stored_in_db: Optional[bool]
field_default_value: Any field_default_value: Any
premium_field: bool = False premium_field: bool = False
nested_fields: Optional[ nested_fields: Optional[List[FieldDetail]] = (
List[FieldDetail] None # For nested dictionary or Pydantic fields
] = None # For nested dictionary or Pydantic fields )
class ConfigGeneralSettings(LiteLLMPydanticObjectBase): class ConfigGeneralSettings(LiteLLMPydanticObjectBase):
@ -1604,9 +1604,9 @@ class LiteLLM_OrganizationMembershipTable(LiteLLMPydanticObjectBase):
budget_id: Optional[str] = None budget_id: Optional[str] = None
created_at: datetime created_at: datetime
updated_at: datetime updated_at: datetime
user: Optional[ user: Optional[Any] = (
Any None # You might want to replace 'Any' with a more specific type if available
] = None # You might want to replace 'Any' with a more specific type if available )
litellm_budget_table: Optional[LiteLLM_BudgetTable] = None litellm_budget_table: Optional[LiteLLM_BudgetTable] = None
model_config = ConfigDict(protected_namespaces=()) model_config = ConfigDict(protected_namespaces=())
@ -2352,9 +2352,9 @@ class TeamModelDeleteRequest(BaseModel):
# Organization Member Requests # Organization Member Requests
class OrganizationMemberAddRequest(OrgMemberAddRequest): class OrganizationMemberAddRequest(OrgMemberAddRequest):
organization_id: str organization_id: str
max_budget_in_organization: Optional[ max_budget_in_organization: Optional[float] = (
float None # Users max budget within the organization
] = None # Users max budget within the organization )
class OrganizationMemberDeleteRequest(MemberDeleteRequest): class OrganizationMemberDeleteRequest(MemberDeleteRequest):
@ -2543,9 +2543,9 @@ class ProviderBudgetResponse(LiteLLMPydanticObjectBase):
Maps provider names to their budget configs. Maps provider names to their budget configs.
""" """
providers: Dict[ providers: Dict[str, ProviderBudgetResponseObject] = (
str, ProviderBudgetResponseObject {}
] = {} # Dictionary mapping provider names to their budget configurations ) # Dictionary mapping provider names to their budget configurations
class ProxyStateVariables(TypedDict): class ProxyStateVariables(TypedDict):
@ -2673,9 +2673,9 @@ class LiteLLM_JWTAuth(LiteLLMPydanticObjectBase):
enforce_rbac: bool = False enforce_rbac: bool = False
roles_jwt_field: Optional[str] = None # v2 on role mappings roles_jwt_field: Optional[str] = None # v2 on role mappings
role_mappings: Optional[List[RoleMapping]] = None role_mappings: Optional[List[RoleMapping]] = None
object_id_jwt_field: Optional[ object_id_jwt_field: Optional[str] = (
str None # can be either user / team, inferred from the role mapping
] = None # can be either user / team, inferred from the role mapping )
scope_mappings: Optional[List[ScopeMapping]] = None scope_mappings: Optional[List[ScopeMapping]] = None
enforce_scope_based_access: bool = False enforce_scope_based_access: bool = False
enforce_team_based_model_access: bool = False enforce_team_based_model_access: bool = False
@ -2776,8 +2776,14 @@ class DailyUserSpendTransaction(TypedDict):
model: str model: str
model_group: Optional[str] model_group: Optional[str]
custom_llm_provider: Optional[str] custom_llm_provider: Optional[str]
# token count metrics
prompt_tokens: int prompt_tokens: int
completion_tokens: int completion_tokens: int
cache_read_input_tokens: int
cache_creation_input_tokens: int
# request level metrics
spend: float spend: float
api_requests: int api_requests: int
successful_requests: int successful_requests: int

View file

@ -6,6 +6,7 @@ Module responsible for
""" """
import asyncio import asyncio
import json
import os import os
import time import time
import traceback import traceback
@ -22,6 +23,7 @@ from litellm.proxy._types import (
DBSpendUpdateTransactions, DBSpendUpdateTransactions,
Litellm_EntityType, Litellm_EntityType,
LiteLLM_UserTable, LiteLLM_UserTable,
SpendLogsMetadata,
SpendLogsPayload, SpendLogsPayload,
SpendUpdateQueueItem, SpendUpdateQueueItem,
) )
@ -862,6 +864,13 @@ class DBSpendUpdateWriter:
request_status = prisma_client.get_request_status(payload) request_status = prisma_client.get_request_status(payload)
verbose_proxy_logger.info(f"Logged request status: {request_status}") verbose_proxy_logger.info(f"Logged request status: {request_status}")
_metadata: SpendLogsMetadata = json.loads(payload["metadata"])
usage_obj = _metadata.get("usage_object", {}) or {}
cache_read_input_tokens = usage_obj.get("cache_read_input_tokens", 0) or 0
cache_creation_input_tokens = (
usage_obj.get("cache_creation_input_tokens", 0) or 0
)
if isinstance(payload["startTime"], datetime): if isinstance(payload["startTime"], datetime):
start_time = payload["startTime"].isoformat() start_time = payload["startTime"].isoformat()
date = start_time.split("T")[0] date = start_time.split("T")[0]
@ -887,6 +896,8 @@ class DBSpendUpdateWriter:
api_requests=1, api_requests=1,
successful_requests=1 if request_status == "success" else 0, successful_requests=1 if request_status == "success" else 0,
failed_requests=1 if request_status != "success" else 0, failed_requests=1 if request_status != "success" else 0,
cache_read_input_tokens=cache_read_input_tokens,
cache_creation_input_tokens=cache_creation_input_tokens,
) )
await self.daily_spend_update_queue.add_update( await self.daily_spend_update_queue.add_update(

View file

@ -53,9 +53,9 @@ class DailySpendUpdateQueue(BaseUpdateQueue):
def __init__(self): def __init__(self):
super().__init__() super().__init__()
self.update_queue: asyncio.Queue[ self.update_queue: asyncio.Queue[Dict[str, DailyUserSpendTransaction]] = (
Dict[str, DailyUserSpendTransaction] asyncio.Queue()
] = asyncio.Queue() )
async def add_update(self, update: Dict[str, DailyUserSpendTransaction]): async def add_update(self, update: Dict[str, DailyUserSpendTransaction]):
"""Enqueue an update.""" """Enqueue an update."""
@ -72,9 +72,9 @@ class DailySpendUpdateQueue(BaseUpdateQueue):
Combine all updates in the queue into a single update. Combine all updates in the queue into a single update.
This is used to reduce the size of the in-memory queue. This is used to reduce the size of the in-memory queue.
""" """
updates: List[ updates: List[Dict[str, DailyUserSpendTransaction]] = (
Dict[str, DailyUserSpendTransaction] await self.flush_all_updates_from_in_memory_queue()
] = await self.flush_all_updates_from_in_memory_queue() )
aggregated_updates = self.get_aggregated_daily_spend_update_transactions( aggregated_updates = self.get_aggregated_daily_spend_update_transactions(
updates updates
) )
@ -98,7 +98,7 @@ class DailySpendUpdateQueue(BaseUpdateQueue):
@staticmethod @staticmethod
def get_aggregated_daily_spend_update_transactions( def get_aggregated_daily_spend_update_transactions(
updates: List[Dict[str, DailyUserSpendTransaction]] updates: List[Dict[str, DailyUserSpendTransaction]],
) -> Dict[str, DailyUserSpendTransaction]: ) -> Dict[str, DailyUserSpendTransaction]:
"""Aggregate updates by daily_transaction_key.""" """Aggregate updates by daily_transaction_key."""
aggregated_daily_spend_update_transactions: Dict[ aggregated_daily_spend_update_transactions: Dict[
@ -113,6 +113,12 @@ class DailySpendUpdateQueue(BaseUpdateQueue):
daily_transaction["completion_tokens"] += payload[ daily_transaction["completion_tokens"] += payload[
"completion_tokens" "completion_tokens"
] ]
daily_transaction["cache_read_input_tokens"] += payload[
"cache_read_input_tokens"
]
daily_transaction["cache_creation_input_tokens"] += payload[
"cache_creation_input_tokens"
]
daily_transaction["api_requests"] += payload["api_requests"] daily_transaction["api_requests"] += payload["api_requests"]
daily_transaction["successful_requests"] += payload[ daily_transaction["successful_requests"] += payload[
"successful_requests" "successful_requests"

View file

@ -326,6 +326,8 @@ model LiteLLM_DailyUserSpend {
custom_llm_provider String? custom_llm_provider String?
prompt_tokens Int @default(0) prompt_tokens Int @default(0)
completion_tokens Int @default(0) completion_tokens Int @default(0)
cache_read_input_tokens Int @default(0)
cache_creation_input_tokens Int @default(0)
spend Float @default(0.0) spend Float @default(0.0)
api_requests Int @default(0) api_requests Int @default(0)
successful_requests Int @default(0) successful_requests Int @default(0)

View file

@ -1713,6 +1713,8 @@ class StandardLoggingMetadata(StandardLoggingUserAPIKeyMetadata):
mcp_tool_call_metadata: Optional[StandardLoggingMCPToolCall] mcp_tool_call_metadata: Optional[StandardLoggingMCPToolCall]
applied_guardrails: Optional[List[str]] applied_guardrails: Optional[List[str]]
usage_object: Optional[dict] usage_object: Optional[dict]
cache_read_input_tokens: Optional[int]
cache_creation_input_tokens: Optional[int]
class StandardLoggingAdditionalHeaders(TypedDict, total=False): class StandardLoggingAdditionalHeaders(TypedDict, total=False):

View file

@ -326,6 +326,8 @@ model LiteLLM_DailyUserSpend {
custom_llm_provider String? custom_llm_provider String?
prompt_tokens Int @default(0) prompt_tokens Int @default(0)
completion_tokens Int @default(0) completion_tokens Int @default(0)
cache_read_input_tokens Int @default(0)
cache_creation_input_tokens Int @default(0)
spend Float @default(0.0) spend Float @default(0.0)
api_requests Int @default(0) api_requests Int @default(0)
successful_requests Int @default(0) successful_requests Int @default(0)