Merge pull request #2978 from BerriAI/litellm_org_spend_tracking

fix(proxy_server.py): support tracking org spend
2024-04-11 23:19:33 -07:00 · 2024-04-11 23:19:33 -07:00 · a1cb9a51b9
commit a1cb9a51b9
parent c480b5a008 f4c7f4f901
6 changed files with 136 additions and 1 deletions
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -46,4 +46,5 @@ general_settings:
  litellm_jwtauth:
    admin_jwt_scope: "litellm_proxy_admin"
    public_key_ttl: os.environ/LITELLM_PUBLIC_KEY_TTL
-    user_id_jwt_field: "sub"
+    user_id_jwt_field: "sub"
    org_id_jwt_field: "azp"
--- a/litellm/proxy/_types.py
+++ b/litellm/proxy/_types.py
@ -140,6 +140,7 @@ class LiteLLM_JWTAuth(LiteLLMBase):
    team_allowed_routes: List[
        Literal["openai_routes", "info_routes", "management_routes"]
    ] = ["openai_routes", "info_routes"]
    org_id_jwt_field: Optional[str] = None
    user_id_jwt_field: Optional[str] = None
    end_user_id_jwt_field: Optional[str] = None
    public_key_ttl: float = 600
@ -514,6 +515,7 @@ class LiteLLM_BudgetTable(LiteLLMBase):
 class NewOrganizationRequest(LiteLLM_BudgetTable):
    organization_id: Optional[str] = None
    organization_alias: str
    models: List = []
    budget_id: Optional[str] = None
@ -522,6 +524,7 @@ class NewOrganizationRequest(LiteLLM_BudgetTable):
 class LiteLLM_OrganizationTable(LiteLLMBase):
    """Represents user-controllable params for a LiteLLM_OrganizationTable record"""
    organization_id: Optional[str] = None
    organization_alias: Optional[str] = None
    budget_id: str
    metadata: Optional[dict] = None
@ -706,6 +709,8 @@ class LiteLLM_VerificationToken(LiteLLMBase):
    soft_budget_cooldown: bool = False
    litellm_budget_table: Optional[dict] = None
    org_id: Optional[str] = None  # org id for a given key
    # hidden params used for parallel request limiting, not required to create a token
    user_id_rate_limits: Optional[dict] = None
    team_id_rate_limits: Optional[dict] = None
--- a/litellm/proxy/auth/auth_checks.py
+++ b/litellm/proxy/auth/auth_checks.py
@ -14,6 +14,7 @@ from litellm.proxy._types import (
    LiteLLM_JWTAuth,
    LiteLLM_TeamTable,
    LiteLLMRoutes,
    LiteLLM_OrganizationTable,
 )
 from typing import Optional, Literal, Union
 from litellm.proxy.utils import PrismaClient
@ -287,3 +288,41 @@ async def get_team_object(
        raise Exception(
            f"Team doesn't exist in db. Team={team_id}. Create team via `/team/new` call."
        )
 async def get_org_object(
    org_id: str,
    prisma_client: Optional[PrismaClient],
    user_api_key_cache: DualCache,
 ):
    """
    - Check if org id in proxy Org Table
    - if valid, return LiteLLM_OrganizationTable object
    - if not, then raise an error
    """
    if prisma_client is None:
        raise Exception(
            "No DB Connected. See - https://docs.litellm.ai/docs/proxy/virtual_keys"
        )
    # check if in cache
    cached_org_obj = user_api_key_cache.async_get_cache(key="org_id:{}".format(org_id))
    if cached_org_obj is not None:
        if isinstance(cached_org_obj, dict):
            return cached_org_obj
        elif isinstance(cached_org_obj, LiteLLM_OrganizationTable):
            return cached_org_obj
    # else, check db
    try:
        response = await prisma_client.db.litellm_organizationtable.find_unique(
            where={"organization_id": org_id}
        )
        if response is None:
            raise Exception
        return response
    except Exception as e:
        raise Exception(
            f"Organization doesn't exist in db. Organization={org_id}. Create organization via `/organization/new` call."
        )
--- a/litellm/proxy/auth/handle_jwt.py
+++ b/litellm/proxy/auth/handle_jwt.py
@ -84,6 +84,16 @@ class JWTHandler:
            user_id = default_value
        return user_id
    def get_org_id(self, token: dict, default_value: Optional[str]) -> Optional[str]:
        try:
            if self.litellm_jwtauth.org_id_jwt_field is not None:
                org_id = token[self.litellm_jwtauth.org_id_jwt_field]
            else:
                org_id = None
        except KeyError:
            org_id = default_value
        return org_id
    def get_scopes(self, token: dict) -> list:
        try:
            if isinstance(token["scope"], str):
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -116,6 +116,7 @@ from litellm.proxy.hooks.prompt_injection_detection import (
 from litellm.proxy.auth.auth_checks import (
    common_checks,
    get_end_user_object,
    get_org_object,
    get_team_object,
    get_user_object,
    allowed_routes_check,
@ -422,6 +423,14 @@ async def user_api_key_auth(
                    user_api_key_cache=user_api_key_cache,
                )
                # [OPTIONAL] track spend for an org id - `LiteLLM_OrganizationTable`
                org_id = jwt_handler.get_org_id(token=valid_token, default_value=None)
                if org_id is not None:
                    _ = await get_org_object(
                        org_id=org_id,
                        prisma_client=prisma_client,
                        user_api_key_cache=user_api_key_cache,
                    )
                # [OPTIONAL] track spend against an internal employee - `LiteLLM_UserTable`
                user_object = None
                user_id = jwt_handler.get_user_id(token=valid_token, default_value=None)
@ -515,6 +524,7 @@ async def user_api_key_auth(
                    team_models=team_object.models,
                    user_role="app_owner",
                    user_id=user_id,
                    org_id=org_id,
                )
        #### ELSE ####
        if master_key is None:
@ -1233,6 +1243,7 @@ async def _PROXY_track_cost_callback(
        end_user_id = proxy_server_request.get("body", {}).get("user", None)
        user_id = kwargs["litellm_params"]["metadata"].get("user_api_key_user_id", None)
        team_id = kwargs["litellm_params"]["metadata"].get("user_api_key_team_id", None)
        org_id = kwargs["litellm_params"]["metadata"].get("user_api_key_org_id", None)
        if kwargs.get("response_cost", None) is not None:
            response_cost = kwargs["response_cost"]
            user_api_key = kwargs["litellm_params"]["metadata"].get(
@ -1260,6 +1271,7 @@ async def _PROXY_track_cost_callback(
                    completion_response=completion_response,
                    start_time=start_time,
                    end_time=end_time,
                    org_id=org_id,
                )
                await update_cache(
@ -1321,6 +1333,7 @@ async def update_database(
    completion_response=None,
    start_time=None,
    end_time=None,
    org_id=None,
 ):
    try:
        global prisma_client
@ -1551,9 +1564,34 @@ async def update_database(
                )
                raise e
        ### UPDATE ORG SPEND ###
        async def _update_org_db():
            try:
                verbose_proxy_logger.debug(
                    "adding spend to org db. Response cost: {}. org_id: {}.".format(
                        response_cost, org_id
                    )
                )
                if org_id is None:
                    verbose_proxy_logger.debug(
                        "track_cost_callback: org_id is None. Not tracking spend for org"
                    )
                    return
                if prisma_client is not None:
                    prisma_client.org_list_transactons[org_id] = (
                        response_cost
                        + prisma_client.org_list_transactons.get(org_id, 0)
                    )
            except Exception as e:
                verbose_proxy_logger.info(
                    f"Update Org DB failed to execute - {str(e)}\n{traceback.format_exc()}"
                )
                raise e
        asyncio.create_task(_update_user_db())
        asyncio.create_task(_update_key_db())
        asyncio.create_task(_update_team_db())
        asyncio.create_task(_update_org_db())
        # asyncio.create_task(_insert_spend_log_to_db())
        if disable_spend_logs == False:
            await _insert_spend_log_to_db()
@ -3432,6 +3470,7 @@ async def chat_completion(
            user_api_key_dict, "key_alias", None
        )
        data["metadata"]["user_api_key_user_id"] = user_api_key_dict.user_id
        data["metadata"]["user_api_key_org_id"] = user_api_key_dict.org_id
        data["metadata"]["user_api_key_team_id"] = getattr(
            user_api_key_dict, "team_id", None
        )
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@ -567,6 +567,7 @@ class PrismaClient:
    end_user_list_transactons: dict = {}
    key_list_transactons: dict = {}
    team_list_transactons: dict = {}
    org_list_transactons: dict = {}
    spend_log_transactions: List = []
    def __init__(self, database_url: str, proxy_logging_obj: ProxyLogging):
@ -2150,6 +2151,46 @@ async def update_spend(
                )
                raise e
    ### UPDATE ORG TABLE ###
    if len(prisma_client.org_list_transactons.keys()) > 0:
        for i in range(n_retry_times + 1):
            try:
                async with prisma_client.db.tx(
                    timeout=timedelta(seconds=60)
                ) as transaction:
                    async with transaction.batch_() as batcher:
                        for (
                            org_id,
                            response_cost,
                        ) in prisma_client.org_list_transactons.items():
                            batcher.litellm_organizationtable.update_many(  # 'update_many' prevents error from being raised if no row exists
                                where={"organization_id": org_id},
                                data={"spend": {"increment": response_cost}},
                            )
                prisma_client.org_list_transactons = (
                    {}
                )  # Clear the remaining transactions after processing all batches in the loop.
                break
            except httpx.ReadTimeout:
                if i >= n_retry_times:  # If we've reached the maximum number of retries
                    raise  # Re-raise the last exception
                # Optionally, sleep for a bit before retrying
                await asyncio.sleep(2**i)  # Exponential backoff
            except Exception as e:
                import traceback
                error_msg = (
                    f"LiteLLM Prisma Client Exception - update org spend: {str(e)}"
                )
                print_verbose(error_msg)
                error_traceback = error_msg + "\n" + traceback.format_exc()
                asyncio.create_task(
                    proxy_logging_obj.failure_handler(
                        original_exception=e, traceback_str=error_traceback
                    )
                )
                raise e
    ### UPDATE SPEND LOGS ###
    verbose_proxy_logger.debug(
        "Spend Logs transactions: {}".format(len(prisma_client.spend_log_transactions))