diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 7a036af03..9709648e4 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -169,6 +169,7 @@ from litellm.scheduler import Scheduler, FlowItem, DefaultPriorities ## Import All Misc routes here ## from litellm.proxy.caching_routes import router as caching_router from litellm.proxy.team_endpoints import router as team_router +from litellm.proxy.spend_management_endpoints import router as spend_management_router try: from litellm._version import version @@ -4754,231 +4755,6 @@ async def run_thread( ) -###################################################################### - -# /v1/batches Endpoints - - -###################################################################### -@router.post( - "/v1/batches", - dependencies=[Depends(user_api_key_auth)], - tags=["batch"], -) -@router.post( - "/batches", - dependencies=[Depends(user_api_key_auth)], - tags=["batch"], -) -async def create_batch( - request: Request, - fastapi_response: Response, - user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), -): - """ - Create large batches of API requests for asynchronous processing. - This is the equivalent of POST https://api.openai.com/v1/batch - Supports Identical Params as: https://platform.openai.com/docs/api-reference/batch - - Example Curl - ``` - curl http://localhost:4000/v1/batches \ - -H "Authorization: Bearer sk-1234" \ - -H "Content-Type: application/json" \ - -d '{ - "input_file_id": "file-abc123", - "endpoint": "/v1/chat/completions", - "completion_window": "24h" - }' - ``` - """ - global proxy_logging_obj - data: Dict = {} - try: - # Use orjson to parse JSON data, orjson speeds up requests significantly - form_data = await request.form() - data = {key: value for key, value in form_data.items() if key != "file"} - - # Include original request and headers in the data - data = await add_litellm_data_to_request( - data=data, - request=request, - general_settings=general_settings, - user_api_key_dict=user_api_key_dict, - version=version, - proxy_config=proxy_config, - ) - - _create_batch_data = CreateBatchRequest(**data) - - # for now use custom_llm_provider=="openai" -> this will change as LiteLLM adds more providers for acreate_batch - response = await litellm.acreate_batch( - custom_llm_provider="openai", **_create_batch_data - ) - - ### ALERTING ### - data["litellm_status"] = "success" # used for alerting - - ### RESPONSE HEADERS ### - hidden_params = getattr(response, "_hidden_params", {}) or {} - model_id = hidden_params.get("model_id", None) or "" - cache_key = hidden_params.get("cache_key", None) or "" - api_base = hidden_params.get("api_base", None) or "" - - fastapi_response.headers.update( - get_custom_headers( - user_api_key_dict=user_api_key_dict, - model_id=model_id, - cache_key=cache_key, - api_base=api_base, - version=version, - model_region=getattr(user_api_key_dict, "allowed_model_region", ""), - ) - ) - - return response - except Exception as e: - data["litellm_status"] = "fail" # used for alerting - await proxy_logging_obj.post_call_failure_hook( - user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data - ) - verbose_proxy_logger.error( - "litellm.proxy.proxy_server.create_batch(): Exception occured - {}".format( - str(e) - ) - ) - verbose_proxy_logger.debug(traceback.format_exc()) - if isinstance(e, HTTPException): - raise ProxyException( - message=getattr(e, "message", str(e.detail)), - type=getattr(e, "type", "None"), - param=getattr(e, "param", "None"), - code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST), - ) - else: - error_msg = f"{str(e)}" - raise ProxyException( - message=getattr(e, "message", error_msg), - type=getattr(e, "type", "None"), - param=getattr(e, "param", "None"), - code=getattr(e, "status_code", 500), - ) - - -@router.get( - "/v1/batches{batch_id}", - dependencies=[Depends(user_api_key_auth)], - tags=["batch"], -) -@router.get( - "/batches{batch_id}", - dependencies=[Depends(user_api_key_auth)], - tags=["batch"], -) -async def retrieve_batch( - request: Request, - fastapi_response: Response, - user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), - batch_id: str = Path( - title="Batch ID to retrieve", description="The ID of the batch to retrieve" - ), -): - """ - Retrieves a batch. - This is the equivalent of GET https://api.openai.com/v1/batches/{batch_id} - Supports Identical Params as: https://platform.openai.com/docs/api-reference/batch/retrieve - - Example Curl - ``` - curl http://localhost:4000/v1/batches/batch_abc123 \ - -H "Authorization: Bearer sk-1234" \ - -H "Content-Type: application/json" \ - - ``` - """ - global proxy_logging_obj - data: Dict = {} - try: - # Use orjson to parse JSON data, orjson speeds up requests significantly - form_data = await request.form() - data = {key: value for key, value in form_data.items() if key != "file"} - - # Include original request and headers in the data - data = await add_litellm_data_to_request( - data=data, - request=request, - general_settings=general_settings, - user_api_key_dict=user_api_key_dict, - version=version, - proxy_config=proxy_config, - ) - - _retrieve_batch_request = RetrieveBatchRequest( - batch_id=batch_id, - ) - - # for now use custom_llm_provider=="openai" -> this will change as LiteLLM adds more providers for acreate_batch - response = await litellm.aretrieve_batch( - custom_llm_provider="openai", **_retrieve_batch_request - ) - - ### ALERTING ### - data["litellm_status"] = "success" # used for alerting - - ### RESPONSE HEADERS ### - hidden_params = getattr(response, "_hidden_params", {}) or {} - model_id = hidden_params.get("model_id", None) or "" - cache_key = hidden_params.get("cache_key", None) or "" - api_base = hidden_params.get("api_base", None) or "" - - fastapi_response.headers.update( - get_custom_headers( - user_api_key_dict=user_api_key_dict, - model_id=model_id, - cache_key=cache_key, - api_base=api_base, - version=version, - model_region=getattr(user_api_key_dict, "allowed_model_region", ""), - ) - ) - - return response - except Exception as e: - data["litellm_status"] = "fail" # used for alerting - await proxy_logging_obj.post_call_failure_hook( - user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data - ) - verbose_proxy_logger.error( - "litellm.proxy.proxy_server.retrieve_batch(): Exception occured - {}".format( - str(e) - ) - ) - verbose_proxy_logger.debug(traceback.format_exc()) - if isinstance(e, HTTPException): - raise ProxyException( - message=getattr(e, "message", str(e.detail)), - type=getattr(e, "type", "None"), - param=getattr(e, "param", "None"), - code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST), - ) - else: - error_traceback = traceback.format_exc() - error_msg = f"{str(e)}" - raise ProxyException( - message=getattr(e, "message", error_msg), - type=getattr(e, "type", "None"), - param=getattr(e, "param", "None"), - code=getattr(e, "status_code", 500), - ) - - -###################################################################### - -# END OF /v1/batches Endpoints Implementation - -###################################################################### - - ###################################################################### # /v1/files Endpoints @@ -5922,1809 +5698,6 @@ async def info_key_fn( ) -#### SPEND MANAGEMENT ##### - - -@router.get( - "/spend/keys", - tags=["Budget & Spend Tracking"], - dependencies=[Depends(user_api_key_auth)], - include_in_schema=False, -) -async def spend_key_fn(): - """ - View all keys created, ordered by spend - - Example Request: - ``` - curl -X GET "http://0.0.0.0:8000/spend/keys" \ --H "Authorization: Bearer sk-1234" - ``` - """ - global prisma_client - try: - if prisma_client is None: - raise Exception( - f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys" - ) - - key_info = await prisma_client.get_data(table_name="key", query_type="find_all") - return key_info - - except Exception as e: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail={"error": str(e)}, - ) - - -@router.get( - "/spend/users", - tags=["Budget & Spend Tracking"], - dependencies=[Depends(user_api_key_auth)], - include_in_schema=False, -) -async def spend_user_fn( - user_id: Optional[str] = fastapi.Query( - default=None, - description="Get User Table row for user_id", - ), -): - """ - View all users created, ordered by spend - - Example Request: - ``` - curl -X GET "http://0.0.0.0:8000/spend/users" \ --H "Authorization: Bearer sk-1234" - ``` - - View User Table row for user_id - ``` - curl -X GET "http://0.0.0.0:8000/spend/users?user_id=1234" \ --H "Authorization: Bearer sk-1234" - ``` - """ - global prisma_client - try: - if prisma_client is None: - raise Exception( - f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys" - ) - - if user_id is not None: - user_info = await prisma_client.get_data( - table_name="user", query_type="find_unique", user_id=user_id - ) - return [user_info] - else: - user_info = await prisma_client.get_data( - table_name="user", query_type="find_all" - ) - - return user_info - - except Exception as e: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail={"error": str(e)}, - ) - - -@router.get( - "/spend/tags", - tags=["Budget & Spend Tracking"], - dependencies=[Depends(user_api_key_auth)], - responses={ - 200: {"model": List[LiteLLM_SpendLogs]}, - }, -) -async def view_spend_tags( - start_date: Optional[str] = fastapi.Query( - default=None, - description="Time from which to start viewing key spend", - ), - end_date: Optional[str] = fastapi.Query( - default=None, - description="Time till which to view key spend", - ), -): - """ - LiteLLM Enterprise - View Spend Per Request Tag - - Example Request: - ``` - curl -X GET "http://0.0.0.0:8000/spend/tags" \ --H "Authorization: Bearer sk-1234" - ``` - - Spend with Start Date and End Date - ``` - curl -X GET "http://0.0.0.0:8000/spend/tags?start_date=2022-01-01&end_date=2022-02-01" \ --H "Authorization: Bearer sk-1234" - ``` - """ - - from enterprise.utils import get_spend_by_tags - - global prisma_client - try: - if prisma_client is None: - raise Exception( - f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys" - ) - - # run the following SQL query on prisma - """ - SELECT - jsonb_array_elements_text(request_tags) AS individual_request_tag, - COUNT(*) AS log_count, - SUM(spend) AS total_spend - FROM "LiteLLM_SpendLogs" - GROUP BY individual_request_tag; - """ - response = await get_spend_by_tags( - start_date=start_date, end_date=end_date, prisma_client=prisma_client - ) - - return response - except Exception as e: - if isinstance(e, HTTPException): - raise ProxyException( - message=getattr(e, "detail", f"/spend/tags Error({str(e)})"), - type="internal_error", - param=getattr(e, "param", "None"), - code=getattr(e, "status_code", status.HTTP_500_INTERNAL_SERVER_ERROR), - ) - elif isinstance(e, ProxyException): - raise e - raise ProxyException( - message="/spend/tags Error" + str(e), - type="internal_error", - param=getattr(e, "param", "None"), - code=status.HTTP_500_INTERNAL_SERVER_ERROR, - ) - - -@router.get( - "/global/activity", - tags=["Budget & Spend Tracking"], - dependencies=[Depends(user_api_key_auth)], - responses={ - 200: {"model": List[LiteLLM_SpendLogs]}, - }, - include_in_schema=False, -) -async def get_global_activity( - start_date: Optional[str] = fastapi.Query( - default=None, - description="Time from which to start viewing spend", - ), - end_date: Optional[str] = fastapi.Query( - default=None, - description="Time till which to view spend", - ), -): - """ - Get number of API Requests, total tokens through proxy - - { - "daily_data": [ - const chartdata = [ - { - date: 'Jan 22', - api_requests: 10, - total_tokens: 2000 - }, - { - date: 'Jan 23', - api_requests: 10, - total_tokens: 12 - }, - ], - "sum_api_requests": 20, - "sum_total_tokens": 2012 - } - """ - from collections import defaultdict - - if start_date is None or end_date is None: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail={"error": "Please provide start_date and end_date"}, - ) - - start_date_obj = datetime.strptime(start_date, "%Y-%m-%d") - end_date_obj = datetime.strptime(end_date, "%Y-%m-%d") - - global prisma_client, llm_router - try: - if prisma_client is None: - raise Exception( - f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys" - ) - - sql_query = """ - SELECT - date_trunc('day', "startTime") AS date, - COUNT(*) AS api_requests, - SUM(total_tokens) AS total_tokens - FROM "LiteLLM_SpendLogs" - WHERE "startTime" BETWEEN $1::date AND $2::date + interval '1 day' - GROUP BY date_trunc('day', "startTime") - """ - db_response = await prisma_client.db.query_raw( - sql_query, start_date_obj, end_date_obj - ) - - if db_response is None: - return [] - - sum_api_requests = 0 - sum_total_tokens = 0 - daily_data = [] - for row in db_response: - # cast date to datetime - _date_obj = datetime.fromisoformat(row["date"]) - row["date"] = _date_obj.strftime("%b %d") - - daily_data.append(row) - sum_api_requests += row.get("api_requests", 0) - sum_total_tokens += row.get("total_tokens", 0) - - # sort daily_data by date - daily_data = sorted(daily_data, key=lambda x: x["date"]) - - data_to_return = { - "daily_data": daily_data, - "sum_api_requests": sum_api_requests, - "sum_total_tokens": sum_total_tokens, - } - - return data_to_return - - except Exception as e: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail={"error": str(e)}, - ) - - -@router.get( - "/global/activity/model", - tags=["Budget & Spend Tracking"], - dependencies=[Depends(user_api_key_auth)], - responses={ - 200: {"model": List[LiteLLM_SpendLogs]}, - }, - include_in_schema=False, -) -async def get_global_activity_model( - start_date: Optional[str] = fastapi.Query( - default=None, - description="Time from which to start viewing spend", - ), - end_date: Optional[str] = fastapi.Query( - default=None, - description="Time till which to view spend", - ), -): - """ - Get number of API Requests, total tokens through proxy - Grouped by MODEL - - [ - { - "model": "gpt-4", - "daily_data": [ - const chartdata = [ - { - date: 'Jan 22', - api_requests: 10, - total_tokens: 2000 - }, - { - date: 'Jan 23', - api_requests: 10, - total_tokens: 12 - }, - ], - "sum_api_requests": 20, - "sum_total_tokens": 2012 - - }, - { - "model": "azure/gpt-4-turbo", - "daily_data": [ - const chartdata = [ - { - date: 'Jan 22', - api_requests: 10, - total_tokens: 2000 - }, - { - date: 'Jan 23', - api_requests: 10, - total_tokens: 12 - }, - ], - "sum_api_requests": 20, - "sum_total_tokens": 2012 - - }, - ] - """ - from collections import defaultdict - - if start_date is None or end_date is None: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail={"error": "Please provide start_date and end_date"}, - ) - - start_date_obj = datetime.strptime(start_date, "%Y-%m-%d") - end_date_obj = datetime.strptime(end_date, "%Y-%m-%d") - - global prisma_client, llm_router, premium_user - try: - if prisma_client is None: - raise Exception( - f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys" - ) - - sql_query = """ - SELECT - model_group, - date_trunc('day', "startTime") AS date, - COUNT(*) AS api_requests, - SUM(total_tokens) AS total_tokens - FROM "LiteLLM_SpendLogs" - WHERE "startTime" BETWEEN $1::date AND $2::date + interval '1 day' - GROUP BY model_group, date_trunc('day', "startTime") - """ - db_response = await prisma_client.db.query_raw( - sql_query, start_date_obj, end_date_obj - ) - if db_response is None: - return [] - - model_ui_data: dict = ( - {} - ) # {"gpt-4": {"daily_data": [], "sum_api_requests": 0, "sum_total_tokens": 0}} - - for row in db_response: - _model = row["model_group"] - if _model not in model_ui_data: - model_ui_data[_model] = { - "daily_data": [], - "sum_api_requests": 0, - "sum_total_tokens": 0, - } - _date_obj = datetime.fromisoformat(row["date"]) - row["date"] = _date_obj.strftime("%b %d") - - model_ui_data[_model]["daily_data"].append(row) - model_ui_data[_model]["sum_api_requests"] += row.get("api_requests", 0) - model_ui_data[_model]["sum_total_tokens"] += row.get("total_tokens", 0) - - # sort mode ui data by sum_api_requests -> get top 10 models - model_ui_data = dict( - sorted( - model_ui_data.items(), - key=lambda x: x[1]["sum_api_requests"], - reverse=True, - )[:10] - ) - - response = [] - for model, data in model_ui_data.items(): - _sort_daily_data = sorted(data["daily_data"], key=lambda x: x["date"]) - - response.append( - { - "model": model, - "daily_data": _sort_daily_data, - "sum_api_requests": data["sum_api_requests"], - "sum_total_tokens": data["sum_total_tokens"], - } - ) - - return response - - except Exception as e: - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail={"error": str(e)}, - ) - - -@router.get( - "/global/activity/exceptions/deployment", - tags=["Budget & Spend Tracking"], - dependencies=[Depends(user_api_key_auth)], - responses={ - 200: {"model": List[LiteLLM_SpendLogs]}, - }, - include_in_schema=False, -) -async def get_global_activity_exceptions_per_deployment( - model_group: str = fastapi.Query( - description="Filter by model group", - ), - start_date: Optional[str] = fastapi.Query( - default=None, - description="Time from which to start viewing spend", - ), - end_date: Optional[str] = fastapi.Query( - default=None, - description="Time till which to view spend", - ), -): - """ - Get number of 429 errors - Grouped by deployment - - [ - { - "deployment": "https://azure-us-east-1.openai.azure.com/", - "daily_data": [ - const chartdata = [ - { - date: 'Jan 22', - num_rate_limit_exceptions: 10 - }, - { - date: 'Jan 23', - num_rate_limit_exceptions: 12 - }, - ], - "sum_num_rate_limit_exceptions": 20, - - }, - { - "deployment": "https://azure-us-east-1.openai.azure.com/", - "daily_data": [ - const chartdata = [ - { - date: 'Jan 22', - num_rate_limit_exceptions: 10, - }, - { - date: 'Jan 23', - num_rate_limit_exceptions: 12 - }, - ], - "sum_num_rate_limit_exceptions": 20, - - }, - ] - """ - from collections import defaultdict - - if start_date is None or end_date is None: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail={"error": "Please provide start_date and end_date"}, - ) - - start_date_obj = datetime.strptime(start_date, "%Y-%m-%d") - end_date_obj = datetime.strptime(end_date, "%Y-%m-%d") - - global prisma_client, llm_router, premium_user - try: - if prisma_client is None: - raise Exception( - f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys" - ) - - sql_query = """ - SELECT - api_base, - date_trunc('day', "startTime")::date AS date, - COUNT(*) AS num_rate_limit_exceptions - FROM - "LiteLLM_ErrorLogs" - WHERE - "startTime" >= $1::date - AND "startTime" < ($2::date + INTERVAL '1 day') - AND model_group = $3 - AND status_code = '429' - GROUP BY - api_base, - date_trunc('day', "startTime") - ORDER BY - date; - """ - db_response = await prisma_client.db.query_raw( - sql_query, start_date_obj, end_date_obj, model_group - ) - if db_response is None: - return [] - - model_ui_data: dict = ( - {} - ) # {"gpt-4": {"daily_data": [], "sum_api_requests": 0, "sum_total_tokens": 0}} - - for row in db_response: - _model = row["api_base"] - if _model not in model_ui_data: - model_ui_data[_model] = { - "daily_data": [], - "sum_num_rate_limit_exceptions": 0, - } - _date_obj = datetime.fromisoformat(row["date"]) - row["date"] = _date_obj.strftime("%b %d") - - model_ui_data[_model]["daily_data"].append(row) - model_ui_data[_model]["sum_num_rate_limit_exceptions"] += row.get( - "num_rate_limit_exceptions", 0 - ) - - # sort mode ui data by sum_api_requests -> get top 10 models - model_ui_data = dict( - sorted( - model_ui_data.items(), - key=lambda x: x[1]["sum_num_rate_limit_exceptions"], - reverse=True, - )[:10] - ) - - response = [] - for model, data in model_ui_data.items(): - _sort_daily_data = sorted(data["daily_data"], key=lambda x: x["date"]) - - response.append( - { - "api_base": model, - "daily_data": _sort_daily_data, - "sum_num_rate_limit_exceptions": data[ - "sum_num_rate_limit_exceptions" - ], - } - ) - - return response - - except Exception as e: - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail={"error": str(e)}, - ) - - -@router.get( - "/global/activity/exceptions", - tags=["Budget & Spend Tracking"], - dependencies=[Depends(user_api_key_auth)], - responses={ - 200: {"model": List[LiteLLM_SpendLogs]}, - }, - include_in_schema=False, -) -async def get_global_activity_exceptions( - model_group: str = fastapi.Query( - description="Filter by model group", - ), - start_date: Optional[str] = fastapi.Query( - default=None, - description="Time from which to start viewing spend", - ), - end_date: Optional[str] = fastapi.Query( - default=None, - description="Time till which to view spend", - ), -): - """ - Get number of API Requests, total tokens through proxy - - { - "daily_data": [ - const chartdata = [ - { - date: 'Jan 22', - num_rate_limit_exceptions: 10, - }, - { - date: 'Jan 23', - num_rate_limit_exceptions: 10, - }, - ], - "sum_api_exceptions": 20, - } - """ - from collections import defaultdict - - if start_date is None or end_date is None: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail={"error": "Please provide start_date and end_date"}, - ) - - start_date_obj = datetime.strptime(start_date, "%Y-%m-%d") - end_date_obj = datetime.strptime(end_date, "%Y-%m-%d") - - global prisma_client, llm_router - try: - if prisma_client is None: - raise Exception( - f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys" - ) - - sql_query = """ - SELECT - date_trunc('day', "startTime")::date AS date, - COUNT(*) AS num_rate_limit_exceptions - FROM - "LiteLLM_ErrorLogs" - WHERE - "startTime" >= $1::date - AND "startTime" < ($2::date + INTERVAL '1 day') - AND model_group = $3 - AND status_code = '429' - GROUP BY - date_trunc('day', "startTime") - ORDER BY - date; - """ - db_response = await prisma_client.db.query_raw( - sql_query, start_date_obj, end_date_obj, model_group - ) - - if db_response is None: - return [] - - sum_num_rate_limit_exceptions = 0 - daily_data = [] - for row in db_response: - # cast date to datetime - _date_obj = datetime.fromisoformat(row["date"]) - row["date"] = _date_obj.strftime("%b %d") - - daily_data.append(row) - sum_num_rate_limit_exceptions += row.get("num_rate_limit_exceptions", 0) - - # sort daily_data by date - daily_data = sorted(daily_data, key=lambda x: x["date"]) - - data_to_return = { - "daily_data": daily_data, - "sum_num_rate_limit_exceptions": sum_num_rate_limit_exceptions, - } - - return data_to_return - - except Exception as e: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail={"error": str(e)}, - ) - - -@router.get( - "/global/spend/provider", - tags=["Budget & Spend Tracking"], - dependencies=[Depends(user_api_key_auth)], - include_in_schema=False, - responses={ - 200: {"model": List[LiteLLM_SpendLogs]}, - }, -) -async def get_global_spend_provider( - start_date: Optional[str] = fastapi.Query( - default=None, - description="Time from which to start viewing spend", - ), - end_date: Optional[str] = fastapi.Query( - default=None, - description="Time till which to view spend", - ), -): - """ - Get breakdown of spend per provider - [ - { - "provider": "Azure OpenAI", - "spend": 20 - }, - { - "provider": "OpenAI", - "spend": 10 - }, - { - "provider": "VertexAI", - "spend": 30 - } - ] - """ - from collections import defaultdict - - if start_date is None or end_date is None: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail={"error": "Please provide start_date and end_date"}, - ) - - start_date_obj = datetime.strptime(start_date, "%Y-%m-%d") - end_date_obj = datetime.strptime(end_date, "%Y-%m-%d") - - global prisma_client, llm_router - try: - if prisma_client is None: - raise Exception( - f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys" - ) - - sql_query = """ - - SELECT - model_id, - SUM(spend) AS spend - FROM "LiteLLM_SpendLogs" - WHERE "startTime" BETWEEN $1::date AND $2::date AND length(model_id) > 0 - GROUP BY model_id - """ - - db_response = await prisma_client.db.query_raw( - sql_query, start_date_obj, end_date_obj - ) - if db_response is None: - return [] - - ################################### - # Convert model_id -> to Provider # - ################################### - - # we use the in memory router for this - ui_response = [] - provider_spend_mapping: defaultdict = defaultdict(int) - for row in db_response: - _model_id = row["model_id"] - _provider = "Unknown" - if llm_router is not None: - _deployment = llm_router.get_deployment(model_id=_model_id) - if _deployment is not None: - try: - _, _provider, _, _ = litellm.get_llm_provider( - model=_deployment.litellm_params.model, - custom_llm_provider=_deployment.litellm_params.custom_llm_provider, - api_base=_deployment.litellm_params.api_base, - litellm_params=_deployment.litellm_params, - ) - provider_spend_mapping[_provider] += row["spend"] - except: - pass - - for provider, spend in provider_spend_mapping.items(): - ui_response.append({"provider": provider, "spend": spend}) - - return ui_response - - except Exception as e: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail={"error": str(e)}, - ) - - -@router.get( - "/global/spend/report", - tags=["Budget & Spend Tracking"], - dependencies=[Depends(user_api_key_auth)], - responses={ - 200: {"model": List[LiteLLM_SpendLogs]}, - }, -) -async def get_global_spend_report( - start_date: Optional[str] = fastapi.Query( - default=None, - description="Time from which to start viewing spend", - ), - end_date: Optional[str] = fastapi.Query( - default=None, - description="Time till which to view spend", - ), - group_by: Optional[Literal["team", "customer"]] = fastapi.Query( - default="team", - description="Group spend by internal team or customer", - ), -): - """ - Get Daily Spend per Team, based on specific startTime and endTime. Per team, view usage by each key, model - [ - { - "group-by-day": "2024-05-10", - "teams": [ - { - "team_name": "team-1" - "spend": 10, - "keys": [ - "key": "1213", - "usage": { - "model-1": { - "cost": 12.50, - "input_tokens": 1000, - "output_tokens": 5000, - "requests": 100 - }, - "audio-modelname1": { - "cost": 25.50, - "seconds": 25, - "requests": 50 - }, - } - } - ] - ] - } - """ - if start_date is None or end_date is None: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail={"error": "Please provide start_date and end_date"}, - ) - - start_date_obj = datetime.strptime(start_date, "%Y-%m-%d") - end_date_obj = datetime.strptime(end_date, "%Y-%m-%d") - - global prisma_client - try: - if prisma_client is None: - raise Exception( - f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys" - ) - - if group_by == "team": - # first get data from spend logs -> SpendByModelApiKey - # then read data from "SpendByModelApiKey" to format the response obj - sql_query = """ - - WITH SpendByModelApiKey AS ( - SELECT - date_trunc('day', sl."startTime") AS group_by_day, - COALESCE(tt.team_alias, 'Unassigned Team') AS team_name, - sl.model, - sl.api_key, - SUM(sl.spend) AS model_api_spend, - SUM(sl.total_tokens) AS model_api_tokens - FROM - "LiteLLM_SpendLogs" sl - LEFT JOIN - "LiteLLM_TeamTable" tt - ON - sl.team_id = tt.team_id - WHERE - sl."startTime" BETWEEN $1::date AND $2::date - GROUP BY - date_trunc('day', sl."startTime"), - tt.team_alias, - sl.model, - sl.api_key - ) - SELECT - group_by_day, - jsonb_agg(jsonb_build_object( - 'team_name', team_name, - 'total_spend', total_spend, - 'metadata', metadata - )) AS teams - FROM ( - SELECT - group_by_day, - team_name, - SUM(model_api_spend) AS total_spend, - jsonb_agg(jsonb_build_object( - 'model', model, - 'api_key', api_key, - 'spend', model_api_spend, - 'total_tokens', model_api_tokens - )) AS metadata - FROM - SpendByModelApiKey - GROUP BY - group_by_day, - team_name - ) AS aggregated - GROUP BY - group_by_day - ORDER BY - group_by_day; - """ - - db_response = await prisma_client.db.query_raw( - sql_query, start_date_obj, end_date_obj - ) - if db_response is None: - return [] - - return db_response - - elif group_by == "customer": - sql_query = """ - - WITH SpendByModelApiKey AS ( - SELECT - date_trunc('day', sl."startTime") AS group_by_day, - sl.end_user AS customer, - sl.model, - sl.api_key, - SUM(sl.spend) AS model_api_spend, - SUM(sl.total_tokens) AS model_api_tokens - FROM - "LiteLLM_SpendLogs" sl - WHERE - sl."startTime" BETWEEN $1::date AND $2::date - GROUP BY - date_trunc('day', sl."startTime"), - customer, - sl.model, - sl.api_key - ) - SELECT - group_by_day, - jsonb_agg(jsonb_build_object( - 'customer', customer, - 'total_spend', total_spend, - 'metadata', metadata - )) AS customers - FROM - ( - SELECT - group_by_day, - customer, - SUM(model_api_spend) AS total_spend, - jsonb_agg(jsonb_build_object( - 'model', model, - 'api_key', api_key, - 'spend', model_api_spend, - 'total_tokens', model_api_tokens - )) AS metadata - FROM - SpendByModelApiKey - GROUP BY - group_by_day, - customer - ) AS aggregated - GROUP BY - group_by_day - ORDER BY - group_by_day; - """ - - db_response = await prisma_client.db.query_raw( - sql_query, start_date_obj, end_date_obj - ) - if db_response is None: - return [] - - return db_response - - except Exception as e: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail={"error": str(e)}, - ) - - -@router.get( - "/global/spend/all_tag_names", - tags=["Budget & Spend Tracking"], - dependencies=[Depends(user_api_key_auth)], - include_in_schema=False, - responses={ - 200: {"model": List[LiteLLM_SpendLogs]}, - }, -) -async def global_get_all_tag_names(): - try: - if prisma_client is None: - raise Exception( - f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys" - ) - - sql_query = """ - SELECT DISTINCT - jsonb_array_elements_text(request_tags) AS individual_request_tag - FROM "LiteLLM_SpendLogs"; - """ - - db_response = await prisma_client.db.query_raw(sql_query) - if db_response is None: - return [] - - _tag_names = [] - for row in db_response: - _tag_names.append(row.get("individual_request_tag")) - - return {"tag_names": _tag_names} - - except Exception as e: - if isinstance(e, HTTPException): - raise ProxyException( - message=getattr(e, "detail", f"/spend/all_tag_names Error({str(e)})"), - type="internal_error", - param=getattr(e, "param", "None"), - code=getattr(e, "status_code", status.HTTP_500_INTERNAL_SERVER_ERROR), - ) - elif isinstance(e, ProxyException): - raise e - raise ProxyException( - message="/spend/all_tag_names Error" + str(e), - type="internal_error", - param=getattr(e, "param", "None"), - code=status.HTTP_500_INTERNAL_SERVER_ERROR, - ) - - -@router.get( - "/global/spend/tags", - tags=["Budget & Spend Tracking"], - dependencies=[Depends(user_api_key_auth)], - include_in_schema=False, - responses={ - 200: {"model": List[LiteLLM_SpendLogs]}, - }, -) -async def global_view_spend_tags( - start_date: Optional[str] = fastapi.Query( - default=None, - description="Time from which to start viewing key spend", - ), - end_date: Optional[str] = fastapi.Query( - default=None, - description="Time till which to view key spend", - ), - tags: Optional[str] = fastapi.Query( - default=None, - description="comman separated tags to filter on", - ), -): - """ - LiteLLM Enterprise - View Spend Per Request Tag. Used by LiteLLM UI - - Example Request: - ``` - curl -X GET "http://0.0.0.0:4000/spend/tags" \ --H "Authorization: Bearer sk-1234" - ``` - - Spend with Start Date and End Date - ``` - curl -X GET "http://0.0.0.0:4000/spend/tags?start_date=2022-01-01&end_date=2022-02-01" \ --H "Authorization: Bearer sk-1234" - ``` - """ - - from enterprise.utils import ui_get_spend_by_tags - - global prisma_client - try: - if prisma_client is None: - raise Exception( - f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys" - ) - - if end_date is None or start_date is None: - raise ProxyException( - message="Please provide start_date and end_date", - type="bad_request", - param=None, - code=status.HTTP_400_BAD_REQUEST, - ) - response = await ui_get_spend_by_tags( - start_date=start_date, - end_date=end_date, - tags_str=tags, - prisma_client=prisma_client, - ) - - return response - except Exception as e: - if isinstance(e, HTTPException): - raise ProxyException( - message=getattr(e, "detail", f"/spend/tags Error({str(e)})"), - type="internal_error", - param=getattr(e, "param", "None"), - code=getattr(e, "status_code", status.HTTP_500_INTERNAL_SERVER_ERROR), - ) - elif isinstance(e, ProxyException): - raise e - raise ProxyException( - message="/spend/tags Error" + str(e), - type="internal_error", - param=getattr(e, "param", "None"), - code=status.HTTP_500_INTERNAL_SERVER_ERROR, - ) - - -async def _get_spend_report_for_time_range( - start_date: str, - end_date: str, -): - global prisma_client - if prisma_client is None: - verbose_proxy_logger.error( - f"Database not connected. Connect a database to your proxy for weekly, monthly spend reports" - ) - return None - - try: - sql_query = """ - SELECT - t.team_alias, - SUM(s.spend) AS total_spend - FROM - "LiteLLM_SpendLogs" s - LEFT JOIN - "LiteLLM_TeamTable" t ON s.team_id = t.team_id - WHERE - s."startTime"::DATE >= $1::date AND s."startTime"::DATE <= $2::date - GROUP BY - t.team_alias - ORDER BY - total_spend DESC; - """ - response = await prisma_client.db.query_raw(sql_query, start_date, end_date) - - # get spend per tag for today - sql_query = """ - SELECT - jsonb_array_elements_text(request_tags) AS individual_request_tag, - SUM(spend) AS total_spend - FROM "LiteLLM_SpendLogs" - WHERE "startTime"::DATE >= $1::date AND "startTime"::DATE <= $2::date - GROUP BY individual_request_tag - ORDER BY total_spend DESC; - """ - - spend_per_tag = await prisma_client.db.query_raw( - sql_query, start_date, end_date - ) - - return response, spend_per_tag - except Exception as e: - verbose_proxy_logger.error( - "Exception in _get_daily_spend_reports {}".format(str(e)) - ) # noqa - - -@router.post( - "/spend/calculate", - tags=["Budget & Spend Tracking"], - dependencies=[Depends(user_api_key_auth)], - responses={ - 200: { - "cost": { - "description": "The calculated cost", - "example": 0.0, - "type": "float", - } - } - }, -) -async def calculate_spend(request: Request): - """ - Accepts all the params of completion_cost. - - Calculate spend **before** making call: - - Note: If you see a spend of $0.0 you need to set custom_pricing for your model: https://docs.litellm.ai/docs/proxy/custom_pricing - - ``` - curl --location 'http://localhost:4000/spend/calculate' - --header 'Authorization: Bearer sk-1234' - --header 'Content-Type: application/json' - --data '{ - "model": "anthropic.claude-v2", - "messages": [{"role": "user", "content": "Hey, how'''s it going?"}] - }' - ``` - - Calculate spend **after** making call: - - ``` - curl --location 'http://localhost:4000/spend/calculate' - --header 'Authorization: Bearer sk-1234' - --header 'Content-Type: application/json' - --data '{ - "completion_response": { - "id": "chatcmpl-123", - "object": "chat.completion", - "created": 1677652288, - "model": "gpt-3.5-turbo-0125", - "system_fingerprint": "fp_44709d6fcb", - "choices": [{ - "index": 0, - "message": { - "role": "assistant", - "content": "Hello there, how may I assist you today?" - }, - "logprobs": null, - "finish_reason": "stop" - }] - "usage": { - "prompt_tokens": 9, - "completion_tokens": 12, - "total_tokens": 21 - } - } - }' - ``` - """ - from litellm import completion_cost - - data = await request.json() - if "completion_response" in data: - data["completion_response"] = litellm.ModelResponse( - **data["completion_response"] - ) - return {"cost": completion_cost(**data)} - - -@router.get( - "/spend/logs", - tags=["Budget & Spend Tracking"], - dependencies=[Depends(user_api_key_auth)], - responses={ - 200: {"model": List[LiteLLM_SpendLogs]}, - }, -) -async def view_spend_logs( - api_key: Optional[str] = fastapi.Query( - default=None, - description="Get spend logs based on api key", - ), - user_id: Optional[str] = fastapi.Query( - default=None, - description="Get spend logs based on user_id", - ), - request_id: Optional[str] = fastapi.Query( - default=None, - description="request_id to get spend logs for specific request_id. If none passed then pass spend logs for all requests", - ), - start_date: Optional[str] = fastapi.Query( - default=None, - description="Time from which to start viewing key spend", - ), - end_date: Optional[str] = fastapi.Query( - default=None, - description="Time till which to view key spend", - ), -): - """ - View all spend logs, if request_id is provided, only logs for that request_id will be returned - - Example Request for all logs - ``` - curl -X GET "http://0.0.0.0:8000/spend/logs" \ --H "Authorization: Bearer sk-1234" - ``` - - Example Request for specific request_id - ``` - curl -X GET "http://0.0.0.0:8000/spend/logs?request_id=chatcmpl-6dcb2540-d3d7-4e49-bb27-291f863f112e" \ --H "Authorization: Bearer sk-1234" - ``` - - Example Request for specific api_key - ``` - curl -X GET "http://0.0.0.0:8000/spend/logs?api_key=sk-Fn8Ej39NkBQmUagFEoUWPQ" \ --H "Authorization: Bearer sk-1234" - ``` - - Example Request for specific user_id - ``` - curl -X GET "http://0.0.0.0:8000/spend/logs?user_id=ishaan@berri.ai" \ --H "Authorization: Bearer sk-1234" - ``` - """ - global prisma_client - try: - verbose_proxy_logger.debug("inside view_spend_logs") - if prisma_client is None: - raise Exception( - f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys" - ) - spend_logs = [] - if ( - start_date is not None - and isinstance(start_date, str) - and end_date is not None - and isinstance(end_date, str) - ): - # Convert the date strings to datetime objects - start_date_obj = datetime.strptime(start_date, "%Y-%m-%d") - end_date_obj = datetime.strptime(end_date, "%Y-%m-%d") - - filter_query = { - "startTime": { - "gte": start_date_obj, # Greater than or equal to Start Date - "lte": end_date_obj, # Less than or equal to End Date - } - } - - if api_key is not None and isinstance(api_key, str): - filter_query["api_key"] = api_key # type: ignore - elif request_id is not None and isinstance(request_id, str): - filter_query["request_id"] = request_id # type: ignore - elif user_id is not None and isinstance(user_id, str): - filter_query["user"] = user_id # type: ignore - - # SQL query - response = await prisma_client.db.litellm_spendlogs.group_by( - by=["api_key", "user", "model", "startTime"], - where=filter_query, # type: ignore - sum={ - "spend": True, - }, - ) - - if ( - isinstance(response, list) - and len(response) > 0 - and isinstance(response[0], dict) - ): - result: dict = {} - for record in response: - dt_object = datetime.strptime( - str(record["startTime"]), "%Y-%m-%dT%H:%M:%S.%fZ" - ) # type: ignore - date = dt_object.date() - if date not in result: - result[date] = {"users": {}, "models": {}} - api_key = record["api_key"] - user_id = record["user"] - model = record["model"] - result[date]["spend"] = ( - result[date].get("spend", 0) + record["_sum"]["spend"] - ) - result[date][api_key] = ( - result[date].get(api_key, 0) + record["_sum"]["spend"] - ) - result[date]["users"][user_id] = ( - result[date]["users"].get(user_id, 0) + record["_sum"]["spend"] - ) - result[date]["models"][model] = ( - result[date]["models"].get(model, 0) + record["_sum"]["spend"] - ) - return_list = [] - final_date = None - for k, v in sorted(result.items()): - return_list.append({**v, "startTime": k}) - final_date = k - - end_date_date = end_date_obj.date() - if final_date is not None and final_date < end_date_date: - current_date = final_date + timedelta(days=1) - while current_date <= end_date_date: - # Represent current_date as string because original response has it this way - return_list.append( - { - "startTime": current_date, - "spend": 0, - "users": {}, - "models": {}, - } - ) # If no data, will stay as zero - current_date += timedelta(days=1) # Move on to the next day - - return return_list - - return response - - elif api_key is not None and isinstance(api_key, str): - if api_key.startswith("sk-"): - hashed_token = prisma_client.hash_token(token=api_key) - else: - hashed_token = api_key - spend_log = await prisma_client.get_data( - table_name="spend", - query_type="find_all", - key_val={"key": "api_key", "value": hashed_token}, - ) - if isinstance(spend_log, list): - return spend_log - else: - return [spend_log] - elif request_id is not None: - spend_log = await prisma_client.get_data( - table_name="spend", - query_type="find_unique", - key_val={"key": "request_id", "value": request_id}, - ) - return [spend_log] - elif user_id is not None: - spend_log = await prisma_client.get_data( - table_name="spend", - query_type="find_all", - key_val={"key": "user", "value": user_id}, - ) - if isinstance(spend_log, list): - return spend_log - else: - return [spend_log] - else: - spend_logs = await prisma_client.get_data( - table_name="spend", query_type="find_all" - ) - - return spend_log - - return None - - except Exception as e: - if isinstance(e, HTTPException): - raise ProxyException( - message=getattr(e, "detail", f"/spend/logs Error({str(e)})"), - type="internal_error", - param=getattr(e, "param", "None"), - code=getattr(e, "status_code", status.HTTP_500_INTERNAL_SERVER_ERROR), - ) - elif isinstance(e, ProxyException): - raise e - raise ProxyException( - message="/spend/logs Error" + str(e), - type="internal_error", - param=getattr(e, "param", "None"), - code=status.HTTP_500_INTERNAL_SERVER_ERROR, - ) - - -@router.post( - "/global/spend/reset", - tags=["Budget & Spend Tracking"], - dependencies=[Depends(user_api_key_auth)], -) -async def global_spend_reset(): - """ - ADMIN ONLY / MASTER KEY Only Endpoint - - Globally reset spend for All API Keys and Teams, maintain LiteLLM_SpendLogs - - 1. LiteLLM_SpendLogs will maintain the logs on spend, no data gets deleted from there - 2. LiteLLM_VerificationTokens spend will be set = 0 - 3. LiteLLM_TeamTable spend will be set = 0 - - """ - global prisma_client - if prisma_client is None: - raise ProxyException( - message="Prisma Client is not initialized", - type="internal_error", - param="None", - code=status.HTTP_401_UNAUTHORIZED, - ) - - await prisma_client.db.litellm_verificationtoken.update_many( - data={"spend": 0.0}, where={} - ) - await prisma_client.db.litellm_teamtable.update_many(data={"spend": 0.0}, where={}) - - return { - "message": "Spend for all API Keys and Teams reset successfully", - "status": "success", - } - - -@router.get( - "/global/spend/logs", - tags=["Budget & Spend Tracking"], - dependencies=[Depends(user_api_key_auth)], - include_in_schema=False, -) -async def global_spend_logs( - api_key: str = fastapi.Query( - default=None, - description="API Key to get global spend (spend per day for last 30d). Admin-only endpoint", - ) -): - """ - [BETA] This is a beta endpoint. It will change. - - Use this to get global spend (spend per day for last 30d). Admin-only endpoint - - More efficient implementation of /spend/logs, by creating a view over the spend logs table. - """ - global prisma_client - if prisma_client is None: - raise ProxyException( - message="Prisma Client is not initialized", - type="internal_error", - param="None", - code=status.HTTP_500_INTERNAL_SERVER_ERROR, - ) - if api_key is None: - sql_query = """SELECT * FROM "MonthlyGlobalSpend" ORDER BY "date";""" - - response = await prisma_client.db.query_raw(query=sql_query) - - return response - else: - sql_query = """ - SELECT * FROM "MonthlyGlobalSpendPerKey" - WHERE "api_key" = $1 - ORDER BY "date"; - """ - - response = await prisma_client.db.query_raw(sql_query, api_key) - - return response - return - - -@router.get( - "/global/spend", - tags=["Budget & Spend Tracking"], - dependencies=[Depends(user_api_key_auth)], - include_in_schema=False, -) -async def global_spend(): - """ - [BETA] This is a beta endpoint. It will change. - - View total spend across all proxy keys - """ - global prisma_client - total_spend = 0.0 - total_proxy_budget = 0.0 - - if prisma_client is None: - raise HTTPException(status_code=500, detail={"error": "No db connected"}) - sql_query = """SELECT SUM(spend) as total_spend FROM "MonthlyGlobalSpend";""" - response = await prisma_client.db.query_raw(query=sql_query) - if response is not None: - if isinstance(response, list) and len(response) > 0: - total_spend = response[0].get("total_spend", 0.0) - - return {"spend": total_spend, "max_budget": litellm.max_budget} - - -@router.get( - "/global/spend/keys", - tags=["Budget & Spend Tracking"], - dependencies=[Depends(user_api_key_auth)], - include_in_schema=False, -) -async def global_spend_keys( - limit: int = fastapi.Query( - default=None, - description="Number of keys to get. Will return Top 'n' keys.", - ) -): - """ - [BETA] This is a beta endpoint. It will change. - - Use this to get the top 'n' keys with the highest spend, ordered by spend. - """ - global prisma_client - - if prisma_client is None: - raise HTTPException(status_code=500, detail={"error": "No db connected"}) - sql_query = f"""SELECT * FROM "Last30dKeysBySpend" LIMIT {limit};""" - - response = await prisma_client.db.query_raw(query=sql_query) - - return response - - -@router.get( - "/global/spend/teams", - tags=["Budget & Spend Tracking"], - dependencies=[Depends(user_api_key_auth)], - include_in_schema=False, -) -async def global_spend_per_team(): - """ - [BETA] This is a beta endpoint. It will change. - - Use this to get daily spend, grouped by `team_id` and `date` - """ - global prisma_client - - if prisma_client is None: - raise HTTPException(status_code=500, detail={"error": "No db connected"}) - sql_query = """ - SELECT - t.team_alias as team_alias, - DATE(s."startTime") AS spend_date, - SUM(s.spend) AS total_spend - FROM - "LiteLLM_SpendLogs" s - LEFT JOIN - "LiteLLM_TeamTable" t ON s.team_id = t.team_id - WHERE - s."startTime" >= CURRENT_DATE - INTERVAL '30 days' - GROUP BY - t.team_alias, - DATE(s."startTime") - ORDER BY - spend_date; - """ - response = await prisma_client.db.query_raw(query=sql_query) - - # transform the response for the Admin UI - spend_by_date = {} - team_aliases = set() - total_spend_per_team = {} - for row in response: - row_date = row["spend_date"] - if row_date is None: - continue - team_alias = row["team_alias"] - if team_alias is None: - team_alias = "Unassigned" - team_aliases.add(team_alias) - if row_date in spend_by_date: - # get the team_id for this entry - # get the spend for this entry - spend = row["total_spend"] - spend = round(spend, 2) - current_date_entries = spend_by_date[row_date] - current_date_entries[team_alias] = spend - else: - spend = row["total_spend"] - spend = round(spend, 2) - spend_by_date[row_date] = {team_alias: spend} - - if team_alias in total_spend_per_team: - total_spend_per_team[team_alias] += spend - else: - total_spend_per_team[team_alias] = spend - - total_spend_per_team_ui = [] - # order the elements in total_spend_per_team by spend - total_spend_per_team = dict( - sorted(total_spend_per_team.items(), key=lambda item: item[1], reverse=True) - ) - for team_id in total_spend_per_team: - # only add first 10 elements to total_spend_per_team_ui - if len(total_spend_per_team_ui) >= 10: - break - if team_id is None: - team_id = "Unassigned" - total_spend_per_team_ui.append( - {"team_id": team_id, "total_spend": total_spend_per_team[team_id]} - ) - - # sort spend_by_date by it's key (which is a date) - - response_data = [] - for key in spend_by_date: - value = spend_by_date[key] - response_data.append({"date": key, **value}) - - return { - "daily_spend": response_data, - "teams": list(team_aliases), - "total_spend_per_team": total_spend_per_team_ui, - } - - -@router.get( - "/global/all_end_users", - tags=["Budget & Spend Tracking"], - dependencies=[Depends(user_api_key_auth)], - include_in_schema=False, -) -async def global_view_all_end_users(): - """ - [BETA] This is a beta endpoint. It will change. - - Use this to just get all the unique `end_users` - """ - global prisma_client - - if prisma_client is None: - raise HTTPException(status_code=500, detail={"error": "No db connected"}) - - sql_query = """ - SELECT DISTINCT end_user FROM "LiteLLM_SpendLogs" - """ - - db_response = await prisma_client.db.query_raw(query=sql_query) - if db_response is None: - return [] - - _end_users = [] - for row in db_response: - _end_users.append(row["end_user"]) - - return {"end_users": _end_users} - - -@router.post( - "/global/spend/end_users", - tags=["Budget & Spend Tracking"], - dependencies=[Depends(user_api_key_auth)], - include_in_schema=False, -) -async def global_spend_end_users(data: Optional[GlobalEndUsersSpend] = None): - """ - [BETA] This is a beta endpoint. It will change. - - Use this to get the top 'n' keys with the highest spend, ordered by spend. - """ - global prisma_client - - if prisma_client is None: - raise HTTPException(status_code=500, detail={"error": "No db connected"}) - - """ - Gets the top 100 end-users for a given api key - """ - startTime = None - endTime = None - selected_api_key = None - if data is not None: - startTime = data.startTime - endTime = data.endTime - selected_api_key = data.api_key - - startTime = startTime or datetime.now() - timedelta(days=30) - endTime = endTime or datetime.now() - - sql_query = """ -SELECT end_user, COUNT(*) AS total_count, SUM(spend) AS total_spend -FROM "LiteLLM_SpendLogs" -WHERE "startTime" >= $1::timestamp - AND "startTime" < $2::timestamp - AND ( - CASE - WHEN $3::TEXT IS NULL THEN TRUE - ELSE api_key = $3 - END - ) -GROUP BY end_user -ORDER BY total_spend DESC -LIMIT 100 - """ - response = await prisma_client.db.query_raw( - sql_query, startTime, endTime, selected_api_key - ) - - return response - - -@router.get( - "/global/spend/models", - tags=["Budget & Spend Tracking"], - dependencies=[Depends(user_api_key_auth)], - include_in_schema=False, -) -async def global_spend_models( - limit: int = fastapi.Query( - default=None, - description="Number of models to get. Will return Top 'n' models.", - ) -): - """ - [BETA] This is a beta endpoint. It will change. - - Use this to get the top 'n' keys with the highest spend, ordered by spend. - """ - global prisma_client - - if prisma_client is None: - raise HTTPException(status_code=500, detail={"error": "No db connected"}) - - sql_query = f"""SELECT * FROM "Last30dModelsBySpend" LIMIT {limit};""" - - response = await prisma_client.db.query_raw(query=sql_query) - - return response - - -@router.post( - "/global/predict/spend/logs", - tags=["Budget & Spend Tracking"], - dependencies=[Depends(user_api_key_auth)], - include_in_schema=False, -) -async def global_predict_spend_logs(request: Request): - from enterprise.utils import _forecast_daily_cost - - data = await request.json() - data = data.get("data") - return _forecast_daily_cost(data) - - #### INTERNAL USER MANAGEMENT #### @router.post( "/user/new", @@ -12783,5 +10756,6 @@ def cleanup_router_config_variables(): app.include_router(router) +app.include_router(spend_management_router) app.include_router(team_router) app.include_router(caching_router) diff --git a/litellm/proxy/spend_management_endpoints.py b/litellm/proxy/spend_management_endpoints.py new file mode 100644 index 000000000..94c491cd5 --- /dev/null +++ b/litellm/proxy/spend_management_endpoints.py @@ -0,0 +1,1832 @@ +#### SPEND MANAGEMENT ##### +from typing import Optional, List +import litellm +from litellm._logging import verbose_proxy_logger +from datetime import datetime, timedelta, timezone +from litellm.proxy.auth.user_api_key_auth import user_api_key_auth +import fastapi +from fastapi import Depends, Request, APIRouter, Header, status +from fastapi import HTTPException +from litellm.proxy._types import * + +router = APIRouter( + prefix="/spend", + tags=["Budget & Spend Tracking"], +) + + +@router.get( + "/spend/keys", + tags=["Budget & Spend Tracking"], + dependencies=[Depends(user_api_key_auth)], + include_in_schema=False, +) +async def spend_key_fn(): + """ + View all keys created, ordered by spend + + Example Request: + ``` + curl -X GET "http://0.0.0.0:8000/spend/keys" \ +-H "Authorization: Bearer sk-1234" + ``` + """ + + from litellm.proxy.proxy_server import prisma_client + + try: + if prisma_client is None: + raise Exception( + f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys" + ) + + key_info = await prisma_client.get_data(table_name="key", query_type="find_all") + return key_info + + except Exception as e: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail={"error": str(e)}, + ) + + +@router.get( + "/spend/users", + tags=["Budget & Spend Tracking"], + dependencies=[Depends(user_api_key_auth)], + include_in_schema=False, +) +async def spend_user_fn( + user_id: Optional[str] = fastapi.Query( + default=None, + description="Get User Table row for user_id", + ), +): + """ + View all users created, ordered by spend + + Example Request: + ``` + curl -X GET "http://0.0.0.0:8000/spend/users" \ +-H "Authorization: Bearer sk-1234" + ``` + + View User Table row for user_id + ``` + curl -X GET "http://0.0.0.0:8000/spend/users?user_id=1234" \ +-H "Authorization: Bearer sk-1234" + ``` + """ + from litellm.proxy.proxy_server import prisma_client + + try: + if prisma_client is None: + raise Exception( + f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys" + ) + + if user_id is not None: + user_info = await prisma_client.get_data( + table_name="user", query_type="find_unique", user_id=user_id + ) + return [user_info] + else: + user_info = await prisma_client.get_data( + table_name="user", query_type="find_all" + ) + + return user_info + + except Exception as e: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail={"error": str(e)}, + ) + + +@router.get( + "/spend/tags", + tags=["Budget & Spend Tracking"], + dependencies=[Depends(user_api_key_auth)], + responses={ + 200: {"model": List[LiteLLM_SpendLogs]}, + }, +) +async def view_spend_tags( + start_date: Optional[str] = fastapi.Query( + default=None, + description="Time from which to start viewing key spend", + ), + end_date: Optional[str] = fastapi.Query( + default=None, + description="Time till which to view key spend", + ), +): + """ + LiteLLM Enterprise - View Spend Per Request Tag + + Example Request: + ``` + curl -X GET "http://0.0.0.0:8000/spend/tags" \ +-H "Authorization: Bearer sk-1234" + ``` + + Spend with Start Date and End Date + ``` + curl -X GET "http://0.0.0.0:8000/spend/tags?start_date=2022-01-01&end_date=2022-02-01" \ +-H "Authorization: Bearer sk-1234" + ``` + """ + + from enterprise.utils import get_spend_by_tags + from litellm.proxy.proxy_server import prisma_client + + try: + if prisma_client is None: + raise Exception( + f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys" + ) + + # run the following SQL query on prisma + """ + SELECT + jsonb_array_elements_text(request_tags) AS individual_request_tag, + COUNT(*) AS log_count, + SUM(spend) AS total_spend + FROM "LiteLLM_SpendLogs" + GROUP BY individual_request_tag; + """ + response = await get_spend_by_tags( + start_date=start_date, end_date=end_date, prisma_client=prisma_client + ) + + return response + except Exception as e: + if isinstance(e, HTTPException): + raise ProxyException( + message=getattr(e, "detail", f"/spend/tags Error({str(e)})"), + type="internal_error", + param=getattr(e, "param", "None"), + code=getattr(e, "status_code", status.HTTP_500_INTERNAL_SERVER_ERROR), + ) + elif isinstance(e, ProxyException): + raise e + raise ProxyException( + message="/spend/tags Error" + str(e), + type="internal_error", + param=getattr(e, "param", "None"), + code=status.HTTP_500_INTERNAL_SERVER_ERROR, + ) + + +@router.get( + "/global/activity", + tags=["Budget & Spend Tracking"], + dependencies=[Depends(user_api_key_auth)], + responses={ + 200: {"model": List[LiteLLM_SpendLogs]}, + }, + include_in_schema=False, +) +async def get_global_activity( + start_date: Optional[str] = fastapi.Query( + default=None, + description="Time from which to start viewing spend", + ), + end_date: Optional[str] = fastapi.Query( + default=None, + description="Time till which to view spend", + ), +): + """ + Get number of API Requests, total tokens through proxy + + { + "daily_data": [ + const chartdata = [ + { + date: 'Jan 22', + api_requests: 10, + total_tokens: 2000 + }, + { + date: 'Jan 23', + api_requests: 10, + total_tokens: 12 + }, + ], + "sum_api_requests": 20, + "sum_total_tokens": 2012 + } + """ + from collections import defaultdict + + if start_date is None or end_date is None: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail={"error": "Please provide start_date and end_date"}, + ) + + start_date_obj = datetime.strptime(start_date, "%Y-%m-%d") + end_date_obj = datetime.strptime(end_date, "%Y-%m-%d") + + from litellm.proxy.proxy_server import prisma_client, llm_router + + try: + if prisma_client is None: + raise Exception( + f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys" + ) + + sql_query = """ + SELECT + date_trunc('day', "startTime") AS date, + COUNT(*) AS api_requests, + SUM(total_tokens) AS total_tokens + FROM "LiteLLM_SpendLogs" + WHERE "startTime" BETWEEN $1::date AND $2::date + interval '1 day' + GROUP BY date_trunc('day', "startTime") + """ + db_response = await prisma_client.db.query_raw( + sql_query, start_date_obj, end_date_obj + ) + + if db_response is None: + return [] + + sum_api_requests = 0 + sum_total_tokens = 0 + daily_data = [] + for row in db_response: + # cast date to datetime + _date_obj = datetime.fromisoformat(row["date"]) + row["date"] = _date_obj.strftime("%b %d") + + daily_data.append(row) + sum_api_requests += row.get("api_requests", 0) + sum_total_tokens += row.get("total_tokens", 0) + + # sort daily_data by date + daily_data = sorted(daily_data, key=lambda x: x["date"]) + + data_to_return = { + "daily_data": daily_data, + "sum_api_requests": sum_api_requests, + "sum_total_tokens": sum_total_tokens, + } + + return data_to_return + + except Exception as e: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail={"error": str(e)}, + ) + + +@router.get( + "/global/activity/model", + tags=["Budget & Spend Tracking"], + dependencies=[Depends(user_api_key_auth)], + responses={ + 200: {"model": List[LiteLLM_SpendLogs]}, + }, + include_in_schema=False, +) +async def get_global_activity_model( + start_date: Optional[str] = fastapi.Query( + default=None, + description="Time from which to start viewing spend", + ), + end_date: Optional[str] = fastapi.Query( + default=None, + description="Time till which to view spend", + ), +): + """ + Get number of API Requests, total tokens through proxy - Grouped by MODEL + + [ + { + "model": "gpt-4", + "daily_data": [ + const chartdata = [ + { + date: 'Jan 22', + api_requests: 10, + total_tokens: 2000 + }, + { + date: 'Jan 23', + api_requests: 10, + total_tokens: 12 + }, + ], + "sum_api_requests": 20, + "sum_total_tokens": 2012 + + }, + { + "model": "azure/gpt-4-turbo", + "daily_data": [ + const chartdata = [ + { + date: 'Jan 22', + api_requests: 10, + total_tokens: 2000 + }, + { + date: 'Jan 23', + api_requests: 10, + total_tokens: 12 + }, + ], + "sum_api_requests": 20, + "sum_total_tokens": 2012 + + }, + ] + """ + from collections import defaultdict + + if start_date is None or end_date is None: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail={"error": "Please provide start_date and end_date"}, + ) + + start_date_obj = datetime.strptime(start_date, "%Y-%m-%d") + end_date_obj = datetime.strptime(end_date, "%Y-%m-%d") + + from litellm.proxy.proxy_server import prisma_client, llm_router, premium_user + + try: + if prisma_client is None: + raise Exception( + f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys" + ) + + sql_query = """ + SELECT + model_group, + date_trunc('day', "startTime") AS date, + COUNT(*) AS api_requests, + SUM(total_tokens) AS total_tokens + FROM "LiteLLM_SpendLogs" + WHERE "startTime" BETWEEN $1::date AND $2::date + interval '1 day' + GROUP BY model_group, date_trunc('day', "startTime") + """ + db_response = await prisma_client.db.query_raw( + sql_query, start_date_obj, end_date_obj + ) + if db_response is None: + return [] + + model_ui_data: dict = ( + {} + ) # {"gpt-4": {"daily_data": [], "sum_api_requests": 0, "sum_total_tokens": 0}} + + for row in db_response: + _model = row["model_group"] + if _model not in model_ui_data: + model_ui_data[_model] = { + "daily_data": [], + "sum_api_requests": 0, + "sum_total_tokens": 0, + } + _date_obj = datetime.fromisoformat(row["date"]) + row["date"] = _date_obj.strftime("%b %d") + + model_ui_data[_model]["daily_data"].append(row) + model_ui_data[_model]["sum_api_requests"] += row.get("api_requests", 0) + model_ui_data[_model]["sum_total_tokens"] += row.get("total_tokens", 0) + + # sort mode ui data by sum_api_requests -> get top 10 models + model_ui_data = dict( + sorted( + model_ui_data.items(), + key=lambda x: x[1]["sum_api_requests"], + reverse=True, + )[:10] + ) + + response = [] + for model, data in model_ui_data.items(): + _sort_daily_data = sorted(data["daily_data"], key=lambda x: x["date"]) + + response.append( + { + "model": model, + "daily_data": _sort_daily_data, + "sum_api_requests": data["sum_api_requests"], + "sum_total_tokens": data["sum_total_tokens"], + } + ) + + return response + + except Exception as e: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail={"error": str(e)}, + ) + + +@router.get( + "/global/activity/exceptions/deployment", + tags=["Budget & Spend Tracking"], + dependencies=[Depends(user_api_key_auth)], + responses={ + 200: {"model": List[LiteLLM_SpendLogs]}, + }, + include_in_schema=False, +) +async def get_global_activity_exceptions_per_deployment( + model_group: str = fastapi.Query( + description="Filter by model group", + ), + start_date: Optional[str] = fastapi.Query( + default=None, + description="Time from which to start viewing spend", + ), + end_date: Optional[str] = fastapi.Query( + default=None, + description="Time till which to view spend", + ), +): + """ + Get number of 429 errors - Grouped by deployment + + [ + { + "deployment": "https://azure-us-east-1.openai.azure.com/", + "daily_data": [ + const chartdata = [ + { + date: 'Jan 22', + num_rate_limit_exceptions: 10 + }, + { + date: 'Jan 23', + num_rate_limit_exceptions: 12 + }, + ], + "sum_num_rate_limit_exceptions": 20, + + }, + { + "deployment": "https://azure-us-east-1.openai.azure.com/", + "daily_data": [ + const chartdata = [ + { + date: 'Jan 22', + num_rate_limit_exceptions: 10, + }, + { + date: 'Jan 23', + num_rate_limit_exceptions: 12 + }, + ], + "sum_num_rate_limit_exceptions": 20, + + }, + ] + """ + from collections import defaultdict + + if start_date is None or end_date is None: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail={"error": "Please provide start_date and end_date"}, + ) + + start_date_obj = datetime.strptime(start_date, "%Y-%m-%d") + end_date_obj = datetime.strptime(end_date, "%Y-%m-%d") + + from litellm.proxy.proxy_server import prisma_client, llm_router, premium_user + + try: + if prisma_client is None: + raise Exception( + f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys" + ) + + sql_query = """ + SELECT + api_base, + date_trunc('day', "startTime")::date AS date, + COUNT(*) AS num_rate_limit_exceptions + FROM + "LiteLLM_ErrorLogs" + WHERE + "startTime" >= $1::date + AND "startTime" < ($2::date + INTERVAL '1 day') + AND model_group = $3 + AND status_code = '429' + GROUP BY + api_base, + date_trunc('day', "startTime") + ORDER BY + date; + """ + db_response = await prisma_client.db.query_raw( + sql_query, start_date_obj, end_date_obj, model_group + ) + if db_response is None: + return [] + + model_ui_data: dict = ( + {} + ) # {"gpt-4": {"daily_data": [], "sum_api_requests": 0, "sum_total_tokens": 0}} + + for row in db_response: + _model = row["api_base"] + if _model not in model_ui_data: + model_ui_data[_model] = { + "daily_data": [], + "sum_num_rate_limit_exceptions": 0, + } + _date_obj = datetime.fromisoformat(row["date"]) + row["date"] = _date_obj.strftime("%b %d") + + model_ui_data[_model]["daily_data"].append(row) + model_ui_data[_model]["sum_num_rate_limit_exceptions"] += row.get( + "num_rate_limit_exceptions", 0 + ) + + # sort mode ui data by sum_api_requests -> get top 10 models + model_ui_data = dict( + sorted( + model_ui_data.items(), + key=lambda x: x[1]["sum_num_rate_limit_exceptions"], + reverse=True, + )[:10] + ) + + response = [] + for model, data in model_ui_data.items(): + _sort_daily_data = sorted(data["daily_data"], key=lambda x: x["date"]) + + response.append( + { + "api_base": model, + "daily_data": _sort_daily_data, + "sum_num_rate_limit_exceptions": data[ + "sum_num_rate_limit_exceptions" + ], + } + ) + + return response + + except Exception as e: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail={"error": str(e)}, + ) + + +@router.get( + "/global/activity/exceptions", + tags=["Budget & Spend Tracking"], + dependencies=[Depends(user_api_key_auth)], + responses={ + 200: {"model": List[LiteLLM_SpendLogs]}, + }, + include_in_schema=False, +) +async def get_global_activity_exceptions( + model_group: str = fastapi.Query( + description="Filter by model group", + ), + start_date: Optional[str] = fastapi.Query( + default=None, + description="Time from which to start viewing spend", + ), + end_date: Optional[str] = fastapi.Query( + default=None, + description="Time till which to view spend", + ), +): + """ + Get number of API Requests, total tokens through proxy + + { + "daily_data": [ + const chartdata = [ + { + date: 'Jan 22', + num_rate_limit_exceptions: 10, + }, + { + date: 'Jan 23', + num_rate_limit_exceptions: 10, + }, + ], + "sum_api_exceptions": 20, + } + """ + from collections import defaultdict + + if start_date is None or end_date is None: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail={"error": "Please provide start_date and end_date"}, + ) + + start_date_obj = datetime.strptime(start_date, "%Y-%m-%d") + end_date_obj = datetime.strptime(end_date, "%Y-%m-%d") + + from litellm.proxy.proxy_server import prisma_client, llm_router + + try: + if prisma_client is None: + raise Exception( + f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys" + ) + + sql_query = """ + SELECT + date_trunc('day', "startTime")::date AS date, + COUNT(*) AS num_rate_limit_exceptions + FROM + "LiteLLM_ErrorLogs" + WHERE + "startTime" >= $1::date + AND "startTime" < ($2::date + INTERVAL '1 day') + AND model_group = $3 + AND status_code = '429' + GROUP BY + date_trunc('day', "startTime") + ORDER BY + date; + """ + db_response = await prisma_client.db.query_raw( + sql_query, start_date_obj, end_date_obj, model_group + ) + + if db_response is None: + return [] + + sum_num_rate_limit_exceptions = 0 + daily_data = [] + for row in db_response: + # cast date to datetime + _date_obj = datetime.fromisoformat(row["date"]) + row["date"] = _date_obj.strftime("%b %d") + + daily_data.append(row) + sum_num_rate_limit_exceptions += row.get("num_rate_limit_exceptions", 0) + + # sort daily_data by date + daily_data = sorted(daily_data, key=lambda x: x["date"]) + + data_to_return = { + "daily_data": daily_data, + "sum_num_rate_limit_exceptions": sum_num_rate_limit_exceptions, + } + + return data_to_return + + except Exception as e: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail={"error": str(e)}, + ) + + +@router.get( + "/global/spend/provider", + tags=["Budget & Spend Tracking"], + dependencies=[Depends(user_api_key_auth)], + include_in_schema=False, + responses={ + 200: {"model": List[LiteLLM_SpendLogs]}, + }, +) +async def get_global_spend_provider( + start_date: Optional[str] = fastapi.Query( + default=None, + description="Time from which to start viewing spend", + ), + end_date: Optional[str] = fastapi.Query( + default=None, + description="Time till which to view spend", + ), +): + """ + Get breakdown of spend per provider + [ + { + "provider": "Azure OpenAI", + "spend": 20 + }, + { + "provider": "OpenAI", + "spend": 10 + }, + { + "provider": "VertexAI", + "spend": 30 + } + ] + """ + from collections import defaultdict + + if start_date is None or end_date is None: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail={"error": "Please provide start_date and end_date"}, + ) + + start_date_obj = datetime.strptime(start_date, "%Y-%m-%d") + end_date_obj = datetime.strptime(end_date, "%Y-%m-%d") + + from litellm.proxy.proxy_server import prisma_client, llm_router + + try: + if prisma_client is None: + raise Exception( + f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys" + ) + + sql_query = """ + + SELECT + model_id, + SUM(spend) AS spend + FROM "LiteLLM_SpendLogs" + WHERE "startTime" BETWEEN $1::date AND $2::date AND length(model_id) > 0 + GROUP BY model_id + """ + + db_response = await prisma_client.db.query_raw( + sql_query, start_date_obj, end_date_obj + ) + if db_response is None: + return [] + + ################################### + # Convert model_id -> to Provider # + ################################### + + # we use the in memory router for this + ui_response = [] + provider_spend_mapping: defaultdict = defaultdict(int) + for row in db_response: + _model_id = row["model_id"] + _provider = "Unknown" + if llm_router is not None: + _deployment = llm_router.get_deployment(model_id=_model_id) + if _deployment is not None: + try: + _, _provider, _, _ = litellm.get_llm_provider( + model=_deployment.litellm_params.model, + custom_llm_provider=_deployment.litellm_params.custom_llm_provider, + api_base=_deployment.litellm_params.api_base, + litellm_params=_deployment.litellm_params, + ) + provider_spend_mapping[_provider] += row["spend"] + except: + pass + + for provider, spend in provider_spend_mapping.items(): + ui_response.append({"provider": provider, "spend": spend}) + + return ui_response + + except Exception as e: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail={"error": str(e)}, + ) + + +@router.get( + "/global/spend/report", + tags=["Budget & Spend Tracking"], + dependencies=[Depends(user_api_key_auth)], + responses={ + 200: {"model": List[LiteLLM_SpendLogs]}, + }, +) +async def get_global_spend_report( + start_date: Optional[str] = fastapi.Query( + default=None, + description="Time from which to start viewing spend", + ), + end_date: Optional[str] = fastapi.Query( + default=None, + description="Time till which to view spend", + ), + group_by: Optional[Literal["team", "customer"]] = fastapi.Query( + default="team", + description="Group spend by internal team or customer", + ), +): + """ + Get Daily Spend per Team, based on specific startTime and endTime. Per team, view usage by each key, model + [ + { + "group-by-day": "2024-05-10", + "teams": [ + { + "team_name": "team-1" + "spend": 10, + "keys": [ + "key": "1213", + "usage": { + "model-1": { + "cost": 12.50, + "input_tokens": 1000, + "output_tokens": 5000, + "requests": 100 + }, + "audio-modelname1": { + "cost": 25.50, + "seconds": 25, + "requests": 50 + }, + } + } + ] + ] + } + """ + if start_date is None or end_date is None: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail={"error": "Please provide start_date and end_date"}, + ) + + start_date_obj = datetime.strptime(start_date, "%Y-%m-%d") + end_date_obj = datetime.strptime(end_date, "%Y-%m-%d") + + from litellm.proxy.proxy_server import prisma_client + + try: + if prisma_client is None: + raise Exception( + f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys" + ) + + if group_by == "team": + # first get data from spend logs -> SpendByModelApiKey + # then read data from "SpendByModelApiKey" to format the response obj + sql_query = """ + + WITH SpendByModelApiKey AS ( + SELECT + date_trunc('day', sl."startTime") AS group_by_day, + COALESCE(tt.team_alias, 'Unassigned Team') AS team_name, + sl.model, + sl.api_key, + SUM(sl.spend) AS model_api_spend, + SUM(sl.total_tokens) AS model_api_tokens + FROM + "LiteLLM_SpendLogs" sl + LEFT JOIN + "LiteLLM_TeamTable" tt + ON + sl.team_id = tt.team_id + WHERE + sl."startTime" BETWEEN $1::date AND $2::date + GROUP BY + date_trunc('day', sl."startTime"), + tt.team_alias, + sl.model, + sl.api_key + ) + SELECT + group_by_day, + jsonb_agg(jsonb_build_object( + 'team_name', team_name, + 'total_spend', total_spend, + 'metadata', metadata + )) AS teams + FROM ( + SELECT + group_by_day, + team_name, + SUM(model_api_spend) AS total_spend, + jsonb_agg(jsonb_build_object( + 'model', model, + 'api_key', api_key, + 'spend', model_api_spend, + 'total_tokens', model_api_tokens + )) AS metadata + FROM + SpendByModelApiKey + GROUP BY + group_by_day, + team_name + ) AS aggregated + GROUP BY + group_by_day + ORDER BY + group_by_day; + """ + + db_response = await prisma_client.db.query_raw( + sql_query, start_date_obj, end_date_obj + ) + if db_response is None: + return [] + + return db_response + + elif group_by == "customer": + sql_query = """ + + WITH SpendByModelApiKey AS ( + SELECT + date_trunc('day', sl."startTime") AS group_by_day, + sl.end_user AS customer, + sl.model, + sl.api_key, + SUM(sl.spend) AS model_api_spend, + SUM(sl.total_tokens) AS model_api_tokens + FROM + "LiteLLM_SpendLogs" sl + WHERE + sl."startTime" BETWEEN $1::date AND $2::date + GROUP BY + date_trunc('day', sl."startTime"), + customer, + sl.model, + sl.api_key + ) + SELECT + group_by_day, + jsonb_agg(jsonb_build_object( + 'customer', customer, + 'total_spend', total_spend, + 'metadata', metadata + )) AS customers + FROM + ( + SELECT + group_by_day, + customer, + SUM(model_api_spend) AS total_spend, + jsonb_agg(jsonb_build_object( + 'model', model, + 'api_key', api_key, + 'spend', model_api_spend, + 'total_tokens', model_api_tokens + )) AS metadata + FROM + SpendByModelApiKey + GROUP BY + group_by_day, + customer + ) AS aggregated + GROUP BY + group_by_day + ORDER BY + group_by_day; + """ + + db_response = await prisma_client.db.query_raw( + sql_query, start_date_obj, end_date_obj + ) + if db_response is None: + return [] + + return db_response + + except Exception as e: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail={"error": str(e)}, + ) + + +@router.get( + "/global/spend/all_tag_names", + tags=["Budget & Spend Tracking"], + dependencies=[Depends(user_api_key_auth)], + include_in_schema=False, + responses={ + 200: {"model": List[LiteLLM_SpendLogs]}, + }, +) +async def global_get_all_tag_names(): + try: + from litellm.proxy.proxy_server import prisma_client + + if prisma_client is None: + raise Exception( + f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys" + ) + + sql_query = """ + SELECT DISTINCT + jsonb_array_elements_text(request_tags) AS individual_request_tag + FROM "LiteLLM_SpendLogs"; + """ + + db_response = await prisma_client.db.query_raw(sql_query) + if db_response is None: + return [] + + _tag_names = [] + for row in db_response: + _tag_names.append(row.get("individual_request_tag")) + + return {"tag_names": _tag_names} + + except Exception as e: + if isinstance(e, HTTPException): + raise ProxyException( + message=getattr(e, "detail", f"/spend/all_tag_names Error({str(e)})"), + type="internal_error", + param=getattr(e, "param", "None"), + code=getattr(e, "status_code", status.HTTP_500_INTERNAL_SERVER_ERROR), + ) + elif isinstance(e, ProxyException): + raise e + raise ProxyException( + message="/spend/all_tag_names Error" + str(e), + type="internal_error", + param=getattr(e, "param", "None"), + code=status.HTTP_500_INTERNAL_SERVER_ERROR, + ) + + +@router.get( + "/global/spend/tags", + tags=["Budget & Spend Tracking"], + dependencies=[Depends(user_api_key_auth)], + include_in_schema=False, + responses={ + 200: {"model": List[LiteLLM_SpendLogs]}, + }, +) +async def global_view_spend_tags( + start_date: Optional[str] = fastapi.Query( + default=None, + description="Time from which to start viewing key spend", + ), + end_date: Optional[str] = fastapi.Query( + default=None, + description="Time till which to view key spend", + ), + tags: Optional[str] = fastapi.Query( + default=None, + description="comman separated tags to filter on", + ), +): + """ + LiteLLM Enterprise - View Spend Per Request Tag. Used by LiteLLM UI + + Example Request: + ``` + curl -X GET "http://0.0.0.0:4000/spend/tags" \ +-H "Authorization: Bearer sk-1234" + ``` + + Spend with Start Date and End Date + ``` + curl -X GET "http://0.0.0.0:4000/spend/tags?start_date=2022-01-01&end_date=2022-02-01" \ +-H "Authorization: Bearer sk-1234" + ``` + """ + + from enterprise.utils import ui_get_spend_by_tags + + from litellm.proxy.proxy_server import prisma_client + + try: + if prisma_client is None: + raise Exception( + f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys" + ) + + if end_date is None or start_date is None: + raise ProxyException( + message="Please provide start_date and end_date", + type="bad_request", + param=None, + code=status.HTTP_400_BAD_REQUEST, + ) + response = await ui_get_spend_by_tags( + start_date=start_date, + end_date=end_date, + tags_str=tags, + prisma_client=prisma_client, + ) + + return response + except Exception as e: + if isinstance(e, HTTPException): + raise ProxyException( + message=getattr(e, "detail", f"/spend/tags Error({str(e)})"), + type="internal_error", + param=getattr(e, "param", "None"), + code=getattr(e, "status_code", status.HTTP_500_INTERNAL_SERVER_ERROR), + ) + elif isinstance(e, ProxyException): + raise e + raise ProxyException( + message="/spend/tags Error" + str(e), + type="internal_error", + param=getattr(e, "param", "None"), + code=status.HTTP_500_INTERNAL_SERVER_ERROR, + ) + + +async def _get_spend_report_for_time_range( + start_date: str, + end_date: str, +): + from litellm.proxy.proxy_server import prisma_client + + if prisma_client is None: + verbose_proxy_logger.error( + f"Database not connected. Connect a database to your proxy for weekly, monthly spend reports" + ) + return None + + try: + sql_query = """ + SELECT + t.team_alias, + SUM(s.spend) AS total_spend + FROM + "LiteLLM_SpendLogs" s + LEFT JOIN + "LiteLLM_TeamTable" t ON s.team_id = t.team_id + WHERE + s."startTime"::DATE >= $1::date AND s."startTime"::DATE <= $2::date + GROUP BY + t.team_alias + ORDER BY + total_spend DESC; + """ + response = await prisma_client.db.query_raw(sql_query, start_date, end_date) + + # get spend per tag for today + sql_query = """ + SELECT + jsonb_array_elements_text(request_tags) AS individual_request_tag, + SUM(spend) AS total_spend + FROM "LiteLLM_SpendLogs" + WHERE "startTime"::DATE >= $1::date AND "startTime"::DATE <= $2::date + GROUP BY individual_request_tag + ORDER BY total_spend DESC; + """ + + spend_per_tag = await prisma_client.db.query_raw( + sql_query, start_date, end_date + ) + + return response, spend_per_tag + except Exception as e: + verbose_proxy_logger.error( + "Exception in _get_daily_spend_reports {}".format(str(e)) + ) # noqa + + +@router.post( + "/spend/calculate", + tags=["Budget & Spend Tracking"], + dependencies=[Depends(user_api_key_auth)], + responses={ + 200: { + "cost": { + "description": "The calculated cost", + "example": 0.0, + "type": "float", + } + } + }, +) +async def calculate_spend(request: Request): + """ + Accepts all the params of completion_cost. + + Calculate spend **before** making call: + + Note: If you see a spend of $0.0 you need to set custom_pricing for your model: https://docs.litellm.ai/docs/proxy/custom_pricing + + ``` + curl --location 'http://localhost:4000/spend/calculate' + --header 'Authorization: Bearer sk-1234' + --header 'Content-Type: application/json' + --data '{ + "model": "anthropic.claude-v2", + "messages": [{"role": "user", "content": "Hey, how'''s it going?"}] + }' + ``` + + Calculate spend **after** making call: + + ``` + curl --location 'http://localhost:4000/spend/calculate' + --header 'Authorization: Bearer sk-1234' + --header 'Content-Type: application/json' + --data '{ + "completion_response": { + "id": "chatcmpl-123", + "object": "chat.completion", + "created": 1677652288, + "model": "gpt-3.5-turbo-0125", + "system_fingerprint": "fp_44709d6fcb", + "choices": [{ + "index": 0, + "message": { + "role": "assistant", + "content": "Hello there, how may I assist you today?" + }, + "logprobs": null, + "finish_reason": "stop" + }] + "usage": { + "prompt_tokens": 9, + "completion_tokens": 12, + "total_tokens": 21 + } + } + }' + ``` + """ + from litellm import completion_cost + + data = await request.json() + if "completion_response" in data: + data["completion_response"] = litellm.ModelResponse( + **data["completion_response"] + ) + return {"cost": completion_cost(**data)} + + +@router.get( + "/spend/logs", + tags=["Budget & Spend Tracking"], + dependencies=[Depends(user_api_key_auth)], + responses={ + 200: {"model": List[LiteLLM_SpendLogs]}, + }, +) +async def view_spend_logs( + api_key: Optional[str] = fastapi.Query( + default=None, + description="Get spend logs based on api key", + ), + user_id: Optional[str] = fastapi.Query( + default=None, + description="Get spend logs based on user_id", + ), + request_id: Optional[str] = fastapi.Query( + default=None, + description="request_id to get spend logs for specific request_id. If none passed then pass spend logs for all requests", + ), + start_date: Optional[str] = fastapi.Query( + default=None, + description="Time from which to start viewing key spend", + ), + end_date: Optional[str] = fastapi.Query( + default=None, + description="Time till which to view key spend", + ), +): + """ + View all spend logs, if request_id is provided, only logs for that request_id will be returned + + Example Request for all logs + ``` + curl -X GET "http://0.0.0.0:8000/spend/logs" \ +-H "Authorization: Bearer sk-1234" + ``` + + Example Request for specific request_id + ``` + curl -X GET "http://0.0.0.0:8000/spend/logs?request_id=chatcmpl-6dcb2540-d3d7-4e49-bb27-291f863f112e" \ +-H "Authorization: Bearer sk-1234" + ``` + + Example Request for specific api_key + ``` + curl -X GET "http://0.0.0.0:8000/spend/logs?api_key=sk-Fn8Ej39NkBQmUagFEoUWPQ" \ +-H "Authorization: Bearer sk-1234" + ``` + + Example Request for specific user_id + ``` + curl -X GET "http://0.0.0.0:8000/spend/logs?user_id=ishaan@berri.ai" \ +-H "Authorization: Bearer sk-1234" + ``` + """ + from litellm.proxy.proxy_server import prisma_client + + try: + verbose_proxy_logger.debug("inside view_spend_logs") + if prisma_client is None: + raise Exception( + f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys" + ) + spend_logs = [] + if ( + start_date is not None + and isinstance(start_date, str) + and end_date is not None + and isinstance(end_date, str) + ): + # Convert the date strings to datetime objects + start_date_obj = datetime.strptime(start_date, "%Y-%m-%d") + end_date_obj = datetime.strptime(end_date, "%Y-%m-%d") + + filter_query = { + "startTime": { + "gte": start_date_obj, # Greater than or equal to Start Date + "lte": end_date_obj, # Less than or equal to End Date + } + } + + if api_key is not None and isinstance(api_key, str): + filter_query["api_key"] = api_key # type: ignore + elif request_id is not None and isinstance(request_id, str): + filter_query["request_id"] = request_id # type: ignore + elif user_id is not None and isinstance(user_id, str): + filter_query["user"] = user_id # type: ignore + + # SQL query + response = await prisma_client.db.litellm_spendlogs.group_by( + by=["api_key", "user", "model", "startTime"], + where=filter_query, # type: ignore + sum={ + "spend": True, + }, + ) + + if ( + isinstance(response, list) + and len(response) > 0 + and isinstance(response[0], dict) + ): + result: dict = {} + for record in response: + dt_object = datetime.strptime( + str(record["startTime"]), "%Y-%m-%dT%H:%M:%S.%fZ" + ) # type: ignore + date = dt_object.date() + if date not in result: + result[date] = {"users": {}, "models": {}} + api_key = record["api_key"] + user_id = record["user"] + model = record["model"] + result[date]["spend"] = ( + result[date].get("spend", 0) + record["_sum"]["spend"] + ) + result[date][api_key] = ( + result[date].get(api_key, 0) + record["_sum"]["spend"] + ) + result[date]["users"][user_id] = ( + result[date]["users"].get(user_id, 0) + record["_sum"]["spend"] + ) + result[date]["models"][model] = ( + result[date]["models"].get(model, 0) + record["_sum"]["spend"] + ) + return_list = [] + final_date = None + for k, v in sorted(result.items()): + return_list.append({**v, "startTime": k}) + final_date = k + + end_date_date = end_date_obj.date() + if final_date is not None and final_date < end_date_date: + current_date = final_date + timedelta(days=1) + while current_date <= end_date_date: + # Represent current_date as string because original response has it this way + return_list.append( + { + "startTime": current_date, + "spend": 0, + "users": {}, + "models": {}, + } + ) # If no data, will stay as zero + current_date += timedelta(days=1) # Move on to the next day + + return return_list + + return response + + elif api_key is not None and isinstance(api_key, str): + if api_key.startswith("sk-"): + hashed_token = prisma_client.hash_token(token=api_key) + else: + hashed_token = api_key + spend_log = await prisma_client.get_data( + table_name="spend", + query_type="find_all", + key_val={"key": "api_key", "value": hashed_token}, + ) + if isinstance(spend_log, list): + return spend_log + else: + return [spend_log] + elif request_id is not None: + spend_log = await prisma_client.get_data( + table_name="spend", + query_type="find_unique", + key_val={"key": "request_id", "value": request_id}, + ) + return [spend_log] + elif user_id is not None: + spend_log = await prisma_client.get_data( + table_name="spend", + query_type="find_all", + key_val={"key": "user", "value": user_id}, + ) + if isinstance(spend_log, list): + return spend_log + else: + return [spend_log] + else: + spend_logs = await prisma_client.get_data( + table_name="spend", query_type="find_all" + ) + + return spend_log + + return None + + except Exception as e: + if isinstance(e, HTTPException): + raise ProxyException( + message=getattr(e, "detail", f"/spend/logs Error({str(e)})"), + type="internal_error", + param=getattr(e, "param", "None"), + code=getattr(e, "status_code", status.HTTP_500_INTERNAL_SERVER_ERROR), + ) + elif isinstance(e, ProxyException): + raise e + raise ProxyException( + message="/spend/logs Error" + str(e), + type="internal_error", + param=getattr(e, "param", "None"), + code=status.HTTP_500_INTERNAL_SERVER_ERROR, + ) + + +@router.post( + "/global/spend/reset", + tags=["Budget & Spend Tracking"], + dependencies=[Depends(user_api_key_auth)], +) +async def global_spend_reset(): + """ + ADMIN ONLY / MASTER KEY Only Endpoint + + Globally reset spend for All API Keys and Teams, maintain LiteLLM_SpendLogs + + 1. LiteLLM_SpendLogs will maintain the logs on spend, no data gets deleted from there + 2. LiteLLM_VerificationTokens spend will be set = 0 + 3. LiteLLM_TeamTable spend will be set = 0 + + """ + from litellm.proxy.proxy_server import prisma_client + + if prisma_client is None: + raise ProxyException( + message="Prisma Client is not initialized", + type="internal_error", + param="None", + code=status.HTTP_401_UNAUTHORIZED, + ) + + await prisma_client.db.litellm_verificationtoken.update_many( + data={"spend": 0.0}, where={} + ) + await prisma_client.db.litellm_teamtable.update_many(data={"spend": 0.0}, where={}) + + return { + "message": "Spend for all API Keys and Teams reset successfully", + "status": "success", + } + + +@router.get( + "/global/spend/logs", + tags=["Budget & Spend Tracking"], + dependencies=[Depends(user_api_key_auth)], + include_in_schema=False, +) +async def global_spend_logs( + api_key: str = fastapi.Query( + default=None, + description="API Key to get global spend (spend per day for last 30d). Admin-only endpoint", + ) +): + """ + [BETA] This is a beta endpoint. It will change. + + Use this to get global spend (spend per day for last 30d). Admin-only endpoint + + More efficient implementation of /spend/logs, by creating a view over the spend logs table. + """ + from litellm.proxy.proxy_server import prisma_client + + if prisma_client is None: + raise ProxyException( + message="Prisma Client is not initialized", + type="internal_error", + param="None", + code=status.HTTP_500_INTERNAL_SERVER_ERROR, + ) + if api_key is None: + sql_query = """SELECT * FROM "MonthlyGlobalSpend" ORDER BY "date";""" + + response = await prisma_client.db.query_raw(query=sql_query) + + return response + else: + sql_query = """ + SELECT * FROM "MonthlyGlobalSpendPerKey" + WHERE "api_key" = $1 + ORDER BY "date"; + """ + + response = await prisma_client.db.query_raw(sql_query, api_key) + + return response + return + + +@router.get( + "/global/spend", + tags=["Budget & Spend Tracking"], + dependencies=[Depends(user_api_key_auth)], + include_in_schema=False, +) +async def global_spend(): + """ + [BETA] This is a beta endpoint. It will change. + + View total spend across all proxy keys + """ + from litellm.proxy.proxy_server import prisma_client + + total_spend = 0.0 + total_proxy_budget = 0.0 + + if prisma_client is None: + raise HTTPException(status_code=500, detail={"error": "No db connected"}) + sql_query = """SELECT SUM(spend) as total_spend FROM "MonthlyGlobalSpend";""" + response = await prisma_client.db.query_raw(query=sql_query) + if response is not None: + if isinstance(response, list) and len(response) > 0: + total_spend = response[0].get("total_spend", 0.0) + + return {"spend": total_spend, "max_budget": litellm.max_budget} + + +@router.get( + "/global/spend/keys", + tags=["Budget & Spend Tracking"], + dependencies=[Depends(user_api_key_auth)], + include_in_schema=False, +) +async def global_spend_keys( + limit: int = fastapi.Query( + default=None, + description="Number of keys to get. Will return Top 'n' keys.", + ) +): + """ + [BETA] This is a beta endpoint. It will change. + + Use this to get the top 'n' keys with the highest spend, ordered by spend. + """ + from litellm.proxy.proxy_server import prisma_client + + if prisma_client is None: + raise HTTPException(status_code=500, detail={"error": "No db connected"}) + sql_query = f"""SELECT * FROM "Last30dKeysBySpend" LIMIT {limit};""" + + response = await prisma_client.db.query_raw(query=sql_query) + + return response + + +@router.get( + "/global/spend/teams", + tags=["Budget & Spend Tracking"], + dependencies=[Depends(user_api_key_auth)], + include_in_schema=False, +) +async def global_spend_per_team(): + """ + [BETA] This is a beta endpoint. It will change. + + Use this to get daily spend, grouped by `team_id` and `date` + """ + from litellm.proxy.proxy_server import prisma_client + + if prisma_client is None: + raise HTTPException(status_code=500, detail={"error": "No db connected"}) + sql_query = """ + SELECT + t.team_alias as team_alias, + DATE(s."startTime") AS spend_date, + SUM(s.spend) AS total_spend + FROM + "LiteLLM_SpendLogs" s + LEFT JOIN + "LiteLLM_TeamTable" t ON s.team_id = t.team_id + WHERE + s."startTime" >= CURRENT_DATE - INTERVAL '30 days' + GROUP BY + t.team_alias, + DATE(s."startTime") + ORDER BY + spend_date; + """ + response = await prisma_client.db.query_raw(query=sql_query) + + # transform the response for the Admin UI + spend_by_date = {} + team_aliases = set() + total_spend_per_team = {} + for row in response: + row_date = row["spend_date"] + if row_date is None: + continue + team_alias = row["team_alias"] + if team_alias is None: + team_alias = "Unassigned" + team_aliases.add(team_alias) + if row_date in spend_by_date: + # get the team_id for this entry + # get the spend for this entry + spend = row["total_spend"] + spend = round(spend, 2) + current_date_entries = spend_by_date[row_date] + current_date_entries[team_alias] = spend + else: + spend = row["total_spend"] + spend = round(spend, 2) + spend_by_date[row_date] = {team_alias: spend} + + if team_alias in total_spend_per_team: + total_spend_per_team[team_alias] += spend + else: + total_spend_per_team[team_alias] = spend + + total_spend_per_team_ui = [] + # order the elements in total_spend_per_team by spend + total_spend_per_team = dict( + sorted(total_spend_per_team.items(), key=lambda item: item[1], reverse=True) + ) + for team_id in total_spend_per_team: + # only add first 10 elements to total_spend_per_team_ui + if len(total_spend_per_team_ui) >= 10: + break + if team_id is None: + team_id = "Unassigned" + total_spend_per_team_ui.append( + {"team_id": team_id, "total_spend": total_spend_per_team[team_id]} + ) + + # sort spend_by_date by it's key (which is a date) + + response_data = [] + for key in spend_by_date: + value = spend_by_date[key] + response_data.append({"date": key, **value}) + + return { + "daily_spend": response_data, + "teams": list(team_aliases), + "total_spend_per_team": total_spend_per_team_ui, + } + + +@router.get( + "/global/all_end_users", + tags=["Budget & Spend Tracking"], + dependencies=[Depends(user_api_key_auth)], + include_in_schema=False, +) +async def global_view_all_end_users(): + """ + [BETA] This is a beta endpoint. It will change. + + Use this to just get all the unique `end_users` + """ + from litellm.proxy.proxy_server import prisma_client + + if prisma_client is None: + raise HTTPException(status_code=500, detail={"error": "No db connected"}) + + sql_query = """ + SELECT DISTINCT end_user FROM "LiteLLM_SpendLogs" + """ + + db_response = await prisma_client.db.query_raw(query=sql_query) + if db_response is None: + return [] + + _end_users = [] + for row in db_response: + _end_users.append(row["end_user"]) + + return {"end_users": _end_users} + + +@router.post( + "/global/spend/end_users", + tags=["Budget & Spend Tracking"], + dependencies=[Depends(user_api_key_auth)], + include_in_schema=False, +) +async def global_spend_end_users(data: Optional[GlobalEndUsersSpend] = None): + """ + [BETA] This is a beta endpoint. It will change. + + Use this to get the top 'n' keys with the highest spend, ordered by spend. + """ + from litellm.proxy.proxy_server import prisma_client + + if prisma_client is None: + raise HTTPException(status_code=500, detail={"error": "No db connected"}) + + """ + Gets the top 100 end-users for a given api key + """ + startTime = None + endTime = None + selected_api_key = None + if data is not None: + startTime = data.startTime + endTime = data.endTime + selected_api_key = data.api_key + + startTime = startTime or datetime.now() - timedelta(days=30) + endTime = endTime or datetime.now() + + sql_query = """ +SELECT end_user, COUNT(*) AS total_count, SUM(spend) AS total_spend +FROM "LiteLLM_SpendLogs" +WHERE "startTime" >= $1::timestamp + AND "startTime" < $2::timestamp + AND ( + CASE + WHEN $3::TEXT IS NULL THEN TRUE + ELSE api_key = $3 + END + ) +GROUP BY end_user +ORDER BY total_spend DESC +LIMIT 100 + """ + response = await prisma_client.db.query_raw( + sql_query, startTime, endTime, selected_api_key + ) + + return response + + +@router.get( + "/global/spend/models", + tags=["Budget & Spend Tracking"], + dependencies=[Depends(user_api_key_auth)], + include_in_schema=False, +) +async def global_spend_models( + limit: int = fastapi.Query( + default=None, + description="Number of models to get. Will return Top 'n' models.", + ) +): + """ + [BETA] This is a beta endpoint. It will change. + + Use this to get the top 'n' keys with the highest spend, ordered by spend. + """ + from litellm.proxy.proxy_server import prisma_client + + if prisma_client is None: + raise HTTPException(status_code=500, detail={"error": "No db connected"}) + + sql_query = f"""SELECT * FROM "Last30dModelsBySpend" LIMIT {limit};""" + + response = await prisma_client.db.query_raw(query=sql_query) + + return response + + +@router.post( + "/global/predict/spend/logs", + tags=["Budget & Spend Tracking"], + dependencies=[Depends(user_api_key_auth)], + include_in_schema=False, +) +async def global_predict_spend_logs(request: Request): + from enterprise.utils import _forecast_daily_cost + + data = await request.json() + data = data.get("data") + return _forecast_daily_cost(data) diff --git a/litellm/tests/test_blocked_user_list.py b/litellm/tests/test_blocked_user_list.py index 3c277a2d4..267adcfa6 100644 --- a/litellm/tests/test_blocked_user_list.py +++ b/litellm/tests/test_blocked_user_list.py @@ -37,11 +37,13 @@ from litellm.proxy.proxy_server import ( update_key_fn, generate_key_fn, generate_key_helper_fn, + user_info, + block_user, +) +from litellm.proxy.spend_management_endpoints import ( spend_user_fn, spend_key_fn, view_spend_logs, - user_info, - block_user, ) from litellm.proxy.utils import PrismaClient, ProxyLogging, hash_token from litellm._logging import verbose_proxy_logger diff --git a/litellm/tests/test_key_generate_prisma.py b/litellm/tests/test_key_generate_prisma.py index cf2e62e8d..56d27bb20 100644 --- a/litellm/tests/test_key_generate_prisma.py +++ b/litellm/tests/test_key_generate_prisma.py @@ -46,9 +46,6 @@ from litellm.proxy.proxy_server import ( update_key_fn, generate_key_fn, generate_key_helper_fn, - spend_user_fn, - spend_key_fn, - view_spend_logs, user_info, info_key_fn, chat_completion, @@ -60,8 +57,12 @@ from litellm.proxy.proxy_server import ( model_list, LitellmUserRoles, ) - from litellm.proxy.team_endpoints import team_info, new_team, update_team +from litellm.proxy.spend_management_endpoints import ( + spend_user_fn, + spend_key_fn, + view_spend_logs, +) from litellm.proxy.utils import PrismaClient, ProxyLogging, hash_token, update_spend from litellm._logging import verbose_proxy_logger diff --git a/litellm/tests/test_update_spend.py b/litellm/tests/test_update_spend.py index 529e90e3c..06b57d972 100644 --- a/litellm/tests/test_update_spend.py +++ b/litellm/tests/test_update_spend.py @@ -34,11 +34,13 @@ from litellm.proxy.proxy_server import ( update_key_fn, generate_key_fn, generate_key_helper_fn, + user_info, + block_user, +) +from litellm.proxy.spend_management_endpoints import ( spend_user_fn, spend_key_fn, view_spend_logs, - user_info, - block_user, ) from litellm.proxy.utils import PrismaClient, ProxyLogging, hash_token, update_spend from litellm._logging import verbose_proxy_logger