Merge branch 'main' into litellm_redis_cache_usage

2025-04-26 11:14:04 +00:00 · 2024-06-13 22:07:21 -07:00 · 2024-06-13 22:07:21 -07:00 · c373f104cc
commit c373f104cc
parent af7e7cdc93 f3fd84908b
131 changed files with 3117 additions and 476 deletions
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -879,6 +879,7 @@ async def user_api_key_auth(

        ## check for cache hit (In-Memory Cache)
        original_api_key = api_key  # (Patch: For DynamoDB Backwards Compatibility)
+        _user_role = None
        if api_key.startswith("sk-"):
            api_key = hash_token(token=api_key)
        valid_token: Optional[UserAPIKeyAuth] = user_api_key_cache.get_cache(  # type: ignore
@ -1512,7 +1513,7 @@ async def user_api_key_auth(
                ):
                    return UserAPIKeyAuth(
                        api_key=api_key,
-                        user_role="app_owner",
+                        user_role=_user_role,
                        parent_otel_span=parent_otel_span,
                        **valid_token_dict,
                    )
@ -6649,7 +6650,7 @@ async def generate_key_fn(

        # Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
        if litellm.store_audit_logs is True:
-            _updated_values = json.dumps(response)
+            _updated_values = json.dumps(response, default=str)
            asyncio.create_task(
                create_audit_log_for_update(
                    request_data=LiteLLM_AuditLogs(
@ -6754,10 +6755,10 @@ async def update_key_fn(

        # Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
        if litellm.store_audit_logs is True:
-            _updated_values = json.dumps(data_json)
+            _updated_values = json.dumps(data_json, default=str)

            _before_value = existing_key_row.json(exclude_none=True)
-            _before_value = json.dumps(_before_value)
+            _before_value = json.dumps(_before_value, default=str)

            asyncio.create_task(
                create_audit_log_for_update(
@ -6853,7 +6854,7 @@ async def delete_key_fn(
                )

                key_row = key_row.json(exclude_none=True)
-                _key_row = json.dumps(key_row)
+                _key_row = json.dumps(key_row, default=str)

                asyncio.create_task(
                    create_audit_log_for_update(
@ -7057,6 +7058,7 @@ async def info_key_fn(
    "/spend/keys",
    tags=["Budget & Spend Tracking"],
    dependencies=[Depends(user_api_key_auth)],
+    include_in_schema=False,
 )
 async def spend_key_fn():
    """
@ -7089,6 +7091,7 @@ async def spend_key_fn():
    "/spend/users",
    tags=["Budget & Spend Tracking"],
    dependencies=[Depends(user_api_key_auth)],
+    include_in_schema=False,
 )
 async def spend_user_fn(
    user_id: Optional[str] = fastapi.Query(
@ -7219,6 +7222,7 @@ async def view_spend_tags(
    responses={
        200: {"model": List[LiteLLM_SpendLogs]},
    },
+    include_in_schema=False,
 )
 async def get_global_activity(
    start_date: Optional[str] = fastapi.Query(
@ -7322,6 +7326,7 @@ async def get_global_activity(
    responses={
        200: {"model": List[LiteLLM_SpendLogs]},
    },
+    include_in_schema=False,
 )
 async def get_global_activity_model(
    start_date: Optional[str] = fastapi.Query(
@ -7468,6 +7473,7 @@ async def get_global_activity_model(
    responses={
        200: {"model": List[LiteLLM_SpendLogs]},
    },
+    include_in_schema=False,
 )
 async def get_global_activity_exceptions_per_deployment(
    model_group: str = fastapi.Query(
@ -7620,6 +7626,7 @@ async def get_global_activity_exceptions_per_deployment(
    responses={
        200: {"model": List[LiteLLM_SpendLogs]},
    },
+    include_in_schema=False,
 )
 async def get_global_activity_exceptions(
    model_group: str = fastapi.Query(
@ -7830,7 +7837,6 @@ async def get_global_spend_provider(
    "/global/spend/report",
    tags=["Budget & Spend Tracking"],
    dependencies=[Depends(user_api_key_auth)],
-    include_in_schema=False,
    responses={
        200: {"model": List[LiteLLM_SpendLogs]},
    },
@ -8530,6 +8536,7 @@ async def global_spend_reset():
    "/global/spend/logs",
    tags=["Budget & Spend Tracking"],
    dependencies=[Depends(user_api_key_auth)],
+    include_in_schema=False,
 )
 async def global_spend_logs(
    api_key: str = fastapi.Query(
@ -8575,6 +8582,7 @@ async def global_spend_logs(
    "/global/spend",
    tags=["Budget & Spend Tracking"],
    dependencies=[Depends(user_api_key_auth)],
+    include_in_schema=False,
 )
 async def global_spend():
    """
@ -8601,6 +8609,7 @@ async def global_spend():
    "/global/spend/keys",
    tags=["Budget & Spend Tracking"],
    dependencies=[Depends(user_api_key_auth)],
+    include_in_schema=False,
 )
 async def global_spend_keys(
    limit: int = fastapi.Query(
@ -8628,6 +8637,7 @@ async def global_spend_keys(
    "/global/spend/teams",
    tags=["Budget & Spend Tracking"],
    dependencies=[Depends(user_api_key_auth)],
+    include_in_schema=False,
 )
 async def global_spend_per_team():
    """
@ -8752,6 +8762,7 @@ async def global_view_all_end_users():
    "/global/spend/end_users",
    tags=["Budget & Spend Tracking"],
    dependencies=[Depends(user_api_key_auth)],
+    include_in_schema=False,
 )
 async def global_spend_end_users(data: Optional[GlobalEndUsersSpend] = None):
    """
@ -8804,6 +8815,7 @@ LIMIT 100
    "/global/spend/models",
    tags=["Budget & Spend Tracking"],
    dependencies=[Depends(user_api_key_auth)],
+    include_in_schema=False,
 )
 async def global_spend_models(
    limit: int = fastapi.Query(
@ -8832,6 +8844,7 @@ async def global_spend_models(
    "/global/predict/spend/logs",
    tags=["Budget & Spend Tracking"],
    dependencies=[Depends(user_api_key_auth)],
+    include_in_schema=False,
 )
 async def global_predict_spend_logs(request: Request):
    from enterprise.utils import _forecast_daily_cost
@ -8863,7 +8876,7 @@ async def new_user(data: NewUserRequest):
    - organization_id: Optional[str] - specify the org a user belongs to.
    - user_email: Optional[str] - Specify a user email.
    - send_invite_email: Optional[bool] - Specify if an invite email should be sent.
-    - user_role: Optional[str] - Specify a user role - "admin", "app_owner", "app_user"
+    - user_role: Optional[str] - Specify a user role - "proxy_admin", "proxy_admin_viewer", "internal_user", "internal_user_viewer", "team", "customer". Info about each role here: `https://github.com/BerriAI/litellm/litellm/proxy/_types.py#L20`
    - max_budget: Optional[float] - Specify max budget for a given user.
    - models: Optional[list] - Model_name's a user is allowed to call. (if empty, key is allowed to call all models)
    - tpm_limit: Optional[int] - Specify tpm limit for a given user (Tokens per minute)
@ -9947,16 +9960,18 @@ async def new_team(
    """
    Allow users to create a new team. Apply user permissions to their team.

-    [ASK FOR HELP](https://github.com/BerriAI/litellm/issues)
+    👉 [Detailed Doc on setting team budgets](https://docs.litellm.ai/docs/proxy/team_budgets)
+

    Parameters:
    - team_alias: Optional[str] - User defined team alias
    - team_id: Optional[str] - The team id of the user. If none passed, we'll generate it.
    - members_with_roles: List[{"role": "admin" or "user", "user_id": "<user-id>"}] - A list of users and their roles in the team. Get user_id when making a new user via `/user/new`.
-    - metadata: Optional[dict] - Metadata for team, store information for team. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" }
+    - metadata: Optional[dict] - Metadata for team, store information for team. Example metadata = {"extra_info": "some info"}
    - tpm_limit: Optional[int] - The TPM (Tokens Per Minute) limit for this team - all keys with this team_id will have at max this TPM limit
    - rpm_limit: Optional[int] - The RPM (Requests Per Minute) limit for this team - all keys associated with this team_id will have at max this RPM limit
    - max_budget: Optional[float] - The maximum budget allocated to the team - all keys for this team_id will have at max this max_budget
+    - budget_duration: Optional[str] - The duration of the budget for the team. Doc [here](https://docs.litellm.ai/docs/proxy/team_budgets)
    - models: Optional[list] - A list of models associated with the team - all keys for this team_id will have at most, these models. If empty, assumes all models are allowed.
    - blocked: bool - Flag indicating if the team is blocked or not - will stop all calls from keys with this team_id.

@ -9981,6 +9996,21 @@ async def new_team(
        {"role": "user", "user_id": "user-2434"}]
    }'

+    ```
+
+     ```
+    curl --location 'http://0.0.0.0:4000/team/new' \
+
+    --header 'Authorization: Bearer sk-1234' \
+
+    --header 'Content-Type: application/json' \
+
+    --data '{
+                "team_alias": "QA Prod Bot", 
+                "max_budget": 0.000000001, 
+                "budget_duration": "1d"
+            }'
+
    ```
    """
    global prisma_client
@ -10110,7 +10140,8 @@ async def new_team(
    # Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
    if litellm.store_audit_logs is True:
        _updated_values = complete_team_data.json(exclude_none=True)
-        _updated_values = json.dumps(_updated_values)
+
+        _updated_values = json.dumps(_updated_values, default=str)

        asyncio.create_task(
            create_audit_log_for_update(
@ -10174,6 +10205,7 @@ async def create_audit_log_for_update(request_data: LiteLLM_AuditLogs):
@management_endpoint_wrapper
 async def update_team(
    data: UpdateTeamRequest,
+    http_request: Request,
    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
    litellm_changed_by: Optional[str] = Header(
        None,
@ -10192,6 +10224,7 @@ async def update_team(
    - tpm_limit: Optional[int] - The TPM (Tokens Per Minute) limit for this team - all keys with this team_id will have at max this TPM limit
    - rpm_limit: Optional[int] - The RPM (Requests Per Minute) limit for this team - all keys associated with this team_id will have at max this RPM limit
    - max_budget: Optional[float] - The maximum budget allocated to the team - all keys for this team_id will have at max this max_budget
+    - budget_duration: Optional[str] - The duration of the budget for the team. Doc [here](https://docs.litellm.ai/docs/proxy/team_budgets)
    - models: Optional[list] - A list of models associated with the team - all keys for this team_id will have at most, these models. If empty, assumes all models are allowed.
    - blocked: bool - Flag indicating if the team is blocked or not - will stop all calls from keys with this team_id.

@ -10209,6 +10242,20 @@ async def update_team(
        "tpm_limit": 100
    }'
    ```
+
+    Example - Update Team `max_budget` budget
+    ```
+    curl --location 'http://0.0.0.0:8000/team/update' \
+
+    --header 'Authorization: Bearer sk-1234' \
+
+    --header 'Content-Type: application/json' \
+
+    --data-raw '{
+        "team_id": "litellm-test-client-id-new",
+        "max_budget": 10
+    }'
+    ```
    """
    global prisma_client

@ -10248,8 +10295,8 @@ async def update_team(
    # Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
    if litellm.store_audit_logs is True:
        _before_value = existing_team_row.json(exclude_none=True)
-        _before_value = json.dumps(_before_value)
-        _after_value: str = json.dumps(updated_kv)
+        _before_value = json.dumps(_before_value, default=str)
+        _after_value: str = json.dumps(updated_kv, default=str)

        asyncio.create_task(
            create_audit_log_for_update(
@ -11408,7 +11455,7 @@ async def model_info_v2(
    for _model in all_models:
        # provided model_info in config.yaml
        model_info = _model.get("model_info", {})
-        if debug == True:
+        if debug is True:
            _openai_client = "None"
            if llm_router is not None:
                _openai_client = (
@ -11433,7 +11480,7 @@ async def model_info_v2(
            litellm_model = litellm_params.get("model", None)
            try:
                litellm_model_info = litellm.get_model_info(model=litellm_model)
-            except:
+            except Exception:
                litellm_model_info = {}
        # 3rd pass on the model, try seeing if we can find model but without the "/" in model cost map
        if litellm_model_info == {}:
@ -11444,8 +11491,10 @@ async def model_info_v2(
            if len(split_model) > 0:
                litellm_model = split_model[-1]
            try:
-                litellm_model_info = litellm.get_model_info(model=litellm_model)
-            except:
+                litellm_model_info = litellm.get_model_info(
+                    model=litellm_model, custom_llm_provider=split_model[0]
+                )
+            except Exception:
                litellm_model_info = {}
        for k, v in litellm_model_info.items():
            if k not in model_info:
@ -11956,7 +12005,9 @@ async def model_info_v1(
            if len(split_model) > 0:
                litellm_model = split_model[-1]
            try:
-                litellm_model_info = litellm.get_model_info(model=litellm_model)
+                litellm_model_info = litellm.get_model_info(
+                    model=litellm_model, custom_llm_provider=split_model[0]
+                )
            except:
                litellm_model_info = {}
        for k, v in litellm_model_info.items():
@ -12223,6 +12274,7 @@ async def alerting_settings(
    "/queue/chat/completions",
    tags=["experimental"],
    dependencies=[Depends(user_api_key_auth)],
+    include_in_schema=False,
 )
 async def async_queue_request(
    request: Request,
@ -12334,18 +12386,10 @@ async def async_queue_request(
        )


-@router.get(
-    "/ollama_logs", dependencies=[Depends(user_api_key_auth)], tags=["experimental"]
-)
-async def retrieve_server_log(request: Request):
-    filepath = os.path.expanduser("~/.ollama/logs/server.log")
-    return FileResponse(filepath)
-
-
 #### LOGIN ENDPOINTS ####


-@app.get("/sso/key/generate", tags=["experimental"])
+@app.get("/sso/key/generate", tags=["experimental"], include_in_schema=False)
 async def google_login(request: Request):
    """
    Create Proxy API Keys using Google Workspace SSO. Requires setting PROXY_BASE_URL in .env
@ -12939,7 +12983,7 @@ def get_image():
        return FileResponse(logo_path, media_type="image/jpeg")


-@app.get("/sso/callback", tags=["experimental"])
+@app.get("/sso/callback", tags=["experimental"], include_in_schema=False)
 async def auth_callback(request: Request):
    """Verify login"""
    global general_settings, ui_access_mode, premium_user
@ -13244,6 +13288,7 @@ async def auth_callback(request: Request):
    tags=["Invite Links"],
    dependencies=[Depends(user_api_key_auth)],
    response_model=InvitationModel,
+    include_in_schema=False,
 )
 async def new_invitation(
    data: InvitationNew, user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth)
@ -13308,6 +13353,7 @@ async def new_invitation(
    tags=["Invite Links"],
    dependencies=[Depends(user_api_key_auth)],
    response_model=InvitationModel,
+    include_in_schema=False,
 )
 async def invitation_info(
    invitation_id: str, user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth)
@ -13359,6 +13405,7 @@ async def invitation_info(
    tags=["Invite Links"],
    dependencies=[Depends(user_api_key_auth)],
    response_model=InvitationModel,
+    include_in_schema=False,
 )
 async def invitation_update(
    data: InvitationUpdate,
@ -13419,6 +13466,7 @@ async def invitation_update(
    tags=["Invite Links"],
    dependencies=[Depends(user_api_key_auth)],
    response_model=InvitationModel,
+    include_in_schema=False,
 )
 async def invitation_delete(
    data: InvitationDelete,
@ -13471,6 +13519,7 @@ async def invitation_delete(
    "/config/update",
    tags=["config.yaml"],
    dependencies=[Depends(user_api_key_auth)],
+    include_in_schema=False,
 )
 async def update_config(config_info: ConfigYAML):
    """
@ -13628,6 +13677,7 @@ Keep it more precise, to prevent overwrite other values unintentially
    "/config/field/update",
    tags=["config.yaml"],
    dependencies=[Depends(user_api_key_auth)],
+    include_in_schema=False,
 )
 async def update_config_general_settings(
    data: ConfigFieldUpdate,
@ -13706,6 +13756,7 @@ async def update_config_general_settings(
    tags=["config.yaml"],
    dependencies=[Depends(user_api_key_auth)],
    response_model=ConfigFieldInfo,
+    include_in_schema=False,
 )
 async def get_config_general_settings(
    field_name: str,
@ -13766,6 +13817,7 @@ async def get_config_general_settings(
    "/config/list",
    tags=["config.yaml"],
    dependencies=[Depends(user_api_key_auth)],
+    include_in_schema=False,
 )
 async def get_config_list(
    config_type: Literal["general_settings"],
@ -13842,6 +13894,7 @@ async def get_config_list(
    "/config/field/delete",
    tags=["config.yaml"],
    dependencies=[Depends(user_api_key_auth)],
+    include_in_schema=False,
 )
 async def delete_config_general_settings(
    data: ConfigFieldDelete,
@ -14097,6 +14150,7 @@ async def get_config():
    "/config/yaml",
    tags=["config.yaml"],
    dependencies=[Depends(user_api_key_auth)],
+    include_in_schema=False,
 )
 async def config_yaml_endpoint(config_info: ConfigYAML):
    """
@ -14743,6 +14797,22 @@ async def cache_flushall():
        )


+@router.get(
+    "/get/litellm_model_cost_map",
+    include_in_schema=False,
+    dependencies=[Depends(user_api_key_auth)],
+)
+async def get_litellm_model_cost_map():
+    try:
+        _model_cost_map = litellm.model_cost
+        return _model_cost_map
+    except Exception as e:
+        raise HTTPException(
+            status_code=500,
+            detail=f"Internal Server Error ({str(e)})",
+        )
+
+
@router.get("/", dependencies=[Depends(user_api_key_auth)])
 async def home(request: Request):
    return "LiteLLM: RUNNING"