Merge branch 'main' into litellm_redis_cache_usage

This commit is contained in:
Krish Dholakia 2024-06-13 22:07:21 -07:00 committed by GitHub
commit c373f104cc
131 changed files with 3117 additions and 476 deletions

View file

@ -879,6 +879,7 @@ async def user_api_key_auth(
## check for cache hit (In-Memory Cache)
original_api_key = api_key # (Patch: For DynamoDB Backwards Compatibility)
_user_role = None
if api_key.startswith("sk-"):
api_key = hash_token(token=api_key)
valid_token: Optional[UserAPIKeyAuth] = user_api_key_cache.get_cache( # type: ignore
@ -1512,7 +1513,7 @@ async def user_api_key_auth(
):
return UserAPIKeyAuth(
api_key=api_key,
user_role="app_owner",
user_role=_user_role,
parent_otel_span=parent_otel_span,
**valid_token_dict,
)
@ -6649,7 +6650,7 @@ async def generate_key_fn(
# Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
if litellm.store_audit_logs is True:
_updated_values = json.dumps(response)
_updated_values = json.dumps(response, default=str)
asyncio.create_task(
create_audit_log_for_update(
request_data=LiteLLM_AuditLogs(
@ -6754,10 +6755,10 @@ async def update_key_fn(
# Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
if litellm.store_audit_logs is True:
_updated_values = json.dumps(data_json)
_updated_values = json.dumps(data_json, default=str)
_before_value = existing_key_row.json(exclude_none=True)
_before_value = json.dumps(_before_value)
_before_value = json.dumps(_before_value, default=str)
asyncio.create_task(
create_audit_log_for_update(
@ -6853,7 +6854,7 @@ async def delete_key_fn(
)
key_row = key_row.json(exclude_none=True)
_key_row = json.dumps(key_row)
_key_row = json.dumps(key_row, default=str)
asyncio.create_task(
create_audit_log_for_update(
@ -7057,6 +7058,7 @@ async def info_key_fn(
"/spend/keys",
tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)],
include_in_schema=False,
)
async def spend_key_fn():
"""
@ -7089,6 +7091,7 @@ async def spend_key_fn():
"/spend/users",
tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)],
include_in_schema=False,
)
async def spend_user_fn(
user_id: Optional[str] = fastapi.Query(
@ -7219,6 +7222,7 @@ async def view_spend_tags(
responses={
200: {"model": List[LiteLLM_SpendLogs]},
},
include_in_schema=False,
)
async def get_global_activity(
start_date: Optional[str] = fastapi.Query(
@ -7322,6 +7326,7 @@ async def get_global_activity(
responses={
200: {"model": List[LiteLLM_SpendLogs]},
},
include_in_schema=False,
)
async def get_global_activity_model(
start_date: Optional[str] = fastapi.Query(
@ -7468,6 +7473,7 @@ async def get_global_activity_model(
responses={
200: {"model": List[LiteLLM_SpendLogs]},
},
include_in_schema=False,
)
async def get_global_activity_exceptions_per_deployment(
model_group: str = fastapi.Query(
@ -7620,6 +7626,7 @@ async def get_global_activity_exceptions_per_deployment(
responses={
200: {"model": List[LiteLLM_SpendLogs]},
},
include_in_schema=False,
)
async def get_global_activity_exceptions(
model_group: str = fastapi.Query(
@ -7830,7 +7837,6 @@ async def get_global_spend_provider(
"/global/spend/report",
tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)],
include_in_schema=False,
responses={
200: {"model": List[LiteLLM_SpendLogs]},
},
@ -8530,6 +8536,7 @@ async def global_spend_reset():
"/global/spend/logs",
tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)],
include_in_schema=False,
)
async def global_spend_logs(
api_key: str = fastapi.Query(
@ -8575,6 +8582,7 @@ async def global_spend_logs(
"/global/spend",
tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)],
include_in_schema=False,
)
async def global_spend():
"""
@ -8601,6 +8609,7 @@ async def global_spend():
"/global/spend/keys",
tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)],
include_in_schema=False,
)
async def global_spend_keys(
limit: int = fastapi.Query(
@ -8628,6 +8637,7 @@ async def global_spend_keys(
"/global/spend/teams",
tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)],
include_in_schema=False,
)
async def global_spend_per_team():
"""
@ -8752,6 +8762,7 @@ async def global_view_all_end_users():
"/global/spend/end_users",
tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)],
include_in_schema=False,
)
async def global_spend_end_users(data: Optional[GlobalEndUsersSpend] = None):
"""
@ -8804,6 +8815,7 @@ LIMIT 100
"/global/spend/models",
tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)],
include_in_schema=False,
)
async def global_spend_models(
limit: int = fastapi.Query(
@ -8832,6 +8844,7 @@ async def global_spend_models(
"/global/predict/spend/logs",
tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)],
include_in_schema=False,
)
async def global_predict_spend_logs(request: Request):
from enterprise.utils import _forecast_daily_cost
@ -8863,7 +8876,7 @@ async def new_user(data: NewUserRequest):
- organization_id: Optional[str] - specify the org a user belongs to.
- user_email: Optional[str] - Specify a user email.
- send_invite_email: Optional[bool] - Specify if an invite email should be sent.
- user_role: Optional[str] - Specify a user role - "admin", "app_owner", "app_user"
- user_role: Optional[str] - Specify a user role - "proxy_admin", "proxy_admin_viewer", "internal_user", "internal_user_viewer", "team", "customer". Info about each role here: `https://github.com/BerriAI/litellm/litellm/proxy/_types.py#L20`
- max_budget: Optional[float] - Specify max budget for a given user.
- models: Optional[list] - Model_name's a user is allowed to call. (if empty, key is allowed to call all models)
- tpm_limit: Optional[int] - Specify tpm limit for a given user (Tokens per minute)
@ -9947,16 +9960,18 @@ async def new_team(
"""
Allow users to create a new team. Apply user permissions to their team.
[ASK FOR HELP](https://github.com/BerriAI/litellm/issues)
👉 [Detailed Doc on setting team budgets](https://docs.litellm.ai/docs/proxy/team_budgets)
Parameters:
- team_alias: Optional[str] - User defined team alias
- team_id: Optional[str] - The team id of the user. If none passed, we'll generate it.
- members_with_roles: List[{"role": "admin" or "user", "user_id": "<user-id>"}] - A list of users and their roles in the team. Get user_id when making a new user via `/user/new`.
- metadata: Optional[dict] - Metadata for team, store information for team. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" }
- metadata: Optional[dict] - Metadata for team, store information for team. Example metadata = {"extra_info": "some info"}
- tpm_limit: Optional[int] - The TPM (Tokens Per Minute) limit for this team - all keys with this team_id will have at max this TPM limit
- rpm_limit: Optional[int] - The RPM (Requests Per Minute) limit for this team - all keys associated with this team_id will have at max this RPM limit
- max_budget: Optional[float] - The maximum budget allocated to the team - all keys for this team_id will have at max this max_budget
- budget_duration: Optional[str] - The duration of the budget for the team. Doc [here](https://docs.litellm.ai/docs/proxy/team_budgets)
- models: Optional[list] - A list of models associated with the team - all keys for this team_id will have at most, these models. If empty, assumes all models are allowed.
- blocked: bool - Flag indicating if the team is blocked or not - will stop all calls from keys with this team_id.
@ -9981,6 +9996,21 @@ async def new_team(
{"role": "user", "user_id": "user-2434"}]
}'
```
```
curl --location 'http://0.0.0.0:4000/team/new' \
--header 'Authorization: Bearer sk-1234' \
--header 'Content-Type: application/json' \
--data '{
"team_alias": "QA Prod Bot",
"max_budget": 0.000000001,
"budget_duration": "1d"
}'
```
"""
global prisma_client
@ -10110,7 +10140,8 @@ async def new_team(
# Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
if litellm.store_audit_logs is True:
_updated_values = complete_team_data.json(exclude_none=True)
_updated_values = json.dumps(_updated_values)
_updated_values = json.dumps(_updated_values, default=str)
asyncio.create_task(
create_audit_log_for_update(
@ -10174,6 +10205,7 @@ async def create_audit_log_for_update(request_data: LiteLLM_AuditLogs):
@management_endpoint_wrapper
async def update_team(
data: UpdateTeamRequest,
http_request: Request,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
litellm_changed_by: Optional[str] = Header(
None,
@ -10192,6 +10224,7 @@ async def update_team(
- tpm_limit: Optional[int] - The TPM (Tokens Per Minute) limit for this team - all keys with this team_id will have at max this TPM limit
- rpm_limit: Optional[int] - The RPM (Requests Per Minute) limit for this team - all keys associated with this team_id will have at max this RPM limit
- max_budget: Optional[float] - The maximum budget allocated to the team - all keys for this team_id will have at max this max_budget
- budget_duration: Optional[str] - The duration of the budget for the team. Doc [here](https://docs.litellm.ai/docs/proxy/team_budgets)
- models: Optional[list] - A list of models associated with the team - all keys for this team_id will have at most, these models. If empty, assumes all models are allowed.
- blocked: bool - Flag indicating if the team is blocked or not - will stop all calls from keys with this team_id.
@ -10209,6 +10242,20 @@ async def update_team(
"tpm_limit": 100
}'
```
Example - Update Team `max_budget` budget
```
curl --location 'http://0.0.0.0:8000/team/update' \
--header 'Authorization: Bearer sk-1234' \
--header 'Content-Type: application/json' \
--data-raw '{
"team_id": "litellm-test-client-id-new",
"max_budget": 10
}'
```
"""
global prisma_client
@ -10248,8 +10295,8 @@ async def update_team(
# Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
if litellm.store_audit_logs is True:
_before_value = existing_team_row.json(exclude_none=True)
_before_value = json.dumps(_before_value)
_after_value: str = json.dumps(updated_kv)
_before_value = json.dumps(_before_value, default=str)
_after_value: str = json.dumps(updated_kv, default=str)
asyncio.create_task(
create_audit_log_for_update(
@ -11408,7 +11455,7 @@ async def model_info_v2(
for _model in all_models:
# provided model_info in config.yaml
model_info = _model.get("model_info", {})
if debug == True:
if debug is True:
_openai_client = "None"
if llm_router is not None:
_openai_client = (
@ -11433,7 +11480,7 @@ async def model_info_v2(
litellm_model = litellm_params.get("model", None)
try:
litellm_model_info = litellm.get_model_info(model=litellm_model)
except:
except Exception:
litellm_model_info = {}
# 3rd pass on the model, try seeing if we can find model but without the "/" in model cost map
if litellm_model_info == {}:
@ -11444,8 +11491,10 @@ async def model_info_v2(
if len(split_model) > 0:
litellm_model = split_model[-1]
try:
litellm_model_info = litellm.get_model_info(model=litellm_model)
except:
litellm_model_info = litellm.get_model_info(
model=litellm_model, custom_llm_provider=split_model[0]
)
except Exception:
litellm_model_info = {}
for k, v in litellm_model_info.items():
if k not in model_info:
@ -11956,7 +12005,9 @@ async def model_info_v1(
if len(split_model) > 0:
litellm_model = split_model[-1]
try:
litellm_model_info = litellm.get_model_info(model=litellm_model)
litellm_model_info = litellm.get_model_info(
model=litellm_model, custom_llm_provider=split_model[0]
)
except:
litellm_model_info = {}
for k, v in litellm_model_info.items():
@ -12223,6 +12274,7 @@ async def alerting_settings(
"/queue/chat/completions",
tags=["experimental"],
dependencies=[Depends(user_api_key_auth)],
include_in_schema=False,
)
async def async_queue_request(
request: Request,
@ -12334,18 +12386,10 @@ async def async_queue_request(
)
@router.get(
"/ollama_logs", dependencies=[Depends(user_api_key_auth)], tags=["experimental"]
)
async def retrieve_server_log(request: Request):
filepath = os.path.expanduser("~/.ollama/logs/server.log")
return FileResponse(filepath)
#### LOGIN ENDPOINTS ####
@app.get("/sso/key/generate", tags=["experimental"])
@app.get("/sso/key/generate", tags=["experimental"], include_in_schema=False)
async def google_login(request: Request):
"""
Create Proxy API Keys using Google Workspace SSO. Requires setting PROXY_BASE_URL in .env
@ -12939,7 +12983,7 @@ def get_image():
return FileResponse(logo_path, media_type="image/jpeg")
@app.get("/sso/callback", tags=["experimental"])
@app.get("/sso/callback", tags=["experimental"], include_in_schema=False)
async def auth_callback(request: Request):
"""Verify login"""
global general_settings, ui_access_mode, premium_user
@ -13244,6 +13288,7 @@ async def auth_callback(request: Request):
tags=["Invite Links"],
dependencies=[Depends(user_api_key_auth)],
response_model=InvitationModel,
include_in_schema=False,
)
async def new_invitation(
data: InvitationNew, user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth)
@ -13308,6 +13353,7 @@ async def new_invitation(
tags=["Invite Links"],
dependencies=[Depends(user_api_key_auth)],
response_model=InvitationModel,
include_in_schema=False,
)
async def invitation_info(
invitation_id: str, user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth)
@ -13359,6 +13405,7 @@ async def invitation_info(
tags=["Invite Links"],
dependencies=[Depends(user_api_key_auth)],
response_model=InvitationModel,
include_in_schema=False,
)
async def invitation_update(
data: InvitationUpdate,
@ -13419,6 +13466,7 @@ async def invitation_update(
tags=["Invite Links"],
dependencies=[Depends(user_api_key_auth)],
response_model=InvitationModel,
include_in_schema=False,
)
async def invitation_delete(
data: InvitationDelete,
@ -13471,6 +13519,7 @@ async def invitation_delete(
"/config/update",
tags=["config.yaml"],
dependencies=[Depends(user_api_key_auth)],
include_in_schema=False,
)
async def update_config(config_info: ConfigYAML):
"""
@ -13628,6 +13677,7 @@ Keep it more precise, to prevent overwrite other values unintentially
"/config/field/update",
tags=["config.yaml"],
dependencies=[Depends(user_api_key_auth)],
include_in_schema=False,
)
async def update_config_general_settings(
data: ConfigFieldUpdate,
@ -13706,6 +13756,7 @@ async def update_config_general_settings(
tags=["config.yaml"],
dependencies=[Depends(user_api_key_auth)],
response_model=ConfigFieldInfo,
include_in_schema=False,
)
async def get_config_general_settings(
field_name: str,
@ -13766,6 +13817,7 @@ async def get_config_general_settings(
"/config/list",
tags=["config.yaml"],
dependencies=[Depends(user_api_key_auth)],
include_in_schema=False,
)
async def get_config_list(
config_type: Literal["general_settings"],
@ -13842,6 +13894,7 @@ async def get_config_list(
"/config/field/delete",
tags=["config.yaml"],
dependencies=[Depends(user_api_key_auth)],
include_in_schema=False,
)
async def delete_config_general_settings(
data: ConfigFieldDelete,
@ -14097,6 +14150,7 @@ async def get_config():
"/config/yaml",
tags=["config.yaml"],
dependencies=[Depends(user_api_key_auth)],
include_in_schema=False,
)
async def config_yaml_endpoint(config_info: ConfigYAML):
"""
@ -14743,6 +14797,22 @@ async def cache_flushall():
)
@router.get(
"/get/litellm_model_cost_map",
include_in_schema=False,
dependencies=[Depends(user_api_key_auth)],
)
async def get_litellm_model_cost_map():
try:
_model_cost_map = litellm.model_cost
return _model_cost_map
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Internal Server Error ({str(e)})",
)
@router.get("/", dependencies=[Depends(user_api_key_auth)])
async def home(request: Request):
return "LiteLLM: RUNNING"