mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 03:04:13 +00:00
Merge branch 'main' into litellm_redis_cache_usage
This commit is contained in:
commit
e61cd2e1e2
131 changed files with 3117 additions and 476 deletions
|
@ -879,6 +879,7 @@ async def user_api_key_auth(
|
|||
|
||||
## check for cache hit (In-Memory Cache)
|
||||
original_api_key = api_key # (Patch: For DynamoDB Backwards Compatibility)
|
||||
_user_role = None
|
||||
if api_key.startswith("sk-"):
|
||||
api_key = hash_token(token=api_key)
|
||||
valid_token: Optional[UserAPIKeyAuth] = user_api_key_cache.get_cache( # type: ignore
|
||||
|
@ -1512,7 +1513,7 @@ async def user_api_key_auth(
|
|||
):
|
||||
return UserAPIKeyAuth(
|
||||
api_key=api_key,
|
||||
user_role="app_owner",
|
||||
user_role=_user_role,
|
||||
parent_otel_span=parent_otel_span,
|
||||
**valid_token_dict,
|
||||
)
|
||||
|
@ -6649,7 +6650,7 @@ async def generate_key_fn(
|
|||
|
||||
# Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
|
||||
if litellm.store_audit_logs is True:
|
||||
_updated_values = json.dumps(response)
|
||||
_updated_values = json.dumps(response, default=str)
|
||||
asyncio.create_task(
|
||||
create_audit_log_for_update(
|
||||
request_data=LiteLLM_AuditLogs(
|
||||
|
@ -6754,10 +6755,10 @@ async def update_key_fn(
|
|||
|
||||
# Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
|
||||
if litellm.store_audit_logs is True:
|
||||
_updated_values = json.dumps(data_json)
|
||||
_updated_values = json.dumps(data_json, default=str)
|
||||
|
||||
_before_value = existing_key_row.json(exclude_none=True)
|
||||
_before_value = json.dumps(_before_value)
|
||||
_before_value = json.dumps(_before_value, default=str)
|
||||
|
||||
asyncio.create_task(
|
||||
create_audit_log_for_update(
|
||||
|
@ -6853,7 +6854,7 @@ async def delete_key_fn(
|
|||
)
|
||||
|
||||
key_row = key_row.json(exclude_none=True)
|
||||
_key_row = json.dumps(key_row)
|
||||
_key_row = json.dumps(key_row, default=str)
|
||||
|
||||
asyncio.create_task(
|
||||
create_audit_log_for_update(
|
||||
|
@ -7057,6 +7058,7 @@ async def info_key_fn(
|
|||
"/spend/keys",
|
||||
tags=["Budget & Spend Tracking"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
include_in_schema=False,
|
||||
)
|
||||
async def spend_key_fn():
|
||||
"""
|
||||
|
@ -7089,6 +7091,7 @@ async def spend_key_fn():
|
|||
"/spend/users",
|
||||
tags=["Budget & Spend Tracking"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
include_in_schema=False,
|
||||
)
|
||||
async def spend_user_fn(
|
||||
user_id: Optional[str] = fastapi.Query(
|
||||
|
@ -7219,6 +7222,7 @@ async def view_spend_tags(
|
|||
responses={
|
||||
200: {"model": List[LiteLLM_SpendLogs]},
|
||||
},
|
||||
include_in_schema=False,
|
||||
)
|
||||
async def get_global_activity(
|
||||
start_date: Optional[str] = fastapi.Query(
|
||||
|
@ -7322,6 +7326,7 @@ async def get_global_activity(
|
|||
responses={
|
||||
200: {"model": List[LiteLLM_SpendLogs]},
|
||||
},
|
||||
include_in_schema=False,
|
||||
)
|
||||
async def get_global_activity_model(
|
||||
start_date: Optional[str] = fastapi.Query(
|
||||
|
@ -7468,6 +7473,7 @@ async def get_global_activity_model(
|
|||
responses={
|
||||
200: {"model": List[LiteLLM_SpendLogs]},
|
||||
},
|
||||
include_in_schema=False,
|
||||
)
|
||||
async def get_global_activity_exceptions_per_deployment(
|
||||
model_group: str = fastapi.Query(
|
||||
|
@ -7620,6 +7626,7 @@ async def get_global_activity_exceptions_per_deployment(
|
|||
responses={
|
||||
200: {"model": List[LiteLLM_SpendLogs]},
|
||||
},
|
||||
include_in_schema=False,
|
||||
)
|
||||
async def get_global_activity_exceptions(
|
||||
model_group: str = fastapi.Query(
|
||||
|
@ -7830,7 +7837,6 @@ async def get_global_spend_provider(
|
|||
"/global/spend/report",
|
||||
tags=["Budget & Spend Tracking"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
include_in_schema=False,
|
||||
responses={
|
||||
200: {"model": List[LiteLLM_SpendLogs]},
|
||||
},
|
||||
|
@ -8530,6 +8536,7 @@ async def global_spend_reset():
|
|||
"/global/spend/logs",
|
||||
tags=["Budget & Spend Tracking"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
include_in_schema=False,
|
||||
)
|
||||
async def global_spend_logs(
|
||||
api_key: str = fastapi.Query(
|
||||
|
@ -8575,6 +8582,7 @@ async def global_spend_logs(
|
|||
"/global/spend",
|
||||
tags=["Budget & Spend Tracking"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
include_in_schema=False,
|
||||
)
|
||||
async def global_spend():
|
||||
"""
|
||||
|
@ -8601,6 +8609,7 @@ async def global_spend():
|
|||
"/global/spend/keys",
|
||||
tags=["Budget & Spend Tracking"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
include_in_schema=False,
|
||||
)
|
||||
async def global_spend_keys(
|
||||
limit: int = fastapi.Query(
|
||||
|
@ -8628,6 +8637,7 @@ async def global_spend_keys(
|
|||
"/global/spend/teams",
|
||||
tags=["Budget & Spend Tracking"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
include_in_schema=False,
|
||||
)
|
||||
async def global_spend_per_team():
|
||||
"""
|
||||
|
@ -8752,6 +8762,7 @@ async def global_view_all_end_users():
|
|||
"/global/spend/end_users",
|
||||
tags=["Budget & Spend Tracking"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
include_in_schema=False,
|
||||
)
|
||||
async def global_spend_end_users(data: Optional[GlobalEndUsersSpend] = None):
|
||||
"""
|
||||
|
@ -8804,6 +8815,7 @@ LIMIT 100
|
|||
"/global/spend/models",
|
||||
tags=["Budget & Spend Tracking"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
include_in_schema=False,
|
||||
)
|
||||
async def global_spend_models(
|
||||
limit: int = fastapi.Query(
|
||||
|
@ -8832,6 +8844,7 @@ async def global_spend_models(
|
|||
"/global/predict/spend/logs",
|
||||
tags=["Budget & Spend Tracking"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
include_in_schema=False,
|
||||
)
|
||||
async def global_predict_spend_logs(request: Request):
|
||||
from enterprise.utils import _forecast_daily_cost
|
||||
|
@ -8863,7 +8876,7 @@ async def new_user(data: NewUserRequest):
|
|||
- organization_id: Optional[str] - specify the org a user belongs to.
|
||||
- user_email: Optional[str] - Specify a user email.
|
||||
- send_invite_email: Optional[bool] - Specify if an invite email should be sent.
|
||||
- user_role: Optional[str] - Specify a user role - "admin", "app_owner", "app_user"
|
||||
- user_role: Optional[str] - Specify a user role - "proxy_admin", "proxy_admin_viewer", "internal_user", "internal_user_viewer", "team", "customer". Info about each role here: `https://github.com/BerriAI/litellm/litellm/proxy/_types.py#L20`
|
||||
- max_budget: Optional[float] - Specify max budget for a given user.
|
||||
- models: Optional[list] - Model_name's a user is allowed to call. (if empty, key is allowed to call all models)
|
||||
- tpm_limit: Optional[int] - Specify tpm limit for a given user (Tokens per minute)
|
||||
|
@ -9947,16 +9960,18 @@ async def new_team(
|
|||
"""
|
||||
Allow users to create a new team. Apply user permissions to their team.
|
||||
|
||||
[ASK FOR HELP](https://github.com/BerriAI/litellm/issues)
|
||||
👉 [Detailed Doc on setting team budgets](https://docs.litellm.ai/docs/proxy/team_budgets)
|
||||
|
||||
|
||||
Parameters:
|
||||
- team_alias: Optional[str] - User defined team alias
|
||||
- team_id: Optional[str] - The team id of the user. If none passed, we'll generate it.
|
||||
- members_with_roles: List[{"role": "admin" or "user", "user_id": "<user-id>"}] - A list of users and their roles in the team. Get user_id when making a new user via `/user/new`.
|
||||
- metadata: Optional[dict] - Metadata for team, store information for team. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" }
|
||||
- metadata: Optional[dict] - Metadata for team, store information for team. Example metadata = {"extra_info": "some info"}
|
||||
- tpm_limit: Optional[int] - The TPM (Tokens Per Minute) limit for this team - all keys with this team_id will have at max this TPM limit
|
||||
- rpm_limit: Optional[int] - The RPM (Requests Per Minute) limit for this team - all keys associated with this team_id will have at max this RPM limit
|
||||
- max_budget: Optional[float] - The maximum budget allocated to the team - all keys for this team_id will have at max this max_budget
|
||||
- budget_duration: Optional[str] - The duration of the budget for the team. Doc [here](https://docs.litellm.ai/docs/proxy/team_budgets)
|
||||
- models: Optional[list] - A list of models associated with the team - all keys for this team_id will have at most, these models. If empty, assumes all models are allowed.
|
||||
- blocked: bool - Flag indicating if the team is blocked or not - will stop all calls from keys with this team_id.
|
||||
|
||||
|
@ -9981,6 +9996,21 @@ async def new_team(
|
|||
{"role": "user", "user_id": "user-2434"}]
|
||||
}'
|
||||
|
||||
```
|
||||
|
||||
```
|
||||
curl --location 'http://0.0.0.0:4000/team/new' \
|
||||
|
||||
--header 'Authorization: Bearer sk-1234' \
|
||||
|
||||
--header 'Content-Type: application/json' \
|
||||
|
||||
--data '{
|
||||
"team_alias": "QA Prod Bot",
|
||||
"max_budget": 0.000000001,
|
||||
"budget_duration": "1d"
|
||||
}'
|
||||
|
||||
```
|
||||
"""
|
||||
global prisma_client
|
||||
|
@ -10110,7 +10140,8 @@ async def new_team(
|
|||
# Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
|
||||
if litellm.store_audit_logs is True:
|
||||
_updated_values = complete_team_data.json(exclude_none=True)
|
||||
_updated_values = json.dumps(_updated_values)
|
||||
|
||||
_updated_values = json.dumps(_updated_values, default=str)
|
||||
|
||||
asyncio.create_task(
|
||||
create_audit_log_for_update(
|
||||
|
@ -10174,6 +10205,7 @@ async def create_audit_log_for_update(request_data: LiteLLM_AuditLogs):
|
|||
@management_endpoint_wrapper
|
||||
async def update_team(
|
||||
data: UpdateTeamRequest,
|
||||
http_request: Request,
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
litellm_changed_by: Optional[str] = Header(
|
||||
None,
|
||||
|
@ -10192,6 +10224,7 @@ async def update_team(
|
|||
- tpm_limit: Optional[int] - The TPM (Tokens Per Minute) limit for this team - all keys with this team_id will have at max this TPM limit
|
||||
- rpm_limit: Optional[int] - The RPM (Requests Per Minute) limit for this team - all keys associated with this team_id will have at max this RPM limit
|
||||
- max_budget: Optional[float] - The maximum budget allocated to the team - all keys for this team_id will have at max this max_budget
|
||||
- budget_duration: Optional[str] - The duration of the budget for the team. Doc [here](https://docs.litellm.ai/docs/proxy/team_budgets)
|
||||
- models: Optional[list] - A list of models associated with the team - all keys for this team_id will have at most, these models. If empty, assumes all models are allowed.
|
||||
- blocked: bool - Flag indicating if the team is blocked or not - will stop all calls from keys with this team_id.
|
||||
|
||||
|
@ -10209,6 +10242,20 @@ async def update_team(
|
|||
"tpm_limit": 100
|
||||
}'
|
||||
```
|
||||
|
||||
Example - Update Team `max_budget` budget
|
||||
```
|
||||
curl --location 'http://0.0.0.0:8000/team/update' \
|
||||
|
||||
--header 'Authorization: Bearer sk-1234' \
|
||||
|
||||
--header 'Content-Type: application/json' \
|
||||
|
||||
--data-raw '{
|
||||
"team_id": "litellm-test-client-id-new",
|
||||
"max_budget": 10
|
||||
}'
|
||||
```
|
||||
"""
|
||||
global prisma_client
|
||||
|
||||
|
@ -10248,8 +10295,8 @@ async def update_team(
|
|||
# Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
|
||||
if litellm.store_audit_logs is True:
|
||||
_before_value = existing_team_row.json(exclude_none=True)
|
||||
_before_value = json.dumps(_before_value)
|
||||
_after_value: str = json.dumps(updated_kv)
|
||||
_before_value = json.dumps(_before_value, default=str)
|
||||
_after_value: str = json.dumps(updated_kv, default=str)
|
||||
|
||||
asyncio.create_task(
|
||||
create_audit_log_for_update(
|
||||
|
@ -11408,7 +11455,7 @@ async def model_info_v2(
|
|||
for _model in all_models:
|
||||
# provided model_info in config.yaml
|
||||
model_info = _model.get("model_info", {})
|
||||
if debug == True:
|
||||
if debug is True:
|
||||
_openai_client = "None"
|
||||
if llm_router is not None:
|
||||
_openai_client = (
|
||||
|
@ -11433,7 +11480,7 @@ async def model_info_v2(
|
|||
litellm_model = litellm_params.get("model", None)
|
||||
try:
|
||||
litellm_model_info = litellm.get_model_info(model=litellm_model)
|
||||
except:
|
||||
except Exception:
|
||||
litellm_model_info = {}
|
||||
# 3rd pass on the model, try seeing if we can find model but without the "/" in model cost map
|
||||
if litellm_model_info == {}:
|
||||
|
@ -11444,8 +11491,10 @@ async def model_info_v2(
|
|||
if len(split_model) > 0:
|
||||
litellm_model = split_model[-1]
|
||||
try:
|
||||
litellm_model_info = litellm.get_model_info(model=litellm_model)
|
||||
except:
|
||||
litellm_model_info = litellm.get_model_info(
|
||||
model=litellm_model, custom_llm_provider=split_model[0]
|
||||
)
|
||||
except Exception:
|
||||
litellm_model_info = {}
|
||||
for k, v in litellm_model_info.items():
|
||||
if k not in model_info:
|
||||
|
@ -11956,7 +12005,9 @@ async def model_info_v1(
|
|||
if len(split_model) > 0:
|
||||
litellm_model = split_model[-1]
|
||||
try:
|
||||
litellm_model_info = litellm.get_model_info(model=litellm_model)
|
||||
litellm_model_info = litellm.get_model_info(
|
||||
model=litellm_model, custom_llm_provider=split_model[0]
|
||||
)
|
||||
except:
|
||||
litellm_model_info = {}
|
||||
for k, v in litellm_model_info.items():
|
||||
|
@ -12223,6 +12274,7 @@ async def alerting_settings(
|
|||
"/queue/chat/completions",
|
||||
tags=["experimental"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
include_in_schema=False,
|
||||
)
|
||||
async def async_queue_request(
|
||||
request: Request,
|
||||
|
@ -12334,18 +12386,10 @@ async def async_queue_request(
|
|||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/ollama_logs", dependencies=[Depends(user_api_key_auth)], tags=["experimental"]
|
||||
)
|
||||
async def retrieve_server_log(request: Request):
|
||||
filepath = os.path.expanduser("~/.ollama/logs/server.log")
|
||||
return FileResponse(filepath)
|
||||
|
||||
|
||||
#### LOGIN ENDPOINTS ####
|
||||
|
||||
|
||||
@app.get("/sso/key/generate", tags=["experimental"])
|
||||
@app.get("/sso/key/generate", tags=["experimental"], include_in_schema=False)
|
||||
async def google_login(request: Request):
|
||||
"""
|
||||
Create Proxy API Keys using Google Workspace SSO. Requires setting PROXY_BASE_URL in .env
|
||||
|
@ -12939,7 +12983,7 @@ def get_image():
|
|||
return FileResponse(logo_path, media_type="image/jpeg")
|
||||
|
||||
|
||||
@app.get("/sso/callback", tags=["experimental"])
|
||||
@app.get("/sso/callback", tags=["experimental"], include_in_schema=False)
|
||||
async def auth_callback(request: Request):
|
||||
"""Verify login"""
|
||||
global general_settings, ui_access_mode, premium_user
|
||||
|
@ -13244,6 +13288,7 @@ async def auth_callback(request: Request):
|
|||
tags=["Invite Links"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
response_model=InvitationModel,
|
||||
include_in_schema=False,
|
||||
)
|
||||
async def new_invitation(
|
||||
data: InvitationNew, user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth)
|
||||
|
@ -13308,6 +13353,7 @@ async def new_invitation(
|
|||
tags=["Invite Links"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
response_model=InvitationModel,
|
||||
include_in_schema=False,
|
||||
)
|
||||
async def invitation_info(
|
||||
invitation_id: str, user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth)
|
||||
|
@ -13359,6 +13405,7 @@ async def invitation_info(
|
|||
tags=["Invite Links"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
response_model=InvitationModel,
|
||||
include_in_schema=False,
|
||||
)
|
||||
async def invitation_update(
|
||||
data: InvitationUpdate,
|
||||
|
@ -13419,6 +13466,7 @@ async def invitation_update(
|
|||
tags=["Invite Links"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
response_model=InvitationModel,
|
||||
include_in_schema=False,
|
||||
)
|
||||
async def invitation_delete(
|
||||
data: InvitationDelete,
|
||||
|
@ -13471,6 +13519,7 @@ async def invitation_delete(
|
|||
"/config/update",
|
||||
tags=["config.yaml"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
include_in_schema=False,
|
||||
)
|
||||
async def update_config(config_info: ConfigYAML):
|
||||
"""
|
||||
|
@ -13628,6 +13677,7 @@ Keep it more precise, to prevent overwrite other values unintentially
|
|||
"/config/field/update",
|
||||
tags=["config.yaml"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
include_in_schema=False,
|
||||
)
|
||||
async def update_config_general_settings(
|
||||
data: ConfigFieldUpdate,
|
||||
|
@ -13706,6 +13756,7 @@ async def update_config_general_settings(
|
|||
tags=["config.yaml"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
response_model=ConfigFieldInfo,
|
||||
include_in_schema=False,
|
||||
)
|
||||
async def get_config_general_settings(
|
||||
field_name: str,
|
||||
|
@ -13766,6 +13817,7 @@ async def get_config_general_settings(
|
|||
"/config/list",
|
||||
tags=["config.yaml"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
include_in_schema=False,
|
||||
)
|
||||
async def get_config_list(
|
||||
config_type: Literal["general_settings"],
|
||||
|
@ -13842,6 +13894,7 @@ async def get_config_list(
|
|||
"/config/field/delete",
|
||||
tags=["config.yaml"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
include_in_schema=False,
|
||||
)
|
||||
async def delete_config_general_settings(
|
||||
data: ConfigFieldDelete,
|
||||
|
@ -14097,6 +14150,7 @@ async def get_config():
|
|||
"/config/yaml",
|
||||
tags=["config.yaml"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
include_in_schema=False,
|
||||
)
|
||||
async def config_yaml_endpoint(config_info: ConfigYAML):
|
||||
"""
|
||||
|
@ -14743,6 +14797,22 @@ async def cache_flushall():
|
|||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/get/litellm_model_cost_map",
|
||||
include_in_schema=False,
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
)
|
||||
async def get_litellm_model_cost_map():
|
||||
try:
|
||||
_model_cost_map = litellm.model_cost
|
||||
return _model_cost_map
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Internal Server Error ({str(e)})",
|
||||
)
|
||||
|
||||
|
||||
@router.get("/", dependencies=[Depends(user_api_key_auth)])
|
||||
async def home(request: Request):
|
||||
return "LiteLLM: RUNNING"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue