mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 11:43:54 +00:00
Merge pull request #3575 from BerriAI/litellm_end_user_obj
fix(proxy_server.py): check + get end-user obj even for master key calls
This commit is contained in:
commit
566a574af3
2 changed files with 52 additions and 58 deletions
|
@ -1,47 +1,18 @@
|
|||
model_list:
|
||||
- litellm_params:
|
||||
api_base: os.environ/AZURE_API_BASE
|
||||
api_key: os.environ/AZURE_API_KEY
|
||||
api_version: 2023-07-01-preview
|
||||
model: azure/azure-embedding-model
|
||||
model_info:
|
||||
base_model: text-embedding-ada-002
|
||||
mode: embedding
|
||||
model_name: text-embedding-ada-002
|
||||
- model_name: gpt-3.5-turbo-012
|
||||
litellm_params:
|
||||
model: gpt-3.5-turbo
|
||||
api_base: http://0.0.0.0:8080
|
||||
api_key: ""
|
||||
- model_name: gpt-3.5-turbo-0125-preview
|
||||
litellm_params:
|
||||
model: azure/chatgpt-v-2
|
||||
api_key: os.environ/AZURE_API_KEY
|
||||
api_base: os.environ/AZURE_API_BASE
|
||||
- model_name: bert-classifier
|
||||
litellm_params:
|
||||
model: huggingface/text-classification/shahrukhx01/question-vs-statement-classifier
|
||||
api_key: os.environ/HUGGINGFACE_API_KEY
|
||||
- model_name: gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: azure/gpt-35-turbo
|
||||
api_base: https://my-endpoint-europe-berri-992.openai.azure.com/
|
||||
api_key: os.environ/AZURE_EUROPE_API_KEY
|
||||
- model_name: gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: azure/chatgpt-v-2
|
||||
api_base: https://openai-gpt-4-test-v-1.openai.azure.com/
|
||||
api_version: "2023-05-15"
|
||||
api_key: os.environ/AZURE_API_KEY # The `os.environ/` prefix tells litellm to read this from the env. See https://docs.litellm.ai/docs/simple_proxy#load-api-keys-from-vault
|
||||
|
||||
router_settings:
|
||||
redis_host: redis
|
||||
# redis_password: <your redis password>
|
||||
redis_port: 6379
|
||||
enable_pre_call_checks: true
|
||||
|
||||
litellm_settings:
|
||||
fallbacks: [{"gpt-3.5-turbo-012": ["azure-gpt-3.5-turbo"]}]
|
||||
callbacks: ["lago"]
|
||||
# service_callback: ["prometheus_system"]
|
||||
# success_callback: ["prometheus"]
|
||||
# failure_callback: ["prometheus"]
|
||||
|
||||
general_settings:
|
||||
enable_jwt_auth: True
|
||||
litellm_jwtauth:
|
||||
team_id_default: "1234"
|
||||
user_id_jwt_field:
|
||||
user_id_upsert: True
|
||||
disable_reset_budget: True
|
||||
proxy_batch_write_at: 10 # 👈 Frequency of batch writing logs to server (in seconds)
|
||||
routing_strategy: simple-shuffle # Literal["simple-shuffle", "least-busy", "usage-based-routing","latency-based-routing"], default="simple-shuffle"
|
||||
alerting: ["slack"]
|
||||
general_settings:
|
||||
master_key: sk-1234 # [OPTIONAL] Use to enforce auth on proxy. See - https://docs.litellm.ai/docs/proxy/virtual_keys
|
||||
|
|
|
@ -601,16 +601,45 @@ async def user_api_key_auth(
|
|||
):
|
||||
return valid_token
|
||||
|
||||
## Check END-USER OBJECT
|
||||
request_data = await _read_request_body(request=request)
|
||||
_end_user_object = None
|
||||
end_user_params = {}
|
||||
if "user" in request_data:
|
||||
_end_user_object = await get_end_user_object(
|
||||
end_user_id=request_data["user"],
|
||||
prisma_client=prisma_client,
|
||||
user_api_key_cache=user_api_key_cache,
|
||||
)
|
||||
if _end_user_object is not None:
|
||||
end_user_params["allowed_model_region"] = (
|
||||
_end_user_object.allowed_model_region
|
||||
)
|
||||
|
||||
try:
|
||||
is_master_key_valid = secrets.compare_digest(api_key, master_key)
|
||||
is_master_key_valid = secrets.compare_digest(api_key, master_key) # type: ignore
|
||||
except Exception as e:
|
||||
is_master_key_valid = False
|
||||
|
||||
## VALIDATE MASTER KEY ##
|
||||
try:
|
||||
assert isinstance(master_key, str)
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail={
|
||||
"Master key must be a valid string. Current type={}".format(
|
||||
type(master_key)
|
||||
)
|
||||
},
|
||||
)
|
||||
|
||||
if is_master_key_valid:
|
||||
_user_api_key_obj = UserAPIKeyAuth(
|
||||
api_key=master_key,
|
||||
user_role="proxy_admin",
|
||||
user_id=litellm_proxy_admin_name,
|
||||
**end_user_params,
|
||||
)
|
||||
await user_api_key_cache.async_set_cache(
|
||||
key=hash_token(master_key), value=_user_api_key_obj
|
||||
|
@ -675,10 +704,6 @@ async def user_api_key_auth(
|
|||
# 7. If token spend is under team budget
|
||||
# 8. If team spend is under team budget
|
||||
|
||||
request_data = await _read_request_body(
|
||||
request=request
|
||||
) # request data, used across all checks. Making this easily available
|
||||
|
||||
# Check 1. If token can call model
|
||||
_model_alias_map = {}
|
||||
if (
|
||||
|
@ -917,7 +942,7 @@ async def user_api_key_auth(
|
|||
{"startTime": {"gt": twenty_eight_days_ago}},
|
||||
{"model": current_model},
|
||||
]
|
||||
},
|
||||
}, # type: ignore
|
||||
)
|
||||
if (
|
||||
len(model_spend) > 0
|
||||
|
@ -989,14 +1014,6 @@ async def user_api_key_auth(
|
|||
key=valid_token.team_id, value=_team_obj
|
||||
) # save team table in cache - used for tpm/rpm limiting - tpm_rpm_limiter.py
|
||||
|
||||
_end_user_object = None
|
||||
if "user" in request_data:
|
||||
_end_user_object = await get_end_user_object(
|
||||
end_user_id=request_data["user"],
|
||||
prisma_client=prisma_client,
|
||||
user_api_key_cache=user_api_key_cache,
|
||||
)
|
||||
|
||||
global_proxy_spend = None
|
||||
if (
|
||||
litellm.max_budget > 0 and prisma_client is not None
|
||||
|
@ -2387,6 +2404,12 @@ class ProxyConfig:
|
|||
)
|
||||
if master_key and master_key.startswith("os.environ/"):
|
||||
master_key = litellm.get_secret(master_key)
|
||||
if not isinstance(master_key, str):
|
||||
raise Exception(
|
||||
"Master key must be a string. Current type - {}".format(
|
||||
type(master_key)
|
||||
)
|
||||
)
|
||||
|
||||
if master_key is not None and isinstance(master_key, str):
|
||||
litellm_master_key_hash = hash_token(master_key)
|
||||
|
@ -3426,7 +3449,7 @@ async def startup_event():
|
|||
store_model_in_db = (
|
||||
litellm.get_secret("STORE_MODEL_IN_DB", store_model_in_db)
|
||||
or store_model_in_db
|
||||
)
|
||||
) # type: ignore
|
||||
if store_model_in_db == True:
|
||||
scheduler.add_job(
|
||||
proxy_config.add_deployment,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue