forked from phoenix/litellm-mirror
Merge branch 'main' into litellm_organization_table
This commit is contained in:
commit
eaccbf26b7
10 changed files with 146 additions and 75 deletions
|
@ -79,6 +79,7 @@ curl 'http://0.0.0.0:8000/key/generate' \
|
||||||
"metadata": {"user": "ishaan@berri.ai"},
|
"metadata": {"user": "ishaan@berri.ai"},
|
||||||
"team_id": "core-infra",
|
"team_id": "core-infra",
|
||||||
"max_budget": 10,
|
"max_budget": 10,
|
||||||
|
"soft_budget": 5,
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -93,6 +94,7 @@ Request Params:
|
||||||
- `config`: *Optional[dict]* - any key-specific configs, overrides config in config.yaml
|
- `config`: *Optional[dict]* - any key-specific configs, overrides config in config.yaml
|
||||||
- `spend`: *Optional[int]* - Amount spent by key. Default is 0. Will be updated by proxy whenever key is used. https://docs.litellm.ai/docs/proxy/virtual_keys#managing-auth---tracking-spend
|
- `spend`: *Optional[int]* - Amount spent by key. Default is 0. Will be updated by proxy whenever key is used. https://docs.litellm.ai/docs/proxy/virtual_keys#managing-auth---tracking-spend
|
||||||
- `max_budget`: *Optional[float]* - Specify max budget for a given key.
|
- `max_budget`: *Optional[float]* - Specify max budget for a given key.
|
||||||
|
- `soft_budget`: *Optional[float]* - Specify soft limit budget for a given key. Get Alerts when key hits its soft budget
|
||||||
- `model_max_budget`: *Optional[dict[str, float]]* - Specify max budget for each model, `model_max_budget={"gpt4": 0.5, "gpt-5": 0.01}`
|
- `model_max_budget`: *Optional[dict[str, float]]* - Specify max budget for each model, `model_max_budget={"gpt4": 0.5, "gpt-5": 0.01}`
|
||||||
- `max_parallel_requests`: *Optional[int]* - Rate limit a user based on the number of parallel requests. Raises 429 error, if user's parallel requests > x.
|
- `max_parallel_requests`: *Optional[int]* - Rate limit a user based on the number of parallel requests. Raises 429 error, if user's parallel requests > x.
|
||||||
- `metadata`: *Optional[dict]* - Metadata for key, store information for key. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" }
|
- `metadata`: *Optional[dict]* - Metadata for key, store information for key. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" }
|
||||||
|
|
|
@ -79,6 +79,9 @@ max_budget: float = 0.0 # set the max budget across all providers
|
||||||
budget_duration: Optional[str] = (
|
budget_duration: Optional[str] = (
|
||||||
None # proxy only - resets budget after fixed duration. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d").
|
None # proxy only - resets budget after fixed duration. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d").
|
||||||
)
|
)
|
||||||
|
default_soft_budget: float = (
|
||||||
|
50.0 # by default all litellm proxy keys have a soft budget of 50.0
|
||||||
|
)
|
||||||
_openai_finish_reasons = ["stop", "length", "function_call", "content_filter", "null"]
|
_openai_finish_reasons = ["stop", "length", "function_call", "content_filter", "null"]
|
||||||
_openai_completion_params = [
|
_openai_completion_params = [
|
||||||
"functions",
|
"functions",
|
||||||
|
|
|
@ -151,6 +151,7 @@ class GenerateRequestBase(LiteLLMBase):
|
||||||
rpm_limit: Optional[int] = None
|
rpm_limit: Optional[int] = None
|
||||||
budget_duration: Optional[str] = None
|
budget_duration: Optional[str] = None
|
||||||
allowed_cache_controls: Optional[list] = []
|
allowed_cache_controls: Optional[list] = []
|
||||||
|
soft_budget: Optional[float] = None
|
||||||
|
|
||||||
|
|
||||||
class GenerateKeyRequest(GenerateRequestBase):
|
class GenerateKeyRequest(GenerateRequestBase):
|
||||||
|
@ -327,7 +328,7 @@ class TeamRequest(LiteLLMBase):
|
||||||
|
|
||||||
class LiteLLM_BudgetTable(LiteLLMBase):
|
class LiteLLM_BudgetTable(LiteLLMBase):
|
||||||
"""Represents user-controllable params for a LiteLLM_BudgetTable record"""
|
"""Represents user-controllable params for a LiteLLM_BudgetTable record"""
|
||||||
|
soft_budget: Optional[float] = None
|
||||||
max_budget: Optional[float] = None
|
max_budget: Optional[float] = None
|
||||||
max_parallel_requests: Optional[int] = None
|
max_parallel_requests: Optional[int] = None
|
||||||
tpm_limit: Optional[int] = None
|
tpm_limit: Optional[int] = None
|
||||||
|
@ -366,7 +367,7 @@ class OrganizationRequest(LiteLLMBase):
|
||||||
class BudgetRequest(LiteLLMBase):
|
class BudgetRequest(LiteLLMBase):
|
||||||
budgets: List[str]
|
budgets: List[str]
|
||||||
|
|
||||||
|
|
||||||
class KeyManagementSystem(enum.Enum):
|
class KeyManagementSystem(enum.Enum):
|
||||||
GOOGLE_KMS = "google_kms"
|
GOOGLE_KMS = "google_kms"
|
||||||
AZURE_KEY_VAULT = "azure_key_vault"
|
AZURE_KEY_VAULT = "azure_key_vault"
|
||||||
|
@ -585,6 +586,7 @@ class LiteLLM_SpendLogs(LiteLLMBase):
|
||||||
request_id: str
|
request_id: str
|
||||||
api_key: str
|
api_key: str
|
||||||
model: Optional[str] = ""
|
model: Optional[str] = ""
|
||||||
|
api_base: Optional[str] = ""
|
||||||
call_type: str
|
call_type: str
|
||||||
spend: Optional[float] = 0.0
|
spend: Optional[float] = 0.0
|
||||||
total_tokens: Optional[int] = 0
|
total_tokens: Optional[int] = 0
|
||||||
|
|
|
@ -791,6 +791,7 @@ async def user_api_key_auth(
|
||||||
"/global/spend/keys",
|
"/global/spend/keys",
|
||||||
"/global/spend/models",
|
"/global/spend/models",
|
||||||
"/global/predict/spend/logs",
|
"/global/predict/spend/logs",
|
||||||
|
"/health/services",
|
||||||
]
|
]
|
||||||
# check if the current route startswith any of the allowed routes
|
# check if the current route startswith any of the allowed routes
|
||||||
if (
|
if (
|
||||||
|
@ -1814,6 +1815,9 @@ async def generate_key_helper_fn(
|
||||||
spend: float,
|
spend: float,
|
||||||
key_max_budget: Optional[float] = None, # key_max_budget is used to Budget Per key
|
key_max_budget: Optional[float] = None, # key_max_budget is used to Budget Per key
|
||||||
key_budget_duration: Optional[str] = None,
|
key_budget_duration: Optional[str] = None,
|
||||||
|
key_soft_budget: Optional[
|
||||||
|
float
|
||||||
|
] = None, # key_soft_budget is used to Budget Per key
|
||||||
max_budget: Optional[float] = None, # max_budget is used to Budget Per user
|
max_budget: Optional[float] = None, # max_budget is used to Budget Per user
|
||||||
budget_duration: Optional[str] = None, # max_budget is used to Budget Per user
|
budget_duration: Optional[str] = None, # max_budget is used to Budget Per user
|
||||||
token: Optional[str] = None,
|
token: Optional[str] = None,
|
||||||
|
@ -1873,6 +1877,19 @@ async def generate_key_helper_fn(
|
||||||
rpm_limit = rpm_limit
|
rpm_limit = rpm_limit
|
||||||
allowed_cache_controls = allowed_cache_controls
|
allowed_cache_controls = allowed_cache_controls
|
||||||
|
|
||||||
|
# TODO: @ishaan-jaff: Migrate all budget tracking to use LiteLLM_BudgetTable
|
||||||
|
if prisma_client is not None:
|
||||||
|
# create the Budget Row for the LiteLLM Verification Token
|
||||||
|
budget_row = LiteLLM_BudgetTable(
|
||||||
|
soft_budget=key_soft_budget or litellm.default_soft_budget,
|
||||||
|
model_max_budget=model_max_budget or {},
|
||||||
|
created_by=user_id,
|
||||||
|
updated_by=user_id,
|
||||||
|
)
|
||||||
|
new_budget = prisma_client.jsonify_object(budget_row.json(exclude_none=True))
|
||||||
|
_budget = await prisma_client.db.litellm_budgettable.create(data={**new_budget}) # type: ignore
|
||||||
|
_budget_id = getattr(_budget, "id", None)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Create a new verification token (you may want to enhance this logic based on your needs)
|
# Create a new verification token (you may want to enhance this logic based on your needs)
|
||||||
user_data = {
|
user_data = {
|
||||||
|
@ -1910,6 +1927,7 @@ async def generate_key_helper_fn(
|
||||||
"allowed_cache_controls": allowed_cache_controls,
|
"allowed_cache_controls": allowed_cache_controls,
|
||||||
"permissions": permissions_json,
|
"permissions": permissions_json,
|
||||||
"model_max_budget": model_max_budget_json,
|
"model_max_budget": model_max_budget_json,
|
||||||
|
"budget_id": _budget_id,
|
||||||
}
|
}
|
||||||
if (
|
if (
|
||||||
general_settings.get("allow_user_auth", False) == True
|
general_settings.get("allow_user_auth", False) == True
|
||||||
|
@ -1982,6 +2000,9 @@ async def generate_key_helper_fn(
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR)
|
raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR)
|
||||||
|
|
||||||
|
# Add budget related info in key_data - this ensures it's returned
|
||||||
|
key_data["soft_budget"] = key_soft_budget
|
||||||
return key_data
|
return key_data
|
||||||
|
|
||||||
|
|
||||||
|
@ -2142,14 +2163,6 @@ async def async_data_generator(response, user_api_key_dict):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
yield f"data: {str(e)}\n\n"
|
yield f"data: {str(e)}\n\n"
|
||||||
|
|
||||||
### ALERTING ###
|
|
||||||
end_time = time.time()
|
|
||||||
asyncio.create_task(
|
|
||||||
proxy_logging_obj.response_taking_too_long(
|
|
||||||
start_time=start_time, end_time=end_time, type="slow_response"
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
# Streaming is done, yield the [DONE] chunk
|
# Streaming is done, yield the [DONE] chunk
|
||||||
done_message = "[DONE]"
|
done_message = "[DONE]"
|
||||||
yield f"data: {done_message}\n\n"
|
yield f"data: {done_message}\n\n"
|
||||||
|
@ -2497,14 +2510,6 @@ async def completion(
|
||||||
headers=custom_headers,
|
headers=custom_headers,
|
||||||
)
|
)
|
||||||
|
|
||||||
### ALERTING ###
|
|
||||||
end_time = time.time()
|
|
||||||
asyncio.create_task(
|
|
||||||
proxy_logging_obj.response_taking_too_long(
|
|
||||||
start_time=start_time, end_time=end_time, type="slow_response"
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
fastapi_response.headers["x-litellm-model-id"] = model_id
|
fastapi_response.headers["x-litellm-model-id"] = model_id
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -2703,14 +2708,6 @@ async def chat_completion(
|
||||||
headers=custom_headers,
|
headers=custom_headers,
|
||||||
)
|
)
|
||||||
|
|
||||||
### ALERTING ###
|
|
||||||
end_time = time.time()
|
|
||||||
asyncio.create_task(
|
|
||||||
proxy_logging_obj.response_taking_too_long(
|
|
||||||
start_time=start_time, end_time=end_time, type="slow_response"
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
fastapi_response.headers["x-litellm-model-id"] = model_id
|
fastapi_response.headers["x-litellm-model-id"] = model_id
|
||||||
|
|
||||||
### CALL HOOKS ### - modify outgoing data
|
### CALL HOOKS ### - modify outgoing data
|
||||||
|
@ -2918,12 +2915,6 @@ async def embeddings(
|
||||||
|
|
||||||
### ALERTING ###
|
### ALERTING ###
|
||||||
data["litellm_status"] = "success" # used for alerting
|
data["litellm_status"] = "success" # used for alerting
|
||||||
end_time = time.time()
|
|
||||||
asyncio.create_task(
|
|
||||||
proxy_logging_obj.response_taking_too_long(
|
|
||||||
start_time=start_time, end_time=end_time, type="slow_response"
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -3069,12 +3060,6 @@ async def image_generation(
|
||||||
|
|
||||||
### ALERTING ###
|
### ALERTING ###
|
||||||
data["litellm_status"] = "success" # used for alerting
|
data["litellm_status"] = "success" # used for alerting
|
||||||
end_time = time.time()
|
|
||||||
asyncio.create_task(
|
|
||||||
proxy_logging_obj.response_taking_too_long(
|
|
||||||
start_time=start_time, end_time=end_time, type="slow_response"
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -3228,12 +3213,6 @@ async def moderations(
|
||||||
|
|
||||||
### ALERTING ###
|
### ALERTING ###
|
||||||
data["litellm_status"] = "success" # used for alerting
|
data["litellm_status"] = "success" # used for alerting
|
||||||
end_time = time.time()
|
|
||||||
asyncio.create_task(
|
|
||||||
proxy_logging_obj.response_taking_too_long(
|
|
||||||
start_time=start_time, end_time=end_time, type="slow_response"
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -3378,6 +3357,8 @@ async def generate_key_fn(
|
||||||
# if we get max_budget passed to /key/generate, then use it as key_max_budget. Since generate_key_helper_fn is used to make new users
|
# if we get max_budget passed to /key/generate, then use it as key_max_budget. Since generate_key_helper_fn is used to make new users
|
||||||
if "max_budget" in data_json:
|
if "max_budget" in data_json:
|
||||||
data_json["key_max_budget"] = data_json.pop("max_budget", None)
|
data_json["key_max_budget"] = data_json.pop("max_budget", None)
|
||||||
|
if "soft_budget" in data_json:
|
||||||
|
data_json["key_soft_budget"] = data_json.pop("soft_budget", None)
|
||||||
|
|
||||||
if "budget_duration" in data_json:
|
if "budget_duration" in data_json:
|
||||||
data_json["key_budget_duration"] = data_json.pop("budget_duration", None)
|
data_json["key_budget_duration"] = data_json.pop("budget_duration", None)
|
||||||
|
@ -6722,6 +6703,50 @@ async def test_endpoint(request: Request):
|
||||||
return {"route": request.url.path}
|
return {"route": request.url.path}
|
||||||
|
|
||||||
|
|
||||||
|
@router.get(
|
||||||
|
"/health/services",
|
||||||
|
tags=["health"],
|
||||||
|
dependencies=[Depends(user_api_key_auth)],
|
||||||
|
include_in_schema=False,
|
||||||
|
)
|
||||||
|
async def health_services_endpoint(
|
||||||
|
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||||
|
service: Literal["slack_budget_alerts"] = fastapi.Query(
|
||||||
|
description="Specify the service being hit."
|
||||||
|
),
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Hidden endpoint.
|
||||||
|
|
||||||
|
Used by the UI to let user check if slack alerting is working as expected.
|
||||||
|
"""
|
||||||
|
global general_settings, proxy_logging_obj
|
||||||
|
|
||||||
|
if service is None:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400, detail={"error": "Service must be specified."}
|
||||||
|
)
|
||||||
|
|
||||||
|
if service not in ["slack_budget_alerts"]:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail={
|
||||||
|
"error": f"Service must be in list. Service={service}. List={['slack_budget_alerts']}"
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
test_message = f"""\n🚨 `ProjectedLimitExceededError` 💸\n\n`Key Alias:` my-secret-project \n`Expected Day of Error`: 28th March \n`Current Spend`: 100 \n`Projected Spend at end of month`: 1000 \n
|
||||||
|
"""
|
||||||
|
|
||||||
|
if "slack" in general_settings.get("alerting", []):
|
||||||
|
await proxy_logging_obj.alerting_handler(message=test_message, level="Low")
|
||||||
|
else:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=422,
|
||||||
|
detail={"error": "No slack connection setup. Unable to test this."},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@router.get("/health", tags=["health"], dependencies=[Depends(user_api_key_auth)])
|
@router.get("/health", tags=["health"], dependencies=[Depends(user_api_key_auth)])
|
||||||
async def health_endpoint(
|
async def health_endpoint(
|
||||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||||
|
|
|
@ -11,10 +11,11 @@ generator client {
|
||||||
model LiteLLM_BudgetTable {
|
model LiteLLM_BudgetTable {
|
||||||
budget_id String @id @default(uuid())
|
budget_id String @id @default(uuid())
|
||||||
max_budget Float?
|
max_budget Float?
|
||||||
|
soft_budget Float?
|
||||||
max_parallel_requests Int?
|
max_parallel_requests Int?
|
||||||
tpm_limit BigInt?
|
tpm_limit BigInt?
|
||||||
rpm_limit BigInt?
|
rpm_limit BigInt?
|
||||||
model_max_budget Json @default("{}")
|
model_max_budget Json?
|
||||||
budget_duration String?
|
budget_duration String?
|
||||||
budget_reset_at DateTime?
|
budget_reset_at DateTime?
|
||||||
created_at DateTime @default(now()) @map("created_at")
|
created_at DateTime @default(now()) @map("created_at")
|
||||||
|
@ -107,6 +108,7 @@ model LiteLLM_VerificationToken {
|
||||||
allowed_cache_controls String[] @default([])
|
allowed_cache_controls String[] @default([])
|
||||||
model_spend Json @default("{}")
|
model_spend Json @default("{}")
|
||||||
model_max_budget Json @default("{}")
|
model_max_budget Json @default("{}")
|
||||||
|
budget_id String?
|
||||||
}
|
}
|
||||||
|
|
||||||
// store proxy config.yaml
|
// store proxy config.yaml
|
||||||
|
@ -127,6 +129,7 @@ model LiteLLM_SpendLogs {
|
||||||
startTime DateTime // Assuming start_time is a DateTime field
|
startTime DateTime // Assuming start_time is a DateTime field
|
||||||
endTime DateTime // Assuming end_time is a DateTime field
|
endTime DateTime // Assuming end_time is a DateTime field
|
||||||
model String @default("")
|
model String @default("")
|
||||||
|
api_base String @default("")
|
||||||
user String @default("")
|
user String @default("")
|
||||||
metadata Json @default("{}")
|
metadata Json @default("{}")
|
||||||
cache_hit String @default("")
|
cache_hit String @default("")
|
||||||
|
|
|
@ -64,6 +64,7 @@ class ProxyLogging:
|
||||||
litellm.callbacks.append(self.max_parallel_request_limiter)
|
litellm.callbacks.append(self.max_parallel_request_limiter)
|
||||||
litellm.callbacks.append(self.max_budget_limiter)
|
litellm.callbacks.append(self.max_budget_limiter)
|
||||||
litellm.callbacks.append(self.cache_control_check)
|
litellm.callbacks.append(self.cache_control_check)
|
||||||
|
litellm.callbacks.append(self.response_taking_too_long_callback)
|
||||||
for callback in litellm.callbacks:
|
for callback in litellm.callbacks:
|
||||||
if callback not in litellm.input_callback:
|
if callback not in litellm.input_callback:
|
||||||
litellm.input_callback.append(callback)
|
litellm.input_callback.append(callback)
|
||||||
|
@ -142,6 +143,30 @@ class ProxyLogging:
|
||||||
raise e
|
raise e
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
async def response_taking_too_long_callback(
|
||||||
|
self,
|
||||||
|
kwargs, # kwargs to completion
|
||||||
|
completion_response, # response from completion
|
||||||
|
start_time,
|
||||||
|
end_time, # start/end time
|
||||||
|
):
|
||||||
|
if self.alerting is None:
|
||||||
|
return
|
||||||
|
time_difference = end_time - start_time
|
||||||
|
# Convert the timedelta to float (in seconds)
|
||||||
|
time_difference_float = time_difference.total_seconds()
|
||||||
|
litellm_params = kwargs.get("litellm_params", {})
|
||||||
|
api_base = litellm_params.get("api_base", "")
|
||||||
|
model = kwargs.get("model", "")
|
||||||
|
messages = kwargs.get("messages", "")
|
||||||
|
request_info = f"\nRequest Model: `{model}`\nAPI Base: `{api_base}`\nMessages: `{messages}`"
|
||||||
|
slow_message = f"`Responses are slow - {round(time_difference_float,2)}s response time > Alerting threshold: {self.alerting_threshold}s`"
|
||||||
|
if time_difference_float > self.alerting_threshold:
|
||||||
|
await self.alerting_handler(
|
||||||
|
message=slow_message + request_info,
|
||||||
|
level="Low",
|
||||||
|
)
|
||||||
|
|
||||||
async def response_taking_too_long(
|
async def response_taking_too_long(
|
||||||
self,
|
self,
|
||||||
start_time: Optional[float] = None,
|
start_time: Optional[float] = None,
|
||||||
|
@ -189,16 +214,6 @@ class ProxyLogging:
|
||||||
level="Medium",
|
level="Medium",
|
||||||
)
|
)
|
||||||
|
|
||||||
elif (
|
|
||||||
type == "slow_response" and start_time is not None and end_time is not None
|
|
||||||
):
|
|
||||||
slow_message = f"`Responses are slow - {round(end_time-start_time,2)}s response time > Alerting threshold: {self.alerting_threshold}s`"
|
|
||||||
if end_time - start_time > self.alerting_threshold:
|
|
||||||
await self.alerting_handler(
|
|
||||||
message=slow_message + request_info,
|
|
||||||
level="Low",
|
|
||||||
)
|
|
||||||
|
|
||||||
async def budget_alerts(
|
async def budget_alerts(
|
||||||
self,
|
self,
|
||||||
type: Literal[
|
type: Literal[
|
||||||
|
@ -1585,6 +1600,7 @@ def get_logging_payload(kwargs, response_obj, start_time, end_time):
|
||||||
"completion_tokens": usage.get("completion_tokens", 0),
|
"completion_tokens": usage.get("completion_tokens", 0),
|
||||||
"request_tags": metadata.get("tags", []),
|
"request_tags": metadata.get("tags", []),
|
||||||
"end_user": kwargs.get("user", ""),
|
"end_user": kwargs.get("user", ""),
|
||||||
|
"api_base": litellm_params.get("api_base", ""),
|
||||||
}
|
}
|
||||||
|
|
||||||
verbose_proxy_logger.debug(f"SpendTable: created payload - payload: {payload}\n\n")
|
verbose_proxy_logger.debug(f"SpendTable: created payload - payload: {payload}\n\n")
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "litellm"
|
name = "litellm"
|
||||||
version = "1.28.8"
|
version = "1.28.9"
|
||||||
description = "Library to easily interface with LLM API providers"
|
description = "Library to easily interface with LLM API providers"
|
||||||
authors = ["BerriAI"]
|
authors = ["BerriAI"]
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
|
@ -74,7 +74,7 @@ requires = ["poetry-core", "wheel"]
|
||||||
build-backend = "poetry.core.masonry.api"
|
build-backend = "poetry.core.masonry.api"
|
||||||
|
|
||||||
[tool.commitizen]
|
[tool.commitizen]
|
||||||
version = "1.28.8"
|
version = "1.28.9"
|
||||||
version_files = [
|
version_files = [
|
||||||
"pyproject.toml:^version"
|
"pyproject.toml:^version"
|
||||||
]
|
]
|
||||||
|
|
|
@ -11,6 +11,7 @@ generator client {
|
||||||
model LiteLLM_BudgetTable {
|
model LiteLLM_BudgetTable {
|
||||||
budget_id String @id @default(uuid())
|
budget_id String @id @default(uuid())
|
||||||
max_budget Float?
|
max_budget Float?
|
||||||
|
soft_budget Float?
|
||||||
max_parallel_requests Int?
|
max_parallel_requests Int?
|
||||||
tpm_limit BigInt?
|
tpm_limit BigInt?
|
||||||
rpm_limit BigInt?
|
rpm_limit BigInt?
|
||||||
|
@ -107,6 +108,7 @@ model LiteLLM_VerificationToken {
|
||||||
allowed_cache_controls String[] @default([])
|
allowed_cache_controls String[] @default([])
|
||||||
model_spend Json @default("{}")
|
model_spend Json @default("{}")
|
||||||
model_max_budget Json @default("{}")
|
model_max_budget Json @default("{}")
|
||||||
|
budget_id String?
|
||||||
}
|
}
|
||||||
|
|
||||||
// store proxy config.yaml
|
// store proxy config.yaml
|
||||||
|
@ -127,6 +129,7 @@ model LiteLLM_SpendLogs {
|
||||||
startTime DateTime // Assuming start_time is a DateTime field
|
startTime DateTime // Assuming start_time is a DateTime field
|
||||||
endTime DateTime // Assuming end_time is a DateTime field
|
endTime DateTime // Assuming end_time is a DateTime field
|
||||||
model String @default("")
|
model String @default("")
|
||||||
|
api_base String @default("")
|
||||||
user String @default("")
|
user String @default("")
|
||||||
metadata Json @default("{}")
|
metadata Json @default("{}")
|
||||||
cache_hit String @default("")
|
cache_hit String @default("")
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
import React, { useState, useEffect, useRef } from "react";
|
import React, { useState, useEffect, useRef } from "react";
|
||||||
import { Button, TextInput, Grid, Col } from "@tremor/react";
|
import { Button, TextInput, Grid, Col } from "@tremor/react";
|
||||||
import { Card, Metric, Text } from "@tremor/react";
|
import { Card, Metric, Text, Title, Subtitle } from "@tremor/react";
|
||||||
import {
|
import {
|
||||||
Button as Button2,
|
Button as Button2,
|
||||||
Modal,
|
Modal,
|
||||||
|
@ -38,6 +38,7 @@ const CreateKey: React.FC<CreateKeyProps> = ({
|
||||||
const [form] = Form.useForm();
|
const [form] = Form.useForm();
|
||||||
const [isModalVisible, setIsModalVisible] = useState(false);
|
const [isModalVisible, setIsModalVisible] = useState(false);
|
||||||
const [apiKey, setApiKey] = useState(null);
|
const [apiKey, setApiKey] = useState(null);
|
||||||
|
const [softBudget, setSoftBudget] = useState(null);
|
||||||
const handleOk = () => {
|
const handleOk = () => {
|
||||||
setIsModalVisible(false);
|
setIsModalVisible(false);
|
||||||
form.resetFields();
|
form.resetFields();
|
||||||
|
@ -54,8 +55,11 @@ const CreateKey: React.FC<CreateKeyProps> = ({
|
||||||
message.info("Making API Call");
|
message.info("Making API Call");
|
||||||
setIsModalVisible(true);
|
setIsModalVisible(true);
|
||||||
const response = await keyCreateCall(accessToken, userID, formValues);
|
const response = await keyCreateCall(accessToken, userID, formValues);
|
||||||
|
|
||||||
|
console.log("key create Response:", response);
|
||||||
setData((prevData) => (prevData ? [...prevData, response] : [response])); // Check if prevData is null
|
setData((prevData) => (prevData ? [...prevData, response] : [response])); // Check if prevData is null
|
||||||
setApiKey(response["key"]);
|
setApiKey(response["key"]);
|
||||||
|
setSoftBudget(response["soft_budget"]);
|
||||||
message.success("API Key Created");
|
message.success("API Key Created");
|
||||||
form.resetFields();
|
form.resetFields();
|
||||||
localStorage.removeItem("userData" + userID);
|
localStorage.removeItem("userData" + userID);
|
||||||
|
@ -108,6 +112,9 @@ const CreateKey: React.FC<CreateKeyProps> = ({
|
||||||
))}
|
))}
|
||||||
</Select>
|
</Select>
|
||||||
</Form.Item>
|
</Form.Item>
|
||||||
|
<Form.Item label="Soft Budget (USD) Monthly" name="soft_budget" initialValue={50.00}>
|
||||||
|
<InputNumber step={0.01} precision={2} defaultValue={50.00} width={200} />
|
||||||
|
</Form.Item>
|
||||||
<Form.Item label="Max Budget (USD)" name="max_budget">
|
<Form.Item label="Max Budget (USD)" name="max_budget">
|
||||||
<InputNumber step={0.01} precision={2} width={200} />
|
<InputNumber step={0.01} precision={2} width={200} />
|
||||||
</Form.Item>
|
</Form.Item>
|
||||||
|
@ -154,28 +161,38 @@ const CreateKey: React.FC<CreateKeyProps> = ({
|
||||||
</Modal>
|
</Modal>
|
||||||
{apiKey && (
|
{apiKey && (
|
||||||
<Modal
|
<Modal
|
||||||
title="Save your key"
|
|
||||||
visible={isModalVisible}
|
visible={isModalVisible}
|
||||||
onOk={handleOk}
|
onOk={handleOk}
|
||||||
onCancel={handleCancel}
|
onCancel={handleCancel}
|
||||||
footer={null}
|
footer={null}
|
||||||
>
|
>
|
||||||
<Grid numItems={1} className="gap-2 w-full">
|
<Grid numItems={1} className="gap-2 w-full">
|
||||||
<Col numColSpan={1}>
|
<Card>
|
||||||
<p>
|
<Title>Save your Key</Title>
|
||||||
Please save this secret key somewhere safe and accessible. For
|
<Col numColSpan={1}>
|
||||||
security reasons, <b>you will not be able to view it again</b>{" "}
|
<p>
|
||||||
through your LiteLLM account. If you lose this secret key, you
|
Please save this secret key somewhere safe and accessible. For
|
||||||
will need to generate a new one.
|
security reasons, <b>you will not be able to view it again</b>{" "}
|
||||||
</p>
|
through your LiteLLM account. If you lose this secret key, you
|
||||||
</Col>
|
will need to generate a new one.
|
||||||
<Col numColSpan={1}>
|
</p>
|
||||||
{apiKey != null ? (
|
</Col>
|
||||||
<Text>API Key: {apiKey}</Text>
|
<Col numColSpan={1}>
|
||||||
) : (
|
{apiKey != null ? (
|
||||||
<Text>Key being created, this might take 30s</Text>
|
<div>
|
||||||
)}
|
<Text>API Key: {apiKey}</Text>
|
||||||
</Col>
|
<Title className="mt-6">Budgets</Title>
|
||||||
|
<Text>Soft Limit Budget: ${softBudget}</Text>
|
||||||
|
<Button className="mt-3">
|
||||||
|
Test Alert
|
||||||
|
</Button>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
) : (
|
||||||
|
<Text>Key being created, this might take 30s</Text>
|
||||||
|
)}
|
||||||
|
</Col>
|
||||||
|
</Card>
|
||||||
</Grid>
|
</Grid>
|
||||||
</Modal>
|
</Modal>
|
||||||
)}
|
)}
|
||||||
|
|
|
@ -105,7 +105,7 @@ const ViewKeySpendReport: React.FC<ViewKeySpendReportProps> = ({
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div>
|
<div>
|
||||||
<Button size = "xs" onClick={showModal}>
|
<Button size = "xs" onClick={showModal} variant="secondary">
|
||||||
View Spend Report
|
View Spend Report
|
||||||
</Button>
|
</Button>
|
||||||
<Modal
|
<Modal
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue