mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 10:44:24 +00:00
Merge branch 'main' into litellm_tpm_rpm_rate_limits
This commit is contained in:
commit
f7694bc193
13 changed files with 378 additions and 31 deletions
|
@ -1,4 +1,4 @@
|
||||||
# Key Management
|
# Virtual Keys
|
||||||
Track Spend, Set budgets and create virtual keys for the proxy
|
Track Spend, Set budgets and create virtual keys for the proxy
|
||||||
|
|
||||||
Grant other's temporary access to your proxy, with keys that expire after a set duration.
|
Grant other's temporary access to your proxy, with keys that expire after a set duration.
|
||||||
|
@ -12,7 +12,7 @@ Grant other's temporary access to your proxy, with keys that expire after a set
|
||||||
|
|
||||||
:::
|
:::
|
||||||
|
|
||||||
## Quick Start
|
## Setup
|
||||||
|
|
||||||
Requirements:
|
Requirements:
|
||||||
|
|
||||||
|
@ -58,16 +58,37 @@ litellm --config /path/to/config.yaml
|
||||||
curl 'http://0.0.0.0:8000/key/generate' \
|
curl 'http://0.0.0.0:8000/key/generate' \
|
||||||
--header 'Authorization: Bearer <your-master-key>' \
|
--header 'Authorization: Bearer <your-master-key>' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data-raw '{"models": ["gpt-3.5-turbo", "gpt-4", "claude-2"], "duration": "20m","metadata": {"user": "ishaan@berri.ai", "team": "core-infra"}}'
|
--data-raw '{"models": ["gpt-3.5-turbo", "gpt-4", "claude-2"], "duration": "20m","metadata": {"user": "ishaan@berri.ai"}}'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## /key/generate
|
||||||
|
|
||||||
|
### Request
|
||||||
|
```shell
|
||||||
|
curl 'http://0.0.0.0:8000/key/generate' \
|
||||||
|
--header 'Authorization: Bearer <your-master-key>' \
|
||||||
|
--header 'Content-Type: application/json' \
|
||||||
|
--data-raw '{
|
||||||
|
"models": ["gpt-3.5-turbo", "gpt-4", "claude-2"],
|
||||||
|
"duration": "20m",
|
||||||
|
"metadata": {"user": "ishaan@berri.ai"},
|
||||||
|
"team_id": "core-infra"
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
Request Params:
|
||||||
|
|
||||||
- `models`: *list or null (optional)* - Specify the models a token has access too. If null, then token has access to all models on server.
|
- `models`: *list or null (optional)* - Specify the models a token has access too. If null, then token has access to all models on server.
|
||||||
|
|
||||||
- `duration`: *str or null (optional)* Specify the length of time the token is valid for. If null, default is set to 1 hour. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d").
|
- `duration`: *str or null (optional)* Specify the length of time the token is valid for. If null, default is set to 1 hour. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d").
|
||||||
|
|
||||||
- `metadata`: *dict or null (optional)* Pass metadata for the created token. If null defaults to {}
|
- `metadata`: *dict or null (optional)* Pass metadata for the created token. If null defaults to {}
|
||||||
|
|
||||||
Expected response:
|
- `team_id`: *str or null (optional)* Specify team_id for the associated key
|
||||||
|
|
||||||
|
### Response
|
||||||
|
|
||||||
```python
|
```python
|
||||||
{
|
{
|
||||||
|
@ -76,7 +97,7 @@ Expected response:
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
## Keys that don't expire
|
### Keys that don't expire
|
||||||
|
|
||||||
Just set duration to None.
|
Just set duration to None.
|
||||||
|
|
||||||
|
@ -87,7 +108,7 @@ curl --location 'http://0.0.0.0:8000/key/generate' \
|
||||||
--data '{"models": ["azure-models"], "aliases": {"mistral-7b": "gpt-3.5-turbo"}, "duration": null}'
|
--data '{"models": ["azure-models"], "aliases": {"mistral-7b": "gpt-3.5-turbo"}, "duration": null}'
|
||||||
```
|
```
|
||||||
|
|
||||||
## Upgrade/Downgrade Models
|
### Upgrade/Downgrade Models
|
||||||
|
|
||||||
If a user is expected to use a given model (i.e. gpt3-5), and you want to:
|
If a user is expected to use a given model (i.e. gpt3-5), and you want to:
|
||||||
|
|
||||||
|
@ -137,7 +158,7 @@ curl -X POST "https://0.0.0.0:8000/key/generate" \
|
||||||
- **How are routing between diff keys/api bases done?** litellm handles this by shuffling between different models in the model list with the same model_name. [**See Code**](https://github.com/BerriAI/litellm/blob/main/litellm/router.py)
|
- **How are routing between diff keys/api bases done?** litellm handles this by shuffling between different models in the model list with the same model_name. [**See Code**](https://github.com/BerriAI/litellm/blob/main/litellm/router.py)
|
||||||
|
|
||||||
|
|
||||||
## Grant Access to new model
|
### Grant Access to new model
|
||||||
|
|
||||||
Use model access groups to give users access to select models, and add new ones to it over time (e.g. mistral, llama-2, etc.)
|
Use model access groups to give users access to select models, and add new ones to it over time (e.g. mistral, llama-2, etc.)
|
||||||
|
|
||||||
|
@ -165,6 +186,102 @@ curl --location 'http://localhost:8000/key/generate' \
|
||||||
"max_budget": 0,}'
|
"max_budget": 0,}'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## /key/info
|
||||||
|
|
||||||
|
### Request
|
||||||
|
```shell
|
||||||
|
curl -X GET "http://0.0.0.0:8000/key/info?key=sk-02Wr4IAlN3NvPXvL5JVvDA" \
|
||||||
|
-H "Authorization: Bearer sk-1234"
|
||||||
|
```
|
||||||
|
|
||||||
|
Request Params:
|
||||||
|
- key: str - The key you want the info for
|
||||||
|
|
||||||
|
### Response
|
||||||
|
|
||||||
|
`token` is the hashed key (The DB stores the hashed key for security)
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"key": "sk-02Wr4IAlN3NvPXvL5JVvDA",
|
||||||
|
"info": {
|
||||||
|
"token": "80321a12d03412c527f2bd9db5fabd746abead2e1d50b435a534432fbaca9ef5",
|
||||||
|
"spend": 0.0,
|
||||||
|
"expires": "2024-01-18T23:52:09.125000+00:00",
|
||||||
|
"models": ["azure-gpt-3.5", "azure-embedding-model"],
|
||||||
|
"aliases": {},
|
||||||
|
"config": {},
|
||||||
|
"user_id": "ishaan2@berri.ai",
|
||||||
|
"team_id": "None",
|
||||||
|
"max_parallel_requests": null,
|
||||||
|
"metadata": {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
## /key/update
|
||||||
|
|
||||||
|
### Request
|
||||||
|
```shell
|
||||||
|
curl 'http://0.0.0.0:8000/key/update' \
|
||||||
|
--header 'Authorization: Bearer <your-master-key>' \
|
||||||
|
--header 'Content-Type: application/json' \
|
||||||
|
--data-raw '{
|
||||||
|
"key": "sk-kdEXbIqZRwEeEiHwdg7sFA",
|
||||||
|
"models": ["gpt-3.5-turbo", "gpt-4", "claude-2"],
|
||||||
|
"metadata": {"user": "ishaan@berri.ai"},
|
||||||
|
"team_id": "core-infra"
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
Request Params:
|
||||||
|
- key: str - The key that needs to be updated.
|
||||||
|
|
||||||
|
- models: list or null (optional) - Specify the models a token has access to. If null, then the token has access to all models on the server.
|
||||||
|
|
||||||
|
- metadata: dict or null (optional) - Pass metadata for the updated token. If null, defaults to an empty dictionary.
|
||||||
|
|
||||||
|
- team_id: str or null (optional) - Specify the team_id for the associated key.
|
||||||
|
|
||||||
|
### Response
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"key": "sk-kdEXbIqZRwEeEiHwdg7sFA",
|
||||||
|
"models": ["gpt-3.5-turbo", "gpt-4", "claude-2"],
|
||||||
|
"metadata": {
|
||||||
|
"user": "ishaan@berri.ai"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## /key/delete
|
||||||
|
|
||||||
|
### Request
|
||||||
|
```shell
|
||||||
|
curl 'http://0.0.0.0:8000/key/delete' \
|
||||||
|
--header 'Authorization: Bearer <your-master-key>' \
|
||||||
|
--header 'Content-Type: application/json' \
|
||||||
|
--data-raw '{
|
||||||
|
"keys": ["sk-kdEXbIqZRwEeEiHwdg7sFA"]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
Request Params:
|
||||||
|
- keys: List[str] - List of keys to delete
|
||||||
|
|
||||||
|
### Response
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"deleted_keys": ["sk-kdEXbIqZRwEeEiHwdg7sFA"]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
## Tracking Spend
|
## Tracking Spend
|
||||||
|
|
||||||
You can get spend for a key by using the `/key/info` endpoint.
|
You can get spend for a key by using the `/key/info` endpoint.
|
||||||
|
|
|
@ -692,9 +692,9 @@ def completion(
|
||||||
or get_secret("AZURE_API_KEY")
|
or get_secret("AZURE_API_KEY")
|
||||||
)
|
)
|
||||||
|
|
||||||
azure_ad_token = optional_params.pop("azure_ad_token", None) or get_secret(
|
azure_ad_token = optional_params.get("extra_body", {}).pop(
|
||||||
"AZURE_AD_TOKEN"
|
"azure_ad_token", None
|
||||||
)
|
) or get_secret("AZURE_AD_TOKEN")
|
||||||
|
|
||||||
headers = headers or litellm.headers
|
headers = headers or litellm.headers
|
||||||
|
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
from pydantic import BaseModel, Extra, Field, root_validator
|
from pydantic import BaseModel, Extra, Field, root_validator, Json
|
||||||
import enum, sys
|
import enum
|
||||||
from typing import Optional, List, Union, Dict, Literal
|
from typing import Optional, List, Union, Dict, Literal, Any
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import uuid, json
|
import uuid, json, sys, os
|
||||||
|
|
||||||
|
|
||||||
class LiteLLMBase(BaseModel):
|
class LiteLLMBase(BaseModel):
|
||||||
|
@ -129,6 +129,7 @@ class GenerateKeyRequest(LiteLLMBase):
|
||||||
config: Optional[dict] = {}
|
config: Optional[dict] = {}
|
||||||
spend: Optional[float] = 0
|
spend: Optional[float] = 0
|
||||||
user_id: Optional[str] = None
|
user_id: Optional[str] = None
|
||||||
|
team_id: Optional[str] = None
|
||||||
max_parallel_requests: Optional[int] = None
|
max_parallel_requests: Optional[int] = None
|
||||||
metadata: Optional[dict] = {}
|
metadata: Optional[dict] = {}
|
||||||
tpm_limit: int = sys.maxsize
|
tpm_limit: int = sys.maxsize
|
||||||
|
@ -202,6 +203,7 @@ class DynamoDBArgs(LiteLLMBase):
|
||||||
user_table_name: str = "LiteLLM_UserTable"
|
user_table_name: str = "LiteLLM_UserTable"
|
||||||
key_table_name: str = "LiteLLM_VerificationToken"
|
key_table_name: str = "LiteLLM_VerificationToken"
|
||||||
config_table_name: str = "LiteLLM_Config"
|
config_table_name: str = "LiteLLM_Config"
|
||||||
|
spend_table_name: str = "LiteLLM_SpendLogs"
|
||||||
|
|
||||||
|
|
||||||
class ConfigGeneralSettings(LiteLLMBase):
|
class ConfigGeneralSettings(LiteLLMBase):
|
||||||
|
@ -320,3 +322,20 @@ class LiteLLM_UserTable(LiteLLMBase):
|
||||||
if values.get("models") is None:
|
if values.get("models") is None:
|
||||||
values.update({"models", []})
|
values.update({"models", []})
|
||||||
return values
|
return values
|
||||||
|
|
||||||
|
|
||||||
|
class LiteLLM_SpendLogs(LiteLLMBase):
|
||||||
|
request_id: str
|
||||||
|
api_key: str
|
||||||
|
model: Optional[str] = ""
|
||||||
|
call_type: str
|
||||||
|
spend: Optional[float] = 0.0
|
||||||
|
startTime: Union[str, datetime, None]
|
||||||
|
endTime: Union[str, datetime, None]
|
||||||
|
user: Optional[str] = ""
|
||||||
|
modelParameters: Optional[Json] = {}
|
||||||
|
messages: Optional[Json] = []
|
||||||
|
response: Optional[Json] = {}
|
||||||
|
usage: Optional[Json] = {}
|
||||||
|
metadata: Optional[Json] = {}
|
||||||
|
cache_hit: Optional[str] = "False"
|
||||||
|
|
|
@ -131,10 +131,27 @@ class DynamoDBWrapper(CustomDB):
|
||||||
raise Exception(
|
raise Exception(
|
||||||
f"Failed to create table - {self.database_arguments.config_table_name}.\nPlease create a new table called {self.database_arguments.config_table_name}\nAND set `hash_key` as 'param_name'"
|
f"Failed to create table - {self.database_arguments.config_table_name}.\nPlease create a new table called {self.database_arguments.config_table_name}\nAND set `hash_key` as 'param_name'"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
## Spend
|
||||||
|
try:
|
||||||
|
verbose_proxy_logger.debug("DynamoDB Wrapper - Creating Spend Table")
|
||||||
|
error_occurred = False
|
||||||
|
table = client.table(self.database_arguments.spend_table_name)
|
||||||
|
if not await table.exists():
|
||||||
|
await table.create(
|
||||||
|
self.throughput_type,
|
||||||
|
KeySchema(hash_key=KeySpec("request_id", KeyType.string)),
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
error_occurred = True
|
||||||
|
if error_occurred == True:
|
||||||
|
raise Exception(
|
||||||
|
f"Failed to create table - {self.database_arguments.key_table_name}.\nPlease create a new table called {self.database_arguments.key_table_name}\nAND set `hash_key` as 'token'"
|
||||||
|
)
|
||||||
verbose_proxy_logger.debug("DynamoDB Wrapper - Done connecting()")
|
verbose_proxy_logger.debug("DynamoDB Wrapper - Done connecting()")
|
||||||
|
|
||||||
async def insert_data(
|
async def insert_data(
|
||||||
self, value: Any, table_name: Literal["user", "key", "config"]
|
self, value: Any, table_name: Literal["user", "key", "config", "spend"]
|
||||||
):
|
):
|
||||||
from aiodynamo.client import Client
|
from aiodynamo.client import Client
|
||||||
from aiodynamo.credentials import Credentials, StaticCredentials
|
from aiodynamo.credentials import Credentials, StaticCredentials
|
||||||
|
@ -166,6 +183,8 @@ class DynamoDBWrapper(CustomDB):
|
||||||
table = client.table(self.database_arguments.key_table_name)
|
table = client.table(self.database_arguments.key_table_name)
|
||||||
elif table_name == "config":
|
elif table_name == "config":
|
||||||
table = client.table(self.database_arguments.config_table_name)
|
table = client.table(self.database_arguments.config_table_name)
|
||||||
|
elif table_name == "spend":
|
||||||
|
table = client.table(self.database_arguments.spend_table_name)
|
||||||
|
|
||||||
for k, v in value.items():
|
for k, v in value.items():
|
||||||
if isinstance(v, datetime):
|
if isinstance(v, datetime):
|
||||||
|
|
|
@ -61,8 +61,8 @@ litellm_settings:
|
||||||
# setting callback class
|
# setting callback class
|
||||||
# callbacks: custom_callbacks.proxy_handler_instance # sets litellm.callbacks = [proxy_handler_instance]
|
# callbacks: custom_callbacks.proxy_handler_instance # sets litellm.callbacks = [proxy_handler_instance]
|
||||||
|
|
||||||
# general_settings:
|
general_settings:
|
||||||
# master_key: sk-1234
|
master_key: sk-1234
|
||||||
# database_type: "dynamo_db"
|
# database_type: "dynamo_db"
|
||||||
# database_args: { # 👈 all args - https://github.com/BerriAI/litellm/blob/befbcbb7ac8f59835ce47415c128decf37aac328/litellm/proxy/_types.py#L190
|
# database_args: { # 👈 all args - https://github.com/BerriAI/litellm/blob/befbcbb7ac8f59835ce47415c128decf37aac328/litellm/proxy/_types.py#L190
|
||||||
# "billing_mode": "PAY_PER_REQUEST",
|
# "billing_mode": "PAY_PER_REQUEST",
|
||||||
|
|
|
@ -72,6 +72,7 @@ from litellm.proxy.utils import (
|
||||||
ProxyLogging,
|
ProxyLogging,
|
||||||
_cache_user_row,
|
_cache_user_row,
|
||||||
send_email,
|
send_email,
|
||||||
|
get_logging_payload,
|
||||||
)
|
)
|
||||||
from litellm.proxy.secret_managers.google_kms import load_google_kms
|
from litellm.proxy.secret_managers.google_kms import load_google_kms
|
||||||
import pydantic
|
import pydantic
|
||||||
|
@ -518,6 +519,7 @@ async def track_cost_callback(
|
||||||
global prisma_client, custom_db_client
|
global prisma_client, custom_db_client
|
||||||
try:
|
try:
|
||||||
# check if it has collected an entire stream response
|
# check if it has collected an entire stream response
|
||||||
|
verbose_proxy_logger.debug(f"Proxy: In track_cost_callback for {kwargs}")
|
||||||
verbose_proxy_logger.debug(
|
verbose_proxy_logger.debug(
|
||||||
f"kwargs stream: {kwargs.get('stream', None)} + complete streaming response: {kwargs.get('complete_streaming_response', None)}"
|
f"kwargs stream: {kwargs.get('stream', None)} + complete streaming response: {kwargs.get('complete_streaming_response', None)}"
|
||||||
)
|
)
|
||||||
|
@ -538,7 +540,13 @@ async def track_cost_callback(
|
||||||
prisma_client is not None or custom_db_client is not None
|
prisma_client is not None or custom_db_client is not None
|
||||||
):
|
):
|
||||||
await update_database(
|
await update_database(
|
||||||
token=user_api_key, response_cost=response_cost, user_id=user_id
|
token=user_api_key,
|
||||||
|
response_cost=response_cost,
|
||||||
|
user_id=user_id,
|
||||||
|
kwargs=kwargs,
|
||||||
|
completion_response=completion_response,
|
||||||
|
start_time=start_time,
|
||||||
|
end_time=end_time,
|
||||||
)
|
)
|
||||||
elif kwargs["stream"] == False: # for non streaming responses
|
elif kwargs["stream"] == False: # for non streaming responses
|
||||||
response_cost = litellm.completion_cost(
|
response_cost = litellm.completion_cost(
|
||||||
|
@ -554,13 +562,27 @@ async def track_cost_callback(
|
||||||
prisma_client is not None or custom_db_client is not None
|
prisma_client is not None or custom_db_client is not None
|
||||||
):
|
):
|
||||||
await update_database(
|
await update_database(
|
||||||
token=user_api_key, response_cost=response_cost, user_id=user_id
|
token=user_api_key,
|
||||||
|
response_cost=response_cost,
|
||||||
|
user_id=user_id,
|
||||||
|
kwargs=kwargs,
|
||||||
|
completion_response=completion_response,
|
||||||
|
start_time=start_time,
|
||||||
|
end_time=end_time,
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_proxy_logger.debug(f"error in tracking cost callback - {str(e)}")
|
verbose_proxy_logger.debug(f"error in tracking cost callback - {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
async def update_database(token, response_cost, user_id=None):
|
async def update_database(
|
||||||
|
token,
|
||||||
|
response_cost,
|
||||||
|
user_id=None,
|
||||||
|
kwargs=None,
|
||||||
|
completion_response=None,
|
||||||
|
start_time=None,
|
||||||
|
end_time=None,
|
||||||
|
):
|
||||||
try:
|
try:
|
||||||
verbose_proxy_logger.debug(
|
verbose_proxy_logger.debug(
|
||||||
f"Enters prisma db call, token: {token}; user_id: {user_id}"
|
f"Enters prisma db call, token: {token}; user_id: {user_id}"
|
||||||
|
@ -630,9 +652,28 @@ async def update_database(token, response_cost, user_id=None):
|
||||||
key=token, value={"spend": new_spend}, table_name="key"
|
key=token, value={"spend": new_spend}, table_name="key"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
async def _insert_spend_log_to_db():
|
||||||
|
# Helper to generate payload to log
|
||||||
|
verbose_proxy_logger.debug("inserting spend log to db")
|
||||||
|
payload = get_logging_payload(
|
||||||
|
kwargs=kwargs,
|
||||||
|
response_obj=completion_response,
|
||||||
|
start_time=start_time,
|
||||||
|
end_time=end_time,
|
||||||
|
)
|
||||||
|
|
||||||
|
payload["spend"] = response_cost
|
||||||
|
|
||||||
|
if prisma_client is not None:
|
||||||
|
await prisma_client.insert_data(data=payload, table_name="spend")
|
||||||
|
|
||||||
|
elif custom_db_client is not None:
|
||||||
|
await custom_db_client.insert_data(payload, table_name="spend")
|
||||||
|
|
||||||
tasks = []
|
tasks = []
|
||||||
tasks.append(_update_user_db())
|
tasks.append(_update_user_db())
|
||||||
tasks.append(_update_key_db())
|
tasks.append(_update_key_db())
|
||||||
|
tasks.append(_insert_spend_log_to_db())
|
||||||
await asyncio.gather(*tasks)
|
await asyncio.gather(*tasks)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_proxy_logger.debug(
|
verbose_proxy_logger.debug(
|
||||||
|
@ -1037,6 +1078,7 @@ async def generate_key_helper_fn(
|
||||||
max_budget: Optional[float] = None,
|
max_budget: Optional[float] = None,
|
||||||
token: Optional[str] = None,
|
token: Optional[str] = None,
|
||||||
user_id: Optional[str] = None,
|
user_id: Optional[str] = None,
|
||||||
|
team_id: Optional[str] = None,
|
||||||
user_email: Optional[str] = None,
|
user_email: Optional[str] = None,
|
||||||
max_parallel_requests: Optional[int] = None,
|
max_parallel_requests: Optional[int] = None,
|
||||||
metadata: Optional[dict] = {},
|
metadata: Optional[dict] = {},
|
||||||
|
@ -1084,12 +1126,15 @@ async def generate_key_helper_fn(
|
||||||
user_id = user_id or str(uuid.uuid4())
|
user_id = user_id or str(uuid.uuid4())
|
||||||
tpm_limit = tpm_limit or sys.maxsize
|
tpm_limit = tpm_limit or sys.maxsize
|
||||||
rpm_limit = rpm_limit or sys.maxsize
|
rpm_limit = rpm_limit or sys.maxsize
|
||||||
|
if type(team_id) is not str:
|
||||||
|
team_id = str(team_id)
|
||||||
try:
|
try:
|
||||||
# Create a new verification token (you may want to enhance this logic based on your needs)
|
# Create a new verification token (you may want to enhance this logic based on your needs)
|
||||||
user_data = {
|
user_data = {
|
||||||
"max_budget": max_budget,
|
"max_budget": max_budget,
|
||||||
"user_email": user_email,
|
"user_email": user_email,
|
||||||
"user_id": user_id,
|
"user_id": user_id,
|
||||||
|
"team_id": team_id,
|
||||||
"spend": spend,
|
"spend": spend,
|
||||||
"models": models,
|
"models": models,
|
||||||
"max_parallel_requests": max_parallel_requests,
|
"max_parallel_requests": max_parallel_requests,
|
||||||
|
@ -1104,6 +1149,7 @@ async def generate_key_helper_fn(
|
||||||
"config": config_json,
|
"config": config_json,
|
||||||
"spend": spend,
|
"spend": spend,
|
||||||
"user_id": user_id,
|
"user_id": user_id,
|
||||||
|
"team_id": team_id,
|
||||||
"max_parallel_requests": max_parallel_requests,
|
"max_parallel_requests": max_parallel_requests,
|
||||||
"metadata": metadata_json,
|
"metadata": metadata_json,
|
||||||
"tpm_limit": tpm_limit,
|
"tpm_limit": tpm_limit,
|
||||||
|
@ -2051,6 +2097,7 @@ async def generate_key_fn(
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
- duration: Optional[str] - Specify the length of time the token is valid for. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d"). **(Default is set to 1 hour.)**
|
- duration: Optional[str] - Specify the length of time the token is valid for. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d"). **(Default is set to 1 hour.)**
|
||||||
|
- team_id: Optional[str] - The team id of the user
|
||||||
- models: Optional[list] - Model_name's a user is allowed to call. (if empty, key is allowed to call all models)
|
- models: Optional[list] - Model_name's a user is allowed to call. (if empty, key is allowed to call all models)
|
||||||
- aliases: Optional[dict] - Any alias mappings, on top of anything in the config.yaml model list. - https://docs.litellm.ai/docs/proxy/virtual_keys#managing-auth---upgradedowngrade-models
|
- aliases: Optional[dict] - Any alias mappings, on top of anything in the config.yaml model list. - https://docs.litellm.ai/docs/proxy/virtual_keys#managing-auth---upgradedowngrade-models
|
||||||
- config: Optional[dict] - any key-specific configs, overrides config in config.yaml
|
- config: Optional[dict] - any key-specific configs, overrides config in config.yaml
|
||||||
|
|
|
@ -9,6 +9,7 @@ generator client {
|
||||||
|
|
||||||
model LiteLLM_UserTable {
|
model LiteLLM_UserTable {
|
||||||
user_id String @unique
|
user_id String @unique
|
||||||
|
team_id String?
|
||||||
max_budget Float?
|
max_budget Float?
|
||||||
spend Float @default(0.0)
|
spend Float @default(0.0)
|
||||||
user_email String?
|
user_email String?
|
||||||
|
@ -27,6 +28,7 @@ model LiteLLM_VerificationToken {
|
||||||
aliases Json @default("{}")
|
aliases Json @default("{}")
|
||||||
config Json @default("{}")
|
config Json @default("{}")
|
||||||
user_id String?
|
user_id String?
|
||||||
|
team_id String?
|
||||||
max_parallel_requests Int?
|
max_parallel_requests Int?
|
||||||
metadata Json @default("{}")
|
metadata Json @default("{}")
|
||||||
tpm_limit BigInt?
|
tpm_limit BigInt?
|
||||||
|
@ -37,3 +39,20 @@ model LiteLLM_Config {
|
||||||
param_name String @id
|
param_name String @id
|
||||||
param_value Json?
|
param_value Json?
|
||||||
}
|
}
|
||||||
|
|
||||||
|
model LiteLLM_SpendLogs {
|
||||||
|
request_id String @unique
|
||||||
|
call_type String
|
||||||
|
api_key String @default ("")
|
||||||
|
spend Float @default(0.0)
|
||||||
|
startTime DateTime // Assuming start_time is a DateTime field
|
||||||
|
endTime DateTime // Assuming end_time is a DateTime field
|
||||||
|
model String @default("")
|
||||||
|
user String @default("")
|
||||||
|
modelParameters Json @default("{}")// Assuming optional_params is a JSON field
|
||||||
|
messages Json @default("[]")
|
||||||
|
response Json @default("{}")
|
||||||
|
usage Json @default("{}")
|
||||||
|
metadata Json @default("{}")
|
||||||
|
cache_hit String @default("")
|
||||||
|
}
|
|
@ -1,7 +1,12 @@
|
||||||
from typing import Optional, List, Any, Literal, Union
|
from typing import Optional, List, Any, Literal, Union
|
||||||
import os, subprocess, hashlib, importlib, asyncio, copy, json, aiohttp, httpx
|
import os, subprocess, hashlib, importlib, asyncio, copy, json, aiohttp, httpx
|
||||||
import litellm, backoff
|
import litellm, backoff
|
||||||
from litellm.proxy._types import UserAPIKeyAuth, DynamoDBArgs, LiteLLM_VerificationToken
|
from litellm.proxy._types import (
|
||||||
|
UserAPIKeyAuth,
|
||||||
|
DynamoDBArgs,
|
||||||
|
LiteLLM_VerificationToken,
|
||||||
|
LiteLLM_SpendLogs,
|
||||||
|
)
|
||||||
from litellm.caching import DualCache
|
from litellm.caching import DualCache
|
||||||
from litellm.proxy.hooks.parallel_request_limiter import MaxParallelRequestsHandler
|
from litellm.proxy.hooks.parallel_request_limiter import MaxParallelRequestsHandler
|
||||||
from litellm.proxy.hooks.max_budget_limiter import MaxBudgetLimiter
|
from litellm.proxy.hooks.max_budget_limiter import MaxBudgetLimiter
|
||||||
|
@ -316,7 +321,7 @@ class PrismaClient:
|
||||||
self,
|
self,
|
||||||
key: str,
|
key: str,
|
||||||
value: Any,
|
value: Any,
|
||||||
table_name: Literal["users", "keys", "config"],
|
table_name: Literal["users", "keys", "config", "spend"],
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Generic implementation of get data
|
Generic implementation of get data
|
||||||
|
@ -334,6 +339,10 @@ class PrismaClient:
|
||||||
response = await self.db.litellm_config.find_first( # type: ignore
|
response = await self.db.litellm_config.find_first( # type: ignore
|
||||||
where={key: value} # type: ignore
|
where={key: value} # type: ignore
|
||||||
)
|
)
|
||||||
|
elif table_name == "spend":
|
||||||
|
response = await self.db.l.find_first( # type: ignore
|
||||||
|
where={key: value} # type: ignore
|
||||||
|
)
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
asyncio.create_task(
|
asyncio.create_task(
|
||||||
|
@ -417,7 +426,7 @@ class PrismaClient:
|
||||||
on_backoff=on_backoff, # specifying the function to call on backoff
|
on_backoff=on_backoff, # specifying the function to call on backoff
|
||||||
)
|
)
|
||||||
async def insert_data(
|
async def insert_data(
|
||||||
self, data: dict, table_name: Literal["user", "key", "config"]
|
self, data: dict, table_name: Literal["user", "key", "config", "spend"]
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Add a key to the database. If it already exists, do nothing.
|
Add a key to the database. If it already exists, do nothing.
|
||||||
|
@ -473,8 +482,18 @@ class PrismaClient:
|
||||||
)
|
)
|
||||||
|
|
||||||
tasks.append(updated_table_row)
|
tasks.append(updated_table_row)
|
||||||
|
|
||||||
await asyncio.gather(*tasks)
|
await asyncio.gather(*tasks)
|
||||||
|
elif table_name == "spend":
|
||||||
|
db_data = self.jsonify_object(data=data)
|
||||||
|
new_spend_row = await self.db.litellm_spendlogs.upsert(
|
||||||
|
where={"request_id": data["request_id"]},
|
||||||
|
data={
|
||||||
|
"create": {**db_data}, # type: ignore
|
||||||
|
"update": {}, # don't do anything if it already exists
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return new_spend_row
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print_verbose(f"LiteLLM Prisma Client Exception: {e}")
|
print_verbose(f"LiteLLM Prisma Client Exception: {e}")
|
||||||
asyncio.create_task(
|
asyncio.create_task(
|
||||||
|
@ -760,3 +779,85 @@ async def send_email(sender_name, sender_email, receiver_email, subject, html):
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print_verbose("An error occurred while sending the email:", str(e))
|
print_verbose("An error occurred while sending the email:", str(e))
|
||||||
|
|
||||||
|
|
||||||
|
def hash_token(token: str):
|
||||||
|
import hashlib
|
||||||
|
|
||||||
|
# Hash the string using SHA-256
|
||||||
|
hashed_token = hashlib.sha256(token.encode()).hexdigest()
|
||||||
|
|
||||||
|
return hashed_token
|
||||||
|
|
||||||
|
|
||||||
|
def get_logging_payload(kwargs, response_obj, start_time, end_time):
|
||||||
|
from litellm.proxy._types import LiteLLM_SpendLogs
|
||||||
|
from pydantic import Json
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
if kwargs == None:
|
||||||
|
kwargs = {}
|
||||||
|
# standardize this function to be used across, s3, dynamoDB, langfuse logging
|
||||||
|
litellm_params = kwargs.get("litellm_params", {})
|
||||||
|
metadata = (
|
||||||
|
litellm_params.get("metadata", {}) or {}
|
||||||
|
) # if litellm_params['metadata'] == None
|
||||||
|
messages = kwargs.get("messages")
|
||||||
|
optional_params = kwargs.get("optional_params", {})
|
||||||
|
call_type = kwargs.get("call_type", "litellm.completion")
|
||||||
|
cache_hit = kwargs.get("cache_hit", False)
|
||||||
|
usage = response_obj["usage"]
|
||||||
|
id = response_obj.get("id", str(uuid.uuid4()))
|
||||||
|
api_key = metadata.get("user_api_key", "")
|
||||||
|
if api_key is not None and type(api_key) == str:
|
||||||
|
# hash the api_key
|
||||||
|
api_key = hash_token(api_key)
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"request_id": id,
|
||||||
|
"call_type": call_type,
|
||||||
|
"api_key": api_key,
|
||||||
|
"cache_hit": cache_hit,
|
||||||
|
"startTime": start_time,
|
||||||
|
"endTime": end_time,
|
||||||
|
"model": kwargs.get("model", ""),
|
||||||
|
"user": kwargs.get("user", ""),
|
||||||
|
"modelParameters": optional_params,
|
||||||
|
"messages": messages,
|
||||||
|
"response": response_obj,
|
||||||
|
"usage": usage,
|
||||||
|
"metadata": metadata,
|
||||||
|
}
|
||||||
|
|
||||||
|
json_fields = [
|
||||||
|
field
|
||||||
|
for field, field_type in LiteLLM_SpendLogs.__annotations__.items()
|
||||||
|
if field_type == Json or field_type == Optional[Json]
|
||||||
|
]
|
||||||
|
str_fields = [
|
||||||
|
field
|
||||||
|
for field, field_type in LiteLLM_SpendLogs.__annotations__.items()
|
||||||
|
if field_type == str or field_type == Optional[str]
|
||||||
|
]
|
||||||
|
datetime_fields = [
|
||||||
|
field
|
||||||
|
for field, field_type in LiteLLM_SpendLogs.__annotations__.items()
|
||||||
|
if field_type == datetime
|
||||||
|
]
|
||||||
|
|
||||||
|
for param in json_fields:
|
||||||
|
if param in payload and type(payload[param]) != Json:
|
||||||
|
if type(payload[param]) == litellm.ModelResponse:
|
||||||
|
payload[param] = payload[param].model_dump_json()
|
||||||
|
if type(payload[param]) == litellm.EmbeddingResponse:
|
||||||
|
payload[param] = payload[param].model_dump_json()
|
||||||
|
elif type(payload[param]) == litellm.Usage:
|
||||||
|
payload[param] = payload[param].model_dump_json()
|
||||||
|
else:
|
||||||
|
payload[param] = json.dumps(payload[param])
|
||||||
|
|
||||||
|
for param in str_fields:
|
||||||
|
if param in payload and type(payload[param]) != str:
|
||||||
|
payload[param] = str(payload[param])
|
||||||
|
|
||||||
|
return payload
|
||||||
|
|
|
@ -268,7 +268,7 @@ def test_completion_azure_gpt4_vision():
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
test_completion_azure_gpt4_vision()
|
# test_completion_azure_gpt4_vision()
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip(reason="this test is flaky")
|
@pytest.mark.skip(reason="this test is flaky")
|
||||||
|
@ -990,9 +990,9 @@ def test_azure_openai_ad_token():
|
||||||
print("azure ad token respoonse\n")
|
print("azure ad token respoonse\n")
|
||||||
print(response)
|
print(response)
|
||||||
litellm.input_callback = []
|
litellm.input_callback = []
|
||||||
except:
|
except Exception as e:
|
||||||
litellm.input_callback = []
|
litellm.input_callback = []
|
||||||
pass
|
pytest.fail(f"An exception occurs - {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
# test_azure_openai_ad_token()
|
# test_azure_openai_ad_token()
|
||||||
|
|
|
@ -179,6 +179,10 @@ def test_call_with_key_over_budget(custom_db_client):
|
||||||
# 5. Make a call with a key over budget, expect to fail
|
# 5. Make a call with a key over budget, expect to fail
|
||||||
setattr(litellm.proxy.proxy_server, "custom_db_client", custom_db_client)
|
setattr(litellm.proxy.proxy_server, "custom_db_client", custom_db_client)
|
||||||
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
||||||
|
from litellm._logging import verbose_proxy_logger
|
||||||
|
import logging
|
||||||
|
|
||||||
|
verbose_proxy_logger.setLevel(logging.DEBUG)
|
||||||
try:
|
try:
|
||||||
|
|
||||||
async def test():
|
async def test():
|
||||||
|
|
|
@ -543,7 +543,8 @@ def test_generate_and_update_key(prisma_client):
|
||||||
async def test():
|
async def test():
|
||||||
await litellm.proxy.proxy_server.prisma_client.connect()
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
||||||
request = NewUserRequest(
|
request = NewUserRequest(
|
||||||
metadata={"team": "litellm-team3", "project": "litellm-project3"}
|
metadata={"team": "litellm-team3", "project": "litellm-project3"},
|
||||||
|
team_id="litellm-core-infra@gmail.com",
|
||||||
)
|
)
|
||||||
key = await new_user(request)
|
key = await new_user(request)
|
||||||
print(key)
|
print(key)
|
||||||
|
@ -560,6 +561,7 @@ def test_generate_and_update_key(prisma_client):
|
||||||
"team": "litellm-team3",
|
"team": "litellm-team3",
|
||||||
"project": "litellm-project3",
|
"project": "litellm-project3",
|
||||||
}
|
}
|
||||||
|
assert result["info"].team_id == "litellm-core-infra@gmail.com"
|
||||||
|
|
||||||
request = Request(scope={"type": "http"})
|
request = Request(scope={"type": "http"})
|
||||||
request._url = URL(url="/update/key")
|
request._url = URL(url="/update/key")
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "litellm"
|
name = "litellm"
|
||||||
version = "1.18.2"
|
version = "1.18.3"
|
||||||
description = "Library to easily interface with LLM API providers"
|
description = "Library to easily interface with LLM API providers"
|
||||||
authors = ["BerriAI"]
|
authors = ["BerriAI"]
|
||||||
license = "MIT License"
|
license = "MIT License"
|
||||||
|
@ -61,7 +61,7 @@ requires = ["poetry-core", "wheel"]
|
||||||
build-backend = "poetry.core.masonry.api"
|
build-backend = "poetry.core.masonry.api"
|
||||||
|
|
||||||
[tool.commitizen]
|
[tool.commitizen]
|
||||||
version = "1.18.2"
|
version = "1.18.3"
|
||||||
version_files = [
|
version_files = [
|
||||||
"pyproject.toml:^version"
|
"pyproject.toml:^version"
|
||||||
]
|
]
|
||||||
|
|
|
@ -9,6 +9,7 @@ generator client {
|
||||||
|
|
||||||
model LiteLLM_UserTable {
|
model LiteLLM_UserTable {
|
||||||
user_id String @unique
|
user_id String @unique
|
||||||
|
team_id String?
|
||||||
max_budget Float?
|
max_budget Float?
|
||||||
spend Float @default(0.0)
|
spend Float @default(0.0)
|
||||||
user_email String?
|
user_email String?
|
||||||
|
@ -27,6 +28,7 @@ model LiteLLM_VerificationToken {
|
||||||
aliases Json @default("{}")
|
aliases Json @default("{}")
|
||||||
config Json @default("{}")
|
config Json @default("{}")
|
||||||
user_id String?
|
user_id String?
|
||||||
|
team_id String?
|
||||||
max_parallel_requests Int?
|
max_parallel_requests Int?
|
||||||
metadata Json @default("{}")
|
metadata Json @default("{}")
|
||||||
tpm_limit BigInt?
|
tpm_limit BigInt?
|
||||||
|
@ -37,3 +39,20 @@ model LiteLLM_Config {
|
||||||
param_name String @id
|
param_name String @id
|
||||||
param_value Json?
|
param_value Json?
|
||||||
}
|
}
|
||||||
|
|
||||||
|
model LiteLLM_SpendLogs {
|
||||||
|
request_id String @unique
|
||||||
|
api_key String @default ("")
|
||||||
|
call_type String
|
||||||
|
spend Float @default(0.0)
|
||||||
|
startTime DateTime // Assuming start_time is a DateTime field
|
||||||
|
endTime DateTime // Assuming end_time is a DateTime field
|
||||||
|
model String @default("")
|
||||||
|
user String @default("")
|
||||||
|
modelParameters Json @default("{}")// Assuming optional_params is a JSON field
|
||||||
|
messages Json @default("[]")
|
||||||
|
response Json @default("{}")
|
||||||
|
usage Json @default("{}")
|
||||||
|
metadata Json @default("{}")
|
||||||
|
cache_hit String @default("")
|
||||||
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue