Merge branch 'main' into litellm_tpm_rpm_rate_limits

This commit is contained in:
Krrish Dholakia 2024-01-18 19:10:07 -08:00
commit f7694bc193
13 changed files with 378 additions and 31 deletions

View file

@ -1,4 +1,4 @@
# Key Management # Virtual Keys
Track Spend, Set budgets and create virtual keys for the proxy Track Spend, Set budgets and create virtual keys for the proxy
Grant other's temporary access to your proxy, with keys that expire after a set duration. Grant other's temporary access to your proxy, with keys that expire after a set duration.
@ -12,7 +12,7 @@ Grant other's temporary access to your proxy, with keys that expire after a set
::: :::
## Quick Start ## Setup
Requirements: Requirements:
@ -58,16 +58,37 @@ litellm --config /path/to/config.yaml
curl 'http://0.0.0.0:8000/key/generate' \ curl 'http://0.0.0.0:8000/key/generate' \
--header 'Authorization: Bearer <your-master-key>' \ --header 'Authorization: Bearer <your-master-key>' \
--header 'Content-Type: application/json' \ --header 'Content-Type: application/json' \
--data-raw '{"models": ["gpt-3.5-turbo", "gpt-4", "claude-2"], "duration": "20m","metadata": {"user": "ishaan@berri.ai", "team": "core-infra"}}' --data-raw '{"models": ["gpt-3.5-turbo", "gpt-4", "claude-2"], "duration": "20m","metadata": {"user": "ishaan@berri.ai"}}'
``` ```
## /key/generate
### Request
```shell
curl 'http://0.0.0.0:8000/key/generate' \
--header 'Authorization: Bearer <your-master-key>' \
--header 'Content-Type: application/json' \
--data-raw '{
"models": ["gpt-3.5-turbo", "gpt-4", "claude-2"],
"duration": "20m",
"metadata": {"user": "ishaan@berri.ai"},
"team_id": "core-infra"
}'
```
Request Params:
- `models`: *list or null (optional)* - Specify the models a token has access too. If null, then token has access to all models on server. - `models`: *list or null (optional)* - Specify the models a token has access too. If null, then token has access to all models on server.
- `duration`: *str or null (optional)* Specify the length of time the token is valid for. If null, default is set to 1 hour. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d"). - `duration`: *str or null (optional)* Specify the length of time the token is valid for. If null, default is set to 1 hour. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d").
- `metadata`: *dict or null (optional)* Pass metadata for the created token. If null defaults to {} - `metadata`: *dict or null (optional)* Pass metadata for the created token. If null defaults to {}
Expected response: - `team_id`: *str or null (optional)* Specify team_id for the associated key
### Response
```python ```python
{ {
@ -76,7 +97,7 @@ Expected response:
} }
``` ```
## Keys that don't expire ### Keys that don't expire
Just set duration to None. Just set duration to None.
@ -87,7 +108,7 @@ curl --location 'http://0.0.0.0:8000/key/generate' \
--data '{"models": ["azure-models"], "aliases": {"mistral-7b": "gpt-3.5-turbo"}, "duration": null}' --data '{"models": ["azure-models"], "aliases": {"mistral-7b": "gpt-3.5-turbo"}, "duration": null}'
``` ```
## Upgrade/Downgrade Models ### Upgrade/Downgrade Models
If a user is expected to use a given model (i.e. gpt3-5), and you want to: If a user is expected to use a given model (i.e. gpt3-5), and you want to:
@ -137,7 +158,7 @@ curl -X POST "https://0.0.0.0:8000/key/generate" \
- **How are routing between diff keys/api bases done?** litellm handles this by shuffling between different models in the model list with the same model_name. [**See Code**](https://github.com/BerriAI/litellm/blob/main/litellm/router.py) - **How are routing between diff keys/api bases done?** litellm handles this by shuffling between different models in the model list with the same model_name. [**See Code**](https://github.com/BerriAI/litellm/blob/main/litellm/router.py)
## Grant Access to new model ### Grant Access to new model
Use model access groups to give users access to select models, and add new ones to it over time (e.g. mistral, llama-2, etc.) Use model access groups to give users access to select models, and add new ones to it over time (e.g. mistral, llama-2, etc.)
@ -165,6 +186,102 @@ curl --location 'http://localhost:8000/key/generate' \
"max_budget": 0,}' "max_budget": 0,}'
``` ```
## /key/info
### Request
```shell
curl -X GET "http://0.0.0.0:8000/key/info?key=sk-02Wr4IAlN3NvPXvL5JVvDA" \
-H "Authorization: Bearer sk-1234"
```
Request Params:
- key: str - The key you want the info for
### Response
`token` is the hashed key (The DB stores the hashed key for security)
```json
{
"key": "sk-02Wr4IAlN3NvPXvL5JVvDA",
"info": {
"token": "80321a12d03412c527f2bd9db5fabd746abead2e1d50b435a534432fbaca9ef5",
"spend": 0.0,
"expires": "2024-01-18T23:52:09.125000+00:00",
"models": ["azure-gpt-3.5", "azure-embedding-model"],
"aliases": {},
"config": {},
"user_id": "ishaan2@berri.ai",
"team_id": "None",
"max_parallel_requests": null,
"metadata": {}
}
}
```
## /key/update
### Request
```shell
curl 'http://0.0.0.0:8000/key/update' \
--header 'Authorization: Bearer <your-master-key>' \
--header 'Content-Type: application/json' \
--data-raw '{
"key": "sk-kdEXbIqZRwEeEiHwdg7sFA",
"models": ["gpt-3.5-turbo", "gpt-4", "claude-2"],
"metadata": {"user": "ishaan@berri.ai"},
"team_id": "core-infra"
}'
```
Request Params:
- key: str - The key that needs to be updated.
- models: list or null (optional) - Specify the models a token has access to. If null, then the token has access to all models on the server.
- metadata: dict or null (optional) - Pass metadata for the updated token. If null, defaults to an empty dictionary.
- team_id: str or null (optional) - Specify the team_id for the associated key.
### Response
```json
{
"key": "sk-kdEXbIqZRwEeEiHwdg7sFA",
"models": ["gpt-3.5-turbo", "gpt-4", "claude-2"],
"metadata": {
"user": "ishaan@berri.ai"
}
}
```
## /key/delete
### Request
```shell
curl 'http://0.0.0.0:8000/key/delete' \
--header 'Authorization: Bearer <your-master-key>' \
--header 'Content-Type: application/json' \
--data-raw '{
"keys": ["sk-kdEXbIqZRwEeEiHwdg7sFA"]
}'
```
Request Params:
- keys: List[str] - List of keys to delete
### Response
```json
{
"deleted_keys": ["sk-kdEXbIqZRwEeEiHwdg7sFA"]
}
```
## Tracking Spend ## Tracking Spend
You can get spend for a key by using the `/key/info` endpoint. You can get spend for a key by using the `/key/info` endpoint.

View file

@ -692,9 +692,9 @@ def completion(
or get_secret("AZURE_API_KEY") or get_secret("AZURE_API_KEY")
) )
azure_ad_token = optional_params.pop("azure_ad_token", None) or get_secret( azure_ad_token = optional_params.get("extra_body", {}).pop(
"AZURE_AD_TOKEN" "azure_ad_token", None
) ) or get_secret("AZURE_AD_TOKEN")
headers = headers or litellm.headers headers = headers or litellm.headers

View file

@ -1,8 +1,8 @@
from pydantic import BaseModel, Extra, Field, root_validator from pydantic import BaseModel, Extra, Field, root_validator, Json
import enum, sys import enum
from typing import Optional, List, Union, Dict, Literal from typing import Optional, List, Union, Dict, Literal, Any
from datetime import datetime from datetime import datetime
import uuid, json import uuid, json, sys, os
class LiteLLMBase(BaseModel): class LiteLLMBase(BaseModel):
@ -129,6 +129,7 @@ class GenerateKeyRequest(LiteLLMBase):
config: Optional[dict] = {} config: Optional[dict] = {}
spend: Optional[float] = 0 spend: Optional[float] = 0
user_id: Optional[str] = None user_id: Optional[str] = None
team_id: Optional[str] = None
max_parallel_requests: Optional[int] = None max_parallel_requests: Optional[int] = None
metadata: Optional[dict] = {} metadata: Optional[dict] = {}
tpm_limit: int = sys.maxsize tpm_limit: int = sys.maxsize
@ -202,6 +203,7 @@ class DynamoDBArgs(LiteLLMBase):
user_table_name: str = "LiteLLM_UserTable" user_table_name: str = "LiteLLM_UserTable"
key_table_name: str = "LiteLLM_VerificationToken" key_table_name: str = "LiteLLM_VerificationToken"
config_table_name: str = "LiteLLM_Config" config_table_name: str = "LiteLLM_Config"
spend_table_name: str = "LiteLLM_SpendLogs"
class ConfigGeneralSettings(LiteLLMBase): class ConfigGeneralSettings(LiteLLMBase):
@ -320,3 +322,20 @@ class LiteLLM_UserTable(LiteLLMBase):
if values.get("models") is None: if values.get("models") is None:
values.update({"models", []}) values.update({"models", []})
return values return values
class LiteLLM_SpendLogs(LiteLLMBase):
request_id: str
api_key: str
model: Optional[str] = ""
call_type: str
spend: Optional[float] = 0.0
startTime: Union[str, datetime, None]
endTime: Union[str, datetime, None]
user: Optional[str] = ""
modelParameters: Optional[Json] = {}
messages: Optional[Json] = []
response: Optional[Json] = {}
usage: Optional[Json] = {}
metadata: Optional[Json] = {}
cache_hit: Optional[str] = "False"

View file

@ -131,10 +131,27 @@ class DynamoDBWrapper(CustomDB):
raise Exception( raise Exception(
f"Failed to create table - {self.database_arguments.config_table_name}.\nPlease create a new table called {self.database_arguments.config_table_name}\nAND set `hash_key` as 'param_name'" f"Failed to create table - {self.database_arguments.config_table_name}.\nPlease create a new table called {self.database_arguments.config_table_name}\nAND set `hash_key` as 'param_name'"
) )
## Spend
try:
verbose_proxy_logger.debug("DynamoDB Wrapper - Creating Spend Table")
error_occurred = False
table = client.table(self.database_arguments.spend_table_name)
if not await table.exists():
await table.create(
self.throughput_type,
KeySchema(hash_key=KeySpec("request_id", KeyType.string)),
)
except Exception as e:
error_occurred = True
if error_occurred == True:
raise Exception(
f"Failed to create table - {self.database_arguments.key_table_name}.\nPlease create a new table called {self.database_arguments.key_table_name}\nAND set `hash_key` as 'token'"
)
verbose_proxy_logger.debug("DynamoDB Wrapper - Done connecting()") verbose_proxy_logger.debug("DynamoDB Wrapper - Done connecting()")
async def insert_data( async def insert_data(
self, value: Any, table_name: Literal["user", "key", "config"] self, value: Any, table_name: Literal["user", "key", "config", "spend"]
): ):
from aiodynamo.client import Client from aiodynamo.client import Client
from aiodynamo.credentials import Credentials, StaticCredentials from aiodynamo.credentials import Credentials, StaticCredentials
@ -166,6 +183,8 @@ class DynamoDBWrapper(CustomDB):
table = client.table(self.database_arguments.key_table_name) table = client.table(self.database_arguments.key_table_name)
elif table_name == "config": elif table_name == "config":
table = client.table(self.database_arguments.config_table_name) table = client.table(self.database_arguments.config_table_name)
elif table_name == "spend":
table = client.table(self.database_arguments.spend_table_name)
for k, v in value.items(): for k, v in value.items():
if isinstance(v, datetime): if isinstance(v, datetime):

View file

@ -61,8 +61,8 @@ litellm_settings:
# setting callback class # setting callback class
# callbacks: custom_callbacks.proxy_handler_instance # sets litellm.callbacks = [proxy_handler_instance] # callbacks: custom_callbacks.proxy_handler_instance # sets litellm.callbacks = [proxy_handler_instance]
# general_settings: general_settings:
# master_key: sk-1234 master_key: sk-1234
# database_type: "dynamo_db" # database_type: "dynamo_db"
# database_args: { # 👈 all args - https://github.com/BerriAI/litellm/blob/befbcbb7ac8f59835ce47415c128decf37aac328/litellm/proxy/_types.py#L190 # database_args: { # 👈 all args - https://github.com/BerriAI/litellm/blob/befbcbb7ac8f59835ce47415c128decf37aac328/litellm/proxy/_types.py#L190
# "billing_mode": "PAY_PER_REQUEST", # "billing_mode": "PAY_PER_REQUEST",

View file

@ -72,6 +72,7 @@ from litellm.proxy.utils import (
ProxyLogging, ProxyLogging,
_cache_user_row, _cache_user_row,
send_email, send_email,
get_logging_payload,
) )
from litellm.proxy.secret_managers.google_kms import load_google_kms from litellm.proxy.secret_managers.google_kms import load_google_kms
import pydantic import pydantic
@ -518,6 +519,7 @@ async def track_cost_callback(
global prisma_client, custom_db_client global prisma_client, custom_db_client
try: try:
# check if it has collected an entire stream response # check if it has collected an entire stream response
verbose_proxy_logger.debug(f"Proxy: In track_cost_callback for {kwargs}")
verbose_proxy_logger.debug( verbose_proxy_logger.debug(
f"kwargs stream: {kwargs.get('stream', None)} + complete streaming response: {kwargs.get('complete_streaming_response', None)}" f"kwargs stream: {kwargs.get('stream', None)} + complete streaming response: {kwargs.get('complete_streaming_response', None)}"
) )
@ -538,7 +540,13 @@ async def track_cost_callback(
prisma_client is not None or custom_db_client is not None prisma_client is not None or custom_db_client is not None
): ):
await update_database( await update_database(
token=user_api_key, response_cost=response_cost, user_id=user_id token=user_api_key,
response_cost=response_cost,
user_id=user_id,
kwargs=kwargs,
completion_response=completion_response,
start_time=start_time,
end_time=end_time,
) )
elif kwargs["stream"] == False: # for non streaming responses elif kwargs["stream"] == False: # for non streaming responses
response_cost = litellm.completion_cost( response_cost = litellm.completion_cost(
@ -554,13 +562,27 @@ async def track_cost_callback(
prisma_client is not None or custom_db_client is not None prisma_client is not None or custom_db_client is not None
): ):
await update_database( await update_database(
token=user_api_key, response_cost=response_cost, user_id=user_id token=user_api_key,
response_cost=response_cost,
user_id=user_id,
kwargs=kwargs,
completion_response=completion_response,
start_time=start_time,
end_time=end_time,
) )
except Exception as e: except Exception as e:
verbose_proxy_logger.debug(f"error in tracking cost callback - {str(e)}") verbose_proxy_logger.debug(f"error in tracking cost callback - {str(e)}")
async def update_database(token, response_cost, user_id=None): async def update_database(
token,
response_cost,
user_id=None,
kwargs=None,
completion_response=None,
start_time=None,
end_time=None,
):
try: try:
verbose_proxy_logger.debug( verbose_proxy_logger.debug(
f"Enters prisma db call, token: {token}; user_id: {user_id}" f"Enters prisma db call, token: {token}; user_id: {user_id}"
@ -630,9 +652,28 @@ async def update_database(token, response_cost, user_id=None):
key=token, value={"spend": new_spend}, table_name="key" key=token, value={"spend": new_spend}, table_name="key"
) )
async def _insert_spend_log_to_db():
# Helper to generate payload to log
verbose_proxy_logger.debug("inserting spend log to db")
payload = get_logging_payload(
kwargs=kwargs,
response_obj=completion_response,
start_time=start_time,
end_time=end_time,
)
payload["spend"] = response_cost
if prisma_client is not None:
await prisma_client.insert_data(data=payload, table_name="spend")
elif custom_db_client is not None:
await custom_db_client.insert_data(payload, table_name="spend")
tasks = [] tasks = []
tasks.append(_update_user_db()) tasks.append(_update_user_db())
tasks.append(_update_key_db()) tasks.append(_update_key_db())
tasks.append(_insert_spend_log_to_db())
await asyncio.gather(*tasks) await asyncio.gather(*tasks)
except Exception as e: except Exception as e:
verbose_proxy_logger.debug( verbose_proxy_logger.debug(
@ -1037,6 +1078,7 @@ async def generate_key_helper_fn(
max_budget: Optional[float] = None, max_budget: Optional[float] = None,
token: Optional[str] = None, token: Optional[str] = None,
user_id: Optional[str] = None, user_id: Optional[str] = None,
team_id: Optional[str] = None,
user_email: Optional[str] = None, user_email: Optional[str] = None,
max_parallel_requests: Optional[int] = None, max_parallel_requests: Optional[int] = None,
metadata: Optional[dict] = {}, metadata: Optional[dict] = {},
@ -1084,12 +1126,15 @@ async def generate_key_helper_fn(
user_id = user_id or str(uuid.uuid4()) user_id = user_id or str(uuid.uuid4())
tpm_limit = tpm_limit or sys.maxsize tpm_limit = tpm_limit or sys.maxsize
rpm_limit = rpm_limit or sys.maxsize rpm_limit = rpm_limit or sys.maxsize
if type(team_id) is not str:
team_id = str(team_id)
try: try:
# Create a new verification token (you may want to enhance this logic based on your needs) # Create a new verification token (you may want to enhance this logic based on your needs)
user_data = { user_data = {
"max_budget": max_budget, "max_budget": max_budget,
"user_email": user_email, "user_email": user_email,
"user_id": user_id, "user_id": user_id,
"team_id": team_id,
"spend": spend, "spend": spend,
"models": models, "models": models,
"max_parallel_requests": max_parallel_requests, "max_parallel_requests": max_parallel_requests,
@ -1104,6 +1149,7 @@ async def generate_key_helper_fn(
"config": config_json, "config": config_json,
"spend": spend, "spend": spend,
"user_id": user_id, "user_id": user_id,
"team_id": team_id,
"max_parallel_requests": max_parallel_requests, "max_parallel_requests": max_parallel_requests,
"metadata": metadata_json, "metadata": metadata_json,
"tpm_limit": tpm_limit, "tpm_limit": tpm_limit,
@ -2051,6 +2097,7 @@ async def generate_key_fn(
Parameters: Parameters:
- duration: Optional[str] - Specify the length of time the token is valid for. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d"). **(Default is set to 1 hour.)** - duration: Optional[str] - Specify the length of time the token is valid for. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d"). **(Default is set to 1 hour.)**
- team_id: Optional[str] - The team id of the user
- models: Optional[list] - Model_name's a user is allowed to call. (if empty, key is allowed to call all models) - models: Optional[list] - Model_name's a user is allowed to call. (if empty, key is allowed to call all models)
- aliases: Optional[dict] - Any alias mappings, on top of anything in the config.yaml model list. - https://docs.litellm.ai/docs/proxy/virtual_keys#managing-auth---upgradedowngrade-models - aliases: Optional[dict] - Any alias mappings, on top of anything in the config.yaml model list. - https://docs.litellm.ai/docs/proxy/virtual_keys#managing-auth---upgradedowngrade-models
- config: Optional[dict] - any key-specific configs, overrides config in config.yaml - config: Optional[dict] - any key-specific configs, overrides config in config.yaml

View file

@ -9,6 +9,7 @@ generator client {
model LiteLLM_UserTable { model LiteLLM_UserTable {
user_id String @unique user_id String @unique
team_id String?
max_budget Float? max_budget Float?
spend Float @default(0.0) spend Float @default(0.0)
user_email String? user_email String?
@ -27,6 +28,7 @@ model LiteLLM_VerificationToken {
aliases Json @default("{}") aliases Json @default("{}")
config Json @default("{}") config Json @default("{}")
user_id String? user_id String?
team_id String?
max_parallel_requests Int? max_parallel_requests Int?
metadata Json @default("{}") metadata Json @default("{}")
tpm_limit BigInt? tpm_limit BigInt?
@ -36,4 +38,21 @@ model LiteLLM_VerificationToken {
model LiteLLM_Config { model LiteLLM_Config {
param_name String @id param_name String @id
param_value Json? param_value Json?
}
model LiteLLM_SpendLogs {
request_id String @unique
call_type String
api_key String @default ("")
spend Float @default(0.0)
startTime DateTime // Assuming start_time is a DateTime field
endTime DateTime // Assuming end_time is a DateTime field
model String @default("")
user String @default("")
modelParameters Json @default("{}")// Assuming optional_params is a JSON field
messages Json @default("[]")
response Json @default("{}")
usage Json @default("{}")
metadata Json @default("{}")
cache_hit String @default("")
} }

View file

@ -1,7 +1,12 @@
from typing import Optional, List, Any, Literal, Union from typing import Optional, List, Any, Literal, Union
import os, subprocess, hashlib, importlib, asyncio, copy, json, aiohttp, httpx import os, subprocess, hashlib, importlib, asyncio, copy, json, aiohttp, httpx
import litellm, backoff import litellm, backoff
from litellm.proxy._types import UserAPIKeyAuth, DynamoDBArgs, LiteLLM_VerificationToken from litellm.proxy._types import (
UserAPIKeyAuth,
DynamoDBArgs,
LiteLLM_VerificationToken,
LiteLLM_SpendLogs,
)
from litellm.caching import DualCache from litellm.caching import DualCache
from litellm.proxy.hooks.parallel_request_limiter import MaxParallelRequestsHandler from litellm.proxy.hooks.parallel_request_limiter import MaxParallelRequestsHandler
from litellm.proxy.hooks.max_budget_limiter import MaxBudgetLimiter from litellm.proxy.hooks.max_budget_limiter import MaxBudgetLimiter
@ -316,7 +321,7 @@ class PrismaClient:
self, self,
key: str, key: str,
value: Any, value: Any,
table_name: Literal["users", "keys", "config"], table_name: Literal["users", "keys", "config", "spend"],
): ):
""" """
Generic implementation of get data Generic implementation of get data
@ -334,6 +339,10 @@ class PrismaClient:
response = await self.db.litellm_config.find_first( # type: ignore response = await self.db.litellm_config.find_first( # type: ignore
where={key: value} # type: ignore where={key: value} # type: ignore
) )
elif table_name == "spend":
response = await self.db.l.find_first( # type: ignore
where={key: value} # type: ignore
)
return response return response
except Exception as e: except Exception as e:
asyncio.create_task( asyncio.create_task(
@ -417,7 +426,7 @@ class PrismaClient:
on_backoff=on_backoff, # specifying the function to call on backoff on_backoff=on_backoff, # specifying the function to call on backoff
) )
async def insert_data( async def insert_data(
self, data: dict, table_name: Literal["user", "key", "config"] self, data: dict, table_name: Literal["user", "key", "config", "spend"]
): ):
""" """
Add a key to the database. If it already exists, do nothing. Add a key to the database. If it already exists, do nothing.
@ -473,8 +482,18 @@ class PrismaClient:
) )
tasks.append(updated_table_row) tasks.append(updated_table_row)
await asyncio.gather(*tasks) await asyncio.gather(*tasks)
elif table_name == "spend":
db_data = self.jsonify_object(data=data)
new_spend_row = await self.db.litellm_spendlogs.upsert(
where={"request_id": data["request_id"]},
data={
"create": {**db_data}, # type: ignore
"update": {}, # don't do anything if it already exists
},
)
return new_spend_row
except Exception as e: except Exception as e:
print_verbose(f"LiteLLM Prisma Client Exception: {e}") print_verbose(f"LiteLLM Prisma Client Exception: {e}")
asyncio.create_task( asyncio.create_task(
@ -760,3 +779,85 @@ async def send_email(sender_name, sender_email, receiver_email, subject, html):
except Exception as e: except Exception as e:
print_verbose("An error occurred while sending the email:", str(e)) print_verbose("An error occurred while sending the email:", str(e))
def hash_token(token: str):
import hashlib
# Hash the string using SHA-256
hashed_token = hashlib.sha256(token.encode()).hexdigest()
return hashed_token
def get_logging_payload(kwargs, response_obj, start_time, end_time):
from litellm.proxy._types import LiteLLM_SpendLogs
from pydantic import Json
import uuid
if kwargs == None:
kwargs = {}
# standardize this function to be used across, s3, dynamoDB, langfuse logging
litellm_params = kwargs.get("litellm_params", {})
metadata = (
litellm_params.get("metadata", {}) or {}
) # if litellm_params['metadata'] == None
messages = kwargs.get("messages")
optional_params = kwargs.get("optional_params", {})
call_type = kwargs.get("call_type", "litellm.completion")
cache_hit = kwargs.get("cache_hit", False)
usage = response_obj["usage"]
id = response_obj.get("id", str(uuid.uuid4()))
api_key = metadata.get("user_api_key", "")
if api_key is not None and type(api_key) == str:
# hash the api_key
api_key = hash_token(api_key)
payload = {
"request_id": id,
"call_type": call_type,
"api_key": api_key,
"cache_hit": cache_hit,
"startTime": start_time,
"endTime": end_time,
"model": kwargs.get("model", ""),
"user": kwargs.get("user", ""),
"modelParameters": optional_params,
"messages": messages,
"response": response_obj,
"usage": usage,
"metadata": metadata,
}
json_fields = [
field
for field, field_type in LiteLLM_SpendLogs.__annotations__.items()
if field_type == Json or field_type == Optional[Json]
]
str_fields = [
field
for field, field_type in LiteLLM_SpendLogs.__annotations__.items()
if field_type == str or field_type == Optional[str]
]
datetime_fields = [
field
for field, field_type in LiteLLM_SpendLogs.__annotations__.items()
if field_type == datetime
]
for param in json_fields:
if param in payload and type(payload[param]) != Json:
if type(payload[param]) == litellm.ModelResponse:
payload[param] = payload[param].model_dump_json()
if type(payload[param]) == litellm.EmbeddingResponse:
payload[param] = payload[param].model_dump_json()
elif type(payload[param]) == litellm.Usage:
payload[param] = payload[param].model_dump_json()
else:
payload[param] = json.dumps(payload[param])
for param in str_fields:
if param in payload and type(payload[param]) != str:
payload[param] = str(payload[param])
return payload

View file

@ -268,7 +268,7 @@ def test_completion_azure_gpt4_vision():
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
test_completion_azure_gpt4_vision() # test_completion_azure_gpt4_vision()
@pytest.mark.skip(reason="this test is flaky") @pytest.mark.skip(reason="this test is flaky")
@ -990,9 +990,9 @@ def test_azure_openai_ad_token():
print("azure ad token respoonse\n") print("azure ad token respoonse\n")
print(response) print(response)
litellm.input_callback = [] litellm.input_callback = []
except: except Exception as e:
litellm.input_callback = [] litellm.input_callback = []
pass pytest.fail(f"An exception occurs - {str(e)}")
# test_azure_openai_ad_token() # test_azure_openai_ad_token()

View file

@ -179,6 +179,10 @@ def test_call_with_key_over_budget(custom_db_client):
# 5. Make a call with a key over budget, expect to fail # 5. Make a call with a key over budget, expect to fail
setattr(litellm.proxy.proxy_server, "custom_db_client", custom_db_client) setattr(litellm.proxy.proxy_server, "custom_db_client", custom_db_client)
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234") setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
from litellm._logging import verbose_proxy_logger
import logging
verbose_proxy_logger.setLevel(logging.DEBUG)
try: try:
async def test(): async def test():

View file

@ -543,7 +543,8 @@ def test_generate_and_update_key(prisma_client):
async def test(): async def test():
await litellm.proxy.proxy_server.prisma_client.connect() await litellm.proxy.proxy_server.prisma_client.connect()
request = NewUserRequest( request = NewUserRequest(
metadata={"team": "litellm-team3", "project": "litellm-project3"} metadata={"team": "litellm-team3", "project": "litellm-project3"},
team_id="litellm-core-infra@gmail.com",
) )
key = await new_user(request) key = await new_user(request)
print(key) print(key)
@ -560,6 +561,7 @@ def test_generate_and_update_key(prisma_client):
"team": "litellm-team3", "team": "litellm-team3",
"project": "litellm-project3", "project": "litellm-project3",
} }
assert result["info"].team_id == "litellm-core-infra@gmail.com"
request = Request(scope={"type": "http"}) request = Request(scope={"type": "http"})
request._url = URL(url="/update/key") request._url = URL(url="/update/key")

View file

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "litellm" name = "litellm"
version = "1.18.2" version = "1.18.3"
description = "Library to easily interface with LLM API providers" description = "Library to easily interface with LLM API providers"
authors = ["BerriAI"] authors = ["BerriAI"]
license = "MIT License" license = "MIT License"
@ -61,7 +61,7 @@ requires = ["poetry-core", "wheel"]
build-backend = "poetry.core.masonry.api" build-backend = "poetry.core.masonry.api"
[tool.commitizen] [tool.commitizen]
version = "1.18.2" version = "1.18.3"
version_files = [ version_files = [
"pyproject.toml:^version" "pyproject.toml:^version"
] ]

View file

@ -9,6 +9,7 @@ generator client {
model LiteLLM_UserTable { model LiteLLM_UserTable {
user_id String @unique user_id String @unique
team_id String?
max_budget Float? max_budget Float?
spend Float @default(0.0) spend Float @default(0.0)
user_email String? user_email String?
@ -27,6 +28,7 @@ model LiteLLM_VerificationToken {
aliases Json @default("{}") aliases Json @default("{}")
config Json @default("{}") config Json @default("{}")
user_id String? user_id String?
team_id String?
max_parallel_requests Int? max_parallel_requests Int?
metadata Json @default("{}") metadata Json @default("{}")
tpm_limit BigInt? tpm_limit BigInt?
@ -36,4 +38,21 @@ model LiteLLM_VerificationToken {
model LiteLLM_Config { model LiteLLM_Config {
param_name String @id param_name String @id
param_value Json? param_value Json?
} }
model LiteLLM_SpendLogs {
request_id String @unique
api_key String @default ("")
call_type String
spend Float @default(0.0)
startTime DateTime // Assuming start_time is a DateTime field
endTime DateTime // Assuming end_time is a DateTime field
model String @default("")
user String @default("")
modelParameters Json @default("{}")// Assuming optional_params is a JSON field
messages Json @default("[]")
response Json @default("{}")
usage Json @default("{}")
metadata Json @default("{}")
cache_hit String @default("")
}