forked from phoenix/litellm-mirror
Merge branch 'main' into litellm_selective_access
This commit is contained in:
commit
e2a161ecbf
17 changed files with 561 additions and 39 deletions
|
@ -1,3 +1,6 @@
|
|||
import Tabs from '@theme/Tabs';
|
||||
import TabItem from '@theme/TabItem';
|
||||
|
||||
# VertexAI - Google [Gemini, Model Garden]
|
||||
|
||||
<a target="_blank" href="https://colab.research.google.com/github/BerriAI/litellm/blob/main/cookbook/liteLLM_VertextAI_Example.ipynb">
|
||||
|
@ -22,8 +25,36 @@ response = litellm.completion(model="gemini-pro", messages=[{"role": "user", "co
|
|||
|
||||
## OpenAI Proxy Usage
|
||||
|
||||
Here's how to use Vertex AI with the LiteLLM Proxy Server
|
||||
|
||||
1. Modify the config.yaml
|
||||
|
||||
<Tabs>
|
||||
|
||||
<TabItem value="completion_param" label="Different location per model">
|
||||
|
||||
Use this when you need to set a different location for each vertex model
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: gemini-vision
|
||||
litellm_params:
|
||||
model: vertex_ai/gemini-1.0-pro-vision-001
|
||||
vertex_project: "project-id"
|
||||
vertex_location: "us-central1"
|
||||
- model_name: gemini-vision
|
||||
litellm_params:
|
||||
model: vertex_ai/gemini-1.0-pro-vision-001
|
||||
vertex_project: "project-id2"
|
||||
vertex_location: "us-east"
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
|
||||
<TabItem value="litellm_param" label="One location all vertex models">
|
||||
|
||||
Use this when you have one vertex location for all models
|
||||
|
||||
```yaml
|
||||
litellm_settings:
|
||||
vertex_project: "hardy-device-38811" # Your Project ID
|
||||
|
@ -35,6 +66,10 @@ model_list:
|
|||
model: gemini-pro
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
|
||||
</Tabs>
|
||||
|
||||
2. Start the proxy
|
||||
|
||||
```bash
|
||||
|
|
44
docs/my-website/docs/proxy/metrics.md
Normal file
44
docs/my-website/docs/proxy/metrics.md
Normal file
|
@ -0,0 +1,44 @@
|
|||
# 💸 GET Daily Spend, Usage Metrics
|
||||
|
||||
## Request Format
|
||||
```shell
|
||||
curl -X GET "http://0.0.0.0:4000/daily_metrics" -H "Authorization: Bearer sk-1234"
|
||||
```
|
||||
|
||||
## Response format
|
||||
```json
|
||||
[
|
||||
daily_spend = [
|
||||
{
|
||||
"daily_spend": 7.9261938052047e+16,
|
||||
"day": "2024-02-01T00:00:00",
|
||||
"spend_per_model": {"azure/gpt-4": 7.9261938052047e+16},
|
||||
"spend_per_api_key": {
|
||||
"76": 914495704992000.0,
|
||||
"12": 905726697912000.0,
|
||||
"71": 866312628003000.0,
|
||||
"28": 865461799332000.0,
|
||||
"13": 859151538396000.0
|
||||
}
|
||||
},
|
||||
{
|
||||
"daily_spend": 7.938489251309491e+16,
|
||||
"day": "2024-02-02T00:00:00",
|
||||
"spend_per_model": {"gpt-3.5": 7.938489251309491e+16},
|
||||
"spend_per_api_key": {
|
||||
"91": 896805036036000.0,
|
||||
"78": 889692646082000.0,
|
||||
"49": 885386687861000.0,
|
||||
"28": 873869890984000.0,
|
||||
"56": 867398637692000.0
|
||||
}
|
||||
}
|
||||
|
||||
],
|
||||
total_spend = 200,
|
||||
top_models = {"gpt4": 0.2, "vertexai/gemini-pro":10},
|
||||
top_api_keys = {"899922": 0.9, "838hcjd999seerr88": 20}
|
||||
|
||||
]
|
||||
|
||||
```
|
|
@ -40,6 +40,7 @@ const sidebars = {
|
|||
"proxy/virtual_keys",
|
||||
"proxy/users",
|
||||
"proxy/ui",
|
||||
"proxy/metrics",
|
||||
"proxy/model_management",
|
||||
"proxy/health",
|
||||
"proxy/debugging",
|
||||
|
|
|
@ -110,3 +110,138 @@ async def view_spend_logs_from_clickhouse(
|
|||
"log_count": num_rows,
|
||||
}
|
||||
return response_data
|
||||
|
||||
|
||||
def _create_clickhouse_material_views(client=None, table_names=[]):
|
||||
# Create Materialized Views if they don't exist
|
||||
# Materialized Views send new inserted rows to the aggregate tables
|
||||
|
||||
verbose_logger.debug("Clickhouse: Creating Materialized Views")
|
||||
if "daily_aggregated_spend_per_model_mv" not in table_names:
|
||||
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_model_mv")
|
||||
client.command(
|
||||
"""
|
||||
CREATE MATERIALIZED VIEW daily_aggregated_spend_per_model_mv
|
||||
TO daily_aggregated_spend_per_model
|
||||
AS
|
||||
SELECT
|
||||
toDate(startTime) as day,
|
||||
sumState(spend) AS DailySpend,
|
||||
model as model
|
||||
FROM spend_logs
|
||||
GROUP BY
|
||||
day, model
|
||||
"""
|
||||
)
|
||||
if "daily_aggregated_spend_per_api_key_mv" not in table_names:
|
||||
verbose_logger.debug(
|
||||
"Clickhouse: Creating daily_aggregated_spend_per_api_key_mv"
|
||||
)
|
||||
client.command(
|
||||
"""
|
||||
CREATE MATERIALIZED VIEW daily_aggregated_spend_per_api_key_mv
|
||||
TO daily_aggregated_spend_per_api_key
|
||||
AS
|
||||
SELECT
|
||||
toDate(startTime) as day,
|
||||
sumState(spend) AS DailySpend,
|
||||
api_key as api_key
|
||||
FROM spend_logs
|
||||
GROUP BY
|
||||
day, api_key
|
||||
"""
|
||||
)
|
||||
if "daily_aggregated_spend_per_user_mv" not in table_names:
|
||||
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_user_mv")
|
||||
client.command(
|
||||
"""
|
||||
CREATE MATERIALIZED VIEW daily_aggregated_spend_per_user_mv
|
||||
TO daily_aggregated_spend_per_user
|
||||
AS
|
||||
SELECT
|
||||
toDate(startTime) as day,
|
||||
sumState(spend) AS DailySpend,
|
||||
user as user
|
||||
FROM spend_logs
|
||||
GROUP BY
|
||||
day, user
|
||||
"""
|
||||
)
|
||||
if "daily_aggregated_spend_mv" not in table_names:
|
||||
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_mv")
|
||||
client.command(
|
||||
"""
|
||||
CREATE MATERIALIZED VIEW daily_aggregated_spend_mv
|
||||
TO daily_aggregated_spend
|
||||
AS
|
||||
SELECT
|
||||
toDate(startTime) as day,
|
||||
sumState(spend) AS DailySpend
|
||||
FROM spend_logs
|
||||
GROUP BY
|
||||
day
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
def _create_clickhouse_aggregate_tables(client=None, table_names=[]):
|
||||
# Basic Logging works without this - this is only used for low latency reporting apis
|
||||
verbose_logger.debug("Clickhouse: Creating Aggregate Tables")
|
||||
|
||||
# Create Aggregeate Tables if they don't exist
|
||||
if "daily_aggregated_spend_per_model" not in table_names:
|
||||
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_model")
|
||||
client.command(
|
||||
"""
|
||||
CREATE TABLE daily_aggregated_spend_per_model
|
||||
(
|
||||
`day` Date,
|
||||
`DailySpend` AggregateFunction(sum, Float64),
|
||||
`model` String
|
||||
)
|
||||
ENGINE = SummingMergeTree()
|
||||
ORDER BY (day, model);
|
||||
"""
|
||||
)
|
||||
if "daily_aggregated_spend_per_api_key" not in table_names:
|
||||
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_api_key")
|
||||
client.command(
|
||||
"""
|
||||
CREATE TABLE daily_aggregated_spend_per_api_key
|
||||
(
|
||||
`day` Date,
|
||||
`DailySpend` AggregateFunction(sum, Float64),
|
||||
`api_key` String
|
||||
)
|
||||
ENGINE = SummingMergeTree()
|
||||
ORDER BY (day, api_key);
|
||||
"""
|
||||
)
|
||||
if "daily_aggregated_spend_per_user" not in table_names:
|
||||
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_user")
|
||||
client.command(
|
||||
"""
|
||||
CREATE TABLE daily_aggregated_spend_per_user
|
||||
(
|
||||
`day` Date,
|
||||
`DailySpend` AggregateFunction(sum, Float64),
|
||||
`user` String
|
||||
)
|
||||
ENGINE = SummingMergeTree()
|
||||
ORDER BY (day, user);
|
||||
"""
|
||||
)
|
||||
if "daily_aggregated_spend" not in table_names:
|
||||
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend")
|
||||
client.command(
|
||||
"""
|
||||
CREATE TABLE daily_aggregated_spend
|
||||
(
|
||||
`day` Date,
|
||||
`DailySpend` AggregateFunction(sum, Float64),
|
||||
)
|
||||
ENGINE = SummingMergeTree()
|
||||
ORDER BY (day);
|
||||
"""
|
||||
)
|
||||
return
|
||||
|
|
|
@ -27,6 +27,151 @@ import litellm, uuid
|
|||
from litellm._logging import print_verbose, verbose_logger
|
||||
|
||||
|
||||
def create_client():
|
||||
try:
|
||||
import clickhouse_connect
|
||||
|
||||
port = os.getenv("CLICKHOUSE_PORT")
|
||||
clickhouse_host = os.getenv("CLICKHOUSE_HOST")
|
||||
if clickhouse_host is not None:
|
||||
verbose_logger.debug("setting up clickhouse")
|
||||
if port is not None and isinstance(port, str):
|
||||
port = int(port)
|
||||
|
||||
client = clickhouse_connect.get_client(
|
||||
host=os.getenv("CLICKHOUSE_HOST"),
|
||||
port=port,
|
||||
username=os.getenv("CLICKHOUSE_USERNAME"),
|
||||
password=os.getenv("CLICKHOUSE_PASSWORD"),
|
||||
)
|
||||
return client
|
||||
else:
|
||||
raise Exception("Clickhouse: Clickhouse host not set")
|
||||
except Exception as e:
|
||||
raise ValueError(f"Clickhouse: {e}")
|
||||
|
||||
|
||||
def build_daily_metrics():
|
||||
click_house_client = create_client()
|
||||
|
||||
# get daily spend
|
||||
daily_spend = click_house_client.query_df(
|
||||
"""
|
||||
SELECT sumMerge(DailySpend) as daily_spend, day FROM daily_aggregated_spend GROUP BY day
|
||||
"""
|
||||
)
|
||||
|
||||
# get daily spend per model
|
||||
daily_spend_per_model = click_house_client.query_df(
|
||||
"""
|
||||
SELECT sumMerge(DailySpend) as daily_spend, day, model FROM daily_aggregated_spend_per_model GROUP BY day, model
|
||||
"""
|
||||
)
|
||||
new_df = daily_spend_per_model.to_dict(orient="records")
|
||||
import pandas as pd
|
||||
|
||||
df = pd.DataFrame(new_df)
|
||||
# Group by 'day' and create a dictionary for each group
|
||||
result_dict = {}
|
||||
for day, group in df.groupby("day"):
|
||||
models = group["model"].tolist()
|
||||
spend = group["daily_spend"].tolist()
|
||||
spend_per_model = {model: spend for model, spend in zip(models, spend)}
|
||||
result_dict[day] = spend_per_model
|
||||
|
||||
# Display the resulting dictionary
|
||||
|
||||
# get daily spend per API key
|
||||
daily_spend_per_api_key = click_house_client.query_df(
|
||||
"""
|
||||
SELECT
|
||||
daily_spend,
|
||||
day,
|
||||
api_key
|
||||
FROM (
|
||||
SELECT
|
||||
sumMerge(DailySpend) as daily_spend,
|
||||
day,
|
||||
api_key,
|
||||
RANK() OVER (PARTITION BY day ORDER BY sumMerge(DailySpend) DESC) as spend_rank
|
||||
FROM
|
||||
daily_aggregated_spend_per_api_key
|
||||
GROUP BY
|
||||
day,
|
||||
api_key
|
||||
) AS ranked_api_keys
|
||||
WHERE
|
||||
spend_rank <= 5
|
||||
AND day IS NOT NULL
|
||||
ORDER BY
|
||||
day,
|
||||
daily_spend DESC
|
||||
"""
|
||||
)
|
||||
new_df = daily_spend_per_api_key.to_dict(orient="records")
|
||||
import pandas as pd
|
||||
|
||||
df = pd.DataFrame(new_df)
|
||||
# Group by 'day' and create a dictionary for each group
|
||||
api_key_result_dict = {}
|
||||
for day, group in df.groupby("day"):
|
||||
api_keys = group["api_key"].tolist()
|
||||
spend = group["daily_spend"].tolist()
|
||||
spend_per_api_key = {api_key: spend for api_key, spend in zip(api_keys, spend)}
|
||||
api_key_result_dict[day] = spend_per_api_key
|
||||
|
||||
# Display the resulting dictionary
|
||||
|
||||
# Calculate total spend across all days
|
||||
total_spend = daily_spend["daily_spend"].sum()
|
||||
|
||||
# Identify top models and top API keys with the highest spend across all days
|
||||
top_models = {}
|
||||
top_api_keys = {}
|
||||
|
||||
for day, spend_per_model in result_dict.items():
|
||||
for model, model_spend in spend_per_model.items():
|
||||
if model not in top_models or model_spend > top_models[model]:
|
||||
top_models[model] = model_spend
|
||||
|
||||
for day, spend_per_api_key in api_key_result_dict.items():
|
||||
for api_key, api_key_spend in spend_per_api_key.items():
|
||||
if api_key not in top_api_keys or api_key_spend > top_api_keys[api_key]:
|
||||
top_api_keys[api_key] = api_key_spend
|
||||
|
||||
# for each day in daily spend, look up the day in result_dict and api_key_result_dict
|
||||
# Assuming daily_spend DataFrame has 'day' column
|
||||
result = []
|
||||
for index, row in daily_spend.iterrows():
|
||||
day = row["day"]
|
||||
data_day = row.to_dict()
|
||||
|
||||
# Look up in result_dict
|
||||
if day in result_dict:
|
||||
spend_per_model = result_dict[day]
|
||||
# Assuming there is a column named 'model' in daily_spend
|
||||
data_day["spend_per_model"] = spend_per_model # Assign 0 if model not found
|
||||
|
||||
# Look up in api_key_result_dict
|
||||
if day in api_key_result_dict:
|
||||
spend_per_api_key = api_key_result_dict[day]
|
||||
# Assuming there is a column named 'api_key' in daily_spend
|
||||
data_day["spend_per_api_key"] = spend_per_api_key
|
||||
|
||||
result.append(data_day)
|
||||
|
||||
data_to_return = {}
|
||||
data_to_return["daily_spend"] = result
|
||||
|
||||
data_to_return["total_spend"] = total_spend
|
||||
data_to_return["top_models"] = top_models
|
||||
data_to_return["top_api_keys"] = top_api_keys
|
||||
return data_to_return
|
||||
|
||||
|
||||
# build_daily_metrics()
|
||||
|
||||
|
||||
def _start_clickhouse():
|
||||
import clickhouse_connect
|
||||
|
||||
|
@ -86,6 +231,14 @@ def _start_clickhouse():
|
|||
response = client.query("DESCRIBE default.spend_logs")
|
||||
verbose_logger.debug(f"spend logs schema ={response.result_rows}")
|
||||
|
||||
# RUN Enterprise Clickhouse Setup
|
||||
# TLDR: For Enterprise - we create views / aggregate tables for low latency reporting APIs
|
||||
from litellm.proxy.enterprise.utils import _create_clickhouse_aggregate_tables
|
||||
from litellm.proxy.enterprise.utils import _create_clickhouse_material_views
|
||||
|
||||
_create_clickhouse_aggregate_tables(client=client, table_names=table_names)
|
||||
_create_clickhouse_material_views(client=client, table_names=table_names)
|
||||
|
||||
|
||||
class ClickhouseLogger:
|
||||
# Class variables or attributes
|
||||
|
|
|
@ -278,7 +278,11 @@ def completion(
|
|||
import google.auth
|
||||
|
||||
## Load credentials with the correct quota project ref: https://github.com/googleapis/python-aiplatform/issues/2557#issuecomment-1709284744
|
||||
print_verbose(
|
||||
f"VERTEX AI: vertex_project={vertex_project}; vertex_location={vertex_location}"
|
||||
)
|
||||
creds, _ = google.auth.default(quota_project_id=vertex_project)
|
||||
print_verbose(f"VERTEX AI: creds={creds}")
|
||||
vertexai.init(
|
||||
project=vertex_project, location=vertex_location, credentials=creds
|
||||
)
|
||||
|
|
|
@ -687,6 +687,15 @@
|
|||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat"
|
||||
},
|
||||
"gemini-1.5-pro-preview-0215": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 1000000,
|
||||
"max_output_tokens": 8192,
|
||||
"input_cost_per_token": 0,
|
||||
"output_cost_per_token": 0,
|
||||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat"
|
||||
},
|
||||
"gemini-pro-vision": {
|
||||
"max_tokens": 16384,
|
||||
"max_output_tokens": 2048,
|
||||
|
|
|
@ -43,7 +43,7 @@ model_list:
|
|||
api_key: os.environ/OPENAI_API_KEY
|
||||
litellm_settings:
|
||||
fallbacks: [{"openai-gpt-3.5": ["azure-gpt-3.5"]}]
|
||||
success_callback: ['langfuse']
|
||||
success_callback: ['clickhouse', 'langfuse']
|
||||
# setting callback class
|
||||
# callbacks: custom_callbacks.proxy_handler_instance # sets litellm.callbacks = [proxy_handler_instance]
|
||||
|
||||
|
|
|
@ -240,6 +240,8 @@ health_check_results = {}
|
|||
queue: List = []
|
||||
litellm_proxy_budget_name = "litellm-proxy-budget"
|
||||
ui_access_mode: Literal["admin", "all"] = "all"
|
||||
proxy_budget_rescheduler_min_time = 597
|
||||
proxy_budget_rescheduler_max_time = 605
|
||||
### INITIALIZE GLOBAL LOGGING OBJECT ###
|
||||
proxy_logging_obj = ProxyLogging(user_api_key_cache=user_api_key_cache)
|
||||
### REDIS QUEUE ###
|
||||
|
@ -1407,7 +1409,7 @@ class ProxyConfig:
|
|||
"""
|
||||
Load config values into proxy global state
|
||||
"""
|
||||
global master_key, user_config_file_path, otel_logging, user_custom_auth, user_custom_auth_path, user_custom_key_generate, use_background_health_checks, health_check_interval, use_queue, custom_db_client, ui_access_mode
|
||||
global master_key, user_config_file_path, otel_logging, user_custom_auth, user_custom_auth_path, user_custom_key_generate, use_background_health_checks, health_check_interval, use_queue, custom_db_client, proxy_budget_rescheduler_max_time, proxy_budget_rescheduler_min_time, ui_access_mode
|
||||
|
||||
# Load existing config
|
||||
config = await self.get_config(config_file_path=config_file_path)
|
||||
|
@ -1718,6 +1720,13 @@ class ProxyConfig:
|
|||
ui_access_mode = general_settings.get(
|
||||
"ui_access_mode", "all"
|
||||
) # can be either ["admin_only" or "all"]
|
||||
## BUDGET RESCHEDULER ##
|
||||
proxy_budget_rescheduler_min_time = general_settings.get(
|
||||
"proxy_budget_rescheduler_min_time", proxy_budget_rescheduler_min_time
|
||||
)
|
||||
proxy_budget_rescheduler_max_time = general_settings.get(
|
||||
"proxy_budget_rescheduler_max_time", proxy_budget_rescheduler_max_time
|
||||
)
|
||||
### BACKGROUND HEALTH CHECKS ###
|
||||
# Enable background health checks
|
||||
use_background_health_checks = general_settings.get(
|
||||
|
@ -2120,10 +2129,9 @@ async def async_data_generator(response, user_api_key_dict):
|
|||
try:
|
||||
start_time = time.time()
|
||||
async for chunk in response:
|
||||
verbose_proxy_logger.debug(f"returned chunk: {chunk}")
|
||||
assert isinstance(chunk, litellm.ModelResponse)
|
||||
chunk = chunk.model_dump_json(exclude_none=True)
|
||||
try:
|
||||
yield f"data: {json.dumps(chunk.model_dump(exclude_none=True))}\n\n"
|
||||
yield f"data: {chunk}\n\n"
|
||||
except Exception as e:
|
||||
yield f"data: {str(e)}\n\n"
|
||||
|
||||
|
@ -2202,7 +2210,7 @@ def parse_cache_control(cache_control):
|
|||
|
||||
@router.on_event("startup")
|
||||
async def startup_event():
|
||||
global prisma_client, master_key, use_background_health_checks, llm_router, llm_model_list, general_settings
|
||||
global prisma_client, master_key, use_background_health_checks, llm_router, llm_model_list, general_settings, proxy_budget_rescheduler_min_time, proxy_budget_rescheduler_max_time
|
||||
import json
|
||||
|
||||
### LOAD MASTER KEY ###
|
||||
|
@ -2313,7 +2321,7 @@ async def startup_event():
|
|||
if prisma_client is not None:
|
||||
scheduler = AsyncIOScheduler()
|
||||
interval = random.randint(
|
||||
597, 605
|
||||
proxy_budget_rescheduler_min_time, proxy_budget_rescheduler_max_time
|
||||
) # random interval, so multiple workers avoid resetting budget at the same time
|
||||
scheduler.add_job(
|
||||
reset_budget, "interval", seconds=interval, args=[prisma_client]
|
||||
|
@ -3839,13 +3847,55 @@ async def view_spend_logs(
|
|||
# gettting spend logs from clickhouse
|
||||
from litellm.proxy.enterprise.utils import view_spend_logs_from_clickhouse
|
||||
|
||||
return await view_spend_logs_from_clickhouse(
|
||||
api_key=api_key,
|
||||
user_id=user_id,
|
||||
request_id=request_id,
|
||||
daily_metrics = await view_daily_metrics(
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
|
||||
# get the top api keys across all daily_metrics
|
||||
top_api_keys = {} # type: ignore
|
||||
|
||||
# make this compatible with the admin UI
|
||||
for response in daily_metrics.get("daily_spend", {}):
|
||||
response["startTime"] = response["day"]
|
||||
response["spend"] = response["daily_spend"]
|
||||
response["models"] = response["spend_per_model"]
|
||||
response["users"] = {"ishaan": 0.0}
|
||||
spend_per_api_key = response["spend_per_api_key"]
|
||||
|
||||
# insert spend_per_api_key key, values in response
|
||||
for key, value in spend_per_api_key.items():
|
||||
response[key] = value
|
||||
top_api_keys[key] = top_api_keys.get(key, 0.0) + value
|
||||
|
||||
del response["day"]
|
||||
del response["daily_spend"]
|
||||
del response["spend_per_model"]
|
||||
del response["spend_per_api_key"]
|
||||
|
||||
# get top 5 api keys
|
||||
top_api_keys = sorted(top_api_keys.items(), key=lambda x: x[1], reverse=True) # type: ignore
|
||||
top_api_keys = top_api_keys[:5] # type: ignore
|
||||
top_api_keys = dict(top_api_keys) # type: ignore
|
||||
"""
|
||||
set it like this
|
||||
{
|
||||
"key" : key,
|
||||
"spend:" : spend
|
||||
}
|
||||
"""
|
||||
# we need this to show on the Admin UI
|
||||
response_keys = []
|
||||
for key in top_api_keys.items():
|
||||
response_keys.append(
|
||||
{
|
||||
"key": key[0],
|
||||
"spend": key[1],
|
||||
}
|
||||
)
|
||||
daily_metrics["top_api_keys"] = response_keys
|
||||
|
||||
return daily_metrics
|
||||
global prisma_client
|
||||
try:
|
||||
verbose_proxy_logger.debug("inside view_spend_logs")
|
||||
|
@ -3998,6 +4048,61 @@ async def view_spend_logs(
|
|||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/daily_metrics",
|
||||
summary="Get daily spend metrics",
|
||||
tags=["budget & spend Tracking"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
)
|
||||
async def view_daily_metrics(
|
||||
start_date: Optional[str] = fastapi.Query(
|
||||
default=None,
|
||||
description="Time from which to start viewing key spend",
|
||||
),
|
||||
end_date: Optional[str] = fastapi.Query(
|
||||
default=None,
|
||||
description="Time till which to view key spend",
|
||||
),
|
||||
):
|
||||
""" """
|
||||
try:
|
||||
if os.getenv("CLICKHOUSE_HOST") is not None:
|
||||
# gettting spend logs from clickhouse
|
||||
from litellm.integrations import clickhouse
|
||||
|
||||
return clickhouse.build_daily_metrics()
|
||||
|
||||
# create a response object
|
||||
"""
|
||||
{
|
||||
"date": "2022-01-01",
|
||||
"spend": 0.0,
|
||||
"users": {},
|
||||
"models": {},
|
||||
}
|
||||
"""
|
||||
else:
|
||||
raise Exception(
|
||||
"Clickhouse: Clickhouse host not set. Required for viewing /daily/metrics"
|
||||
)
|
||||
except Exception as e:
|
||||
if isinstance(e, HTTPException):
|
||||
raise ProxyException(
|
||||
message=getattr(e, "detail", f"/spend/logs Error({str(e)})"),
|
||||
type="internal_error",
|
||||
param=getattr(e, "param", "None"),
|
||||
code=getattr(e, "status_code", status.HTTP_500_INTERNAL_SERVER_ERROR),
|
||||
)
|
||||
elif isinstance(e, ProxyException):
|
||||
raise e
|
||||
raise ProxyException(
|
||||
message="/spend/logs Error" + str(e),
|
||||
type="internal_error",
|
||||
param=getattr(e, "param", "None"),
|
||||
code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
)
|
||||
|
||||
|
||||
#### USER MANAGEMENT ####
|
||||
@router.post(
|
||||
"/user/new",
|
||||
|
|
|
@ -130,6 +130,8 @@ def test_vertex_ai():
|
|||
f"response.choices[0].finish_reason: {response.choices[0].finish_reason}"
|
||||
)
|
||||
assert response.choices[0].finish_reason in litellm._openai_finish_reasons
|
||||
except litellm.RateLimitError as e:
|
||||
pass
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
@ -183,6 +185,8 @@ def test_vertex_ai_stream():
|
|||
assert type(content) == str
|
||||
# pass
|
||||
assert len(completed_str) > 4
|
||||
except litellm.RateLimitError as e:
|
||||
pass
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
|
|
@ -205,18 +205,18 @@ def map_finish_reason(
|
|||
|
||||
class FunctionCall(OpenAIObject):
|
||||
arguments: str
|
||||
name: str
|
||||
name: Optional[str] = None
|
||||
|
||||
|
||||
class Function(OpenAIObject):
|
||||
arguments: str
|
||||
name: str
|
||||
name: Optional[str] = None
|
||||
|
||||
|
||||
class ChatCompletionDeltaToolCall(OpenAIObject):
|
||||
id: str
|
||||
id: Optional[str] = None
|
||||
function: Function
|
||||
type: str
|
||||
type: Optional[str] = None
|
||||
index: int
|
||||
|
||||
|
||||
|
@ -275,13 +275,19 @@ class Delta(OpenAIObject):
|
|||
super(Delta, self).__init__(**params)
|
||||
self.content = content
|
||||
self.role = role
|
||||
self.function_call = function_call
|
||||
if tool_calls is not None and isinstance(tool_calls, dict):
|
||||
if function_call is not None and isinstance(function_call, dict):
|
||||
self.function_call = FunctionCall(**function_call)
|
||||
else:
|
||||
self.function_call = function_call
|
||||
if tool_calls is not None and isinstance(tool_calls, list):
|
||||
self.tool_calls = []
|
||||
for tool_call in tool_calls:
|
||||
if tool_call.get("index", None) is None:
|
||||
tool_call["index"] = 0
|
||||
self.tool_calls.append(ChatCompletionDeltaToolCall(**tool_call))
|
||||
if isinstance(tool_call, dict):
|
||||
if tool_call.get("index", None) is None:
|
||||
tool_call["index"] = 0
|
||||
self.tool_calls.append(ChatCompletionDeltaToolCall(**tool_call))
|
||||
elif isinstance(tool_call, ChatCompletionDeltaToolCall):
|
||||
self.tool_calls.append(tool_call)
|
||||
else:
|
||||
self.tool_calls = tool_calls
|
||||
|
||||
|
@ -8728,7 +8734,7 @@ class CustomStreamWrapper:
|
|||
or original_chunk.choices[0].delta.tool_calls is not None
|
||||
):
|
||||
try:
|
||||
delta = dict(original_chunk.choices[0].delta)
|
||||
delta = original_chunk.choices[0].delta
|
||||
model_response.system_fingerprint = (
|
||||
original_chunk.system_fingerprint
|
||||
)
|
||||
|
@ -8763,7 +8769,9 @@ class CustomStreamWrapper:
|
|||
is None
|
||||
):
|
||||
t.function.arguments = ""
|
||||
model_response.choices[0].delta = Delta(**delta)
|
||||
_json_delta = delta.model_dump()
|
||||
print_verbose(f"_json_delta: {_json_delta}")
|
||||
model_response.choices[0].delta = Delta(**_json_delta)
|
||||
except Exception as e:
|
||||
traceback.print_exc()
|
||||
model_response.choices[0].delta = Delta()
|
||||
|
|
|
@ -687,6 +687,15 @@
|
|||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat"
|
||||
},
|
||||
"gemini-1.5-pro-preview-0215": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 1000000,
|
||||
"max_output_tokens": 8192,
|
||||
"input_cost_per_token": 0,
|
||||
"output_cost_per_token": 0,
|
||||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat"
|
||||
},
|
||||
"gemini-pro-vision": {
|
||||
"max_tokens": 16384,
|
||||
"max_output_tokens": 2048,
|
||||
|
|
|
@ -40,6 +40,8 @@ litellm_settings:
|
|||
budget_duration: 30d
|
||||
general_settings:
|
||||
master_key: sk-1234 # [OPTIONAL] Only use this if you to require all calls to contain this key (Authorization: Bearer sk-1234)
|
||||
proxy_budget_rescheduler_min_time: 30
|
||||
proxy_budget_rescheduler_max_time: 60
|
||||
# database_url: "postgresql://<user>:<password>@<host>:<port>/<dbname>" # [OPTIONAL] use for token-based auth to proxy
|
||||
|
||||
environment_variables:
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[tool.poetry]
|
||||
name = "litellm"
|
||||
version = "1.27.12"
|
||||
version = "1.27.14"
|
||||
description = "Library to easily interface with LLM API providers"
|
||||
authors = ["BerriAI"]
|
||||
license = "MIT"
|
||||
|
@ -74,7 +74,7 @@ requires = ["poetry-core", "wheel"]
|
|||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.commitizen]
|
||||
version = "1.27.12"
|
||||
version = "1.27.14"
|
||||
version_files = [
|
||||
"pyproject.toml:^version"
|
||||
]
|
||||
|
|
|
@ -10,6 +10,7 @@ gunicorn==21.2.0 # server dep
|
|||
boto3==1.34.34 # aws bedrock/sagemaker calls
|
||||
redis==5.0.0 # caching
|
||||
numpy==1.24.3 # semantic caching
|
||||
pandas==2.1.1 # for viewing clickhouse spend analytics
|
||||
prisma==0.11.0 # for db
|
||||
mangum==0.17.0 # for aws lambda functions
|
||||
google-generativeai==0.3.2 # for vertex ai calls
|
||||
|
|
|
@ -449,7 +449,7 @@ async def test_key_with_budgets():
|
|||
reset_at_init_value = key_info["info"]["budget_reset_at"]
|
||||
reset_at_new_value = None
|
||||
i = 0
|
||||
await asyncio.sleep(610)
|
||||
await asyncio.sleep(120)
|
||||
while i < 3:
|
||||
key_info = await get_key_info(session=session, get_key=key, call_key=key)
|
||||
reset_at_new_value = key_info["info"]["budget_reset_at"]
|
||||
|
|
|
@ -172,20 +172,32 @@ const UsagePage: React.FC<UsagePageProps> = ({
|
|||
startTime,
|
||||
endTime
|
||||
).then(async (response) => {
|
||||
const topKeysResponse = await keyInfoCall(
|
||||
accessToken,
|
||||
getTopKeys(response)
|
||||
);
|
||||
const filtered_keys = topKeysResponse["info"].map((k: any) => ({
|
||||
key: (k["key_name"] || k["key_alias"] || k["token"]).substring(
|
||||
0,
|
||||
7
|
||||
),
|
||||
spend: k["spend"],
|
||||
}));
|
||||
setTopKeys(filtered_keys);
|
||||
setTopUsers(getTopUsers(response));
|
||||
setKeySpendData(response);
|
||||
console.log("result from spend logs call", response);
|
||||
if ("daily_spend" in response) {
|
||||
// this is from clickhouse analytics
|
||||
//
|
||||
let daily_spend = response["daily_spend"];
|
||||
console.log("daily spend", daily_spend);
|
||||
setKeySpendData(daily_spend);
|
||||
let topApiKeys = response.top_api_keys;
|
||||
setTopKeys(topApiKeys);
|
||||
}
|
||||
else {
|
||||
const topKeysResponse = await keyInfoCall(
|
||||
accessToken,
|
||||
getTopKeys(response)
|
||||
);
|
||||
const filtered_keys = topKeysResponse["info"].map((k: any) => ({
|
||||
key: (k["key_name"] || k["key_alias"] || k["token"]).substring(
|
||||
0,
|
||||
7
|
||||
),
|
||||
spend: k["spend"],
|
||||
}));
|
||||
setTopKeys(filtered_keys);
|
||||
setTopUsers(getTopUsers(response));
|
||||
setKeySpendData(response);
|
||||
}
|
||||
});
|
||||
} catch (error) {
|
||||
console.error("There was an error fetching the data", error);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue